Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
network-assets-reptile
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
liyang
network-assets-reptile
Commits
d7796167
Commit
d7796167
authored
Jul 27, 2023
by
liyang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix:ptt debug
parent
ad694283
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
14 deletions
+20
-14
pc_ptt.py
pc_ptt.py
+3
-2
test.py
test.py
+17
-12
No files found.
pc_ptt.py
View file @
d7796167
...
...
@@ -39,8 +39,9 @@ def reptile(browser=None, search_word=""):
# 打开网页
browser
.
get
(
url
)
# log.debug("已打开浏览器")
classify_
item_list
=
browser
.
find_elements
(
'xpath'
,
"//div[@class='board-class']"
)
item_list
=
browser
.
find_elements
(
'xpath'
,
"//div[@class='board-class']"
)
# log.debug(classify_item_list)
classify_item_list
=
item_list
.
copy
()
length
=
len
(
classify_item_list
)
for
index
in
range
(
length
):
# 暂时先爬取 第2个 分类
...
...
@@ -209,7 +210,7 @@ def reptile(browser=None, search_word=""):
browser
.
back
()
time
.
sleep
(
0.1
)
# 重新获取
classify_item_list
=
browser
.
find_elements
(
'xpath'
,
"//div[@class='board-class']"
)
#
classify_item_list = browser.find_elements('xpath', "//div[@class='board-class']")
# 发送爬取数据到java服务
# print('----------------------')
...
...
test.py
View file @
d7796167
# set options to be headless, ..
from
selenium
import
webdriver
options
=
webdriver
.
ChromeOptions
()
options
.
add_argument
(
'--headless'
)
options
.
add_argument
(
'--no-sandbox'
)
options
.
add_argument
(
'--disable-dev-shm-usage'
)
# open it, go to a website, and get results
wd
=
webdriver
.
Chrome
(
options
=
options
)
wd
.
get
(
"https://www.youtube.com/results?search_query=俄乌战争"
)
print
(
wd
.
page_source
)
# results
\ No newline at end of file
classify_item_list
=
browser
.
find_elements
(
'xpath'
,
"//div[@class='board-class']"
)
# log.debug(classify_item_list)
length
=
len
(
classify_item_list
)
for
index
in
range
(
length
):
# 暂时先爬取 第2个 分类
if
0
<=
index
<
4
:
type_title
=
classify_item_list
[
index
]
.
text
classify_item_list
[
index
]
.
click
()
time
.
sleep
(
0.1
)
for
index_two
in
range
(
length_two
):
print
(
element_list
[
index_two
]
.
text
)
# 浏览器返回上一页
browser
.
back
()
if
index
==
0
:
browser
.
back
()
time
.
sleep
(
0.1
)
classify_item_list
=
browser
.
find_elements
(
'xpath'
,
"//div[@class='board-class']"
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment