Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
network-assets-reptile
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
liyang
network-assets-reptile
Commits
0af3679e
Commit
0af3679e
authored
Jul 28, 2023
by
liyang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix:ptt执行效率
parent
2732252c
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
27 additions
and
9 deletions
+27
-9
pc_twitter.py
pc_twitter.py
+27
-9
No files found.
pc_twitter.py
View file @
0af3679e
...
...
@@ -22,6 +22,7 @@ from selenium.webdriver.support import expected_conditions as EC
# --------------- selenium 依赖 end ----------------
import
platform
'''
打开指定网页,并使用 Selenium 模拟点击 "GDPR-accept" 按钮,然后循环点击 "search-show-more-button" 按钮来加载更多数据,直到按钮不再可点击为止。最后,获取完整的分页数据并关闭浏览器驱动。
'''
...
...
@@ -35,15 +36,19 @@ def reptile(browser=None, search_word=""):
"""
print
(
f
"搜索词:{search_word}"
)
base_url
=
"https://twitter.com/"
if
platform
.
system
()
==
"Windows"
:
browser
=
browser
or
create
(
no_headless
=
True
,
using_user_data
=
True
)
else
:
browser
=
browser
or
create
(
no_headless
=
True
,
using_user_data
=
True
)
browser
=
browser
or
create
(
no_headless
=
False
,
using_user_data
=
True
)
# print(browser)
# 打开网页
browser
.
get
(
base_url
)
time
.
sleep
(
2
)
try
:
try
:
login_button
=
browser
.
find_element
(
'xpath'
,
"//a[@href='/login']"
)
login_button
.
click
()
time
.
sleep
(
2
)
except
:
error
=
""
# wait = WebDriverWait(browser, 20)
# wait.until(EC.presence_of_element_located((By.XPATH, "//input[@autocomplete='username']")))
# 检测是否要登录
...
...
@@ -59,9 +64,11 @@ def reptile(browser=None, search_word=""):
# # 获取登录按钮
button_login
=
browser
.
find_element
(
'xpath'
,
"//div[@data-testid='LoginForm_Login_Button']"
)
button_login
.
click
()
time
.
sleep
(
1
)
time
.
sleep
(
2
)
except
:
print
(
"------"
)
# print("------")
error
=
""
url
=
'https://twitter.com/search?q='
+
search_word
+
'&src=typed_query'
browser
.
get
(
url
)
wait
=
WebDriverWait
(
browser
,
10
)
...
...
@@ -86,6 +93,15 @@ def reptile(browser=None, search_word=""):
except
:
link_str
=
""
timestamp
=
time
.
time
()
# 删除多余div
parent_div
=
soup
.
find
(
"div"
)
# 找到所有的 <div> 子元素
div_elements
=
parent_div
.
find_all
(
'div'
,
recursive
=
False
)
for
key
,
item
in
enumerate
(
div_elements
):
if
key
==
0
or
key
==
len
(
div_elements
)
-
1
:
item
.
extract
()
author
=
element_authors_list
[
index
]
.
text
# 标题取:作者+日期
title
=
f
"{author}-{datetime.fromtimestamp(int(timestamp))}"
...
...
@@ -113,8 +129,8 @@ def reptile(browser=None, search_word=""):
custom_video
[
"src"
]
=
""
parent_div
.
append
(
custom_video
)
else
:
print
(
""
)
#
print("")
error
=
""
picture_url
=
[]
if
len
(
image_list
)
>
0
:
for
key
,
element
in
enumerate
(
image_list
):
...
...
@@ -130,7 +146,9 @@ def reptile(browser=None, search_word=""):
element
[
'src'
]
=
access_address
picture_url
.
append
(
download_dir
)
else
:
print
(
""
)
# print("")
error
=
""
content
=
soup
.
prettify
()
# ---------------- 判断类型 start ----------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment