Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
network-assets-reptile
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
liyang
network-assets-reptile
Commits
73e25c43
Commit
73e25c43
authored
Jul 20, 2023
by
liyang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix:脚本执行异常未退出
parent
290453ef
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
32 additions
and
10 deletions
+32
-10
pc_facebook.py
pc_facebook.py
+9
-2
pc_ptt.py
pc_ptt.py
+8
-3
pc_twitter.py
pc_twitter.py
+7
-2
pc_youtube.py
pc_youtube.py
+8
-3
No files found.
pc_facebook.py
View file @
73e25c43
...
...
@@ -11,7 +11,7 @@ from datetime import datetime
from
utils.download_image
import
download_image
import
os
from
config.settings
import
get_base_file_url
import
sys
# 工具函数-下载图片
'''
打开指定网页,并使用 Selenium 模拟点击 "GDPR-accept" 按钮,然后循环点击 "search-show-more-button" 按钮来加载更多数据,直到按钮不再可点击为止。最后,获取完整的分页数据并关闭浏览器驱动。
...
...
@@ -162,7 +162,14 @@ def reptile(browser=None, search_word=""):
else
:
# 爬取数据为空
log
.
info
(
"未爬取到数据"
)
browser
.
quit
()
# 关闭浏览器驱动
try
:
browser
.
close
()
browser
.
quit
()
sys
.
exit
()
except
:
log
.
debug
(
"浏览器驱动关闭失败"
)
def
main
():
...
...
pc_ptt.py
View file @
73e25c43
import
io
import
json
import
re
import
sys
import
time
import
loguru
# import pymysql.cursors
import
requests
...
...
@@ -18,6 +18,7 @@ from utils.filse import save_json
import
os
from
config.settings
import
get_base_file_url
from
utils.download_image
import
download_image
'''
爬取台湾PTT论坛的热门帖子,包括帖子的标题、内容【文本、图片、视频】
...
...
@@ -202,8 +203,12 @@ def reptile(browser=None, search_word=""):
log
.
info
(
"未爬取到数据"
)
# 关闭浏览器驱动
# time.sleep(3)
browser
.
quit
()
try
:
browser
.
close
()
browser
.
quit
()
sys
.
exit
()
except
:
log
.
debug
(
"浏览器驱动关闭失败"
)
def
main
():
...
...
pc_twitter.py
View file @
73e25c43
...
...
@@ -8,6 +8,7 @@ from api.index import importJson, getReptileTask, importJsonPath
from
utils.index
import
convert_to_traditional
,
yt_dlp_download
,
convert_string_to_time
,
parse_twitter_time_string
# from pytube import YouTube
import
os
import
sys
from
datetime
import
datetime
from
utils.download_image
import
download_image
from
config.settings
import
get_base_file_url
...
...
@@ -163,8 +164,12 @@ def reptile(browser=None, search_word=""):
log
.
info
(
"未爬取到数据"
)
# 关闭浏览器驱动
# time.sleep(3)
browser
.
quit
()
try
:
browser
.
close
()
browser
.
quit
()
sys
.
exit
()
except
:
log
.
debug
(
"浏览器驱动关闭失败"
)
def
main
():
...
...
pc_youtube.py
View file @
73e25c43
...
...
@@ -11,7 +11,7 @@ from datetime import datetime
import
os
from
config.settings
import
get_base_file_url
from
selenium.webdriver.common.action_chains
import
ActionChains
import
sys
def
reptile
(
browser
=
None
,
search_word
=
""
):
"""
...
...
@@ -77,9 +77,14 @@ def reptile(browser=None, search_word=""):
else
:
# 爬取数据为空
log
.
info
(
"未爬取到数据"
)
browser
.
close
()
# 关闭浏览器驱动
browser
.
quit
()
try
:
browser
.
close
()
browser
.
quit
()
sys
.
exit
()
except
:
log
.
debug
(
"浏览器驱动关闭失败"
)
def
main
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment