Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
network-assets-reptile
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
liyang
network-assets-reptile
Commits
51830bd6
Commit
51830bd6
authored
Jul 25, 2023
by
liyang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat:ins爬虫数据条件过滤优化
parent
46ec2eee
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
17 additions
and
17 deletions
+17
-17
pc_dcard.py
pc_dcard.py
+1
-3
pc_facebook.py
pc_facebook.py
+1
-2
pc_instagram.py
pc_instagram.py
+2
-3
pc_ptt.py
pc_ptt.py
+1
-3
pc_twitter.py
pc_twitter.py
+1
-3
pc_youtube.py
pc_youtube.py
+1
-2
createBrowserDriver.py
utils/createBrowserDriver.py
+10
-1
No files found.
pc_dcard.py
View file @
51830bd6
...
...
@@ -35,9 +35,7 @@ def reptile(browser=None, search_word=""):
"""
print
(
f
"搜索词:{search_word}"
)
base_url
=
"https://www.dcard.tw"
option
=
[
'--headless'
]
# ['--headless']
browser
=
browser
or
create
(
None
,
False
)
browser
=
browser
or
create
(
no_headless
=
True
,
using_user_data
=
True
)
# 打开网页
# browser.get(base_url)
browser
.
get
(
f
"{base_url}/search?query={search_word}"
)
...
...
pc_facebook.py
View file @
51830bd6
...
...
@@ -23,8 +23,7 @@ import sys
def
reptile
(
browser
=
None
,
search_word
=
""
):
print
(
f
"搜索词:{search_word}"
)
url
=
"https://www.facebook.com/"
option
=
[
'--headless'
]
browser
=
browser
or
create
(
None
,
True
)
browser
=
browser
or
create
(
no_headless
=
True
,
using_user_data
=
True
)
# 打开网页
browser
.
get
(
url
)
try
:
...
...
pc_instagram.py
View file @
51830bd6
...
...
@@ -34,9 +34,8 @@ def reptile(browser=None, search_word=""):
"""
print
(
f
"搜索词:{search_word}"
)
base_url
=
"https://www.instagram.com/"
option
=
[
'--headless'
]
# ['--headless']
browser
=
browser
or
create
(
option
,
True
)
browser
=
browser
or
create
(
no_headless
=
True
,
using_user_data
=
True
)
# print(browser)
# 打开网页
browser
.
get
(
base_url
)
...
...
pc_ptt.py
View file @
51830bd6
...
...
@@ -28,9 +28,7 @@ from utils.download_image import download_image
def
reptile
(
browser
=
None
,
search_word
=
""
):
url
=
"https://www.ptt.cc/bbs/hotboards.html"
# 无头模式执行
option
=
[
'--headless'
]
browser
=
browser
or
create
(
option
,
True
)
browser
=
browser
or
create
(
no_headless
=
True
,
using_user_data
=
True
)
# 有头模式执行
# browser = browser or create()
# 打开网页
...
...
pc_twitter.py
View file @
51830bd6
...
...
@@ -28,9 +28,7 @@ def reptile(browser=None, search_word=""):
"""
print
(
f
"搜索词:{search_word}"
)
base_url
=
"https://twitter.com/"
option
=
[
'--headless'
]
# ['--headless']
browser
=
browser
or
create
(
option
,
True
)
browser
=
browser
or
create
(
no_headless
=
True
,
using_user_data
=
True
)
# print(browser)
# 打开网页
browser
.
get
(
base_url
)
...
...
pc_youtube.py
View file @
51830bd6
...
...
@@ -21,8 +21,7 @@ def reptile(browser=None, search_word=""):
:param search_word:
:return:
"""
option
=
[
'--headless'
]
browser
=
browser
or
create
([
'--headless'
],
True
)
browser
=
browser
or
create
(
no_headless
=
True
,
using_user_data
=
True
)
# print(browser)
# 打开网页
url
=
f
'https://www.youtube.com/results?search_query={search_word}'
...
...
utils/createBrowserDriver.py
View file @
51830bd6
...
...
@@ -29,10 +29,11 @@ from utils.index import get_screen_resolution
'''
def
create
(
option
=
None
,
using_user_data
=
True
,
web_browser
=
"firefox"
):
def
create
(
option
=
None
,
no_headless
=
False
,
using_user_data
=
True
,
web_browser
=
"firefox"
):
"""
生成selenium实例
:param no_headless:
:param web_browser:
:param using_user_data:
:param option:
...
...
@@ -54,6 +55,7 @@ def create(option=None, using_user_data=True, web_browser="firefox"):
options
=
webdriver
.
EdgeOptions
()
elif
web_browser
==
"chromium"
:
options
=
webdriver
.
ChromeOptions
()
if
option
is
not
None
:
for
value
in
option
:
options
.
add_argument
(
value
)
...
...
@@ -84,6 +86,13 @@ def create(option=None, using_user_data=True, web_browser="firefox"):
# chrome_options.add_argument('--headless')
# options.add_argument("--window-size=1920x1080") # 设置窗口大小,这是一个常见的完全无头模式的设置
# options.add_argument("--start-maximized") # 最大化窗口
if
no_headless
==
True
:
if
platform
.
system
()
==
"Linux"
and
platform
.
system
()
==
"Darwin"
:
# 开启无头模式
options
.
add_argument
(
"-headless"
)
elif
platform
.
system
()
==
"Windows"
and
web_browser
==
"firefox"
:
# windows系统、火狐浏览器不开启无头模式
print
(
""
)
if
option
!=
None
:
# 无头模式下禁用gpu加速
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment