Commit 51830bd6 authored by liyang's avatar liyang

feat:ins爬虫数据条件过滤优化

parent 46ec2eee
......@@ -35,9 +35,7 @@ def reptile(browser=None, search_word=""):
"""
print(f"搜索词:{search_word}")
base_url = "https://www.dcard.tw"
option = ['--headless']
# ['--headless']
browser = browser or create(None, False)
browser = browser or create(no_headless=True,using_user_data=True)
# 打开网页
# browser.get(base_url)
browser.get(f"{base_url}/search?query={search_word}")
......
......@@ -23,8 +23,7 @@ import sys
def reptile(browser=None, search_word=""):
print(f"搜索词:{search_word}")
url = "https://www.facebook.com/"
option = ['--headless']
browser = browser or create(None, True)
browser = browser or create(no_headless=True,using_user_data=True)
# 打开网页
browser.get(url)
try:
......
......@@ -34,9 +34,8 @@ def reptile(browser=None, search_word=""):
"""
print(f"搜索词:{search_word}")
base_url = "https://www.instagram.com/"
option = ['--headless']
# ['--headless']
browser = browser or create(option, True)
browser = browser or create(no_headless=True,using_user_data=True)
# print(browser)
# 打开网页
browser.get(base_url)
......
......@@ -28,9 +28,7 @@ from utils.download_image import download_image
def reptile(browser=None, search_word=""):
url = "https://www.ptt.cc/bbs/hotboards.html"
# 无头模式执行
option = ['--headless']
browser = browser or create(option, True)
browser = browser or create(no_headless=True,using_user_data=True)
# 有头模式执行
# browser = browser or create()
# 打开网页
......
......@@ -28,9 +28,7 @@ def reptile(browser=None, search_word=""):
"""
print(f"搜索词:{search_word}")
base_url = "https://twitter.com/"
option = ['--headless']
# ['--headless']
browser = browser or create(option, True)
browser = browser or create(no_headless=True,using_user_data=True)
# print(browser)
# 打开网页
browser.get(base_url)
......
......@@ -21,8 +21,7 @@ def reptile(browser=None, search_word=""):
:param search_word:
:return:
"""
option = ['--headless']
browser = browser or create(['--headless'], True)
browser = browser or create(no_headless=True,using_user_data=True)
# print(browser)
# 打开网页
url = f'https://www.youtube.com/results?search_query={search_word}'
......
......@@ -29,10 +29,11 @@ from utils.index import get_screen_resolution
'''
def create(option=None, using_user_data=True, web_browser="firefox"):
def create(option=None, no_headless=False, using_user_data=True, web_browser="firefox"):
"""
生成selenium实例
:param no_headless:
:param web_browser:
:param using_user_data:
:param option:
......@@ -54,6 +55,7 @@ def create(option=None, using_user_data=True, web_browser="firefox"):
options = webdriver.EdgeOptions()
elif web_browser == "chromium":
options = webdriver.ChromeOptions()
if option is not None:
for value in option:
options.add_argument(value)
......@@ -84,6 +86,13 @@ def create(option=None, using_user_data=True, web_browser="firefox"):
# chrome_options.add_argument('--headless')
# options.add_argument("--window-size=1920x1080") # 设置窗口大小,这是一个常见的完全无头模式的设置
# options.add_argument("--start-maximized") # 最大化窗口
if no_headless == True:
if platform.system() == "Linux" and platform.system() == "Darwin":
# 开启无头模式
options.add_argument("-headless")
elif platform.system() == "Windows" and web_browser == "firefox":
# windows系统、火狐浏览器不开启无头模式
print("")
if option != None:
# 无头模式下禁用gpu加速
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment