Commit 51830bd6 authored by liyang's avatar liyang

feat:ins爬虫数据条件过滤优化

parent 46ec2eee
...@@ -35,9 +35,7 @@ def reptile(browser=None, search_word=""): ...@@ -35,9 +35,7 @@ def reptile(browser=None, search_word=""):
""" """
print(f"搜索词:{search_word}") print(f"搜索词:{search_word}")
base_url = "https://www.dcard.tw" base_url = "https://www.dcard.tw"
option = ['--headless'] browser = browser or create(no_headless=True,using_user_data=True)
# ['--headless']
browser = browser or create(None, False)
# 打开网页 # 打开网页
# browser.get(base_url) # browser.get(base_url)
browser.get(f"{base_url}/search?query={search_word}") browser.get(f"{base_url}/search?query={search_word}")
......
...@@ -23,8 +23,7 @@ import sys ...@@ -23,8 +23,7 @@ import sys
def reptile(browser=None, search_word=""): def reptile(browser=None, search_word=""):
print(f"搜索词:{search_word}") print(f"搜索词:{search_word}")
url = "https://www.facebook.com/" url = "https://www.facebook.com/"
option = ['--headless'] browser = browser or create(no_headless=True,using_user_data=True)
browser = browser or create(None, True)
# 打开网页 # 打开网页
browser.get(url) browser.get(url)
try: try:
......
...@@ -34,9 +34,8 @@ def reptile(browser=None, search_word=""): ...@@ -34,9 +34,8 @@ def reptile(browser=None, search_word=""):
""" """
print(f"搜索词:{search_word}") print(f"搜索词:{search_word}")
base_url = "https://www.instagram.com/" base_url = "https://www.instagram.com/"
option = ['--headless']
# ['--headless'] browser = browser or create(no_headless=True,using_user_data=True)
browser = browser or create(option, True)
# print(browser) # print(browser)
# 打开网页 # 打开网页
browser.get(base_url) browser.get(base_url)
......
...@@ -28,9 +28,7 @@ from utils.download_image import download_image ...@@ -28,9 +28,7 @@ from utils.download_image import download_image
def reptile(browser=None, search_word=""): def reptile(browser=None, search_word=""):
url = "https://www.ptt.cc/bbs/hotboards.html" url = "https://www.ptt.cc/bbs/hotboards.html"
# 无头模式执行 browser = browser or create(no_headless=True,using_user_data=True)
option = ['--headless']
browser = browser or create(option, True)
# 有头模式执行 # 有头模式执行
# browser = browser or create() # browser = browser or create()
# 打开网页 # 打开网页
......
...@@ -28,9 +28,7 @@ def reptile(browser=None, search_word=""): ...@@ -28,9 +28,7 @@ def reptile(browser=None, search_word=""):
""" """
print(f"搜索词:{search_word}") print(f"搜索词:{search_word}")
base_url = "https://twitter.com/" base_url = "https://twitter.com/"
option = ['--headless'] browser = browser or create(no_headless=True,using_user_data=True)
# ['--headless']
browser = browser or create(option, True)
# print(browser) # print(browser)
# 打开网页 # 打开网页
browser.get(base_url) browser.get(base_url)
......
...@@ -21,8 +21,7 @@ def reptile(browser=None, search_word=""): ...@@ -21,8 +21,7 @@ def reptile(browser=None, search_word=""):
:param search_word: :param search_word:
:return: :return:
""" """
option = ['--headless'] browser = browser or create(no_headless=True,using_user_data=True)
browser = browser or create(['--headless'], True)
# print(browser) # print(browser)
# 打开网页 # 打开网页
url = f'https://www.youtube.com/results?search_query={search_word}' url = f'https://www.youtube.com/results?search_query={search_word}'
......
...@@ -29,10 +29,11 @@ from utils.index import get_screen_resolution ...@@ -29,10 +29,11 @@ from utils.index import get_screen_resolution
''' '''
def create(option=None, using_user_data=True, web_browser="firefox"): def create(option=None, no_headless=False, using_user_data=True, web_browser="firefox"):
""" """
生成selenium实例 生成selenium实例
:param no_headless:
:param web_browser: :param web_browser:
:param using_user_data: :param using_user_data:
:param option: :param option:
...@@ -54,6 +55,7 @@ def create(option=None, using_user_data=True, web_browser="firefox"): ...@@ -54,6 +55,7 @@ def create(option=None, using_user_data=True, web_browser="firefox"):
options = webdriver.EdgeOptions() options = webdriver.EdgeOptions()
elif web_browser == "chromium": elif web_browser == "chromium":
options = webdriver.ChromeOptions() options = webdriver.ChromeOptions()
if option is not None: if option is not None:
for value in option: for value in option:
options.add_argument(value) options.add_argument(value)
...@@ -84,6 +86,13 @@ def create(option=None, using_user_data=True, web_browser="firefox"): ...@@ -84,6 +86,13 @@ def create(option=None, using_user_data=True, web_browser="firefox"):
# chrome_options.add_argument('--headless') # chrome_options.add_argument('--headless')
# options.add_argument("--window-size=1920x1080") # 设置窗口大小,这是一个常见的完全无头模式的设置 # options.add_argument("--window-size=1920x1080") # 设置窗口大小,这是一个常见的完全无头模式的设置
# options.add_argument("--start-maximized") # 最大化窗口 # options.add_argument("--start-maximized") # 最大化窗口
if no_headless == True:
if platform.system() == "Linux" and platform.system() == "Darwin":
# 开启无头模式
options.add_argument("-headless")
elif platform.system() == "Windows" and web_browser == "firefox":
# windows系统、火狐浏览器不开启无头模式
print("")
if option != None: if option != None:
# 无头模式下禁用gpu加速 # 无头模式下禁用gpu加速
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment