Commit 2fe822c2 authored by liyang's avatar liyang

feat:dcard 爬虫

parent db9c2b5b
...@@ -17,3 +17,4 @@ reptile_data/**/*.json ...@@ -17,3 +17,4 @@ reptile_data/**/*.json
*.exe *.exe
*.deb *.deb
browser/*chrome* browser/*chrome*
browser/**/chromedriver
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -3,18 +3,23 @@ import platform ...@@ -3,18 +3,23 @@ import platform
from selenium import webdriver from selenium import webdriver
# --------------- selenium 依赖 start ---------------- # --------------- selenium 依赖 start ----------------
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.chrome.service import Service as ChromeService from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.service import Service as ChromiumService from selenium.webdriver.chromium.service import ChromiumService
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.core.utils import ChromeType
from selenium.webdriver.firefox.service import Service as FirefoxService from selenium.webdriver.firefox.service import Service as FirefoxService
from webdriver_manager.firefox import GeckoDriverManager from selenium.webdriver.edge.service import Service as EdgeService
from selenium.webdriver.ie.service import Service as IeService
from selenium.webdriver.safari.service import Service as SafariService
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
# --------------- selenium 依赖 end ---------------- # --------------- selenium 依赖 end ----------------
# --------------- webdriver_manager 依赖 start -----------------
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from webdriver_manager.opera import OperaDriverManager
# --------------- webdriver_manager 依赖 end -----------------
from utils.index import get_screen_resolution from utils.index import get_screen_resolution
# from mozprofile import FirefoxProfile # from mozprofile import FirefoxProfile
...@@ -23,7 +28,7 @@ from utils.index import get_screen_resolution ...@@ -23,7 +28,7 @@ from utils.index import get_screen_resolution
''' '''
def create(option=None, using_user_data=True, web_browser="firefox"): def create(option=None, using_user_data=True, web_browser="chrome_test"):
""" """
:param web_browser: :param web_browser:
...@@ -41,8 +46,10 @@ def create(option=None, using_user_data=True, web_browser="firefox"): ...@@ -41,8 +46,10 @@ def create(option=None, using_user_data=True, web_browser="firefox"):
user_data_dir = os.path.join(os.path.abspath("../"), 'network-assets-reptile', 'user_data') user_data_dir = os.path.join(os.path.abspath("../"), 'network-assets-reptile', 'user_data')
if web_browser == "firefox": if web_browser == "firefox":
options = webdriver.FirefoxOptions() options = webdriver.FirefoxOptions()
else: elif web_browser == "chrome" or web_browser == "chrome_test":
options = webdriver.ChromeOptions() options = webdriver.ChromeOptions()
elif web_browser == "edge":
options = webdriver.EdgeOptions()
if option is not None: if option is not None:
for value in option: for value in option:
...@@ -57,10 +64,12 @@ def create(option=None, using_user_data=True, web_browser="firefox"): ...@@ -57,10 +64,12 @@ def create(option=None, using_user_data=True, web_browser="firefox"):
# options.add_argument(f'--user-data-dir={user_data_dir}') # options.add_argument(f'--user-data-dir={user_data_dir}')
elif web_browser == "chrome": elif web_browser == "chrome":
options.add_argument(f'--user-data-dir={user_data_dir}') options.add_argument(f'--user-data-dir={user_data_dir}')
elif web_browser == "chromium": elif web_browser == "chrome_test":
options.add_argument(f'--user-data-dir={user_data_dir}') options.add_argument(f'--user-data-dir={user_data_dir}')
elif web_browser == "chrome_test": elif web_browser == "chrome_test":
options.add_argument(f'--user-data-dir={user_data_dir}') options.add_argument(f'--user-data-dir={user_data_dir}')
elif web_browser == "edge":
options.add_argument(f'--user-data-dir={user_data_dir}')
else: else:
print("") print("")
...@@ -85,22 +94,36 @@ def create(option=None, using_user_data=True, web_browser="firefox"): ...@@ -85,22 +94,36 @@ def create(option=None, using_user_data=True, web_browser="firefox"):
web_browser = webdriver.Chrome(options=options, service=ChromeService(ChromeDriverManager().install())) web_browser = webdriver.Chrome(options=options, service=ChromeService(ChromeDriverManager().install()))
elif web_browser == "chrome_test": elif web_browser == "chrome_test":
binary_location = "" binary_location = ""
webdriver_location = ""
if platform.system() == "Windows": if platform.system() == "Windows":
binary_location = os.path.join(os.path.abspath("../"), 'network-assets-reptile', 'browser', "chrome-win64", binary_location = os.path.join(os.path.abspath("../"), 'network-assets-reptile', 'browser', "chrome_win64",
"chrome") "chrome.exe")
webdriver_location = os.path.join(os.path.abspath("../"), 'network-assets-reptile', 'browser',"web-driver","chromedriver_win32",
"chromedriver.exe")
elif platform.system() == "Linux": elif platform.system() == "Linux":
binary_location = os.path.join(os.path.abspath("../"), 'network-assets-reptile', 'browser', binary_location = os.path.join(os.path.abspath("../"), 'network-assets-reptile', 'browser',
"chrome-linux64", "chrome") "chrome-linux64", "chrome")
webdriver_location = os.path.join(os.path.abspath("../"), 'network-assets-reptile', 'browser',"web-driver","chromedriver_linux64",
"chromedriver")
elif platform.system() == "Darwin": elif platform.system() == "Darwin":
binary_location = os.path.join(os.path.abspath("../"), 'network-assets-reptile', 'browser', binary_location = os.path.join(os.path.abspath("../"), 'network-assets-reptile', 'browser',
"chrome-mac-x64", "chrome") "chrome-mac-x64", "chrome.app")
webdriver_location = os.path.join(os.path.abspath("../"), 'network-assets-reptile', 'browser',"web-driver","chromedriver_mac64",
"chromedriver")
else: else:
print("") print("")
# 指定浏览器路径 # 指定浏览器路径
print(binary_location) # print(binary_location)
# 指定浏览器路径
options.binary_location = binary_location options.binary_location = binary_location
browser = webdriver.Chrome(options=options, service=ChromiumService( # options.browser_version = "114"
ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install())) # 设置驱动二进制可执行文件路径
# service = ChromeService(executable_path=webdriver_location)
service = ChromeService(executable_path=webdriver_location)
# service=service=ChromeService(ChromeDriverManager().install())
browser = webdriver.Chrome(options=options,service=service)
elif web_browser == "edge":
browser = webdriver.Edge(options=options,service=EdgeService(EdgeChromiumDriverManager().install()))
else: else:
print("") print("")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment