Commit 1526bcd6 authored by liyang's avatar liyang

feat:爬取instagram

parent 2fe822c2
......@@ -13,8 +13,5 @@ reptile_data/**/*.json
*.mp4
*.webm
*.jpg
*.app
*.exe
*.deb
browser/*chrome*
browser/**/chromedriver
\ No newline at end of file
......@@ -2,8 +2,26 @@
def get_log_path():
return "../"
def get_base_url():
return "http://192.168.0.118:8081/"
def get_base_file_url():
return "http://192.168.0.118:8186/"
def get_account(name):
data = {}
if name == "twitter":
data["name"] = "liyang1851603"
data["password"] = "liyang19970814"
elif name == "facebook":
data["name"] = "liyang19970814@gmail.com"
data["password"] = "xn89kiPT/^Kaeg#"
elif name == "instagram":
data["name"] = "anthonymills7693"
data["password"] = "unm8rgoab52"
else:
print("")
return data
......@@ -11,6 +11,7 @@ from datetime import datetime
from utils.download_image import download_image
import os
from config.settings import get_base_file_url
from config.settings import get_account
import sys
# 工具函数-下载图片
'''
......@@ -29,8 +30,8 @@ def reptile(browser=None, search_word=""):
# 检测是否要登录
login_input = browser.find_element('xpath', "//input[@name='email']")
password_input = browser.find_element('xpath', "//input[@name='pass']")
login_input.send_keys("liyang19970814@gmail.com")
password_input.send_keys("xn89kiPT/^Kaeg#")
login_input.send_keys(get_account("facebook")["name"])
password_input.send_keys(get_account("facebook")["password"])
# 获取登录按钮
button_login = browser.find_element('xpath', "//button[@name='login']")
button_login.click()
......
This diff is collapsed.
......@@ -12,7 +12,7 @@ import sys
from datetime import datetime
from utils.download_image import download_image
from config.settings import get_base_file_url
from config.settings import get_account
# 工具函数-下载图片
'''
打开指定网页,并使用 Selenium 模拟点击 "GDPR-accept" 按钮,然后循环点击 "search-show-more-button" 按钮来加载更多数据,直到按钮不再可点击为止。最后,获取完整的分页数据并关闭浏览器驱动。
......@@ -37,13 +37,13 @@ def reptile(browser=None, search_word=""):
try:
# 检测是否要登录
login_input = browser.find_element('xpath', "//input[@autocomplete='username']")
login_input.send_keys("liyang1851603")
login_input.send_keys(get_account("twitter")["name"])
# 获取下一步按钮
buttons = browser.find_element('xpath', "//div[@role='button'][2]")
buttons.click()
time.sleep(3)
password_input = browser.find_element('xpath', "//input[@autocomplete='current-password']")
password_input.send_keys("liyang19970814")
password_input.send_keys(get_account("twitter")["password"])
# # 获取登录按钮
button_login = browser.find_element('xpath', "//div[@data-testid='LoginForm_Login_Button']")
button_login.click()
......
APScheduler==3.10.1
asgiref==3.7.2
async-generator==1.10
attrs==23.1.0
beautifulsoup4==4.12.2
certifi==2023.5.7
charset-normalizer==3.1.0
Django==4.2.2
docopt==0.6.2
exceptiongroup==1.1.1
h11==0.14.0
idna==3.4
loguru==0.7.0
lxml==4.9.2
outcome==1.2.0
pipreqs==0.4.13
PyMySQL==1.1.0
PySocks==1.7.1
pytube==15.0.0
pytz==2023.3
requests==2.31.0
selenium==4.10.0
six==1.16.0
sniffio==1.3.0
sortedcontainers==2.4.0
soupsieve==2.4.1
sqlparse==0.4.4
trio==0.22.0
trio-websocket==0.10.3
typing_extensions==4.7.0
tzlocal==5.0.1
urllib3==2.0.3
wsproto==1.2.0
yarg==0.1.9
OpenCC~=1.1.1
python-dateutil~=2.8.2
\ No newline at end of file
......@@ -28,7 +28,7 @@ from utils.index import get_screen_resolution
'''
def create(option=None, using_user_data=True, web_browser="chrome_test"):
def create(option=None, using_user_data=True, web_browser="chromium"):
"""
:param web_browser:
......@@ -50,7 +50,8 @@ def create(option=None, using_user_data=True, web_browser="chrome_test"):
options = webdriver.ChromeOptions()
elif web_browser == "edge":
options = webdriver.EdgeOptions()
elif web_browser == "chromium":
options = webdriver.ChromeOptions()
if option is not None:
for value in option:
options.add_argument(value)
......@@ -64,7 +65,7 @@ def create(option=None, using_user_data=True, web_browser="chrome_test"):
# options.add_argument(f'--user-data-dir={user_data_dir}')
elif web_browser == "chrome":
options.add_argument(f'--user-data-dir={user_data_dir}')
elif web_browser == "chrome_test":
elif web_browser == "chromium":
options.add_argument(f'--user-data-dir={user_data_dir}')
elif web_browser == "chrome_test":
options.add_argument(f'--user-data-dir={user_data_dir}')
......@@ -92,7 +93,7 @@ def create(option=None, using_user_data=True, web_browser="chrome_test"):
elif web_browser == "chrome":
# 创建Chrome浏览器对象并传入选项
web_browser = webdriver.Chrome(options=options, service=ChromeService(ChromeDriverManager().install()))
elif web_browser == "chrome_test":
elif web_browser == "chromium":
binary_location = ""
webdriver_location = ""
if platform.system() == "Windows":
......@@ -115,7 +116,7 @@ def create(option=None, using_user_data=True, web_browser="chrome_test"):
# 指定浏览器路径
# print(binary_location)
# 指定浏览器路径
options.binary_location = binary_location
# options.binary_location = binary_location
# options.browser_version = "114"
# 设置驱动二进制可执行文件路径
# service = ChromeService(executable_path=webdriver_location)
......
......@@ -2,7 +2,7 @@ import os.path
import re
import time
import opencc
from hanziconv import HanziConv
import datetime
from pytube import YouTube
import ssl
......@@ -135,8 +135,8 @@ def convert_to_traditional(simplified_text):
Returns:
str: 转换后的繁体中文文本。
"""
converter = opencc.OpenCC('s2t.json') # 创建简体中文到繁体中文的转换器
traditional_text = converter.convert(simplified_text) # 进行转换
# converter = opencc.OpenCC('s2t.json') # 创建简体中文到繁体中文的转换器
traditional_text = HanziConv.toTraditional(simplified_text) # 进行转换
return traditional_text
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment