Commit e6a8964a authored by liyang's avatar liyang

feat:处理ins爬虫数据

parent 1526bcd6
...@@ -55,7 +55,6 @@ def reptile(browser=None, search_word=""): ...@@ -55,7 +55,6 @@ def reptile(browser=None, search_word=""):
# 评论 # 评论
element_comment_list = browser.find_elements('xpath', f"{base_xpath}/div[3]/div[2]/div/span") element_comment_list = browser.find_elements('xpath', f"{base_xpath}/div[3]/div[2]/div/span")
length = len(element_content_list) length = len(element_content_list)
for index in range(length): for index in range(length):
# 提取时间,并转为时间戳 # 提取时间,并转为时间戳
...@@ -77,7 +76,7 @@ def reptile(browser=None, search_word=""): ...@@ -77,7 +76,7 @@ def reptile(browser=None, search_word=""):
wait.until(EC.presence_of_element_located((By.XPATH, "//div[@data-testid='overlay']"))) wait.until(EC.presence_of_element_located((By.XPATH, "//div[@data-testid='overlay']")))
time.sleep(3) time.sleep(3)
click_dom = browser.find_element("xpath", click_dom = browser.find_element("xpath",
"//div[@data-testid='overlay']") "//div[@data-testid='overlay']")
# 处理弹窗内容加载失败的情况 # 处理弹窗内容加载失败的情况
try: try:
browser.find_element("xpath", "//div[@data-testid='overlay']//h2[text()='發生錯誤']") browser.find_element("xpath", "//div[@data-testid='overlay']//h2[text()='發生錯誤']")
...@@ -175,10 +174,14 @@ def reptile(browser=None, search_word=""): ...@@ -175,10 +174,14 @@ def reptile(browser=None, search_word=""):
log.debug('save file success') log.debug('save file success')
else: else:
log.debug('save file failed') log.debug('save file failed')
script_close(browser)
else: else:
# 爬取数据为空 # 爬取数据为空
log.info("未爬取到数据") log.info("未爬取到数据")
script_close(browser)
def script_close(browser):
# 关闭浏览器驱动 # 关闭浏览器驱动
try: try:
browser.close() browser.close()
......
...@@ -13,6 +13,7 @@ import os ...@@ -13,6 +13,7 @@ import os
from config.settings import get_base_file_url from config.settings import get_base_file_url
from config.settings import get_account from config.settings import get_account
import sys import sys
# 工具函数-下载图片 # 工具函数-下载图片
''' '''
打开指定网页,并使用 Selenium 模拟点击 "GDPR-accept" 按钮,然后循环点击 "search-show-more-button" 按钮来加载更多数据,直到按钮不再可点击为止。最后,获取完整的分页数据并关闭浏览器驱动。 打开指定网页,并使用 Selenium 模拟点击 "GDPR-accept" 按钮,然后循环点击 "search-show-more-button" 按钮来加载更多数据,直到按钮不再可点击为止。最后,获取完整的分页数据并关闭浏览器驱动。
...@@ -160,19 +161,23 @@ def reptile(browser=None, search_word=""): ...@@ -160,19 +161,23 @@ def reptile(browser=None, search_word=""):
log.debug('save file success') log.debug('save file success')
else: else:
log.debug('save file failed') log.debug('save file failed')
script_close(browser)
else: else:
# 爬取数据为空 # 爬取数据为空
log.info("未爬取到数据") log.info("未爬取到数据")
script_close(browser)
def script_close(browser):
# 关闭浏览器驱动 # 关闭浏览器驱动
try: try:
browser.close() browser.close()
browser.quit() browser.quit()
except: except:
log.debug("浏览器驱动关闭失败") log.debug("浏览器驱动关闭失败")
sys.exit() sys.exit()
def main(): def main():
""" """
......
...@@ -40,7 +40,10 @@ def reptile(browser=None, search_word=""): ...@@ -40,7 +40,10 @@ def reptile(browser=None, search_word=""):
# print(browser) # print(browser)
# 打开网页 # 打开网页
browser.get(base_url) browser.get(base_url)
time.sleep(3) # 等待加载完成
time.sleep(2)
# wait = WebDriverWait(browser, 10)
# wait.until(EC.presence_of_element_located((By.XPATH, "//input[@name='username']")))
try: try:
# 检测是否要登录 # 检测是否要登录
login_input = browser.find_element('xpath', "//input[@name='username']") login_input = browser.find_element('xpath', "//input[@name='username']")
...@@ -50,55 +53,62 @@ def reptile(browser=None, search_word=""): ...@@ -50,55 +53,62 @@ def reptile(browser=None, search_word=""):
# 获取登录按钮 # 获取登录按钮
button_login = browser.find_element('xpath', "//button[@type='submit']") button_login = browser.find_element('xpath', "//button[@type='submit']")
button_login.click() button_login.click()
time.sleep(3) time.sleep(2)
except: except:
print("------") print("------")
# print("1111") # print("1111")
url = f"{base_url}explore/tags/{search_word}/" url = f"{base_url}explore/tags/{search_word}/"
browser.get(url) browser.get(url)
wait = WebDriverWait(browser, 10)
wait.until(EC.presence_of_element_located((By.XPATH, "//article//a")))
# 链接 # 链接
element_link_list = browser.find_elements('xpath',"//article//a") element_link_list = browser.find_elements('xpath', "//article//a")
length = len(element_link_list) length = len(element_link_list)
for index in range(length): for index in range(length):
element_link_list[index].click() # element_link_list[index].click()
browser.execute_script("arguments[0].click();", element_link_list[index])
# 等待弹窗加载完成 # 等待弹窗加载完成
wait = WebDriverWait(browser,10) wait = WebDriverWait(browser, 10)
wait.until(EC.presence_of_element_located((By.XPATH, "//div[@role='dialog']/div/div[2]"))) wait.until(EC.presence_of_element_located((By.XPATH, "//div[@role='dialog']/div/div[2]")))
# 提取其他 # 提取其他
author = browser.find_element("xpath","//div[@role='dialog']/div//article/div/div[2]/div/div/div[1]//a") author = browser.find_element("xpath", "//div[@role='dialog']/div//article/div/div[2]/div/div/div[1]//a")
content_element = browser.find_element("xpath","//div[@role='dialog']/div//article/div/div[2]/div/div/div[2]/div[1]//div[@role='button']//h1") content_element = browser.find_element("xpath",
time_element = browser.find_element("xpath","//div[@role='dialog']/div//article/div/div[2]/div/div/div[2]/div[2]//time") "//div[@role='dialog']/div//article/div/div[2]/div/div/div[2]/div[1]//div[@role='button']//h1")
time_element = browser.find_element("xpath",
"//div[@role='dialog']/div//article/div/div[2]/div/div/div[2]/div[2]//time")
link_str = browser.current_url link_str = browser.current_url
# 提取时间,并转为时间戳 # 提取时间,并转为时间戳
timestamp = datetime.fromisoformat(time_element.get_attribute("datetime")[:-1]).timestamp() timestamp = datetime.fromisoformat(time_element.get_attribute("datetime")[:-1]).timestamp()
#提取图片、视频 # 提取弹窗内容
picture_url = []
img_list = browser.find_elements("xpath","//div[@role='dialog']/div//article/div/div[1]/div/div[1]//img")
for key,item in enumerate(img_list):
if key == 0:
title = item.get_attribute("alt")
# 下载图片至本地,替换标签中的src
id = str(int(time.time()))
# 下载地址
download_dir = f'{os.path.join(file_dir, f"{id}.jpg")}'
# 访问地址
access_address = f'{get_base_file_url()}{table_name.split("_")[1]}/{id}.jpg'
# 下载状态
status = download_image(item['src'], download_dir)
if status:
item['src'] = access_address
picture_url.append(access_address)
#提取弹窗内容
soup = BeautifulSoup(content_element.get_attribute("outerHTML"), "html.parser") soup = BeautifulSoup(content_element.get_attribute("outerHTML"), "html.parser")
# 将图片整合到内容中 # 提取图片、视频
picture_url = []
img_list = browser.find_elements("xpath", "//div[@role='dialog']/div//article/div/div[1]/div/div[1]//img")
# 过滤视频
video_list = browser.find_elements("xpath", "//div[@role='dialog']/div//article/div/div[1]/div/div[1]//video")
for key, item in enumerate(img_list): for key, item in enumerate(img_list):
img = BeautifulSoup(item.get_attribute("outerHTML"),"html.parser") if len(video_list) == 0:
soup.append(img) if key == 0:
title = item.get_attribute("alt")
# 下载图片至本地,替换标签中的src
id = str(int(time.time()))
# 下载地址
download_dir = f'{os.path.join(file_dir, f"{id}.jpg")}'
# 访问地址
access_address = f'{get_base_file_url()}{table_name.split("_")[1]}/{id}.jpg'
# 下载状态
status = download_image(item.get_attribute("src"), download_dir)
if status:
# 将图片追加到内容中
img_soup = BeautifulSoup(item.get_attribute("outerHTML"), "html.parser")
img_soup.img["src"] = access_address
# print(img_soup.prettify())
soup.append(img_soup)
picture_url.append(access_address)
content = soup.prettify() content = soup.prettify()
# 类型 # 类型
content_type = "图文" content_type = "图文"
...@@ -116,15 +126,12 @@ def reptile(browser=None, search_word=""): ...@@ -116,15 +126,12 @@ def reptile(browser=None, search_word=""):
# --------------- 组装数据 end--------------------- # --------------- 组装数据 end---------------------
data.append(obj) data.append(obj)
# 获取下一页按钮 # 获取下一页按钮
next_buttons = browser.find_elements("xpath","//div[@role='dialog']/div/div[1]//button") next_buttons = browser.find_elements("xpath", "//div[@role='dialog']/div/div[1]//button")
if index < length-1: if index < length - 1:
for key,item in enumerate(next_buttons): for key, item in enumerate(next_buttons):
if key+1 == len(next_buttons): if key + 1 == len(next_buttons):
item.click() item.click()
# 发送爬取数据到java服务
# print('----------------------')
# print(data)
if len(data) > 0: if len(data) > 0:
# 保存json文件到本地 # 保存json文件到本地
# log.debug(os.path.abspath("../")) # log.debug(os.path.abspath("../"))
...@@ -133,17 +140,29 @@ def reptile(browser=None, search_word=""): ...@@ -133,17 +140,29 @@ def reptile(browser=None, search_word=""):
log.debug('save file success') log.debug('save file success')
else: else:
log.debug('save file failed') log.debug('save file failed')
script_close(browser)
else: else:
# 爬取数据为空 # 爬取数据为空
log.info("未爬取到数据") log.info("未爬取到数据")
script_close(browser)
def script_close(browser):
# 关闭浏览器驱动 # 关闭浏览器驱动
try: try:
browser.close() browser.close()
browser.quit() browser.quit()
except: except:
log.debug("浏览器驱动关闭失败") log.debug("浏览器驱动关闭失败")
sys.exit() try:
# 一些代码...
sys.exit()
except SystemExit:
raise # 重新抛出SystemExit异常,让脚本退出
except Exception as e:
# 异常处理代码...
print("sys.exit() 执行失败")
def main(): def main():
""" """
......
# # 导入依赖库
import json
import time
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
from loguru import logger
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
# 工具函数-下载图片
from utils.download_image import download_image
'''
打开指定网页,并使用 Selenium 模拟点击 "GDPR-accept" 按钮,然后循环点击 "search-show-more-button" 按钮来加载更多数据,直到按钮不再可点击为止。最后,获取完整的分页数据并关闭浏览器驱动。
'''
# # json 数据
data = [];
image_key = 0
fileDir = "./reptile_data/news/nytimes/"
year = datetime(2021, 1, 1)
startDate = datetime(2020, 12, 31) # 初始日期
endDate = datetime(2020, 12, 31) # 结束日期
# 创建浏览器驱动对象
browser = webdriver.Chrome()
for i in range(1):
endDate = startDate = startDate + timedelta(days=i)
# 打开网页
browser.get(
f'https://www.nytimes.com/search?dropmab=false&endDate={endDate.strftime("%Y%m%d")}&query={year.strftime("%Y")}&sort=best&startDate={startDate.strftime("%Y%m%d")}&types=interactivegraphics%2Carticle')
try:
accept = WebDriverWait(browser, 10).until(
EC.presence_of_element_located((By.XPATH, "//button[@data-testid='GDPR-accept']")))
accept.click()
finally:
logger.debug("")
# 等待加载更多按钮出现
button = WebDriverWait(browser, 10).until(
EC.element_to_be_clickable((By.XPATH, "//button[@data-testid='search-show-more-button']")))
# logger.debug(button)
# 模拟点击按钮多次加载更多数据
while button.is_enabled():
time.sleep(2) # 等待一段时间,确保页面加载完毕
try:
button.click()
button = WebDriverWait(browser, 5).until(
EC.element_to_be_clickable((By.XPATH, "//button[@data-testid='search-show-more-button']")))
except:
break
# 获取完整的分页数据
page_content = browser.page_source
soup = BeautifulSoup(page_content, 'html.parser')
list_news = soup.find_all('li', {"class": "css-1l4w6pd"})
for index, item in enumerate(list_news):
logger.debug(item)
# 抓取图片
image_key = image_key + 1
url_element = item.find('img', {"class": "css-rq4mmj"})
image_url = url_element['src'] if url_element else ""
# logger.debug(url)
if image_url:
# logger.debug(url)
# # 下载图片
#
filename = f"{image_key}.jpg"
# logger.debug(filename)
# sys.exit()
download_image(image_url, f'{fileDir}images/{filename}')
# 抓取文字
title_element = item.find('h4', {"class": "css-2fgx4k"})
introduction_element = item.find('p', {"class": "css-16nhkrn"})
title = title_element.get_text() if title_element else ""
introduction = introduction_element.get_text() if introduction_element else ""
news = {
"title": title,
"introduction": introduction,
"imageName": filename
}
data.append(news)
# logger.debug(data)
# 将数据保存到文件中
with open(f'{fileDir}data.json', "w", encoding="utf-8") as file:
json.dump(data, file, indent=2, ensure_ascii=False)
browser.close()
# 关闭浏览器驱动
browser.quit()
...@@ -200,10 +200,14 @@ def reptile(browser=None, search_word=""): ...@@ -200,10 +200,14 @@ def reptile(browser=None, search_word=""):
log.debug('save file success') log.debug('save file success')
else: else:
log.debug('save file failed') log.debug('save file failed')
script_close(browser)
else: else:
# 爬取数据为空 # 爬取数据为空
log.info("未爬取到数据") log.info("未爬取到数据")
script_close(browser)
def script_close(browser):
# 关闭浏览器驱动 # 关闭浏览器驱动
try: try:
browser.close() browser.close()
...@@ -212,7 +216,6 @@ def reptile(browser=None, search_word=""): ...@@ -212,7 +216,6 @@ def reptile(browser=None, search_word=""):
log.debug("浏览器驱动关闭失败") log.debug("浏览器驱动关闭失败")
sys.exit() sys.exit()
def main(): def main():
""" """
......
...@@ -13,6 +13,7 @@ from datetime import datetime ...@@ -13,6 +13,7 @@ from datetime import datetime
from utils.download_image import download_image from utils.download_image import download_image
from config.settings import get_base_file_url from config.settings import get_base_file_url
from config.settings import get_account from config.settings import get_account
# 工具函数-下载图片 # 工具函数-下载图片
''' '''
打开指定网页,并使用 Selenium 模拟点击 "GDPR-accept" 按钮,然后循环点击 "search-show-more-button" 按钮来加载更多数据,直到按钮不再可点击为止。最后,获取完整的分页数据并关闭浏览器驱动。 打开指定网页,并使用 Selenium 模拟点击 "GDPR-accept" 按钮,然后循环点击 "search-show-more-button" 按钮来加载更多数据,直到按钮不再可点击为止。最后,获取完整的分页数据并关闭浏览器驱动。
...@@ -159,10 +160,14 @@ def reptile(browser=None, search_word=""): ...@@ -159,10 +160,14 @@ def reptile(browser=None, search_word=""):
log.debug('save file success') log.debug('save file success')
else: else:
log.debug('save file failed') log.debug('save file failed')
script_close(browser)
else: else:
# 爬取数据为空 # 爬取数据为空
log.info("未爬取到数据") log.info("未爬取到数据")
script_close(browser)
def script_close(browser):
# 关闭浏览器驱动 # 关闭浏览器驱动
try: try:
browser.close() browser.close()
...@@ -171,6 +176,7 @@ def reptile(browser=None, search_word=""): ...@@ -171,6 +176,7 @@ def reptile(browser=None, search_word=""):
log.debug("浏览器驱动关闭失败") log.debug("浏览器驱动关闭失败")
sys.exit() sys.exit()
def main(): def main():
""" """
......
...@@ -75,15 +75,18 @@ def reptile(browser=None, search_word=""): ...@@ -75,15 +75,18 @@ def reptile(browser=None, search_word=""):
log.debug('save file success') log.debug('save file success')
else: else:
log.debug('save file failed') log.debug('save file failed')
script_close(browser)
else: else:
# 爬取数据为空 # 爬取数据为空
log.info("未爬取到数据") log.info("未爬取到数据")
script_close(browser)
def script_close(browser):
# 关闭浏览器驱动 # 关闭浏览器驱动
try: try:
browser.close() browser.close()
browser.quit() browser.quit()
except: except:
log.debug("浏览器驱动关闭失败") log.debug("浏览器驱动关闭失败")
sys.exit() sys.exit()
......
...@@ -5,12 +5,17 @@ ...@@ -5,12 +5,17 @@
import time # import time
from selenium.webdriver import Firefox # from selenium.webdriver import Firefox
from selenium import webdriver # from selenium import webdriver
driver = webdriver.Firefox() # driver = webdriver.Firefox()
driver.get("https://www.toutiao.com/a6969138023774667264/") # driver.get("https://www.toutiao.com/a6969138023774667264/")
time.sleep(2) # time.sleep(2)
html = driver.page_source # html = driver.page_source
print(html) # print(html)
driver.quit() # driver.quit()
\ No newline at end of file
var = {'title': 'Photo by 今周刊 on July 17, 2023. May be an illustration of poster and text.',
'content': '<h1 class="_aacl _aaco _aacu _aacx _aad7 _aade" dir="auto">\n 你有沒有想過,為什麼你的時間總是不夠用?為什麼你買稍微貴點的東西,都要不自覺地把它折算成自己多少天的收入?為什麼你以前看著數字就頭暈腦脹,現在願意對著數字精打細算,還時刻惦記著要怎麼花錢才能熬得到月尾?為什麼你正值青春的花樣年華,卻窮得只剩下理想,忙得沒時間生活?\n <br/>\n <br/>\n 實際上,絕大多數人的疲於奔命,不是因為忙,而是心態出了問題,是眼下的生活不能如人所願,是對當前的生活不知所措。\n <br/>\n <br/>\n 但是,如果你不能以一種主動的、有規劃的方式去對待生活和工作,那麼你即使什麼都不做,依然會覺得疲憊。\n <br/>\n <br/>\n 比如你忙著回覆一封又一封無關緊要的郵件,忙著參加一個又一個無聊的會議,忙著從一個聚會趕到另一個聚會,忙著在節假日跟社交軟體裡每一個熟悉的和不熟悉的人說沒完沒了的、便宜的祝福語……比如你每天兩點一線,在家和公司之間步履匆匆。\n <br/>\n <br/>\n 一大早忙著擠上即將關門開走的公車,好不容易來到公司忙著準備資料、製作檔案、接待客戶。終於熬到下班,行屍走肉樣的狀態卻不忘看看社群動態,在手機裡看著大家都在為生計而奔忙。\n <br/>\n <br/>\n 可如果誰要是問你,「怎麼你老是這麼忙?都做了些什麼?」你就算皺緊了眉頭,想破了腦袋也只能給出一個這樣的回答:「呃,我也記不住都忙什麼了,反正就是很忙!」\n <br/>\n \u200b\n <br/>\n 你呀,像極了一隻在泳池裡瞎撲騰的旱鴨子,一直抓住一個叫作「工作忙」的游泳圈不肯放手。\n <br/>\n <br/>\n 於是,「我好忙」變成了你的海洛因,變成了讓你麻木的精神撫慰品。它讓你忘記為了什麼而出發,忘記了你的最終目的是什麼,就像把你綁在了旋轉的音樂盒上,看起來美妙,聽著也舒服,卻是周而復始的、無意義的瞎打轉。\n <br/>\n <br/>\n 嗯,那你就接著懶吧,以後很失敗的時候,還有可以安慰一下自己的理由——萬一努力了還不成功,那不就尷尬了?\n <br/>\n <br/>\n 這世上真的沒有什麼搖身一變,更沒有什麼能拯救你的人,有的只是你看不到的低調努力。怕就怕,你只有低調,沒有努力。\n <br/>\n \u200b\n <br/>\n 📚本篇僅為部分節錄,摘自《裝睡的人叫不醒,再不清醒窮死你》\n <br/>\n \u200b\n <br/>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/%E6%9B%B8%E6%91%98/" role="link" tabindex="0">\n #書摘\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/%E9%96%B1%E8%AE%80/" role="link" tabindex="0">\n #閱讀\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/reading/" role="link" tabindex="0">\n #reading\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/%E4%BB%8A%E5%91%A8%E5%88%8A/" role="link" tabindex="0">\n #今周刊\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/%E6%96%B0%E8%81%9E/" role="link" tabindex="0">\n #新聞\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/news/" role="link" tabindex="0">\n #news\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/%E5%AA%92%E9%AB%94/" role="link" tabindex="0">\n #媒體\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/%E7%90%86%E8%B2%A1/" role="link" tabindex="0">\n #理財\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/quotes/" role="link" tabindex="0">\n #quotes\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/allianzgitw/" role="link" tabindex="0">\n #allianzgitw\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/%E4%B8%BB%E5%8B%95%E8%AE%93%E6%8A%95%E8%B3%87%E7%99%BC%E6%8F%AE%E5%BD%B1%E9%9F%BF%E5%8A%9B/" role="link" tabindex="0">\n #主動讓投資發揮影響力\n </a>\n</h1>\n<img alt="Photo by 今周刊 on July 17, 2023. May be an illustration of poster and text." class="x5yr21d xu96u03 x10l6tqk x13vifvy x87ps6o xh8yej3" crossorigin="anonymous" decoding="auto" sizes="653.5999755859375px" src="https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e15_fr_p1080x1080&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfBdfyF7qUQQaVxi_e9z5aI4P6e6Hy9JIVtTl2YV9gXoJw&amp;oe=64C4688F&amp;_nc_sid=2011ad" srcset="https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e15_fr_p1080x1080&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfBdfyF7qUQQaVxi_e9z5aI4P6e6Hy9JIVtTl2YV9gXoJw&amp;oe=64C4688F&amp;_nc_sid=2011ad 1080w,https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e35_p750x750_sh0.08&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfDH1NfR4Ik2R9DwcoPB5XectpJqfeUOtbvrxtmRHDxOVg&amp;oe=64C4688F&amp;_nc_sid=2011ad 750w,https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e35_p640x640_sh0.08&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfCc0UgUfQGa7N4QJeWgEMdqmiIDKwuO10SH_A5R1-q9EQ&amp;oe=64C4688F&amp;_nc_sid=2011ad 640w,https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e15_p480x480&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfAlY_Yxs-GAahDKxf4ijFWqjTERRFrRitvuXySepJR7hw&amp;oe=64C4688F&amp;_nc_sid=2011ad 480w,https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e15_p320x320&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfBuQrHYUVMkHj6NKLuNmNvSlyupwQFTY6e7lsdxwXmy5Q&amp;oe=64C4688F&amp;_nc_sid=2011ad 320w,https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e15_p240x240&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfD9YWNkm7j6GSh3CufCedWL0LlxHSEdLskO6PVsE4DA8Q&amp;oe=64C4688F&amp;_nc_sid=2011ad 240w,https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e15_p150x150&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfAzad7jssscLrABgHQKNVi0CGOm5H1DZWIpqwwMMx5Kjw&amp;oe=64C4688F&amp;_nc_sid=2011ad 150w" style="object-fit: cover;"/>\n',
'link': 'https://www.instagram.com/p/Cu0cmuSssPZ/', 'reptileTime': '1690259090', 'type': '图文', 'author': '',
'releaseTime': '1689613204', 'picture_url': 'http://192.168.0.118:8186/instagram/1690259027.jpg'}
\ No newline at end of file
...@@ -28,7 +28,7 @@ from utils.index import get_screen_resolution ...@@ -28,7 +28,7 @@ from utils.index import get_screen_resolution
''' '''
def create(option=None, using_user_data=True, web_browser="chromium"): def create(option=None, using_user_data=True, web_browser="firefox"):
""" """
:param web_browser: :param web_browser:
...@@ -60,8 +60,10 @@ def create(option=None, using_user_data=True, web_browser="chromium"): ...@@ -60,8 +60,10 @@ def create(option=None, using_user_data=True, web_browser="chromium"):
# 添加用户数据目录参数,启用浏览器的持久性会话,可以保存登录状态和Cookie # 添加用户数据目录参数,启用浏览器的持久性会话,可以保存登录状态和Cookie
if web_browser == "firefox": if web_browser == "firefox":
# 将此处替换为你的Firefox用户数据目录路径 # 将此处替换为你的Firefox用户数据目录路径
profile = FirefoxProfile(profile_directory=user_data_dir) # profile = FirefoxProfile(profile_directory=user_data_dir)
options.profile = profile # options.profile = profile
options.add_argument("-profile")
options.add_argument(user_data_dir)
# options.add_argument(f'--user-data-dir={user_data_dir}') # options.add_argument(f'--user-data-dir={user_data_dir}')
elif web_browser == "chrome": elif web_browser == "chrome":
options.add_argument(f'--user-data-dir={user_data_dir}') options.add_argument(f'--user-data-dir={user_data_dir}')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment