Commit c712ff68 authored by liyang's avatar liyang

feat:time.sleep()更换为 WebDriverWait

parent 37ffd734
...@@ -35,9 +35,10 @@ def reptile(browser=None, search_word=""): ...@@ -35,9 +35,10 @@ def reptile(browser=None, search_word=""):
""" """
print(f"搜索词:{search_word}") print(f"搜索词:{search_word}")
base_url = "https://www.dcard.tw" base_url = "https://www.dcard.tw"
browser = browser or create(no_headless=True,using_user_data=True) browser = browser or create(no_headless=False, using_user_data=True)
# 打开网页 # 打开网页
# browser.get(base_url) # browser.get(base_url)
# time.sleep(3)
browser.get(f"{base_url}/search?query={search_word}") browser.get(f"{base_url}/search?query={search_word}")
base_xpath = "//div[@role='main']//div[@data-key]//article" base_xpath = "//div[@role='main']//div[@data-key]//article"
# 内容块 # 内容块
......
...@@ -13,6 +13,12 @@ import os ...@@ -13,6 +13,12 @@ import os
from config.settings import get_base_file_url from config.settings import get_base_file_url
from config.settings import get_account from config.settings import get_account
import sys import sys
# --------------- selenium 依赖 start ----------------
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# --------------- selenium 依赖 end ----------------
# 工具函数-下载图片 # 工具函数-下载图片
''' '''
...@@ -23,7 +29,7 @@ import sys ...@@ -23,7 +29,7 @@ import sys
def reptile(browser=None, search_word=""): def reptile(browser=None, search_word=""):
print(f"搜索词:{search_word}") print(f"搜索词:{search_word}")
url = "https://www.facebook.com/" url = "https://www.facebook.com/"
browser = browser or create(no_headless=True,using_user_data=True) browser = browser or create(no_headless=False,using_user_data=True)
# 打开网页 # 打开网页
browser.get(url) browser.get(url)
try: try:
...@@ -35,15 +41,19 @@ def reptile(browser=None, search_word=""): ...@@ -35,15 +41,19 @@ def reptile(browser=None, search_word=""):
# 获取登录按钮 # 获取登录按钮
button_login = browser.find_element('xpath', "//button[@name='login']") button_login = browser.find_element('xpath', "//button[@name='login']")
button_login.click() button_login.click()
time.sleep(6) wait = WebDriverWait(browser,10)
wait.until(EC.presence_of_element_located((By.XPATH,"//div[@role='main']")))
except: except:
print("已登录") print("已登录")
log.debug("facebook login complete")
url = f"https://www.facebook.com/search/top?q={search_word}" url = f"https://www.facebook.com/search/top?q={search_word}"
browser.get(url) browser.get(url)
# 使用 JavaScript 将网页滚动到底部 # 使用 JavaScript 将网页滚动到底部
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);") browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3) # 等待内容出现,设置最长等待时间为10秒
wait = WebDriverWait(browser, 10)
# 通过 expected_conditions 来定义等待条件,这里以弹窗内容的某个元素为例
wait.until(EC.presence_of_element_located((By.XPATH, "//div[@role='feed']")))
# 内容 # 内容
element_content_list = browser.find_elements('xpath', element_content_list = browser.find_elements('xpath',
"//div[@role='feed']/div//div[@aria-describedby]/div/div/div/div/div/div[2]/div/div/div[3]") "//div[@role='feed']/div//div[@aria-describedby]/div/div/div/div/div/div[2]/div/div/div[3]")
......
...@@ -56,6 +56,7 @@ def reptile(browser=None, search_word=""): ...@@ -56,6 +56,7 @@ def reptile(browser=None, search_word=""):
except: except:
print("------") print("------")
# print("1111") # print("1111")
log.debug("instagram login complete")
url = f"{base_url}explore/tags/{search_word}/" url = f"{base_url}explore/tags/{search_word}/"
browser.get(url) browser.get(url)
wait = WebDriverWait(browser, 10) wait = WebDriverWait(browser, 10)
...@@ -98,6 +99,7 @@ def reptile(browser=None, search_word=""): ...@@ -98,6 +99,7 @@ def reptile(browser=None, search_word=""):
if len(title_str_list) >= 3: if len(title_str_list) >= 3:
title = title_str_list[1] title = title_str_list[1]
else: else:
# 提取图片中的文字
title = "" title = ""
img_soup = BeautifulSoup(item.get_attribute("outerHTML"), "html.parser").find("img") img_soup = BeautifulSoup(item.get_attribute("outerHTML"), "html.parser").find("img")
del img_soup["srcset"] del img_soup["srcset"]
......
...@@ -33,7 +33,7 @@ def reptile(browser=None, search_word=""): ...@@ -33,7 +33,7 @@ def reptile(browser=None, search_word=""):
# browser = browser or create() # browser = browser or create()
# 打开网页 # 打开网页
browser.get(url) browser.get(url)
log.debug("已打开浏览器") # log.debug("已打开浏览器")
classify_item_list = browser.find_elements('xpath', "//div[@class='board-class']") classify_item_list = browser.find_elements('xpath', "//div[@class='board-class']")
# log.debug(classify_item_list) # log.debug(classify_item_list)
length = len(classify_item_list) length = len(classify_item_list)
......
...@@ -14,7 +14,12 @@ from utils.download_image import download_image ...@@ -14,7 +14,12 @@ from utils.download_image import download_image
from config.settings import get_base_file_url from config.settings import get_base_file_url
from config.settings import get_account from config.settings import get_account
# 工具函数-下载图片 # --------------- selenium 依赖 start ----------------
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# --------------- selenium 依赖 end ----------------
''' '''
打开指定网页,并使用 Selenium 模拟点击 "GDPR-accept" 按钮,然后循环点击 "search-show-more-button" 按钮来加载更多数据,直到按钮不再可点击为止。最后,获取完整的分页数据并关闭浏览器驱动。 打开指定网页,并使用 Selenium 模拟点击 "GDPR-accept" 按钮,然后循环点击 "search-show-more-button" 按钮来加载更多数据,直到按钮不再可点击为止。最后,获取完整的分页数据并关闭浏览器驱动。
''' '''
...@@ -28,32 +33,35 @@ def reptile(browser=None, search_word=""): ...@@ -28,32 +33,35 @@ def reptile(browser=None, search_word=""):
""" """
print(f"搜索词:{search_word}") print(f"搜索词:{search_word}")
base_url = "https://twitter.com/" base_url = "https://twitter.com/"
browser = browser or create(no_headless=True,using_user_data=True) browser = browser or create(no_headless=False, using_user_data=True)
# print(browser) # print(browser)
# 打开网页 # 打开网页
browser.get(base_url) browser.get(base_url)
time.sleep(3) time.sleep(2)
try: try:
# wait = WebDriverWait(browser, 20)
# wait.until(EC.presence_of_element_located((By.XPATH, "//input[@autocomplete='username']")))
# 检测是否要登录 # 检测是否要登录
login_input = browser.find_element('xpath', "//input[@autocomplete='username']") login_input = browser.find_element('xpath', "//input[@autocomplete='username']")
login_input.send_keys(get_account("twitter")["name"]) login_input.send_keys(get_account("twitter")["name"])
# 获取下一步按钮 # 获取下一步按钮
buttons = browser.find_element('xpath', "//div[@role='button'][2]") buttons = browser.find_element('xpath', "//div[@role='button'][2]")
buttons.click() buttons.click()
time.sleep(3) wait = WebDriverWait(browser, 10)
wait.until(EC.presence_of_element_located((By.XPATH, "//input[@autocomplete='current-password']")))
password_input = browser.find_element('xpath', "//input[@autocomplete='current-password']") password_input = browser.find_element('xpath', "//input[@autocomplete='current-password']")
password_input.send_keys(get_account("twitter")["password"]) password_input.send_keys(get_account("twitter")["password"])
# # 获取登录按钮 # # 获取登录按钮
button_login = browser.find_element('xpath', "//div[@data-testid='LoginForm_Login_Button']") button_login = browser.find_element('xpath', "//div[@data-testid='LoginForm_Login_Button']")
button_login.click() button_login.click()
time.sleep(1)
except: except:
print("------") print("------")
time.sleep(2)
# print("1111")
url = 'https://twitter.com/search?q=' + search_word + '&src=typed_query' url = 'https://twitter.com/search?q=' + search_word + '&src=typed_query'
browser.get(url) browser.get(url)
time.sleep(4) wait = WebDriverWait(browser, 10)
wait.until(
EC.presence_of_element_located((By.XPATH, "//div[@data-testid='cellInnerDiv']//article/div/div/div[2]/div[2]")))
base_xpath = "//div[@data-testid='cellInnerDiv']//article/div/div/div[2]/div[2]" base_xpath = "//div[@data-testid='cellInnerDiv']//article/div/div/div[2]/div[2]"
# 内容块 # 内容块
element_content_list = browser.find_elements('xpath', base_xpath) element_content_list = browser.find_elements('xpath', base_xpath)
......
...@@ -12,8 +12,14 @@ import os ...@@ -12,8 +12,14 @@ import os
from config.settings import get_base_file_url from config.settings import get_base_file_url
from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.action_chains import ActionChains
import sys import sys
# --------------- selenium 依赖 start ----------------
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# --------------- selenium 依赖 end ----------------
def reptile(browser=None, search_word=""): def reptile(browser=None, search_word=""):
""" """
...@@ -21,12 +27,14 @@ def reptile(browser=None, search_word=""): ...@@ -21,12 +27,14 @@ def reptile(browser=None, search_word=""):
:param search_word: :param search_word:
:return: :return:
""" """
browser = browser or create(no_headless=True,using_user_data=True) browser = browser or create(no_headless=False, using_user_data=True)
# print(browser) # print(browser)
# 打开网页 # 打开网页
url = f'https://www.youtube.com/results?search_query={search_word}' url = f'https://www.youtube.com/results?search_query={search_word}'
browser.get(url) browser.get(url)
# time.sleep(2) wait = WebDriverWait(browser, 10)
wait.until(EC.presence_of_element_located((By.XPATH,"//div[@id='contents']")))
log.debug("youtube login complete")
classify_video_list = browser.find_elements('xpath', classify_video_list = browser.find_elements('xpath',
"//div[@id='contents']//ytd-video-renderer//div[@id='title-wrapper']//a") "//div[@id='contents']//ytd-video-renderer//div[@id='title-wrapper']//a")
element_author_list = browser.find_elements('xpath', element_author_list = browser.find_elements('xpath',
...@@ -54,6 +62,7 @@ def reptile(browser=None, search_word=""): ...@@ -54,6 +62,7 @@ def reptile(browser=None, search_word=""):
# 下载视频 # 下载视频
state_download = yt_dlp_download(url, 'youtube') state_download = yt_dlp_download(url, 'youtube')
video_url.append(download_dir) video_url.append(download_dir)
if state_download: if state_download:
# 组装数据 # 组装数据
obj = { obj = {
...@@ -68,8 +77,8 @@ def reptile(browser=None, search_word=""): ...@@ -68,8 +77,8 @@ def reptile(browser=None, search_word=""):
} }
data.append(obj) data.append(obj)
else: else:
print("") # print("")
error = ""
if len(data) > 0: if len(data) > 0:
# 保存json文件到本地 # 保存json文件到本地
# log.debug(os.path.abspath("../")) # log.debug(os.path.abspath("../"))
......
# from utils.index import yt_dlp_download import os
# import pytesseract
# status = yt_dlp_download("https://www.facebook.com/e5627ead-8b9a-48fd-820f-ee242cc08bbb", "facebook") from PIL import Image
# print(status)
# 指定 Tesseract OCR 的执行路径(可选,如果已经配置环境变量,则无需此步骤)
cmd_path = "/usr/local/Cellar/tesseract/5.3.2/share/tessdata"
img_path = os.path.join(os.path.abspath("../"), 'network-assets-reptile', 'reptile-data', "instagram","Cr8vg2MyNFz.jpg")
pytesseract.pytesseract.tesseract_cmd = cmd_path
# 打开图片
image = Image.open(img_path)
# import time # 进行图片文字识别
# from selenium.webdriver import Firefox text = pytesseract.image_to_string(image, lang='chi_sim')
# from selenium import webdriver
# driver = webdriver.Firefox()
# driver.get("https://www.toutiao.com/a6969138023774667264/")
# time.sleep(2)
# html = driver.page_source
# print(html)
# driver.quit()
# var = {'title': 'Photo by 今周刊 on July 17, 2023. May be an illustration of poster and text.', # 输出识别的文字
# 'content': '<h1 class="_aacl _aaco _aacu _aacx _aad7 _aade" dir="auto">\n 你有沒有想過,為什麼你的時間總是不夠用?為什麼你買稍微貴點的東西,都要不自覺地把它折算成自己多少天的收入?為什麼你以前看著數字就頭暈腦脹,現在願意對著數字精打細算,還時刻惦記著要怎麼花錢才能熬得到月尾?為什麼你正值青春的花樣年華,卻窮得只剩下理想,忙得沒時間生活?\n <br/>\n <br/>\n 實際上,絕大多數人的疲於奔命,不是因為忙,而是心態出了問題,是眼下的生活不能如人所願,是對當前的生活不知所措。\n <br/>\n <br/>\n 但是,如果你不能以一種主動的、有規劃的方式去對待生活和工作,那麼你即使什麼都不做,依然會覺得疲憊。\n <br/>\n <br/>\n 比如你忙著回覆一封又一封無關緊要的郵件,忙著參加一個又一個無聊的會議,忙著從一個聚會趕到另一個聚會,忙著在節假日跟社交軟體裡每一個熟悉的和不熟悉的人說沒完沒了的、便宜的祝福語……比如你每天兩點一線,在家和公司之間步履匆匆。\n <br/>\n <br/>\n 一大早忙著擠上即將關門開走的公車,好不容易來到公司忙著準備資料、製作檔案、接待客戶。終於熬到下班,行屍走肉樣的狀態卻不忘看看社群動態,在手機裡看著大家都在為生計而奔忙。\n <br/>\n <br/>\n 可如果誰要是問你,「怎麼你老是這麼忙?都做了些什麼?」你就算皺緊了眉頭,想破了腦袋也只能給出一個這樣的回答:「呃,我也記不住都忙什麼了,反正就是很忙!」\n <br/>\n \u200b\n <br/>\n 你呀,像極了一隻在泳池裡瞎撲騰的旱鴨子,一直抓住一個叫作「工作忙」的游泳圈不肯放手。\n <br/>\n <br/>\n 於是,「我好忙」變成了你的海洛因,變成了讓你麻木的精神撫慰品。它讓你忘記為了什麼而出發,忘記了你的最終目的是什麼,就像把你綁在了旋轉的音樂盒上,看起來美妙,聽著也舒服,卻是周而復始的、無意義的瞎打轉。\n <br/>\n <br/>\n 嗯,那你就接著懶吧,以後很失敗的時候,還有可以安慰一下自己的理由——萬一努力了還不成功,那不就尷尬了?\n <br/>\n <br/>\n 這世上真的沒有什麼搖身一變,更沒有什麼能拯救你的人,有的只是你看不到的低調努力。怕就怕,你只有低調,沒有努力。\n <br/>\n \u200b\n <br/>\n 📚本篇僅為部分節錄,摘自《裝睡的人叫不醒,再不清醒窮死你》\n <br/>\n \u200b\n <br/>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/%E6%9B%B8%E6%91%98/" role="link" tabindex="0">\n #書摘\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/%E9%96%B1%E8%AE%80/" role="link" tabindex="0">\n #閱讀\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/reading/" role="link" tabindex="0">\n #reading\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/%E4%BB%8A%E5%91%A8%E5%88%8A/" role="link" tabindex="0">\n #今周刊\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/%E6%96%B0%E8%81%9E/" role="link" tabindex="0">\n #新聞\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/news/" role="link" tabindex="0">\n #news\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/%E5%AA%92%E9%AB%94/" role="link" tabindex="0">\n #媒體\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/%E7%90%86%E8%B2%A1/" role="link" tabindex="0">\n #理財\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/quotes/" role="link" tabindex="0">\n #quotes\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/allianzgitw/" role="link" tabindex="0">\n #allianzgitw\n </a>\n <a class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz _aa9_ _a6hd" href="/explore/tags/%E4%B8%BB%E5%8B%95%E8%AE%93%E6%8A%95%E8%B3%87%E7%99%BC%E6%8F%AE%E5%BD%B1%E9%9F%BF%E5%8A%9B/" role="link" tabindex="0">\n #主動讓投資發揮影響力\n </a>\n</h1>\n<img alt="Photo by 今周刊 on July 17, 2023. May be an illustration of poster and text." class="x5yr21d xu96u03 x10l6tqk x13vifvy x87ps6o xh8yej3" crossorigin="anonymous" decoding="auto" sizes="653.5999755859375px" src="https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e15_fr_p1080x1080&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfBdfyF7qUQQaVxi_e9z5aI4P6e6Hy9JIVtTl2YV9gXoJw&amp;oe=64C4688F&amp;_nc_sid=2011ad" srcset="https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e15_fr_p1080x1080&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfBdfyF7qUQQaVxi_e9z5aI4P6e6Hy9JIVtTl2YV9gXoJw&amp;oe=64C4688F&amp;_nc_sid=2011ad 1080w,https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e35_p750x750_sh0.08&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfDH1NfR4Ik2R9DwcoPB5XectpJqfeUOtbvrxtmRHDxOVg&amp;oe=64C4688F&amp;_nc_sid=2011ad 750w,https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e35_p640x640_sh0.08&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfCc0UgUfQGa7N4QJeWgEMdqmiIDKwuO10SH_A5R1-q9EQ&amp;oe=64C4688F&amp;_nc_sid=2011ad 640w,https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e15_p480x480&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfAlY_Yxs-GAahDKxf4ijFWqjTERRFrRitvuXySepJR7hw&amp;oe=64C4688F&amp;_nc_sid=2011ad 480w,https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e15_p320x320&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfBuQrHYUVMkHj6NKLuNmNvSlyupwQFTY6e7lsdxwXmy5Q&amp;oe=64C4688F&amp;_nc_sid=2011ad 320w,https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e15_p240x240&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfD9YWNkm7j6GSh3CufCedWL0LlxHSEdLskO6PVsE4DA8Q&amp;oe=64C4688F&amp;_nc_sid=2011ad 240w,https://scontent-hkg4-2.cdninstagram.com/v/t39.30808-6/361612960_677983984363846_6639522657508930508_n.jpg?stp=dst-jpg_e15_p150x150&amp;_nc_ht=scontent-hkg4-2.cdninstagram.com&amp;_nc_cat=110&amp;_nc_ohc=NFrLOL0j0HoAX-dbhrX&amp;edm=AGyKU4gAAAAA&amp;ccb=7-5&amp;ig_cache_key=MzE0OTI2Nzg0NTc4NzA3NTU0NQ%3D%3D.2-ccb7-5&amp;oh=00_AfAzad7jssscLrABgHQKNVi0CGOm5H1DZWIpqwwMMx5Kjw&amp;oe=64C4688F&amp;_nc_sid=2011ad 150w" style="object-fit: cover;"/>\n', print(text)
# 'link': 'https://www.instagram.com/p/Cu0cmuSssPZ/', 'reptileTime': '1690259090', 'type': '图文', 'author': '', \ No newline at end of file
# 'releaseTime': '1689613204', 'picture_url': 'http://192.168.0.118:8186/instagram/1690259027.jpg'}
str = "https://www.instagram.com/p/Cs0YvVcJFF8/"
list = str.split("/")
print(list[len(list)-2])
\ No newline at end of file
...@@ -87,7 +87,7 @@ def create(option=None, no_headless=False, using_user_data=True, web_browser="fi ...@@ -87,7 +87,7 @@ def create(option=None, no_headless=False, using_user_data=True, web_browser="fi
# options.add_argument("--window-size=1920x1080") # 设置窗口大小,这是一个常见的完全无头模式的设置 # options.add_argument("--window-size=1920x1080") # 设置窗口大小,这是一个常见的完全无头模式的设置
# options.add_argument("--start-maximized") # 最大化窗口 # options.add_argument("--start-maximized") # 最大化窗口
if no_headless == True: if no_headless == True:
if platform.system() == "Linux" and platform.system() == "Darwin": if platform.system() == "Linux" or platform.system() == "Darwin":
# 开启无头模式 # 开启无头模式
options.add_argument("-headless") options.add_argument("-headless")
elif platform.system() == "Windows" and web_browser == "firefox": elif platform.system() == "Windows" and web_browser == "firefox":
......
...@@ -166,19 +166,15 @@ def pytube_download(link, file_dir): ...@@ -166,19 +166,15 @@ def pytube_download(link, file_dir):
def yt_dlp_download(url, name): def yt_dlp_download(url, name):
file_dir = os.path.abspath("../") file_dir = os.path.abspath("../")
options = f'-v'
network_options = f'-o "{os.path.join(file_dir, "network-assets-reptile", "reptile_data", name, "%(id)s.%(ext)s")}"' network_options = f'-o "{os.path.join(file_dir, "network-assets-reptile", "reptile_data", name, "%(id)s.%(ext)s")}"'
geo = ""
# --get-url
video_selection = f''
# 清晰度 # 清晰度
definition = f'18' # 360p # definition = f'18' # 360p
# definition = f'18' # 720p # definition = f'18' # 720p
# definition = f'24' # 1080p # definition = f'24' # 1080p
download_options = f'-f {definition} -vU' # f'-f 18 -vU'
other_options = f'--verbose' download_options = f'-f mp4'
# 要执行的 shell 命令 # 要执行的 shell 命令
command = f'yt-dlp {options} {network_options} {geo} {video_selection} {download_options} {other_options} -- {url}' command = f'yt-dlp -v {download_options} {network_options} --verbose -- {url}'
# 使用 subprocess 调用 shell 命令 # 使用 subprocess 调用 shell 命令
result = subprocess.run(command, shell=True, capture_output=True, text=True) result = subprocess.run(command, shell=True, capture_output=True, text=True)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment