Commit a94971b7 authored by liyang's avatar liyang

fix:youtube 360P 每次6条

parent 7a510943
...@@ -5,10 +5,14 @@ from utils.Logger import log ...@@ -5,10 +5,14 @@ from utils.Logger import log
from utils.createBrowserDriver import create from utils.createBrowserDriver import create
from utils.filse import save_json from utils.filse import save_json
from api.index import importJson, getReptileTask, importJsonPath from api.index import importJson, getReptileTask, importJsonPath
from utils.index import convert_to_traditional, yt_dlp_download,convert_string_to_time from utils.index import convert_to_traditional, yt_dlp_download, convert_string_to_time
# from pytube import YouTube from pytube import YouTube
from datetime import datetime
import os import os
from config.settings import get_base_file_url from config.settings import get_base_file_url
from selenium.webdriver.common.action_chains import ActionChains
def reptile(browser=None, search_word=""): def reptile(browser=None, search_word=""):
""" """
...@@ -22,25 +26,39 @@ def reptile(browser=None, search_word=""): ...@@ -22,25 +26,39 @@ def reptile(browser=None, search_word=""):
# 打开网页 # 打开网页
url = f'https://www.youtube.com/results?search_query={search_word}' url = f'https://www.youtube.com/results?search_query={search_word}'
browser.get(url) browser.get(url)
# time.sleep(2)
classify_video_list = browser.find_elements('xpath', classify_video_list = browser.find_elements('xpath',
"//div[@id='contents']//ytd-video-renderer//div[@id='title-wrapper']//a") "//div[@id='contents']//ytd-video-renderer//div[@id='title-wrapper']//a")
element_author_list = browser.find_elements('xpath',"//div[@id='contents']//ytd-video-renderer//ytd-channel-name//yt-formatted-string/a") element_author_list = browser.find_elements('xpath',
element_time_list = browser.find_elements('xpath',"//div[@id='contents']//ytd-video-renderer//ytd-video-meta-block//div[@id='metadata-line']/span[2]") "//div[@id='contents']//ytd-video-renderer//ytd-channel-name//yt-formatted-string/a")
# print(classify_item_list) element_time_list = browser.find_elements('xpath',
"//div[@id='contents']//ytd-video-renderer//ytd-video-meta-block//div[@id='metadata-line']/span[2]")
# 时间长度集合
# elemnet_logtime_list = browser.find_elements('xpath',"//div[@id='contents']//ytd-video-renderer//ytd-thumbnail//ytd-thumbnail-overlay-time-status-renderer//span")
# 获取目录下所有文件名
# file_names = os.listdir(os.path.join(os.path.abspath("./"), "reptile_data", "youtube"))
length = len(classify_video_list) length = len(classify_video_list)
for index in range(length): for index in range(length):
if 0 <= index < length:
title = classify_video_list[index].get_attribute('title') title = classify_video_list[index].get_attribute('title')
link = classify_video_list[index].get_attribute('href') link = classify_video_list[index].get_attribute('href')
id = link.split("?")[1].split("&")[0].replace("v=", "")
url = f'https://www.youtube.com/watch?v={id}'
# is_repeat = False
# for item in file_names:
# # print("id——1:"+f'{id}.mp4')
# # print("id——2:" + item)
# if f'{id}.mp4' == item:
# is_repeat = True
# else:
# is_repeat = False
# print(is_repeat)
if index < 6 and YouTube(url).length // 60 < 60:
# yt = YouTube(link) # yt = YouTube(link)
# link = "https://www.youtube.com/watch?v=7q88m5MQRhE" # link = "https://www.youtube.com/watch?v=7q88m5MQRhE"
# print(link) # print(link)
# author = element_author_list[index].text # author = element_author_list[index].text
# file_url = './' + link + '.mp4' # file_url = './' + link + '.mp4'
id = link.split("?")[1].split("&")[0].replace("v=","")
url = f'https://www.youtube.com/watch?v={id}'
base_urr = get_base_file_url() base_urr = get_base_file_url()
log.debug(url)
releaseTime = "" releaseTime = ""
try: try:
releaseTime = str(convert_string_to_time(element_time_list[index].text)) releaseTime = str(convert_string_to_time(element_time_list[index].text))
...@@ -48,6 +66,7 @@ def reptile(browser=None, search_word=""): ...@@ -48,6 +66,7 @@ def reptile(browser=None, search_word=""):
releaseTime = str(time.time()) releaseTime = str(time.time())
# 下载视频 # 下载视频
state_download = yt_dlp_download(url, 'youtube') state_download = yt_dlp_download(url, 'youtube')
log.debug(url)
file_http_src = f'{base_urr}youtube/{id}.mp4' file_http_src = f'{base_urr}youtube/{id}.mp4'
if state_download: if state_download:
# 组装数据 # 组装数据
...@@ -62,8 +81,10 @@ def reptile(browser=None, search_word=""): ...@@ -62,8 +81,10 @@ def reptile(browser=None, search_word=""):
"releaseTime": releaseTime "releaseTime": releaseTime
} }
data.append(obj) data.append(obj)
else: else :
return False print("")
# return False
# log.debug("")
if len(data) > 0: if len(data) > 0:
# 保存json文件到本地 # 保存json文件到本地
......
...@@ -84,7 +84,11 @@ def yt_dlp_download(url, name): ...@@ -84,7 +84,11 @@ def yt_dlp_download(url, name):
geo = "" geo = ""
# --get-url # --get-url
video_selection = f'' video_selection = f''
download_options = f'-f mp4 -vU' # 清晰度
definition = f'18' # 360p
# definition = f'18' # 720p
# definition = f'24' # 1080p
download_options = f'-f {definition} -vU'
other_options = f'--verbose' other_options = f'--verbose'
# 要执行的 shell 命令 # 要执行的 shell 命令
command = f'yt-dlp {options} {network_options} {geo} {video_selection} {download_options} {other_options} -- {url}' command = f'yt-dlp {options} {network_options} {geo} {video_selection} {download_options} {other_options} -- {url}'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment