Commit 0d1ca3e0 authored by liyang's avatar liyang

feat:ins爬虫数据条件过滤优化

parent d801e8b0
......@@ -9,7 +9,7 @@ def get_base_url():
def get_base_file_url():
# return "http://192.168.0.118:8186/"
return "/"
return "/files/reptile_data/"
def get_account(name):
data = {}
......
......@@ -117,7 +117,7 @@ def reptile(browser=None, search_word=""):
status = download_image(element['src'], download_dir)
if status:
element['src'] = access_address
picture_url.append(access_address)
picture_url.append(download_dir)
else:
print("")
content = soup.prettify()
......
......@@ -122,7 +122,7 @@ def reptile(browser=None, search_word=""):
img_soup["src"] = access_address
# print(img_soup.prettify())
soup.append(img_soup)
picture_url.append(access_address)
picture_url.append(download_dir)
content = soup.prettify()
# 类型
......
......@@ -138,7 +138,7 @@ def reptile(browser=None, search_word=""):
status = download_image(element['src'], download_dir)
if status:
element['src'] = access_address
picture_url.append(access_address)
picture_url.append(download_dir)
else:
print("")
# ---------------- 判断类型 end ----------
......
......@@ -117,7 +117,7 @@ def reptile(browser=None, search_word=""):
status = download_image(element['src'], download_dir)
if status:
element['src'] = access_address
picture_url.append(access_address)
picture_url.append(download_dir)
else:
print("")
content = soup.prettify()
......
......@@ -47,16 +47,20 @@ def reptile(browser=None, search_word=""):
releaseTime = str(int(convert_string_to_time(element_time_list[index].text)))
except:
releaseTime = str(int(time.time()))
video_url = []
# 下载地址
download_dir = f'{os.path.join(file_dir, f"{id}.mp4")}'
# 访问地址
access_address = f'{get_base_file_url()}{table_name.split("_")[1]}/{id}.mp4'
# 下载视频
state_download = yt_dlp_download(url, 'youtube')
log.debug(url)
file_http_src = f'{base_urr}youtube/{id}.mp4'
video_url.append(download_dir)
if state_download:
# 组装数据
obj = {
"title": title,
"content": f"<video controls style='width:100%' src='{file_http_src}'></video>",
"videoUrl": file_http_src,
"content": f"<video controls style='width:100%' src='{access_address}'></video>",
"videoUrl": ",".join(video_url),
"link": link,
"reptileTime": str(int(time.time())),
"type": '视频',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment