Commit 0d1ca3e0 authored by liyang's avatar liyang

feat:ins爬虫数据条件过滤优化

parent d801e8b0
...@@ -9,7 +9,7 @@ def get_base_url(): ...@@ -9,7 +9,7 @@ def get_base_url():
def get_base_file_url(): def get_base_file_url():
# return "http://192.168.0.118:8186/" # return "http://192.168.0.118:8186/"
return "/" return "/files/reptile_data/"
def get_account(name): def get_account(name):
data = {} data = {}
......
...@@ -117,7 +117,7 @@ def reptile(browser=None, search_word=""): ...@@ -117,7 +117,7 @@ def reptile(browser=None, search_word=""):
status = download_image(element['src'], download_dir) status = download_image(element['src'], download_dir)
if status: if status:
element['src'] = access_address element['src'] = access_address
picture_url.append(access_address) picture_url.append(download_dir)
else: else:
print("") print("")
content = soup.prettify() content = soup.prettify()
......
...@@ -122,7 +122,7 @@ def reptile(browser=None, search_word=""): ...@@ -122,7 +122,7 @@ def reptile(browser=None, search_word=""):
img_soup["src"] = access_address img_soup["src"] = access_address
# print(img_soup.prettify()) # print(img_soup.prettify())
soup.append(img_soup) soup.append(img_soup)
picture_url.append(access_address) picture_url.append(download_dir)
content = soup.prettify() content = soup.prettify()
# 类型 # 类型
......
...@@ -138,7 +138,7 @@ def reptile(browser=None, search_word=""): ...@@ -138,7 +138,7 @@ def reptile(browser=None, search_word=""):
status = download_image(element['src'], download_dir) status = download_image(element['src'], download_dir)
if status: if status:
element['src'] = access_address element['src'] = access_address
picture_url.append(access_address) picture_url.append(download_dir)
else: else:
print("") print("")
# ---------------- 判断类型 end ---------- # ---------------- 判断类型 end ----------
......
...@@ -117,7 +117,7 @@ def reptile(browser=None, search_word=""): ...@@ -117,7 +117,7 @@ def reptile(browser=None, search_word=""):
status = download_image(element['src'], download_dir) status = download_image(element['src'], download_dir)
if status: if status:
element['src'] = access_address element['src'] = access_address
picture_url.append(access_address) picture_url.append(download_dir)
else: else:
print("") print("")
content = soup.prettify() content = soup.prettify()
......
...@@ -47,16 +47,20 @@ def reptile(browser=None, search_word=""): ...@@ -47,16 +47,20 @@ def reptile(browser=None, search_word=""):
releaseTime = str(int(convert_string_to_time(element_time_list[index].text))) releaseTime = str(int(convert_string_to_time(element_time_list[index].text)))
except: except:
releaseTime = str(int(time.time())) releaseTime = str(int(time.time()))
video_url = []
# 下载地址
download_dir = f'{os.path.join(file_dir, f"{id}.mp4")}'
# 访问地址
access_address = f'{get_base_file_url()}{table_name.split("_")[1]}/{id}.mp4'
# 下载视频 # 下载视频
state_download = yt_dlp_download(url, 'youtube') state_download = yt_dlp_download(url, 'youtube')
log.debug(url) video_url.append(download_dir)
file_http_src = f'{base_urr}youtube/{id}.mp4'
if state_download: if state_download:
# 组装数据 # 组装数据
obj = { obj = {
"title": title, "title": title,
"content": f"<video controls style='width:100%' src='{file_http_src}'></video>", "content": f"<video controls style='width:100%' src='{access_address}'></video>",
"videoUrl": file_http_src, "videoUrl": ",".join(video_url),
"link": link, "link": link,
"reptileTime": str(int(time.time())), "reptileTime": str(int(time.time())),
"type": '视频', "type": '视频',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment