Commit 0832e447 authored by liyang's avatar liyang

fix:twitter 过滤

parent d13aeedc
...@@ -102,26 +102,15 @@ def reptile(browser=None, search_word=""): ...@@ -102,26 +102,15 @@ def reptile(browser=None, search_word=""):
# lth = len(ignore_list) # lth = len(ignore_list)
if len(video_list) > 0: if len(video_list) > 0:
# for key,element in enumerate(video_list): # for key,element in enumerate(video_list):
# div_elements = soup.find("div").findChildren("div", recursive=False) div_elements = soup.find("div").findChildren("div", recursive=False)
# div_tags = soup.find_all("div", recursive=False) # div_tags = soup.find_all("div", recursive=False)
for item in video_list: for item in video_list:
# 把video替换成img标签
# 创建 <img> 标签
img_tag = soup.new_tag('img') img_tag = soup.new_tag('img')
img_tag["src"] = item["poster"] img_tag["src"] = item["poster"]
item.replaceWith(img_tag) for items in div_elements:
# 确保列表中至少有两个 <div> 子元素 if hasattr(items,"aria-labelledby"):
# if len(div_elements) >= 2: # div[@aria-labelledby="xx"] 替换为img标签【内容含有视频的替换为img标签】
# # 获取第二个 <div> 元素,并将其从父级元素中移除 items.replaceWith(img_tag)
# for item in div_elements:
# if hasattr(item,"aria-labelledby"):
# item.extract()
# 删除
# div.decompose()
# 创建video标签占位
# custom_video = soup.new_tag("video")
# custom_video["src"] = ""
# soup.find("div").append(custom_video)
else: else:
# print("") # print("")
error = "" error = ""
...@@ -130,18 +119,22 @@ def reptile(browser=None, search_word=""): ...@@ -130,18 +119,22 @@ def reptile(browser=None, search_word=""):
picture_url = [] picture_url = []
if len(image_list) > 0: if len(image_list) > 0:
for key, element in enumerate(image_list): for key, element in enumerate(image_list):
# 下载图片至本地,替换标签中的src # 如果是svg,就删除
id = str(int(time.time())) if str(element['src']).find("svg") != -1:
image_type = extract_image_format(element['src']) element.extract()
# 下载地址 else:
download_dir = f'{os.path.join(file_dir, f"{id}.{image_type}")}' # 下载图片至本地,替换标签中的src
# 访问地址 id = str(int(time.time()))
access_address = f'{get_base_file_url()}{table_name.split("_")[1]}/{id}.{image_type}' image_type = extract_image_format(element['src'])
# 下载状态 # 下载地址
status = download_image(element['src'], download_dir) download_dir = f'{os.path.join(file_dir, f"{id}.{image_type}")}'
if status: # 访问地址
element['src'] = access_address access_address = f'{get_base_file_url()}{table_name.split("_")[1]}/{id}.{image_type}'
picture_url.append(download_dir) # 下载状态
status = download_image(element['src'], download_dir)
if status:
element['src'] = access_address
picture_url.append(download_dir)
else: else:
# print("") # print("")
error = "" error = ""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment