Commit 835fc16a authored by liyang's avatar liyang

feat:1.编写自由时报爬虫脚本

2.编写数据量统计脚本
parent 48c53bef
......@@ -57,7 +57,7 @@ def reptile(browser=None, search_word=""):
# 循环分页
for key, element in enumerate(page_list_element):
if key > 0 or key <= len(page_list_element) - 1:
if key > 0 and key <= len(page_list_element) - 1:
# 点击分页
browser.get(f"{url}&page={key+1}")
# element.click()
......@@ -90,7 +90,11 @@ def reptile(browser=None, search_word=""):
# 访问地址
access_address = f'{get_base_file_url()}{table_name.split("_")[1]}/{local_path_name}/{id}.jpg'
# 下载状态
status = download_image(element['src'], download_dir)
if "default" in element['src']:
status = False
else:
status = download_image(element['src'], download_dir)
if status:
# element['src'] = access_address
img_tag["src"] = access_address
......
......@@ -137,8 +137,8 @@ def parse_ltn_time_string(time_str):
except ValueError:
return None
else:
# print(time_str)
return datetime.datetime.strptime(time_str, '%Y/%m/%d')
dt_object = datetime.datetime.strptime(time_str, '%Y/%m/%d')
return dt_object.timestamp()
# 转换 youtube 的时间
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment