Commit f0e81304 authored by liyang's avatar liyang

feat:1.编写自由时报爬虫脚本

2.编写数据量统计脚本
parent 15d41825
......@@ -49,7 +49,7 @@ def reptile(browser=None, search_word=""):
# browser = browser or create()
# 打开网页
browser.get(url+"&page=1")
time.sleep(3)
time.sleep(2)
# 获取分页
page_list_element = browser.find_elements("xpath", "//div[@data-desc='分頁']/a[@class='p_num' or @class='active']")
......@@ -61,7 +61,7 @@ def reptile(browser=None, search_word=""):
# 点击分页
browser.get(f"{url}&page={key+1}")
# element.click()
time.sleep(3)
time.sleep(2)
# 重新获取
page_list_element = browser.find_elements("xpath", "//div[@data-desc='分頁']/a")
elif key == len(page_list_element) - 1:
......@@ -110,9 +110,13 @@ def reptile(browser=None, search_word=""):
date_format = "%a %b %d %H:%M:%S %Y"
# 将日期字符串转换为datetime对象
date_time = parse_ltn_time_string(date_string)
# print(date_time)
# date_time = datetime.datetime.strptime(, date_format)
# 将datetime对象转换为时间戳(以秒为单位)
release_time = int(date_time)
try:
release_time = int(date_time)
except:
release_time = int(time.time())
# 过滤时间
if beginFiltrationTime <= release_time <= endFiltrationTime:
......@@ -189,7 +193,7 @@ def main():
log.debug("call success")
search_word = ""
for item in response['data']['rows']:
if item['name'] == 'libertyTimeNet-自由时报':
if item['name'] == 'ltn-自由时报':
search_word = item['keyword']
table_name = item['tableName']
status_task = int(item["status"])
......@@ -212,7 +216,7 @@ def main():
data = []
# 任务详情
task = {}
table_name = "pms_libertyTimeNet"
table_name = "pms_ltn"
# 全局字段
keyword = ""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment