Commit f0e81304 authored by liyang's avatar liyang

feat:1.编写自由时报爬虫脚本

2.编写数据量统计脚本
parent 15d41825
...@@ -49,7 +49,7 @@ def reptile(browser=None, search_word=""): ...@@ -49,7 +49,7 @@ def reptile(browser=None, search_word=""):
# browser = browser or create() # browser = browser or create()
# 打开网页 # 打开网页
browser.get(url+"&page=1") browser.get(url+"&page=1")
time.sleep(3) time.sleep(2)
# 获取分页 # 获取分页
page_list_element = browser.find_elements("xpath", "//div[@data-desc='分頁']/a[@class='p_num' or @class='active']") page_list_element = browser.find_elements("xpath", "//div[@data-desc='分頁']/a[@class='p_num' or @class='active']")
...@@ -61,7 +61,7 @@ def reptile(browser=None, search_word=""): ...@@ -61,7 +61,7 @@ def reptile(browser=None, search_word=""):
# 点击分页 # 点击分页
browser.get(f"{url}&page={key+1}") browser.get(f"{url}&page={key+1}")
# element.click() # element.click()
time.sleep(3) time.sleep(2)
# 重新获取 # 重新获取
page_list_element = browser.find_elements("xpath", "//div[@data-desc='分頁']/a") page_list_element = browser.find_elements("xpath", "//div[@data-desc='分頁']/a")
elif key == len(page_list_element) - 1: elif key == len(page_list_element) - 1:
...@@ -110,9 +110,13 @@ def reptile(browser=None, search_word=""): ...@@ -110,9 +110,13 @@ def reptile(browser=None, search_word=""):
date_format = "%a %b %d %H:%M:%S %Y" date_format = "%a %b %d %H:%M:%S %Y"
# 将日期字符串转换为datetime对象 # 将日期字符串转换为datetime对象
date_time = parse_ltn_time_string(date_string) date_time = parse_ltn_time_string(date_string)
# print(date_time)
# date_time = datetime.datetime.strptime(, date_format) # date_time = datetime.datetime.strptime(, date_format)
# 将datetime对象转换为时间戳(以秒为单位) # 将datetime对象转换为时间戳(以秒为单位)
release_time = int(date_time) try:
release_time = int(date_time)
except:
release_time = int(time.time())
# 过滤时间 # 过滤时间
if beginFiltrationTime <= release_time <= endFiltrationTime: if beginFiltrationTime <= release_time <= endFiltrationTime:
...@@ -189,7 +193,7 @@ def main(): ...@@ -189,7 +193,7 @@ def main():
log.debug("call success") log.debug("call success")
search_word = "" search_word = ""
for item in response['data']['rows']: for item in response['data']['rows']:
if item['name'] == 'libertyTimeNet-自由时报': if item['name'] == 'ltn-自由时报':
search_word = item['keyword'] search_word = item['keyword']
table_name = item['tableName'] table_name = item['tableName']
status_task = int(item["status"]) status_task = int(item["status"])
...@@ -212,7 +216,7 @@ def main(): ...@@ -212,7 +216,7 @@ def main():
data = [] data = []
# 任务详情 # 任务详情
task = {} task = {}
table_name = "pms_libertyTimeNet" table_name = "pms_ltn"
# 全局字段 # 全局字段
keyword = "" keyword = ""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment