Commit 9ea6170b authored by liyang's avatar liyang

fix:ptt 加速

parent 98dcc42f
......@@ -67,7 +67,14 @@ def reptile(browser=None, search_word=""):
log.debug(f"正在爬取分类:{type_title}-第{index_two + 1}条")
print("当前连接:"+str(browser.current_url))
print(data[len(data)-1]["title"])
# 使用正则表达式进行匹配关键词
if re.findall(search_word, element_list[index_two].text):
# log.debug(f"找到了匹配的字符串:{matches}")
error = ""
else:
# log.debug("未找到匹配的字符串")
# 退出本次迭代,进入下一次迭代
continue
# 标题不包含"公告"和"看板"
if re.findall("公告", element_list[index_two].text) or re.findall("看板", element_list[index_two].text):
a = 1
......@@ -190,17 +197,6 @@ def reptile(browser=None, search_word=""):
"picture_url": ",".join(picture_url)
}
# --------------- 组装数据 end---------------------
# 使用正则表达式进行匹配
# log.debug(f"关键词:{search_word}-{element_title.text}")
matches = re.findall(search_word, element_title.text)
# 打印匹配结果
if matches:
# log.debug(f"找到了匹配的字符串:{matches}")
data.append(obj)
else:
# log.debug("未找到匹配的字符串")
a = 3
# 浏览器返回上一页
browser.back()
time.sleep(0.1)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment