Commit d01df965 authored by liyang's avatar liyang

fix:ptt debug

parent 1821fe14
...@@ -59,11 +59,12 @@ def reptile(browser=None, search_word=""): ...@@ -59,11 +59,12 @@ def reptile(browser=None, search_word=""):
element_list = browser.find_elements('xpath', "//div[@class='r-ent']//div[@class='title']//a") element_list = browser.find_elements('xpath', "//div[@class='r-ent']//div[@class='title']//a")
length_two = len(element_list) length_two = len(element_list)
for index_two in range(length_two): for index_two in range(length_two):
print(element_list[index_two].text)
# 标题不包含"公告"和"看板" # 标题不包含"公告"和"看板"
if re.findall("公告", element_list[index_two].text) or re.findall("看板", element_list[index_two].text): if re.findall("公告", element_list[index_two].text) or re.findall("看板", element_list[index_two].text):
a = 1 a = 1
else: else:
log.debug(f"正在爬取分类:{type_title}-第{index_two + 1}条") # log.debug(f"正在爬取分类:{type_title}-第{index_two + 1}条")
# 使用正则表达式进行匹配 # 使用正则表达式进行匹配
# matches = # matches =
# log.debug(element_list[index_two].text+str(matches)) # log.debug(element_list[index_two].text+str(matches))
...@@ -151,7 +152,8 @@ def reptile(browser=None, search_word=""): ...@@ -151,7 +152,8 @@ def reptile(browser=None, search_word=""):
element['src'] = access_address element['src'] = access_address
picture_url.append(download_dir) picture_url.append(download_dir)
else: else:
print("") # print("")
error = ""
# ---------------- 判断类型 end ---------- # ---------------- 判断类型 end ----------
# log.debug('开始内容过滤') # log.debug('开始内容过滤')
# ------------------ content 过滤 start-------------- # ------------------ content 过滤 start--------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment