Commit 5d9b5cc8 authored by liyang's avatar liyang

fix:爬取数据入库

parent b6fa9839
...@@ -120,16 +120,16 @@ def reptile(browser=None, search_word=""): ...@@ -120,16 +120,16 @@ def reptile(browser=None, search_word=""):
log.debug('开始判断类型') log.debug('开始判断类型')
# ---------------- 判断类型 start ---------- # ---------------- 判断类型 start ----------
# 类型 # 类型
content_type = ""
try:
# 查找所有img标签
img_tags = soup.find_all('img')
if len(img_tags) > 0:
content_type = "图文"
else:
content_type = "文字"
except:
content_type = "文字" content_type = "文字"
# try:
# # 查找所有img标签
# img_tags = soup.find_all('img')
# if len(img_tags) > 0:
# content_type = "图文"
# else:
# content_type = "文字"
# except:
# content_type = "文字"
# ---------------- 判断类型 end ---------- # ---------------- 判断类型 end ----------
log.debug('开始内容过滤') log.debug('开始内容过滤')
# ------------------ content 过滤 start-------------- # ------------------ content 过滤 start--------------
...@@ -173,15 +173,17 @@ def reptile(browser=None, search_word=""): ...@@ -173,15 +173,17 @@ def reptile(browser=None, search_word=""):
} }
# --------------- 组装数据 end--------------------- # --------------- 组装数据 end---------------------
if search_word is "":
data.append(obj) data.append(obj)
else:
# 使用正则表达式进行匹配 # 使用正则表达式进行匹配
# matches = re.findall(search_word, element_title.text) matches = re.findall(search_word, element_title.text)
# 打印匹配结果 # 打印匹配结果
# if matches: if matches:
# # log.debug(f"找到了匹配的字符串:{matches}") # log.debug(f"找到了匹配的字符串:{matches}")
# data.append(obj) data.append(obj)
# else: else:
# log.debug("未找到匹配的字符串") log.debug("未找到匹配的字符串")
# 浏览器返回上一页 # 浏览器返回上一页
browser.back() browser.back()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment