Commit 8bbe2730 authored by liyang's avatar liyang

fix:爬取数据入库

parent 4f920e0f
......@@ -133,6 +133,7 @@ def reptile(browser=None, search_word=""):
try:
# 查找所有的<a>标签
a_tags = soup.find_all('a', href=True)
log.debug("a标签数量:" + str(len(a_tags)))
# 循环遍历<a>标签,检查每个<a>标签是否包含<img>元素,如果包含则删除该<a>标签
for tag in a_tags:
tag.decompose()
......@@ -142,11 +143,13 @@ def reptile(browser=None, search_word=""):
try:
# 找到所有第一级标签为 `div` 的元素
div_elements = soup.find_all('div')
log.debug("一级div数量:" + str(len(div_elements)))
# 逐个删除这些元素
for div in div_elements:
div.extract()
# 删除第一级span
span_element = soup.find_all('span')
log.debug("一级span数量:" + str(len(span_element)))
for span in span_element:
span.extract()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment