Commit 290453ef authored by liyang's avatar liyang

fix:selenium 驱动配置

parent 8ba9b922
...@@ -94,7 +94,22 @@ def reptile(browser=None, search_word=""): ...@@ -94,7 +94,22 @@ def reptile(browser=None, search_word=""):
# 将datetime对象转换为时间戳(以秒为单位) # 将datetime对象转换为时间戳(以秒为单位)
release_time = int(date_time.timestamp()) release_time = int(date_time.timestamp())
# log.debug('开始判断类型') # log.debug('开始判断类型')
try:
# 找到所有第一级标签为 `div` 的元素
div_elements = soup.find_all('div')
# log.debug("一级div数量:" + str(len(div_elements)))
# 逐个删除这些元素
for div in div_elements:
div.extract()
# 删除第一级span
span_element = soup.find_all('span')
# log.debug("一级span数量:" + str(len(span_element)))
for span in span_element:
span.extract()
except:
# log.debug("删除第一级div失败")
a = 2
# ---------------- 判断类型 start ---------- # ---------------- 判断类型 start ----------
# 类型 # 类型
content_type = "" content_type = ""
...@@ -136,22 +151,6 @@ def reptile(browser=None, search_word=""): ...@@ -136,22 +151,6 @@ def reptile(browser=None, search_word=""):
except: except:
# log.debug("查找所有的<a>标签失败") # log.debug("查找所有的<a>标签失败")
a = 1 a = 1
try:
# 找到所有第一级标签为 `div` 的元素
div_elements = soup.find_all('div')
# log.debug("一级div数量:" + str(len(div_elements)))
# 逐个删除这些元素
for div in div_elements:
div.extract()
# 删除第一级span
span_element = soup.find_all('span')
# log.debug("一级span数量:" + str(len(span_element)))
for span in span_element:
span.extract()
except:
# log.debug("删除第一级div失败")
a = 2
html = soup.prettify().replace('amp;', '') html = soup.prettify().replace('amp;', '')
# ------------------ content 过滤 end-------------- # ------------------ content 过滤 end--------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment