Commit 0e2d5311 authored by liyang's avatar liyang

feat:ins爬虫数据条件过滤优化

parent a9600993
...@@ -36,7 +36,7 @@ def reptile(browser=None, search_word=""): ...@@ -36,7 +36,7 @@ def reptile(browser=None, search_word=""):
base_url = "https://www.instagram.com/" base_url = "https://www.instagram.com/"
option = ['--headless'] option = ['--headless']
# ['--headless'] # ['--headless']
browser = browser or create(None, True) browser = browser or create(option, True)
# print(browser) # print(browser)
# 打开网页 # 打开网页
browser.get(base_url) browser.get(base_url)
...@@ -97,7 +97,7 @@ def reptile(browser=None, search_word=""): ...@@ -97,7 +97,7 @@ def reptile(browser=None, search_word=""):
if key == 0: if key == 0:
title_str_list = item.get_attribute("alt").split("'") title_str_list = item.get_attribute("alt").split("'")
if len(title_str_list) >= 3: if len(title_str_list) >= 3:
title = title_str_list[3] title = title_str_list[1]
else: else:
title = "" title = ""
img_soup = BeautifulSoup(item.get_attribute("outerHTML"), "html.parser").find("img") img_soup = BeautifulSoup(item.get_attribute("outerHTML"), "html.parser").find("img")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment