Commit d7796167 authored by liyang's avatar liyang

fix:ptt debug

parent ad694283
......@@ -39,8 +39,9 @@ def reptile(browser=None, search_word=""):
# 打开网页
browser.get(url)
# log.debug("已打开浏览器")
classify_item_list = browser.find_elements('xpath', "//div[@class='board-class']")
item_list = browser.find_elements('xpath', "//div[@class='board-class']")
# log.debug(classify_item_list)
classify_item_list = item_list.copy()
length = len(classify_item_list)
for index in range(length):
# 暂时先爬取 第2个 分类
......@@ -209,7 +210,7 @@ def reptile(browser=None, search_word=""):
browser.back()
time.sleep(0.1)
# 重新获取
classify_item_list = browser.find_elements('xpath', "//div[@class='board-class']")
# classify_item_list = browser.find_elements('xpath', "//div[@class='board-class']")
# 发送爬取数据到java服务
# print('----------------------')
......
# set options to be headless, ..
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
# open it, go to a website, and get results
wd = webdriver.Chrome(options=options)
wd.get("https://www.youtube.com/results?search_query=俄乌战争")
print(wd.page_source) # results
\ No newline at end of file
classify_item_list = browser.find_elements('xpath', "//div[@class='board-class']")
# log.debug(classify_item_list)
length = len(classify_item_list)
for index in range(length):
# 暂时先爬取 第2个 分类
if 0 <= index < 4:
type_title = classify_item_list[index].text
classify_item_list[index].click()
time.sleep(0.1)
for index_two in range(length_two):
print(element_list[index_two].text)
# 浏览器返回上一页
browser.back()
if index == 0:
browser.back()
time.sleep(0.1)
classify_item_list = browser.find_elements('xpath', "//div[@class='board-class']")
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment