Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
network-assets-reptile
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
liyang
network-assets-reptile
Commits
c9b62377
Commit
c9b62377
authored
Jul 13, 2023
by
liyang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix:youtube 360P 每次6条
parent
a94971b7
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
5 deletions
+6
-5
pc_ptt.py
pc_ptt.py
+6
-5
.gitkeep
reptile_data/youtube/.gitkeep
+0
-0
No files found.
pc_ptt.py
View file @
c9b62377
...
@@ -4,7 +4,7 @@ import re
...
@@ -4,7 +4,7 @@ import re
import
time
import
time
import
loguru
import
loguru
import
pymysql.cursors
#
import pymysql.cursors
import
requests
import
requests
from
bs4
import
BeautifulSoup
from
bs4
import
BeautifulSoup
from
datetime
import
datetime
from
datetime
import
datetime
...
@@ -37,9 +37,10 @@ def reptile(browser=None, search_word=""):
...
@@ -37,9 +37,10 @@ def reptile(browser=None, search_word=""):
# log.debug(classify_item_list)
# log.debug(classify_item_list)
length
=
len
(
classify_item_list
)
length
=
len
(
classify_item_list
)
for
index
in
range
(
length
):
for
index
in
range
(
length
):
# 暂时先爬取 第2个 分类
if
1
<
index
<
3
:
if
1
<
index
<
3
:
classify_item_list
[
index
]
.
click
()
classify_item_list
[
index
]
.
click
()
time
.
sleep
(
1
)
# time.sleep(0.
1)
element_list
=
browser
.
find_elements
(
'xpath'
,
"//div[@class='r-ent']//div[@class='title']//a"
)
element_list
=
browser
.
find_elements
(
'xpath'
,
"//div[@class='r-ent']//div[@class='title']//a"
)
length_two
=
len
(
element_list
)
length_two
=
len
(
element_list
)
for
index_two
in
range
(
length_two
):
for
index_two
in
range
(
length_two
):
...
@@ -51,7 +52,7 @@ def reptile(browser=None, search_word=""):
...
@@ -51,7 +52,7 @@ def reptile(browser=None, search_word=""):
# if matches:
# if matches:
# log.debug(f"找到了匹配的字符串:{matches}")
# log.debug(f"找到了匹配的字符串:{matches}")
element_list
[
index_two
]
.
click
()
element_list
[
index_two
]
.
click
()
time
.
sleep
(
1
)
# time.sleep(0.
1)
# 原链接
# 原链接
browser_current_url
=
browser
.
current_url
browser_current_url
=
browser
.
current_url
log
.
debug
(
'网页链接'
+
str
(
browser_current_url
))
log
.
debug
(
'网页链接'
+
str
(
browser_current_url
))
...
@@ -139,7 +140,7 @@ def reptile(browser=None, search_word=""):
...
@@ -139,7 +140,7 @@ def reptile(browser=None, search_word=""):
}
}
# --------------- 组装数据 end---------------------
# --------------- 组装数据 end---------------------
if
search_word
is
""
:
if
search_word
==
""
:
data
.
append
(
obj
)
data
.
append
(
obj
)
else
:
else
:
# 使用正则表达式进行匹配
# 使用正则表达式进行匹配
...
@@ -156,7 +157,7 @@ def reptile(browser=None, search_word=""):
...
@@ -156,7 +157,7 @@ def reptile(browser=None, search_word=""):
element_list
=
browser
.
find_elements
(
'xpath'
,
"//div[@class='r-ent']//div[@class='title']//a"
)
element_list
=
browser
.
find_elements
(
'xpath'
,
"//div[@class='r-ent']//div[@class='title']//a"
)
# 浏览器返回上一页
# 浏览器返回上一页
browser
.
back
()
browser
.
back
()
time
.
sleep
(
1
)
#
time.sleep(1)
# 重新获取
# 重新获取
classify_item_list
=
browser
.
find_elements
(
'xpath'
,
"//div[@class='board-class']"
)
classify_item_list
=
browser
.
find_elements
(
'xpath'
,
"//div[@class='board-class']"
)
...
...
reptile_data/youtube/.gitkeep
deleted
100644 → 0
View file @
a94971b7
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment