Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
network-assets-reptile
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
liyang
network-assets-reptile
Commits
f0e81304
Commit
f0e81304
authored
Aug 01, 2023
by
liyang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat:1.编写自由时报爬虫脚本
2.编写数据量统计脚本
parent
15d41825
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
9 additions
and
5 deletions
+9
-5
pc_ltn.py
pc_ltn.py
+9
-5
.gitkeep
reptile_data/ltn/.gitkeep
+0
-0
No files found.
pc_l
ibertyTimeNet
.py
→
pc_l
tn
.py
View file @
f0e81304
...
...
@@ -49,7 +49,7 @@ def reptile(browser=None, search_word=""):
# browser = browser or create()
# 打开网页
browser
.
get
(
url
+
"&page=1"
)
time
.
sleep
(
3
)
time
.
sleep
(
2
)
# 获取分页
page_list_element
=
browser
.
find_elements
(
"xpath"
,
"//div[@data-desc='分頁']/a[@class='p_num' or @class='active']"
)
...
...
@@ -61,7 +61,7 @@ def reptile(browser=None, search_word=""):
# 点击分页
browser
.
get
(
f
"{url}&page={key+1}"
)
# element.click()
time
.
sleep
(
3
)
time
.
sleep
(
2
)
# 重新获取
page_list_element
=
browser
.
find_elements
(
"xpath"
,
"//div[@data-desc='分頁']/a"
)
elif
key
==
len
(
page_list_element
)
-
1
:
...
...
@@ -110,9 +110,13 @@ def reptile(browser=None, search_word=""):
date_format
=
"
%
a
%
b
%
d
%
H:
%
M:
%
S
%
Y"
# 将日期字符串转换为datetime对象
date_time
=
parse_ltn_time_string
(
date_string
)
# print(date_time)
# date_time = datetime.datetime.strptime(, date_format)
# 将datetime对象转换为时间戳(以秒为单位)
release_time
=
int
(
date_time
)
try
:
release_time
=
int
(
date_time
)
except
:
release_time
=
int
(
time
.
time
())
# 过滤时间
if
beginFiltrationTime
<=
release_time
<=
endFiltrationTime
:
...
...
@@ -189,7 +193,7 @@ def main():
log
.
debug
(
"call success"
)
search_word
=
""
for
item
in
response
[
'data'
][
'rows'
]:
if
item
[
'name'
]
==
'l
ibertyTimeNet
-自由时报'
:
if
item
[
'name'
]
==
'l
tn
-自由时报'
:
search_word
=
item
[
'keyword'
]
table_name
=
item
[
'tableName'
]
status_task
=
int
(
item
[
"status"
])
...
...
@@ -212,7 +216,7 @@ def main():
data
=
[]
# 任务详情
task
=
{}
table_name
=
"pms_l
ibertyTimeNet
"
table_name
=
"pms_l
tn
"
# 全局字段
keyword
=
""
...
...
reptile_data/l
ibertyTimeNet
/.gitkeep
→
reptile_data/l
tn
/.gitkeep
View file @
f0e81304
File moved
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment