Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
network-assets-reptile
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
liyang
network-assets-reptile
Commits
0d1ca3e0
Commit
0d1ca3e0
authored
Jul 25, 2023
by
liyang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat:ins爬虫数据条件过滤优化
parent
d801e8b0
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
13 additions
and
9 deletions
+13
-9
settings.py
config/settings.py
+1
-1
pc_facebook.py
pc_facebook.py
+1
-1
pc_instagram.py
pc_instagram.py
+1
-1
pc_ptt.py
pc_ptt.py
+1
-1
pc_twitter.py
pc_twitter.py
+1
-1
pc_youtube.py
pc_youtube.py
+8
-4
No files found.
config/settings.py
View file @
0d1ca3e0
...
@@ -9,7 +9,7 @@ def get_base_url():
...
@@ -9,7 +9,7 @@ def get_base_url():
def
get_base_file_url
():
def
get_base_file_url
():
# return "http://192.168.0.118:8186/"
# return "http://192.168.0.118:8186/"
return
"/"
return
"/
files/reptile_data/
"
def
get_account
(
name
):
def
get_account
(
name
):
data
=
{}
data
=
{}
...
...
pc_facebook.py
View file @
0d1ca3e0
...
@@ -117,7 +117,7 @@ def reptile(browser=None, search_word=""):
...
@@ -117,7 +117,7 @@ def reptile(browser=None, search_word=""):
status
=
download_image
(
element
[
'src'
],
download_dir
)
status
=
download_image
(
element
[
'src'
],
download_dir
)
if
status
:
if
status
:
element
[
'src'
]
=
access_address
element
[
'src'
]
=
access_address
picture_url
.
append
(
access_address
)
picture_url
.
append
(
download_dir
)
else
:
else
:
print
(
""
)
print
(
""
)
content
=
soup
.
prettify
()
content
=
soup
.
prettify
()
...
...
pc_instagram.py
View file @
0d1ca3e0
...
@@ -122,7 +122,7 @@ def reptile(browser=None, search_word=""):
...
@@ -122,7 +122,7 @@ def reptile(browser=None, search_word=""):
img_soup
[
"src"
]
=
access_address
img_soup
[
"src"
]
=
access_address
# print(img_soup.prettify())
# print(img_soup.prettify())
soup
.
append
(
img_soup
)
soup
.
append
(
img_soup
)
picture_url
.
append
(
access_address
)
picture_url
.
append
(
download_dir
)
content
=
soup
.
prettify
()
content
=
soup
.
prettify
()
# 类型
# 类型
...
...
pc_ptt.py
View file @
0d1ca3e0
...
@@ -138,7 +138,7 @@ def reptile(browser=None, search_word=""):
...
@@ -138,7 +138,7 @@ def reptile(browser=None, search_word=""):
status
=
download_image
(
element
[
'src'
],
download_dir
)
status
=
download_image
(
element
[
'src'
],
download_dir
)
if
status
:
if
status
:
element
[
'src'
]
=
access_address
element
[
'src'
]
=
access_address
picture_url
.
append
(
access_address
)
picture_url
.
append
(
download_dir
)
else
:
else
:
print
(
""
)
print
(
""
)
# ---------------- 判断类型 end ----------
# ---------------- 判断类型 end ----------
...
...
pc_twitter.py
View file @
0d1ca3e0
...
@@ -117,7 +117,7 @@ def reptile(browser=None, search_word=""):
...
@@ -117,7 +117,7 @@ def reptile(browser=None, search_word=""):
status
=
download_image
(
element
[
'src'
],
download_dir
)
status
=
download_image
(
element
[
'src'
],
download_dir
)
if
status
:
if
status
:
element
[
'src'
]
=
access_address
element
[
'src'
]
=
access_address
picture_url
.
append
(
access_address
)
picture_url
.
append
(
download_dir
)
else
:
else
:
print
(
""
)
print
(
""
)
content
=
soup
.
prettify
()
content
=
soup
.
prettify
()
...
...
pc_youtube.py
View file @
0d1ca3e0
...
@@ -47,16 +47,20 @@ def reptile(browser=None, search_word=""):
...
@@ -47,16 +47,20 @@ def reptile(browser=None, search_word=""):
releaseTime
=
str
(
int
(
convert_string_to_time
(
element_time_list
[
index
]
.
text
)))
releaseTime
=
str
(
int
(
convert_string_to_time
(
element_time_list
[
index
]
.
text
)))
except
:
except
:
releaseTime
=
str
(
int
(
time
.
time
()))
releaseTime
=
str
(
int
(
time
.
time
()))
video_url
=
[]
# 下载地址
download_dir
=
f
'{os.path.join(file_dir, f"{id}.mp4")}'
# 访问地址
access_address
=
f
'{get_base_file_url()}{table_name.split("_")[1]}/{id}.mp4'
# 下载视频
# 下载视频
state_download
=
yt_dlp_download
(
url
,
'youtube'
)
state_download
=
yt_dlp_download
(
url
,
'youtube'
)
log
.
debug
(
url
)
video_url
.
append
(
download_dir
)
file_http_src
=
f
'{base_urr}youtube/{id}.mp4'
if
state_download
:
if
state_download
:
# 组装数据
# 组装数据
obj
=
{
obj
=
{
"title"
:
title
,
"title"
:
title
,
"content"
:
f
"<video controls style='width:100
%
' src='{
file_http_src
}'></video>"
,
"content"
:
f
"<video controls style='width:100
%
' src='{
access_address
}'></video>"
,
"videoUrl"
:
file_http_src
,
"videoUrl"
:
","
.
join
(
video_url
)
,
"link"
:
link
,
"link"
:
link
,
"reptileTime"
:
str
(
int
(
time
.
time
())),
"reptileTime"
:
str
(
int
(
time
.
time
())),
"type"
:
'视频'
,
"type"
:
'视频'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment