Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
N
network-assets-reptile
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
liyang
network-assets-reptile
Commits
0832e447
Commit
0832e447
authored
Jul 28, 2023
by
liyang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix:twitter 过滤
parent
d13aeedc
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
21 additions
and
28 deletions
+21
-28
pc_twitter.py
pc_twitter.py
+21
-28
No files found.
pc_twitter.py
View file @
0832e447
...
@@ -102,26 +102,15 @@ def reptile(browser=None, search_word=""):
...
@@ -102,26 +102,15 @@ def reptile(browser=None, search_word=""):
# lth = len(ignore_list)
# lth = len(ignore_list)
if
len
(
video_list
)
>
0
:
if
len
(
video_list
)
>
0
:
# for key,element in enumerate(video_list):
# for key,element in enumerate(video_list):
#
div_elements = soup.find("div").findChildren("div", recursive=False)
div_elements
=
soup
.
find
(
"div"
)
.
findChildren
(
"div"
,
recursive
=
False
)
# div_tags = soup.find_all("div", recursive=False)
# div_tags = soup.find_all("div", recursive=False)
for
item
in
video_list
:
for
item
in
video_list
:
# 把video替换成img标签
# 创建 <img> 标签
img_tag
=
soup
.
new_tag
(
'img'
)
img_tag
=
soup
.
new_tag
(
'img'
)
img_tag
[
"src"
]
=
item
[
"poster"
]
img_tag
[
"src"
]
=
item
[
"poster"
]
item
.
replaceWith
(
img_tag
)
for
items
in
div_elements
:
# 确保列表中至少有两个 <div> 子元素
if
hasattr
(
items
,
"aria-labelledby"
):
# if len(div_elements) >= 2:
# div[@aria-labelledby="xx"] 替换为img标签【内容含有视频的替换为img标签】
# # 获取第二个 <div> 元素,并将其从父级元素中移除
items
.
replaceWith
(
img_tag
)
# for item in div_elements:
# if hasattr(item,"aria-labelledby"):
# item.extract()
# 删除
# div.decompose()
# 创建video标签占位
# custom_video = soup.new_tag("video")
# custom_video["src"] = ""
# soup.find("div").append(custom_video)
else
:
else
:
# print("")
# print("")
error
=
""
error
=
""
...
@@ -130,18 +119,22 @@ def reptile(browser=None, search_word=""):
...
@@ -130,18 +119,22 @@ def reptile(browser=None, search_word=""):
picture_url
=
[]
picture_url
=
[]
if
len
(
image_list
)
>
0
:
if
len
(
image_list
)
>
0
:
for
key
,
element
in
enumerate
(
image_list
):
for
key
,
element
in
enumerate
(
image_list
):
# 下载图片至本地,替换标签中的src
# 如果是svg,就删除
id
=
str
(
int
(
time
.
time
()))
if
str
(
element
[
'src'
])
.
find
(
"svg"
)
!=
-
1
:
image_type
=
extract_image_format
(
element
[
'src'
])
element
.
extract
()
# 下载地址
else
:
download_dir
=
f
'{os.path.join(file_dir, f"{id}.{image_type}")}'
# 下载图片至本地,替换标签中的src
# 访问地址
id
=
str
(
int
(
time
.
time
()))
access_address
=
f
'{get_base_file_url()}{table_name.split("_")[1]}/{id}.{image_type}'
image_type
=
extract_image_format
(
element
[
'src'
])
# 下载状态
# 下载地址
status
=
download_image
(
element
[
'src'
],
download_dir
)
download_dir
=
f
'{os.path.join(file_dir, f"{id}.{image_type}")}'
if
status
:
# 访问地址
element
[
'src'
]
=
access_address
access_address
=
f
'{get_base_file_url()}{table_name.split("_")[1]}/{id}.{image_type}'
picture_url
.
append
(
download_dir
)
# 下载状态
status
=
download_image
(
element
[
'src'
],
download_dir
)
if
status
:
element
[
'src'
]
=
access_address
picture_url
.
append
(
download_dir
)
else
:
else
:
# print("")
# print("")
error
=
""
error
=
""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment