fix:爬取数据入库

5d9b5cc8 · liyang · b6fa9839 · 5d9b5cc8
Commit 5d9b5cc8 authored Jul 11, 2023 by liyang
Hide whitespace changes
Inline Side-by-side

Showing with 21 additions and 19 deletions

pc_ptt.py pc_ptt.py +21 -19

No files found.
--- a/pc_ptt.py
+++ b/pc_ptt.py
@@ -120,16 +120,16 @@ def reptile(browser=None, search_word=""):
                log.debug('开始判断类型')
                # ---------------- 判断类型 start ----------
                # 类型
-                content_type = "文字"
+                content_type = ""
-                # try:
+                try:
-                #     # 查找所有img标签
+                    # 查找所有img标签
-                #     img_tags = soup.find_all('img')
+                    img_tags = soup.find_all('img')
-                #     if len(img_tags) > 0:
+                    if len(img_tags) > 0:
-                #         content_type = "图文"
+                        content_type = "图文"
-                #     else:
+                    else:
-                #         content_type = "文字"
+                        content_type = "文字"
-                # except:
+                except:
-                #     content_type = "文字"
+                    content_type = "文字"
                # ---------------- 判断类型 end ----------
                log.debug('开始内容过滤')
                # ------------------ content 过滤 start--------------
@@ -173,15 +173,17 @@ def reptile(browser=None, search_word=""):
                }
                # --------------- 组装数据 end---------------------
-                data.append(obj)
+                if search_word is "":
-                # 使用正则表达式进行匹配
+                    data.append(obj)
-                # matches = re.findall(search_word, element_title.text)
+                else:
-                # 打印匹配结果
+                    # 使用正则表达式进行匹配
-                # if matches:
+                    matches = re.findall(search_word, element_title.text)
-                #     # log.debug(f"找到了匹配的字符串：{matches}")
+                    # 打印匹配结果
-                #     data.append(obj)
+                    if matches:
-                # else:
+                        # log.debug(f"找到了匹配的字符串：{matches}")
-                #     log.debug("未找到匹配的字符串")
+                        data.append(obj)
+                    else:
+                        log.debug("未找到匹配的字符串")
                # 浏览器返回上一页
                browser.back()