views.py 4.27 KB
Newer Older
liyang's avatar
liyang committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
import http.client
import json
import sys
import time

import pymysql.cursors
from django.http import HttpResponse
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options


def index(request):
    return HttpResponse("Hello, world. You're at the polls index.")


def user(request):
    # Connect to the database
    connection = pymysql.connect(host='10.211.55.34',
                                 user='root',
                                 password='123456',
                                 database='test',
                                 cursorclass=pymysql.cursors.DictCursor)

    with connection:
        # with connection.cursor() as cursor:
        #     # Create a new record
        #     sql = "INSERT INTO `users` (`email`, `password`) VALUES (%s, %s)"
        #     cursor.execute(sql, ('webmaster@python.org', 'very-secret'))
        #
        # # connection is not autocommit by default. So you must commit to save
        # # your changes.
        # connection.commit()
        print("已连接")
        with connection.cursor() as cursor:
            # Read a single record
            sql = "SELECT `id`,`password`,`name` FROM `user`"
            cursor.execute(sql)
            result = cursor.fetchone()
            print(result)
            return HttpResponse(json.dumps(result))
            cursor.close()
            connection.close()


def pc(request):
    # 工具函数-下载图片

    '''
    打开指定网页,并使用 Selenium 模拟点击 "GDPR-accept" 按钮,然后循环点击 "search-show-more-button" 按钮来加载更多数据,直到按钮不再可点击为止。最后,获取完整的分页数据并关闭浏览器驱动。
    '''

    # # json 数据
    data = []
    image_key = 0
    fileDir = "./reptile_data/news/nytimes/"
    # year = datetime(2021, 1, 1)
    # startDate = datetime(2020, 12, 31)  # 初始日期
    # endDate = datetime(2020, 12, 31)  # 结束日期
    url = "https://twitter.com/"

    if sys.platform.startswith('linux'):
        # print("当前系统是 Linux")
        # linunx 下加载驱动
        # 加载谷歌浏览器驱动
        chrome_options = Options()
        # linux下运行记得加上这些参数 ----------------------------
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--disable-dev-shm-usage')
        # -----------------------------------------------------

        # 加载chromedriver -------------------------------------------------
        # windows 下的 chromedriver 默认加载路径是当前路径下的 chromedriver.exe
        # linux 下的 chromedriver 默认加载路径是 /usr/bin/chromedriver
        # 当然也可以通过 executable_path 自定义
        browser = webdriver.Chrome(options=chrome_options)
        # -----------------------------------------------------------------
    else:
        # print("当前系统不是 Linux")
        # 创建浏览器驱动对象
        browser = webdriver.Chrome()
    print(browser)
    # browser = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver')
    # endDate = startDate = startDate + timedelta(days=i)
    # 打开网页
    browser.get(url)

    # WebDriverWait(browser,10).
    # 打开登录窗口
    open_button_login = WebDriverWait(browser, 10).until(
        EC.presence_of_element_located((By.XPATH, "//a[@data-testid='login']")))
    open_button_login.click()
    time.sleep(5)

    # 获取账号密码输入框
    input_email_element = WebDriverWait(browser, 10).until(
        EC.presence_of_element_located((By.XPATH, "//input[@autocomplete='username']")))
    # 获取下一步按钮
    buttons = WebDriverWait(browser, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[@role='button']")))
    # for item in buttons:
    # print(BeautifulSoup(item, 'html.parser'))
    page_content = browser.page_source
    soup = BeautifulSoup(page_content, 'html.parser')
    browser.close()
    # 关闭浏览器驱动
    # browser.quit()
    print(soup)
    return HttpResponse(soup)