一个简单的python爬虫

大小: 4.29KB

文件类型: .py

金币: 1

下载: 0 次

发布日期: 2024-05-11
语言: Python
标签: python 简单 py 爬虫

高速下载

资源简介

一个单文件爬虫，实现监听页面变化并发送邮件。

资源截图

小图大图

代码片段和文件信息

import urllib
from urllib import request
from bs4 import BeautifulSoup
from datetime import datetime
import random
import time
import pymysql
import smtplib
import sys
import requests
import json
from email.header import Header
from email.mime.text import MIMEText

# 连接mysql
def get_mysql（）:
    db = pymysql.connect（host=‘localhost‘
                         port=3306
                         user=‘root‘ passwd=‘root‘
                         db=‘test‘ charset=‘utf8‘
                         cursorclass=pymysql.cursors.DictCursor）
    cursor = db.cursor（）
    return db cursor

# 爬虫
def get_spiderMsg（）:
    host = {}
    title = {}
    lastest_title = {}
    host[0] = ‘http://cjxy.hebtu.edu.cn/a/zxks/tzgg/index.html‘
    title[0] = ‘自考实践通知‘
    # 获取数据库中保存的最新标题
    db mysql = get_mysql（）
    select_sql = “SELECT title FROM test where url = ‘“+host[0]+“‘ORDER BY updated_at desc“
    mysql.execute（select_sql）
    res = mysql.fetchone（）
    lastest_title[0] = ‘‘
    if res:
        lastest_title[0] = res[‘title‘];

    # email相关
    mail_host = “smtp.163.com“      # SMTP服务器
    mail_user = “xxxxxxx“                  # 用户名
    mail_pass = “xxxxxxxxx“               # 授权密码，非登录密码
    sender = ‘xxxxxx@163.com‘    # 发件人邮箱（最好写全 不然会失败）
    receivers = [‘1111111111@qq.com‘]  # 接收邮件，可设置为你的QQ邮箱或者其他邮箱

    header_list = [‘Mozilla/5.0 （Windows NT 6.1; WOW64; rv:54.0） Gecko/20100101 Firefox/54.000‘
                   ‘Mozilla/5.0 （Windows NT 6.1; WOW64） AppleWebKit/537.36 （KHTML like Gecko） Chrome/57.0.2987.133 Safari/537.36‘
                   ‘Mozilla/5.0 （Windows NT 6.1; WOW64） AppleWebKit/537.36 （KHTML like Gecko） Chrome/53.0.2785.104 Safari/537.36 Core/1.53.2759.400 QQBrowser/9.6.11220.400‘
                   ‘Mozilla/5.0 （Windows NT 6.1; WOW64; Trident/7.0; rv:11.0） like Gecko‘]
    switch = True
    while switch:
        time.sleep（3）
        sys.stdout.flush（）
        i = 0

        range_header = random.randint（0 3）
        user_agent = header_list[range_header]
        accept = ‘text/htmlapplication/xhtml+xmlapplication/xml;q=0.9image/

上一篇：groupby函数在Excel多个文件薄中数值求和及文字归纳
下一篇：KNN算法实战

共有条评论

一个简单的python爬虫

资源简介

资源截图

代码片段和文件信息

评论

相关资源