使用python对小说更新进行提醒

总管写的书一直都很喜欢,从《雪中悍刀行》到《剑来》。
其实我还是最喜欢那个雪中的鼠标垫,哈哈哈

针对笔趣阁小说进行数据爬取

上源码

#filename=get_data.py
# -*-coding:utf-8 -*-
# BY WANGCC


from bs4 import BeautifulSoup
import urllib.request
import os
from send_mail import sms
from ip_to_mysql import mysql_proxies
import logger
log = logger.Logger("debug")


test_file="剑来" + ".txt"
def gain_html_content(url):
    """获取网页的html内容
        url:目标url地址
        content:返回的页面内容
    """
    # 构建请求对象

    headers = {
         "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"
     }
    # proxies = mysql_proxies()
    # print(proxies)
    #
    # request = urllib.request.Request(url,headers=headers)
    proxies=mysql_proxies()
    proxies_str=proxies.split(":")
    Agreement=proxies_str[0]
    ip=str(proxies_str[1])
    port=str(proxies_str[2])
    proxies_new=(ip[2:]+":"+port)
    # 构建代理Handler
    #http://111.26.9.26:80
    httpproxy_handler = urllib.request.ProxyHandler({Agreement: proxies_new})
    #httpproxy_handler = urllib.request.ProxyHandler({'http': '116.114.19.211:443'})

    opener = urllib.request.build_opener(httpproxy_handler)
    request = urllib.request.Request(url=url,headers=headers)
    #request = urllib.request.Request(url,headers=header)
    response = opener.open(request)

    log.info('获取代理成功,请求页面成功!')
    # 发送请求
    #response = urllib.request.urlopen(request)
    # 读取文件
    content = response.read().decode('utf-8')
    return content


def get_chapter(content):
    # 先构建一个soup对象
    soup = BeautifulSoup(content, "lxml")
    # 找到小说的内容(是在div标签里面,并且这个div标签的id为"list")
    content1 = soup.find("meta", property="og:novel:latest_chapter_name")
    content=content1['content']

    return content

def readfile(content):
    if not os.path.exists(test_file):
        write2file(content)
        log.info('将当前内容写入文档,生成剑来.txt文档')
    with open(test_file, 'r',encoding='utf-8') as f:
        str=f.read()
        log.info('读取剑来.txt文档')
    return str


def write2file(content):
    """将小说写入本地文件"""
    with open(test_file, 'w',encoding='utf-8') as f:
        f.write(content)
    log.info('将小说写入本地文件,生成剑来.txt文档')



def main():
    # 获取页面内容
    tar_url = 'https://www.qu.la/book/31177/'
    content_url = gain_html_content(tar_url)
    log.info('页面下载完成')
    content=get_chapter(content_url)
    old_str=readfile(content)
    if content == old_str:
        log.info("没更新呢!")
    else:
        write2file(content)
        sms(content)
        log.info('发送邮件提醒')

#main()

if __name__ == "__main__":
    main()
发送邮件部分
# -*-coding:utf-8 -*-
# BY WANGCC
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import logger
log = logger.Logger("debug")


smtpserver = 'smtp.163.com'
username = 'xxxxx@163.com'
password = 'xxxxxx'
sender = 'xxxx@163.com'
# receiver='XXX@126.com'
# 收件人为多个收件人
receiver = ['xxxxxxx@139.com','xxxxx@wo.cn']
#这里使用运行商邮箱可以配置短信提醒,非常好用,就像短信提醒一样

def sms(contect):
    print("input sms...")
    subject = contect
    #通过Header对象编码的文本,包含utf-8编码信息和Base64编码信息。以下中文名测试ok
    #subject = '中文标题'
    #subject=Header(subject, 'utf-8').encode()

    #构造邮件对象MIMEMultipart对象
    #下面的主题,发件人,收件人,日期是显示在邮件页面上的。
    msg = MIMEMultipart('mixed')
    msg['Subject'] = subject
    msg['From'] = 'wangcc <wangcc7777@163.com>'
    #msg['To'] = 'XXX@126.com'
    #收件人为多个收件人,通过join将列表转换为以;为间隔的字符串
    msg['To'] = ";".join(receiver)
    #msg['Date']='2019-3-16'

    #构造文字内容
    text = "小说更新了!"
    text_plain = MIMEText(text,'plain', 'utf-8')
    msg.attach(text_plain)


    smtp = smtplib.SMTP_SSL(host='smtp.163.com')
    smtp.connect(host='smtp.163.com',port=465)
    #我们用set_debuglevel(1)就可以打印出和SMTP服务器交互的所有信息。
    #smtp.set_debuglevel(1)
    smtp.login(username, password)
    print("进入发送")
    smtp.sendmail(sender, receiver, msg.as_string())
    print('success....')
    s_receiver=str(receiver)
    log.info('发送提醒邮件给:'+s_receiver)

    smtp.quit()

if __name__ == "__main__":
    sms('c测试~~')
数据库连接
# -*-coding:utf-8 -*-
# BY WANGCC

import pymysql,datetime
import logger,random

log = logger.Logger("debug")

DB_CONFIG = {
    "host": "xxxxxxxx",
    "port": xxxxx,
    "user": "xxxx",
    "passwd": "111111111",
    "db": "xxxxx",
    "charset": "utf8"
}

def get_random():
    numbers = range(1,10)
    chosen = random.choice(numbers)
    return chosen

def mysql(ip_list):
    # 打开数据库连接
    db = pymysql.connect(
        host=DB_CONFIG["host"],
        port=DB_CONFIG["port"],
        user=DB_CONFIG["user"],
        passwd=DB_CONFIG["passwd"],
        db=DB_CONFIG["db"],
        charset=DB_CONFIG["charset"])
    # 使用cursor()方法获取操作游标
    cursor = db.cursor()
    date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    for ip in ip_list:
        check_sql="select count(*) from ip_original where ip='%s'"%(ip)
        insert_sql = "insert into ip_original(ip,date)value ('%s','%s')" % (ip, date)
        cursor.execute(check_sql)
        number=cursor.fetchall()
        new_num=number[0][0]
        if number[0][0] == 0:
            try:
                # 执行sql语句
                cursor.execute(insert_sql)
                log.info(ip+'insert to ip_original success!')
                # 提交到数据库执行
                db.commit()
            except Exception as e:
                log.info('执行sql-->'+insert_sql+'fail')
                # 发生错误时回滚
                db.rollback()
        else:
            log.info(ip+': is existence !!',)
    # 关闭数据库连接
    db.close()

#采集用一个ip代理
def mysql_proxies():
    # 打开数据库连接
    db = pymysql.connect(
        host=DB_CONFIG["host"],
        port=DB_CONFIG["port"],
        user=DB_CONFIG["user"],
        passwd=DB_CONFIG["passwd"],
        db=DB_CONFIG["db"],
        charset=DB_CONFIG["charset"])
    # 使用cursor()方法获取操作游标
    cursor = db.cursor()
    check_sql="SELECT * FROM ip_original where check_date is not NULL ORDER BY RAND() LIMIT 10 "
    cursor.execute(check_sql)
    number=cursor.fetchmany(10)
    chose=get_random()
    proxies=number[chose][1]
    print(proxies)
# 关闭数据库连接
    db.close()
    return proxies

#验证用一个ip代理
def mysql_old():
    # 打开数据库连接
    db = pymysql.connect(
        host=DB_CONFIG["host"],
        port=DB_CONFIG["port"],
        user=DB_CONFIG["user"],
        passwd=DB_CONFIG["passwd"],
        db=DB_CONFIG["db"],
        charset=DB_CONFIG["charset"])
    # 使用cursor()方法获取操作游标
    cursor = db.cursor()
    check_sql="SELECT * FROM ip_original ORDER BY RAND() LIMIT 10 "
    cursor.execute(check_sql)
    number=cursor.fetchmany(10)
    chose=get_random()
    proxies=number[chose][1]
    print(proxies)
# 关闭数据库连接
    db.close()
    return proxies



#删除一条数据
def mysql_delete(proxies):
    # 打开数据库连接
    db = pymysql.connect(
        host=DB_CONFIG["host"],
        port=DB_CONFIG["port"],
        user=DB_CONFIG["user"],
        passwd=DB_CONFIG["passwd"],
        db=DB_CONFIG["db"],
        charset=DB_CONFIG["charset"])
    # 使用cursor()方法获取操作游标
    cursor = db.cursor()
    check_sql="delete  from ip_original  where ip = '%s'"%(proxies)
    log.info('delete ip-->'+check_sql)
    cursor.execute(check_sql)
    db.commit()


# 关闭数据库连接
    db.close()
    return proxies

#更新来源和验证时间
def mysql_update(str_from,proxies_yuan):
    # 打开数据库连接
    db = pymysql.connect(
        host=DB_CONFIG["host"],
        port=DB_CONFIG["port"],
        user=DB_CONFIG["user"],
        passwd=DB_CONFIG["passwd"],
        db=DB_CONFIG["db"],
        charset=DB_CONFIG["charset"])
    # 使用cursor()方法获取操作游标
    cursor = db.cursor()
    date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    update_sql = "update  ip_original set from_area='%s',check_date='%s' where ip='%s'" % (str_from, date,prox
ies_yuan)
    try:
        print(update_sql)
        # 执行sql语句
        cursor.execute(update_sql)
        log.info(proxies_yuan+'---->'+str_from+'--> updata success!')
        # 提交到数据库执行
        db.commit()
    except Exception as e:
        log.info(str_from+'failed')
        print(e)
        # 发生错误时回滚
        db.rollback()
    # 关闭数据库连接
    db.close()

if  __name__=="__main__":
    ip_list = ['http://117.191.11.108:80', 'http://134.209.15.143:8080', 'http://157.230.232.130:80',
               'http://111.206.6.100:80', 'http://159.138.5.222:80', 'http://178.128.12.118:8080',
               'http://83.142.126.147:80', 'http://150.109.55.190:83', 'http://165.227.62.167:8080',
               'http://167.114.153.18:80', 'http://39.137.69.10:8080', 'http://111.206.6.101:80',
               'http://165.227.29.189:8080', 'http://175.139.252.192:80', 'http://103.42.213.176:8080',
               'http://211.23.149.29:80', 'http://211.23.149.28:80', 'http://47.94.57.119:80',
               'http://175.139.252.194:80', 'http://47.94.217.37:80']
    #mysql(ip_list)
    number=mysql_proxies()

思路

每次爬取,从数据库随机抽一个代理ip来用,如果没用就销毁。
数据爬取后,存在本地txt。留着和下次作比对,如果一致则更新,并发送邮件。