python配置《剑来》小说更新提醒 | ntssl.cn

python配置《剑来》小说更新提醒

写在前面

总管写的书一直都很喜欢,从《雪中悍刀行》到《剑来》。
其实我还是最喜欢那个雪中的鼠标垫,哈哈哈

针对笔趣阁小说进行数据爬取

上源码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#filename=get_data.py
# -*-coding:utf-8 -*-
# BY WANGCC


from bs4 import BeautifulSoup
import urllib.request
import os
from send_mail import sms
from ip_to_mysql import mysql_proxies
import logger
log = logger.Logger("debug")


test_file="剑来" + ".txt"
def gain_html_content(url):
"""获取网页的html内容
url:目标url地址
content:返回的页面内容
"""
# 构建请求对象

headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"
}
# proxies = mysql_proxies()
# print(proxies)
#
# request = urllib.request.Request(url,headers=headers)
proxies=mysql_proxies()
proxies_str=proxies.split(":")
Agreement=proxies_str[0]
ip=str(proxies_str[1])
port=str(proxies_str[2])
proxies_new=(ip[2:]+":"+port)
# 构建代理Handler
#http://111.26.9.26:80
httpproxy_handler = urllib.request.ProxyHandler({Agreement: proxies_new})
#httpproxy_handler = urllib.request.ProxyHandler({'http': '116.114.19.211:443'})

opener = urllib.request.build_opener(httpproxy_handler)
request = urllib.request.Request(url=url,headers=headers)
#request = urllib.request.Request(url,headers=header)
response = opener.open(request)

log.info('获取代理成功,请求页面成功!')
# 发送请求
#response = urllib.request.urlopen(request)
# 读取文件
content = response.read().decode('utf-8')
return content


def get_chapter(content):
# 先构建一个soup对象
soup = BeautifulSoup(content, "lxml")
# 找到小说的内容(是在div标签里面,并且这个div标签的id为"list")
content1 = soup.find("meta", property="og:novel:latest_chapter_name")
content=content1['content']

return content

def readfile(content):
if not os.path.exists(test_file):
write2file(content)
log.info('将当前内容写入文档,生成剑来.txt文档')
with open(test_file, 'r',encoding='utf-8') as f:
str=f.read()
log.info('读取剑来.txt文档')
return str


def write2file(content):
"""将小说写入本地文件"""
with open(test_file, 'w',encoding='utf-8') as f:
f.write(content)
log.info('将小说写入本地文件,生成剑来.txt文档')



def main():
# 获取页面内容
tar_url = 'https://www.qu.la/book/31177/'
content_url = gain_html_content(tar_url)
log.info('页面下载完成')
content=get_chapter(content_url)
old_str=readfile(content)
if content == old_str:
log.info("没更新呢!")
else:
write2file(content)
sms(content)
log.info('发送邮件提醒')

#main()

if __name__ == "__main__":
main()

发送邮件部分
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# -*-coding:utf-8 -*-
# BY WANGCC
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import logger
log = logger.Logger("debug")


smtpserver = 'smtp.163.com'
username = 'xxxxx@163.com'
password = 'xxxxxx'
sender = 'xxxx@163.com'
# receiver='XXX@126.com'
# 收件人为多个收件人
receiver = ['xxxxxxx@139.com','xxxxx@wo.cn']
#这里使用运行商邮箱可以配置短信提醒,非常好用,就像短信提醒一样

def sms(contect):
print("input sms...")
subject = contect
#通过Header对象编码的文本,包含utf-8编码信息和Base64编码信息。以下中文名测试ok
#subject = '中文标题'
#subject=Header(subject, 'utf-8').encode()

#构造邮件对象MIMEMultipart对象
#下面的主题,发件人,收件人,日期是显示在邮件页面上的。
msg = MIMEMultipart('mixed')
msg['Subject'] = subject
msg['From'] = 'wangcc <wangcc7777@163.com>'
#msg['To'] = 'XXX@126.com'
#收件人为多个收件人,通过join将列表转换为以;为间隔的字符串
msg['To'] = ";".join(receiver)
#msg['Date']='2019-3-16'

#构造文字内容
text = "小说更新了!"
text_plain = MIMEText(text,'plain', 'utf-8')
msg.attach(text_plain)


smtp = smtplib.SMTP_SSL(host='smtp.163.com')
smtp.connect(host='smtp.163.com',port=465)
#我们用set_debuglevel(1)就可以打印出和SMTP服务器交互的所有信息。
#smtp.set_debuglevel(1)
smtp.login(username, password)
print("进入发送")
smtp.sendmail(sender, receiver, msg.as_string())
print('success....')
s_receiver=str(receiver)
log.info('发送提醒邮件给:'+s_receiver)

smtp.quit()

if __name__ == "__main__":
sms('c测试~~')
数据库连接
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# -*-coding:utf-8 -*-
# BY WANGCC

import pymysql,datetime
import logger,random

log = logger.Logger("debug")

DB_CONFIG = {
   "host": "xxxxxxxx",
   "port": xxxxx,
   "user": "xxxx",
"passwd": "111111111",
   "db": "xxxxx",
"charset": "utf8"
}

def get_random():
numbers = range(1,10)
chosen = random.choice(numbers)
return chosen

def mysql(ip_list):
# 打开数据库连接
db = pymysql.connect(
host=DB_CONFIG["host"],
port=DB_CONFIG["port"],
user=DB_CONFIG["user"],
passwd=DB_CONFIG["passwd"],
db=DB_CONFIG["db"],
charset=DB_CONFIG["charset"])
# 使用cursor()方法获取操作游标
cursor = db.cursor()
date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
for ip in ip_list:
check_sql="select count(*) from ip_original where ip='%s'"%(ip)
insert_sql = "insert into ip_original(ip,date)value ('%s','%s')" % (ip, date)
cursor.execute(check_sql)
number=cursor.fetchall()
new_num=number[0][0]
if number[0][0] == 0:
try:
# 执行sql语句
cursor.execute(insert_sql)
log.info(ip+'insert to ip_original success!')
# 提交到数据库执行
db.commit()
except Exception as e:
log.info('执行sql-->'+insert_sql+'fail')
# 发生错误时回滚
db.rollback()
else:
log.info(ip+': is existence !!',)
# 关闭数据库连接
db.close()

#采集用一个ip代理
def mysql_proxies():
# 打开数据库连接
db = pymysql.connect(
host=DB_CONFIG["host"],
port=DB_CONFIG["port"],
user=DB_CONFIG["user"],
passwd=DB_CONFIG["passwd"],
db=DB_CONFIG["db"],
charset=DB_CONFIG["charset"])
# 使用cursor()方法获取操作游标
cursor = db.cursor()
check_sql="SELECT * FROM ip_original where check_date is not NULL ORDER BY RAND() LIMIT 10 "
cursor.execute(check_sql)
number=cursor.fetchmany(10)
chose=get_random()
proxies=number[chose][1]
print(proxies)
# 关闭数据库连接
db.close()
return proxies

#验证用一个ip代理
def mysql_old():
# 打开数据库连接
db = pymysql.connect(
host=DB_CONFIG["host"],
port=DB_CONFIG["port"],
user=DB_CONFIG["user"],
passwd=DB_CONFIG["passwd"],
db=DB_CONFIG["db"],
charset=DB_CONFIG["charset"])
# 使用cursor()方法获取操作游标
cursor = db.cursor()
check_sql="SELECT * FROM ip_original ORDER BY RAND() LIMIT 10 "
cursor.execute(check_sql)
number=cursor.fetchmany(10)
chose=get_random()
proxies=number[chose][1]
print(proxies)
# 关闭数据库连接
db.close()
return proxies



#删除一条数据
def mysql_delete(proxies):
# 打开数据库连接
db = pymysql.connect(
host=DB_CONFIG["host"],
port=DB_CONFIG["port"],
user=DB_CONFIG["user"],
passwd=DB_CONFIG["passwd"],
db=DB_CONFIG["db"],
charset=DB_CONFIG["charset"])
# 使用cursor()方法获取操作游标
cursor = db.cursor()
check_sql="delete from ip_original where ip = '%s'"%(proxies)
log.info('delete ip-->'+check_sql)
cursor.execute(check_sql)
db.commit()


# 关闭数据库连接
db.close()
return proxies

#更新来源和验证时间
def mysql_update(str_from,proxies_yuan):
# 打开数据库连接
db = pymysql.connect(
host=DB_CONFIG["host"],
port=DB_CONFIG["port"],
user=DB_CONFIG["user"],
passwd=DB_CONFIG["passwd"],
db=DB_CONFIG["db"],
charset=DB_CONFIG["charset"])
# 使用cursor()方法获取操作游标
cursor = db.cursor()
date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
update_sql = "update ip_original set from_area='%s',check_date='%s' where ip='%s'" % (str_from, date,prox
ies_yuan)
try:
print(update_sql)
# 执行sql语句
cursor.execute(update_sql)
log.info(proxies_yuan+'---->'+str_from+'--> updata success!')
# 提交到数据库执行
db.commit()
except Exception as e:
log.info(str_from+'failed')
print(e)
# 发生错误时回滚
db.rollback()
# 关闭数据库连接
db.close()

if __name__=="__main__":
ip_list = ['http://117.191.11.108:80', 'http://134.209.15.143:8080', 'http://157.230.232.130:80',
'http://111.206.6.100:80', 'http://159.138.5.222:80', 'http://178.128.12.118:8080',
'http://83.142.126.147:80', 'http://150.109.55.190:83', 'http://165.227.62.167:8080',
'http://167.114.153.18:80', 'http://39.137.69.10:8080', 'http://111.206.6.101:80',
'http://165.227.29.189:8080', 'http://175.139.252.192:80', 'http://103.42.213.176:8080',
'http://211.23.149.29:80', 'http://211.23.149.28:80', 'http://47.94.57.119:80',
'http://175.139.252.194:80', 'http://47.94.217.37:80']
#mysql(ip_list)
number=mysql_proxies()

思路

每次爬取,从数据库随机抽一个代理ip来用,如果没用就销毁。
数据爬取后,存在本地txt。留着和下次作比对,如果一致则更新,并发送邮件。