设为首页 加入收藏

TOP

python爬取英语学习资料并发送邮件
2018-12-06 22:08:50 】 浏览:42
Tags:python 英语 学习资料 发送 邮件

新建发送邮件类

import smtplib
from email.mime.text import MIMEText
from email.header import Header

class SendMail:

   def __init__(self):
       self.sender = 'xx@qq.com'
       self.receivers = ['xx1@qq.com','xx2@qq.com']  # 接收邮件,可设置为你的QQ邮箱或者其他邮箱
       self.smtp_server = 'smtp.qq.com'
       self.smtp_pwd = 'xx'
       self.stmp_port = 25

   def sendMessage(self, title, msg):
       # 三个参数:第一个为文本内容,第二个 plain 设置文本格式,第三个 utf-8 设置编码
       message = MIMEText(msg, 'plain', 'utf-8')
       message['From'] = self.sender
       message['To'] = ';'.join(self.receivers)

       message['Subject'] = Header(title, 'utf-8')

       smtpObj = smtplib.SMTP(self.smtp_server, self.stmp_port)
       smtpObj.starttls()
       smtpObj.login(self.sender, self.smtp_pwd)
       smtpObj.sendmail(self.sender, self.receivers, message.as_string())
       print('success')

爬取英语学习资料

比如爬取英语学习链接:http://www.hjenglish.com/new/c1020/,将当前页文章爬取到并发送邮件到指定邮箱:

from bs4 import BeautifulSoup
import time, os
import xlwt
import requests
import datetime
import threading
import schedule
from mymodule.SendMail import *

def getLinks(url):
    try:
        res = requests.get(url, headers={'Host': 'www.hjenglish.com', 'Referer':'http://www.hjenglish.com/new/cet/', 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'})
        res.raise_for_status()
        page = BeautifulSoup(res.text, 'lxml')
        res.close()

        links =['http://www.hjenglish.com' + adom['href'] for adom in page.select('.big-link.title-article')]
        return links
    except Exception as err:
        print(err)

def spiderLink(url, lock):
    print('当前线程', threadin
		    

g.currentThread().getName()) res = requests.get(url, headers={'Host': 'www.hjenglish.com', 'Referer':'http://www.hjenglish.com/new/cet/', 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}) if res.status_code == 200: try: page = BeautifulSoup(res.text, "lxml") res.close() titles = [title.getText() for title in page.select('.article-header .title')] contents = [contentDom.getText() for contentDom in page.select('#J-article-content')] # print(titles, contents) num = len(titles) global total print(titles, contents) for i in range(0, num): lock.acquire() total = total + 1 lock.release() print(titles[i], contents[i]) sender.sendMessage(titles[i], contents[i]) except Exception as err: print(err) else: pass def my_job(): try: starttime = datetime.datetime.now() url = 'http://www.hjenglish.com/new/c1020/' lock = threading.Lock() spider_links = getLinks(url) threads = [threading.Thread(target=spiderLink, args=(link, lock)) for link in spider_links] for thread1 in threads: thread1.start() for thread2 in threads: thread2.join() endtime = datetime.datetime.now() print('have spend ', str((endtime - starttime).seconds) + 's') global total total = 0 except Exception as err: print(err) os._exit(0) if __name__ == '__main__': try: sender = SendMail() total = 0 my_job() except Exception as err: print(err) os._exit(0)

编程开发网
】【打印繁体】【投稿】【收藏】 【推荐】【举报】【评论】 【关闭】 【返回顶部
上一篇python初识参数 下一篇4 列表

评论

帐  号: 密码: (新用户注册)
验 证 码:
表  情:
内  容:

array(4) { ["type"]=> int(8) ["message"]=> string(24) "Undefined variable: jobs" ["file"]=> string(32) "/mnt/wp/cppentry/do/bencandy.php" ["line"]=> int(217) }