cp(2,3)
# 用生产者消费者模式实现爬虫的例子
from multiprocessing import Process,Queue
import requests
import re
import json
def producer(q,url):
response = requests.get(url)
q.put(response.text)
def consumer(q):
while True:
s = q.get()
if not s:break
com = re.compile(
'<div class="item">.*?<div class="pic">.*?<em .*?>(?P<id>\d+).*?<span class="title">(?P<title>.*?)</span>'
'.*?<span class="rating_num" .*?>(?P<rating_num>.*?)</span>.*?<span>(?P<comment_num>.*?)评价</span>', re.S)
ret = com.finditer(s)
for i in ret:
print({
"id": i.group("id"),
"title": i.group("title"),
"rating_num": i.group("rating_num"),
"comment_num": i.group("comment_num")}
)
if __name__ == '__main__':
count = 0
q = Queue(3)
p_l = []
for i in range(10):
url = 'https://movie.douban.com/top250?start=%s&filter='%count
count+=25
p = Process(target=producer,args=(q,url,)).start()
p_l.append(p)
p = Process(target=consumer, args=(q,)).start()
for p in p_l:p.join()
q.put(None)
11. JoinableQueue类
JoinableQueue 与 Queue使用成本一样,但JoinableQueue 更严紧一些
import time
import random
from multiprocessing import JoinableQueue,Process
def producer(q,name,food):
for i in range(10):
time.sleep(random.random())
fd = '%s%s'%(food,i)
q.put(fd)
print('%s生产了一个%s'%(name,food))
q.join()
def consumer(q,name):
while True:
food = q.get()
time.sleep(random.random())
print('%s吃了%s'%(name,food))
q.task_done()
if __name__ == '__main__':
jq = JoinableQueue()
p =Process(target=producer,args=(jq,'wusir','烧饼'))
p.start()
c = Process(target=consumer,args=(jq,'alex'))
c.daemon = True
c.start()
p.join()
12. 进程之间的数据共享-Manager类
multiprocessing中有一个manager类,封装了所有和进程相关的(包括:数据共享、数据传递)、与共享相关的数据类型,但是对于 字典 、列表这一类的数据操作的时候会产生数据不安全,需要加锁解决问题,并且需要尽量少的使用这种方式。
```python
from multiprocessing import Manager,Process,Lock
def func(dic,lock):
with lock:
dic['count'] -= 1
if name == 'main':
# m = Manager()
with Manager() as m:
l = Lock()
dic = m.dict({'count':100})
p_l = []
for i in range(100):
p = Process(target=func,args=(dic,l))
p.start()
p_l.append(p)
for p in p_l:p.join()
print(dic)
``