1 import requests
2 import json
3 import os
4 import shutil
5 import time
6
7 qq = 627911861
8
9 headers = {
10 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
11 'accept-encoding': 'gzip, deflate, br',
12 'accept-language': 'zh-CN,zh;q=0.8',
13 'cache-control': 'max-age=0',
14 'cookie': 'xxxxxx',
15 'upgrade-insecure-requests': '1',
16 'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Mobile Safari/537.36'
17 }
18
19 url_x = 'https://mobile.qzone.qq.com/list?qzonetoken=9d29961d6fbb88be6236636010e0d4fde43a5b77d57ef984938b5aa0cb695e28c258a4d86b8c02a545bbcce970ff&g_tk=1573033187&res_attach=att%3D'
20 url_y = '%26tl%3D1508257557&format=json&list_type=shuoshuo&action=0&res_uin=627911861&count=40'
21 numbers = 0 # ‘查看更多’翻页
22 img_set = set() # 存放图片url集
23 word_count = 0 # 文字说说计数器
24 words = "" # 存放文字说说
25 images = "" # 存放图片url
26 page = int(1761 / 40)
27
28
29 for i in range(0, page):
30 try:
31 html = requests.get(url_x + str(numbers) + url_y, headers=headers).content
32 data = json.loads(html)
33 # print(data)
34
35 for vFeed in data['data']['vFeeds']:
36 if 'pic' in vFeed:
37 for pic in vFeed['pic']['picdata']['pic']:
38 img_set.add(pic['photourl']['0']['url'])
39
40 if 'summary' in vFeed:
41 # print(str(word_count) + '. ' + vFeed['summary']['summary'])
42 words += str(word_count) + '. ' + vFeed['summary']['summary'] + '\r\n'
43 word_count += 1
44 except:
45 print('error')
46
47 numbers += 40
48 time.sleep(10)
49
50 try:
51 with open(os.getcwd() + '\\' + str(qq) + '.txt', 'wb') as fo:
52 fo.write(words.encode('utf-8'))
53 print("文字说说写入完毕")
54
55 with open(os.getcwd() + '\\' + 'images_url', 'wb') as foImg:
56 for imgUrl in img_set:
57 images += imgUrl + '\r\n'
58 foImg.write(images.encode('utf-8'))
59 print("图片写入完毕")
60
61 except:
62 print('写入数据出错')
63
64
65 if not img_set:
66 print(u'不存在图片说说')
67 else:
68 image_path = os.getcwd() + '\images'
69 if os.path.exists(image_path) is False:
70 os.mkdir(image_path)
71 x = 1
72 for imgUrl in img_set:
73 temp = image_path + '/%s.jpg' % x
74 print(u'正在下载地%s张图片' % x)
75 try:
76 r = requests.get(imgUrl, stream=True)
77 if r.status_code == 200:
78 with open(temp, 'wb') as f:
79 r.raw.decode_content = True
80 shutil.copyfileobj(r.raw, f)
81 except:
82 print(u'该图片下载失败:%s' % imgUrl)
83 x += 1