import os.path #导入包 import requests from lxml import etree #指定URL url='https://pic.netbian.com/4kmeinv/' #UA伪装 headers= {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0'} #读取整页文本 page_text=requests.get(url=url,headers=headers).text #解决中文乱码问题-方法2 #response=requests.get(url=url,headers=headers) #response.encoding='utf-8' #page_text=response.text #初始化etree对象 tree=etree.HTML(page_text) #解析获取所有对象列表 li_list=tree.xpath('//div[@class="slist"]//li') folder='./4kMeinv' if not os.path.exists(folder): os.mkdir(folder) #遍历对象列表 for li in li_list: #从对象解析出图片URL,并补全为完整URL src='https://pic.netbian.com'+li.xpath('./a/img/@src')[0] #读取图片二进制数据 data=requests.get(url=src,headers=headers).content #获取文件名称 name=li.xpath('./a/img/@alt')[0]+'.jpg' #解决中文乱码问题-方法1 name=name.encode('iso-8859-1').decode('gbk') #print path=folder+'/'+name #数据持久化存储 with open(path,'wb') as fp: fp.write(data) print(name,', 保存成功!!!') |