from bs4 import BeautifulSoup import lxml import requests url='https://so.gushiwen.cn/guwen/book_46653FD803893E4F7F702BCF1F7CCE17.aspx' headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0'} page_text=requests.get(url=url,headers=headers).text soup=BeautifulSoup(page_text,'lxml') #print(soup.div.div.div.a.text)#古诗文网 #print(soup.div.div.string)#None #print(soup.div.div.div.a['href']) chapters=soup.select('.bookcont > ul > span > a') print(chapters) with open('sanguo.txt','w',encoding='utf-8') as fp: for a in chapters: title=a.string link=a['href'] page_content=requests.get(url=link,headers=headers).text soup2=BeautifulSoup(page_content,'lxml') content=soup2.find('div',class_='contson') content_text=content.text fp.write(title+':\n'+content_text+'\n') print(title ,' 写入成功!!!') |