def handle_starttag(self, tag, attrs): if tag in CustomParser.selected: self._level_stack.append(tag) def handle_endtag(self, tag): if self._level_stack and tag in CustomParser.selected and tag == self._level_stack[-1]: self._level_stack.pop() def handle_data(self, data): #我们将需要获取的数据放到一个list中,同时每一个漏洞的数据会放到一个小的listz中 #如[[名称,CVE,风险],[名称,CVE,风险]],这里拿到的是全部HTML中的数据 if "/".join(self._level_stack) in CustomParser.selected_a and not CustomParser.sigle_cve: print self._level_stack, data.decode('gbk').encode('utf-8') CustomParser.sigle_cve.append(data.decode('gbk').encode('utf-8').strip()) elif "/".join(self._level_stack) in CustomParser.selected_a: print self._level_stack, data.decode('gbk').encode('utf-8').strip() CustomParser.sigle_cve.append(data.decode('gbk').encode('utf-8').strip()) elif "/".join(self._level_stack) in CustomParser.selected_font and CustomParser.sigle_cve: print self._level_stack, data.decode('gbk').encode('utf-8').strip() CustomParser.sigle_cve.append(data.decode('gbk').encode('utf-8').strip()) CustomParser.cve_list.append(CustomParser.sigle_cve) CustomParser.sigle_cve = [] if __name__ == '__main__': ''' 读取,判断是否为高风险,是的打印出来 ''' try: fd = open('test.html','r') except Exception,error: print error html_string = fd.read() ht = CustomParser() ht.feed(html_string) get_list = ht.cve_list for item in get_list: if item[-1] == '高风险': print item fd.close()