设为首页 加入收藏

TOP

Python解析xml文件操作的例子
2015-02-02 14:27:57 来源: 作者: 【 】 浏览:10
Tags:Python 解析 xml 文件 操作 例子

Python解析xml文件操作实例,操作XML文件的常见技巧。


xml文件内容:





<br />sample xml thing <br />



ma


xiaoju



Springs Widgets, Inc.



<br />First <br />

I think widgets are greate.You should buy lots of them forom

Spirngy Widgts, Inc




python代码:


from xml.dom import minidom, Node
import re, textwrap ## www.jbxue.com


class SampleScanner:
""""""


def __init__(self, doc):
"""Constructor"""
assert(isinstance(doc, minidom.Document))
for child in doc.childNodes:
if child.nodeType == Node.ELEMENT_NODE and \
child.tagName == "book":
self.handle_book(child)


def handle_book(self, node):


for child in node.childNodes:
if child.nodeType != Node.ELEMENT_NODE:
continue
if child.tagName == "title":
print "Book titile is:", self.gettext(child.childNodes)
if child.tagName == "author":
self.handle_author(child)
if child.tagName == "chapter":
self.handle_chapter(child)


def handle_chapter(self, node):
number = node.getAttribute("number")
print "number:", number
title_node = node.getElementsByTagName("title")
print "title:", self.gettext(title_node)


for child in node.childNodes:
if child.nodeType != Node.ELEMENT_NODE:
continue
if child.tagName == "para":
self.handle_chapter_para(child)


def handle_chapter_para(self, node):
company = ""
company = self.gettext(node.getElementsByTagName("company"))
print "chapter:para:company", company


def handle_author(self, node):
for child in node.childNodes:
if child.nodeType != Node.ELEMENT_NODE:
continue
if child.tagName == "name":
self.handle_author_name(child)
if child.tagName == "affiliation":
print "affiliation:", self.gettext(child.childNodes)


def handle_author_name(self, node):
first = ""
last = ""
for child in node.childNodes:
if child.nodeType != Node.ELEMENT_NODE:
continue
if child.tagName == "first":
first = self.gettext(child.childNodes)
if child.tagName == 'last':
last = self.gettext(child.childNodes)


print "firstname:%s,lastname:%s" % (first, last)


def gettext(self, nodelist):
retlist = []
for node in nodelist:
if node.nodeType == Node.TEXT_NODE:
retlist.append(node.wholeText)
elif node.hasChildNodes:
retlist.append(self.gettext(node.childNodes))


return re.sub('\s+', " ", ''.join(retlist))


if __name__=="__main__":
doc = minidom.parse("simple.xml")
sample = SampleScanner(doc)


?


】【打印繁体】【投稿】【收藏】 【推荐】【举报】【评论】 【关闭】 【返回顶部
分享到: 
上一篇C++多态实现的机制 下一篇支持https但不验证证书的HttpClie..

评论

帐  号: 密码: (新用户注册)
验 证 码:
表  情:
内  容: