This is a summary of the general approach to reading XML using the DOM in Python
#!/usr/bin/env python3 # Load minidom from xml.dom import minidom # Read a filename from the command line import sys progName = sys.argv.pop(0) fileName = sys.argv.pop(0)
doc = minidom.parse(fileName)
tagnameElementSet = doc.getElementsByTagName('tagname')
tagnameElement = tagnameElementSet.item(0) - or - tagnameElement = tagnameElementSet[0]
tagnameTextNode = tagnameElement.firstChild
tagnameText = tagnameTextNode.data
attribureText = tagnameElement.getAttribute('attr-name')
This is the 'obvious' way to do it
#!/usr/bin/env python3 from xml.dom import minidom doc = minidom.parse('test.xml') for species in doc.getElementsByTagName('species'): speciesName = species.getAttribute('name') commonName = species.getElementsByTagName('common-name')[0].firstChild.data conservation = species.getElementsByTagName('conservation')[0].getAttribute('status') print ("%s (%s) %s" % (commonName, speciesName, conservation))
This is now my preferred approach - don't worry about getting the first item, simply do everything as a loop so the loops exactly mirror the structure of the XML.
#!/usr/bin/env python3 from xml.dom import minidom doc = minidom.parse('test.xml') for species in doc.getElementsByTagName('species'): speciesName = species.getAttribute('name') for commonNameElement in species.getElementsByTagName('common-name'): commonName = commonNameElement.firstChild.data for conservationElement in species.getElementsByTagName('conservation'): conservation = conservationElement.getAttribute('status') print ("%s (%s) %s" % (commonName, speciesName, conservation))