Friday, January 25, 2013

Read Zip and Look At XML in Zip To Count Elements With A Specific Attribute

import xml.sax
import zipfile
import re

counter = 0

class MyContentHandler(xml.sax.ContentHandler):
    def __init__(self):
        xml.sax.ContentHandler.__init__(self)

    def startElement(self, name, attrs):
        if attrs.has_key("color"): # search for the element with the attribute of color
            global counter
            counter += 1

def count(myZip):
    zf = zipfile.ZipFile(myZip)  # read in zip
    files = zf.namelist() # get the files in zip
    for file in files:
        if re.search(".*myXML.xml", file) is not None:  # using regular exp to find file
            f = zf.open(file) # open file
            xml.sax.parse(f, MyContentHandler()) # parse the xml
            print counter # print counter
            break

if __name__ == '__main__':
    import sys
    if len(sys.argv) == 2:
        try:
            count(sys.argv[1])
        except IOError:
            print 'Filename not found.'
    else:
        import os
        print 'usage: %s zip' % os.path.basename(sys.argv[0])

No comments:

Post a Comment