#!/usr/local/bin/python import sys, os, re import getopt, time, socket import xml.dom.minidom, Debug from pyPgSQL import PgSQL #iTunesFileName = 'iTunes Music Library.xml' global d keysToDBKeys = {"Track ID" : {"dbName": "trackid", "type":"integer"}, "Name" : {"dbName": "name", "type":"string"}, "Artist" : {"dbName": "artist", "type":"string"}, "Album" : {"dbName" : "album", "type":"string"}, "Track Number" : {"dbName" : "tracknumber", "type":"integer"}, "Play Count" : {"dbName" : "playcount", "type":"integer"}, "Rating" : {"dbName" : "rating", "type":"integer"}, "Comments" : {"dbName" : "comments", "type":"string"}, "Genre" : {"dbName" : "genre", "type":"string"}, "Total Time" : {"dbName": "length", "type":"integer", "function": (lambda x: int(x/1000))}, "Location" : {"dbName": "location", "type": "string"}} dbName = "iTunesTracks" tableName = "tracks" def main(iTunesFileName): global d d = Debug.Debug(3) iTunesFile = open(iTunesFileName, 'r') iTunesString = iTunesFile.read() iTunesFile.close() d.dprint(2, "iTunesString is %d long." % len(iTunesString)) dom1 = xml.dom.minidom.parseString(iTunesString) #d.dprint(3, "top guy has %d children" % len(dom1.childNodes)) #printChildren(dom1) #for child in dom1.childNodes: #d.dprint(3, "childNode: name is %s, value is %s" % (child.nodeName, child.nodeValue)) dom2 = dom1.childNodes[1] #d.dprint(3, "next guy has %d children" % len(dom2.childNodes)) #printChildren(dom2) dom3 = dom2.childNodes[1] #d.dprint(3, "next guy has %d children" % len(dom3.childNodes)) #printChildren(dom3) trackData = None for child in dom3.childNodes: if (child.nodeName == 'dict'): trackData = child #.nextSibling if (trackData): db = PgSQL.PQconnectdb('dbname=%s' % dbName) d.dprint(2, "trackData: type is %d, name is %s, value is %s" % (trackData.nodeType, trackData.nodeName, trackData.nodeValue)) d.dprint(2,"Got track data - has %d children" % len(trackData.childNodes)) db.query('DELETE FROM %s' % tableName) #os.system('psql -d %s -c "DELETE FROM %s;"' % (dbName, tableName)) os.system('vacuumdb %s' % dbName) numTracks = 0 for child in trackData.childNodes: if (child.nodeName == 'dict'): trackDict = child trackMap = {} for child in trackDict.childNodes: if (child.nodeName == 'key'): key = getText(child.childNodes) trackMap[key] = getText(child.nextSibling.childNodes) pgKeyList = "(" pgValueList = "(" for pgsqlKey in keysToDBKeys.keys(): if pgsqlKey in trackMap.keys(): # Got a key to use. pgKeyList = pgKeyList + keysToDBKeys[pgsqlKey]['dbName'] + "," valueToUse = trackMap[pgsqlKey] if (keysToDBKeys[pgsqlKey]['type'] == 'integer'): valueToUse = int(valueToUse) if (keysToDBKeys[pgsqlKey].has_key('function')): valueToUse = keysToDBKeys[pgsqlKey]['function'](valueToUse) if (keysToDBKeys[pgsqlKey]['type'] == 'integer'): pgValueList = pgValueList + str(valueToUse) + "," elif (keysToDBKeys[pgsqlKey]['type'] == 'string'): valueToUse = valueToUse.replace("'", "''") #valueToUse = valueToUse.replace('"', '\\"') pgValueList = pgValueList + "'" + valueToUse + "'," else: d.dprint(0, "ERROR - got unrecognized value type %s for key %s" % (keysToDBKeys[pgsqlKey][type], keysToDBKeys[pgsqlKey])) sys.exit(1) pgKeyList = pgKeyList[:-1] + ')' pgValueList = pgValueList[:-1] + ')' #d.dprint(2, "keyList is %s, valueList is %s" % (pgKeyList, pgValueList)) dbQuery = "INSERT INTO %s %s VALUES %s" % (tableName, pgKeyList, pgValueList) #print "dbQuery is %s" % dbQuery try: query = db.query(dbQuery) except PgSQL.Error, msg: print "Failed with message %s" % msg print "Query was %s" % dbQuery del db sys.exit(1) if (query.resultStatus != PgSQL.COMMAND_OK): print "Got bad resultStatus %d! Command was %s" % (query.resultStatus, dbQuery) del db sys.exit(1) #commandLine = "psql -d %s -c \"%s\"" % (dbName, dbQuery) #if (os.system(commandLine) != 0): #print 'Got error running "%s"! Exiting.' % commandLine #sys.exit(1) numTracks = numTracks + 1 if (numTracks % 100 == 0): d.dprint(1, "Done %d tracks..." % numTracks) d.dprint(1, "Got %d tracks." % numTracks) del db os.system('vacuumdb %s' % dbName) #dicts = def getText(nodelist): rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc = rc + node.data.encode('utf-8') return rc def attributesString(attributes): if (attributes == None): return '' toReturn = '' i = 0 d.dprint(3, "attributes.length is %d" % attributes.length) for i in range(0, attributes.length): attribute = attributes.item(i) toReturn = toReturn + ("{name = %s, value = %s}" % (attribute.nodeName, child.value)) return toReturn def printChildren(dom): global d for child in dom.childNodes: d.dprint(3, "childNode: type is %d, name is %s, value is %s" % (child.nodeType, child.nodeName, child.nodeValue)) if (__name__ == '__main__'): if (len(sys.argv) < 2): print "requires name of .xml file" sys.exit(1) else: main(sys.argv[1])