#!/usr/bin/python3 import sys, os, re import getopt, time, socket import Debug import xml.sax, xml.sax.handler from pyPgSQL import PgSQL #iTunesFileName = 'iTunes Music Library.xml' global d keysToDBKeys = {"Track ID" : {"dbName": "trackid", "type":"integer"}, "Name" : {"dbName": "name", "type":"string"}, "Artist" : {"dbName": "artist", "type":"string"}, "Album" : {"dbName" : "album", "type":"string"}, "Track Number" : {"dbName" : "tracknumber", "type":"integer"}, "Play Count" : {"dbName" : "playcount", "type":"integer"}, "Rating" : {"dbName" : "rating", "type":"integer"}, "Comments" : {"dbName" : "comments", "type":"string"}, "Genre" : {"dbName" : "genre", "type":"string"}, "Total Time" : {"dbName": "length", "type":"integer", "function": (lambda x: int(x/1000))}, "Location" : {"dbName": "location", "type": "string"}} dbName = "iTunesTracks" defaultTableName = "tracks" defaultPlaylistsTableName = "playlists" defaultPlaylistsNamesTableName = "playlistsnames" tableCreationString = "(trackid INTEGER PRIMARY KEY, tracknumber INTEGER, playcount INTEGER, rating INTEGER DEFAULT 0, name VARCHAR(300), genre VARCHAR(100), length INTEGER, comments VARCHAR(300), artist VARCHAR(300), album VARCHAR(300), location VARCHAR(450));" topartistString = "CREATE FUNCTION topartist%s (text) RETURNS VARCHAR(300) AS 'SELECT topartist.artist FROM (SELECT artist, COUNT(*) AS acount FROM %s WHERE album=$1 GROUP BY artist ORDER BY acount DESC LIMIT 1) AS topartist WHERE topartist.acount * 2 >= (SELECT COUNT(*) FROM %s WHERE album=$1);' LANGUAGE SQL;" playlistsNamesTableCreationString = "(playlistid INTEGER PRIMARY KEY, playlistname VARCHAR(200));" playlistsTableCreationString = "(playlistid INTEGER, trackid INTEGER);" playlistdataString = "CREATE FUNCTION playlistdata%s (text) RETURNS playlistresults AS 'SELECT * FROM (SELECT playlistname FROM %s WHERE playlistid=$1) AS tempname, (SELECT COUNT(*) FROM %s WHERE playlistid=$1) AS tempblah, (SELECT COUNT(*), ROUND(AVG(rating/20.0),5), ROUND(STDDEV(rating/20.0),5) FROM (SELECT trackid FROM %s WHERE playlistid=$1) AS foo INNER JOIN (SELECT * from %s) AS bar USING (trackid) WHERE rating > 0) AS baz;' LANGUAGE SQL;" videoFileRe = re.compile("video file") def safeQuery(db, queryString): try: # TODO - batch queries? query = db.query(queryString) except PgSQL.Error as msg: del db raise Exception("Query failed with message '%s' - query was '%s'" % (msg, queryString)) if (query.resultStatus != PgSQL.COMMAND_OK): del db raise Exception("Query got bad resultStatus %d! Command was %s" % (query.resultStatus, queryString)) return query def createTable(newTableName, playlistsNamesTableName, playlistsTableName, sessionName): db = PgSQL.PQconnectdb('dbname=%s' % dbName) db.query("CREATE TABLE %s %s" % (newTableName, tableCreationString)) db.query("CREATE TABLE %s %s" % (playlistsNamesTableName, playlistsNamesTableCreationString)) db.query("CREATE TABLE %s %s" % (playlistsTableName, playlistsTableCreationString)) db.query(topartistString % (sessionName, newTableName, newTableName)) db.query(playlistdataString % (sessionName, playlistsNamesTableName, playlistsTableName, playlistsTableName, newTableName)) del db def removeTables(tableNames, session): db = PgSQL.PQconnectdb('dbname=%s' % dbName) # We may be removing because of an error, so don't stress if we can't remove these. for tableName in tableNames: try: db.query("DROP TABLE %s" % tableName) except: pass try: db.query("DROP FUNCTION topartist%s(text);" % session) except: pass try: db.query("DROP FUNCTION playlistdata%s(text);" % session) except: pass del db class State: Begin = 0 InStartTracks = 1 InTracks = 2 InTrackData = 3 AfterTracks = 4 InStartPlaylists = 5 InPlaylistHeader = 6 InPlaylistItems = 7 class Handler(xml.sax.handler.ContentHandler): def __init__(self, tableName, playlistsNamesTableName, playlistsTableName, session, statusShelf, db): self.tableName = tableName self.playlistsNamesTableName = playlistsNamesTableName self.playlistsTableName = playlistsTableName self.session = session self.statusShelf = statusShelf self.db = db self.state = State.Begin self.lastKeyName = '' self.inKey = False self.inValue = False self.lastTrackKey = None self.tracksNestingLevel = 0 self.lastValue = '' self.curTrackData = {} self.numTracks = 0 self.playlistName = None self.playlistID = None def startElement(self, name, attrs): if name.lower() == 'key': if self.state == State.InTrackData: self.curTrackData[self.lastKeyName] = self.lastValue elif self.state == State.InPlaylistHeader: if self.lastKeyName.lower() == 'name': self.playlistName = self.lastValue elif self.lastKeyName.lower() == 'playlist id': self.playlistID = self.lastValue self.inKey = True self.lastKeyName = '' elif name.lower() == 'dict': if self.state == State.InStartTracks: self.state = State.InTracks elif self.state == State.InTracks or self.state == State.InTrackData: self.tracksNestingLevel += 1 if self.state == State.InTracks: self.state = State.InTrackData self.lastTrackKey = self.lastKeyName elif self.state == State.InTrackData or self.state == State.InPlaylistHeader or self.state == State.InPlaylistItems: self.inValue = True self.lastValue = '' elif self.state == State.AfterTracks and self.lastKeyName.lower() == 'playlists' and name.lower() == 'array': if (self.playlistsNamesTableName): self.db.query('DELETE FROM %s' % self.playlistsTableName) self.db.query('DELETE FROM %s' % self.playlistsNamesTableName) self.state = State.InPlaylistHeader if (self.statusShelf): self.statusShelf[self.session]['status'] = 'Adding playlist data...' self.statusShelf.sync() def endElement(self, name): nameLower = name.lower() if self.inKey and nameLower == 'key': self.inKey = False if self.state == State.Begin and self.lastKeyName.lower() == 'tracks': self.state = State.InStartTracks elif self.state == State.InPlaylistHeader and self.lastKeyName.lower() == 'playlist items': dbQuery = "INSERT INTO %s(playlistid, playlistname) VALUES (%s, '%s')" % (self.playlistsNamesTableName, self.playlistID, self.playlistName.replace("'", "''")) query = safeQuery(self.db, dbQuery) self.state = State.InPlaylistItems elif nameLower == 'dict' and (self.state == State.InTracks or self.state == State.InTrackData): self.tracksNestingLevel -= 1 if self.state == State.InTrackData: self.curTrackData[self.lastKeyName] = self.lastValue self.processTrack(self.lastTrackKey, self.curTrackData) self.curTrackData = {} self.state = State.InTracks if self.tracksNestingLevel < 0: self.state = State.AfterTracks elif self.state == State.InPlaylistItems: if nameLower == 'array': self.state = State.InPlaylistHeader elif nameLower == 'dict' and self.lastKeyName == 'Track ID': dbQuery = "INSERT INTO %s (playlistid, trackid) VALUES (%s, %s)" % (self.playlistsTableName, self.playlistID, self.lastValue) query = safeQuery(self.db, dbQuery) self.inValue = False def characters(self, content): if self.inKey: self.lastKeyName += content elif (self.state == State.InTrackData or self.state == State.InPlaylistHeader or self.state == State.InPlaylistItems) and self.inValue: self.lastValue += content def ignorableWhitespace(self, whitespace): # TODO? pass def processTrack(self, trackKey, trackMap): #print "key: %s data: %s" % (trackKey, trackMap) if ('Kind' in trackMap and not videoFileRe.search(trackMap['Kind'])): pgKeyList = "(" pgValueList = "(" for pgsqlKey in list(keysToDBKeys.keys()): if pgsqlKey in list(trackMap.keys()): # Got a key to use. pgKeyList = pgKeyList + keysToDBKeys[pgsqlKey]['dbName'] + "," valueToUse = trackMap[pgsqlKey] if (keysToDBKeys[pgsqlKey]['type'] == 'integer'): valueToUse = int(valueToUse) if ('function' in keysToDBKeys[pgsqlKey]): valueToUse = keysToDBKeys[pgsqlKey]['function'](valueToUse) if (keysToDBKeys[pgsqlKey]['type'] == 'integer'): pgValueList = pgValueList + str(valueToUse) + "," elif (keysToDBKeys[pgsqlKey]['type'] == 'string'): valueToUse = valueToUse.replace("'", "''") #valueToUse = valueToUse.replace('"', '\\"') pgValueList = pgValueList + "'" + valueToUse + "'," else: raise Exception("ERROR - got unrecognized value type %s for key %s" % (keysToDBKeys[pgsqlKey][type], keysToDBKeys[pgsqlKey])) pgKeyList = pgKeyList[:-1] + ')' pgValueList = pgValueList[:-1] + ')' #d.dprint(2, "keyList is %s, valueList is %s" % (pgKeyList, pgValueList)) dbQuery = "INSERT INTO %s %s VALUES %s" % (self.tableName, pgKeyList, pgValueList) #print "dbQuery is %s" % dbQuery query = safeQuery(self.db, dbQuery) #commandLine = "psql -d %s -c \"%s\"" % (dbName, dbQuery) #if (os.system(commandLine) != 0): #print 'Got error running "%s"! Exiting.' % commandLine #sys.exit(1) self.numTracks = self.numTracks + 1 if (self.numTracks % 100 == 0): d.dprint(1, "Done %d tracks..." % (self.numTracks)) if (self.statusShelf): self.statusShelf[self.session]['status'] = 'Adding tracks to database - done %d tracks...' % (self.numTracks) self.statusShelf.sync() def main(iTunesFileName, tableName, playlistsNamesTableName, playlistsTableName, session=None, statusShelf=None): global d if (statusShelf != None): d = Debug.Debug(0) else: d = Debug.Debug(2) if (statusShelf): statusShelf[session]['status'] = 'Parsing file (this may take a few minutes)...' statusShelf.sync() #d.dprint(2, "iTunesString is %d long." % len(iTunesString)) db = PgSQL.PQconnectdb('dbname=%s' % dbName) handler = Handler(tableName, playlistsNamesTableName, playlistsTableName, session, statusShelf, db) db.query('DELETE FROM %s' % tableName) xml.sax.parse(iTunesFileName, handler) del db if (__name__ == '__main__'): if (len(sys.argv) < 2): print("requires name of .xml file") sys.exit(1) else: main(sys.argv[1], defaultTableName, defaultPlaylistsNamesTableName, defaultPlaylistsTableName)