#!/usr/bin/python3
import os, time
from xml.sax import saxutils
import psycopg2
# From http://pypgsql.sourceforge.net/pypgsql-faq.html
#pyPgSQL.fetchReturnsList = 1
#pyPgSQL.noPostgresCursor = 1
defaultDbName = "iTunesTracks"
defaultTableName = "tracks"
defaultTopArtistName = "topartist"
defaultPlaylistsNamesTableName = "playlistsnames"
defaultPlaylistsTableName = "playlists"
defaultPlaylistDataName = "playlistdata"
defaultOutputFileName = "analysis.html"
def makeSummize(output, ratingInfo, width=150):
total = 0
for i in range(6):
if (i not in ratingInfo):
ratingInfo[i] = 0
else:
total += ratingInfo[i]
output.append('')
cumWidth = 0
for i in range(6):
if (ratingInfo[i] > 0):
newWidth = (ratingInfo[i] * width)/total
songWord = "songs"
if (ratingInfo[i] == 1):
songWord = "song"
output.append('' % (i, ratingInfo[i], songWord, i, newWidth, cumWidth))
cumWidth += newWidth
output.append('\n')
curIndex = 0
def chartCallback(connection, index, tableName, column):
def tempCallback(output, item, whereInTable):
global curIndex
if (whereInTable == 0):
if (index == curIndex):
if (item == None or item == 'None'):
whereString = "%s IS NULL" % column
else:
whereString = "%s='%s'" % (column, str(item).replace("'", "''"))
sqlQuery = "SELECT rating/20 AS rating, COUNT(*) AS numTracks FROM %s WHERE %s GROUP BY rating ORDER BY rating;" % (tableName, whereString)
tempCursor = connection.cursor()
tempCursor.execute(sqlQuery)
ratingInfo = {}
for record in tempCursor:
ratingInfo[int(record[0])] = int(record[1])
output.append('
')
makeSummize(output, ratingInfo)
output.append(' | ')
curIndex += 1
else:
curIndex = 0
return tempCallback
curIndex = 0
def playlistCallback(connection, index, tableName, playlistsNamesTableName, playlistsTableName):
def tempCallback(output, item, whereInTable):
global curIndex
if (whereInTable == 0):
if (index == curIndex):
if (item == None):
item = ''
# SELECT rating/20 AS rating, COUNT(*) AS num FROM ((SELECT playlistid FROM playlistsnames WHERE playlistname='Guitar Hero') AS baz INNER JOIN (SELECT trackid FROM playlists) AS foo USING (playlistid)) INNER JOIN (SELECT * from tracks) AS bar USING (trackid) GROUP BY rating;
#sqlQuery = "SELECT rating/20 AS rating, COUNT(*) AS numTracks FROM (SELECT trackid from %s WHERE playlistid='%s') AS foo INNER JOIN (SELECT * from %s) AS bar USING (trackid) GROUP BY rating;" % (tableName, column, str(item).replace("'", "''"))
# TODO
sqlQuery = "SELECT rating/20 AS rating, COUNT(*) FROM %s, %s, %s WHERE %s.playlistname='%s' AND %s.playlistid=%s.playlistid AND %s.trackid=%s.trackid GROUP BY rating;" % (tableName, playlistsNamesTableName, playlistsTableName, playlistsNamesTableName, str(item).replace("'", "''"), playlistsNamesTableName, playlistsTableName, tableName, playlistsTableName)
tempCursor = connection.cursor()
tempCursor.execute(sqlQuery)
ratingInfo = {}
for record in tempCursor:
ratingInfo[int(record[0])] = int(record[1])
output.append('')
makeSummize(output, ratingInfo)
output.append(' | ')
curIndex += 1
else:
curIndex = 0
return tempCallback
overallRatingInfo = {}
def overallRatingCallback(output, row, whereInTable):
if (row != None):
overallRatingInfo[row[0]] = int(row[1])
else:
if (whereInTable == 2):
output.append('')
makeSummize(output, overallRatingInfo)
output.append('
')
#maxNum = 0
#for i in range(6):
# if (i not in ratingInfo):
# ratingInfo[i] = 0
# else:
# maxNum = max(ratingInfo[i], maxNum)
## Figure out how to make the graph.
#mainOutput.append('\n')
#for i in range(6):
# mainOutput.append('- %d
\n' % (i+1, i))
# mainOutput.append('%d
\n' % (int(300 * (float(ratingInfo[i])/float(maxNum * 1.1))), ratingInfo[i]))
#mainOutput.append('- \n')
#mainOutput.append('\n' % int(maxNum * 1.1))
#mainOutput.append('\n' % int(maxNum * 1.1 * .8))
#mainOutput.append('\n' % int(maxNum * 1.1 * .6))
#mainOutput.append('\n' % int(maxNum * 1.1 * .4))
#mainOutput.append('\n' % int(maxNum * 1.1 * .2))
#mainOutput.append('
\n')
#mainOutput.append('
\n')
#print "got row: %s" % row
linksOutput = []
mainOutput = []
def main(dbName, tableName, topArtistName, playlistsNamesTableName, playlistsTableName, playlistDataName, outputFileName, session=None, statusShelf=None):
if (os.path.exists(outputFileName)):
os.remove(outputFileName)
outputFile = open(outputFileName, 'a')
if (statusShelf != None):
statusShelf['data']['status'] = 'Performing analysis on data...'
statusShelf.sync()
connection = psycopg2.connect('dbname=%s' % dbName)
connection.set_client_encoding('UNICODE')
cursor = connection.cursor()
outputFile.write('\n')
outputFile.write('\niTunes Rating Analysis\n')
outputFile.write('\n')
outputFile.write('\n')
outputFile.write('\n')
outputFile.write('\n')
outputFile.write('\n')
outputFile.write('This analysis done by Greg\'s iTunes Rating Analysis on %s
\n' % time.asctime())
findFavoriteGroups(cursor, tableName, topArtistName, outputFile)
outputFile.write('Click on the headers to sort by column. The little charts are based on summize.com - each color represents a different rating (black is 0, red is 1, etc.). You can mouse over the colors to see the exact number of songs that had that rating.
\n')
linksOutput.append('')
executeCommand(cursor, "SELECT foo.averagerating, foo.stdDev, foo.numRatedTracks, bar.totalTracks, baz.trackstorate FROM (SELECT AVG(rating/20.0) AS averagerating, STDDEV(rating/20.0) AS stdDev, COUNT(*) AS numRatedTracks FROM %s WHERE rating > 20) AS foo, (SELECT COUNT(*) AS totalTracks FROM %s) AS bar, (SELECT COUNT(*) AS trackstorate FROM %s WHERE rating = 0 AND (genre IS NULL OR genre<>'Books & Spoken')) AS baz;" % (tableName, tableName, tableName), "Average rating of all tracks", ['Average rating', 'Standard deviation', 'Rated tracks', 'Total tracks', 'Tracks to rate'], 'avgRating')
executeCommand(cursor, "SELECT rating/20 AS rating, COUNT(*) AS numTracks FROM %s GROUP BY rating ORDER BY rating;" % (tableName), "Number of rated tracks", ['Rating', 'Number of tracks'], 'ratingTracks', overallRatingCallback, False)
executeCommand(cursor, "SELECT SUM(playcount*length)/(3600*24) AS days, (SUM(playcount*length)-((3600*24)*(SUM(playcount*length)/(3600*24))))/3600 AS hours, (SUM(playcount*length)-(3600*(SUM(playcount*length)/3600)))/60 AS minutes, (SUM(playcount*length)-(60*(SUM(playcount*length)/60))) AS seconds FROM %s;" % (tableName) , "Total time listened to music", ['Days', 'Hours', 'Minutes', 'Seconds'], 'timeListened')
executeCommand(cursor, "SELECT SUM(length)/(3600*24) AS days, (SUM(length)-((3600*24)*(SUM(length)/(3600*24))))/3600 AS hours, (SUM(length)-(3600*(SUM(length)/3600)))/60 AS minutes, (SUM(length)-(60*(SUM(length)/60))) AS seconds FROM %s;" % (tableName), "Total time of music in library", ['Days', 'Hours', 'Minutes', 'Seconds'], 'totalTime')
executeCommand(cursor, "SELECT SUM(length)/(3600*24) AS days, (SUM(length)-((3600*24)*(SUM(length)/(3600*24))))/3600 AS hours, (SUM(length)-(3600*(SUM(length)/3600)))/60 AS minutes, (SUM(length)-(60*(SUM(length)/60))) AS seconds FROM %s WHERE rating = 0 AND (genre IS NULL OR genre <> 'Books & Spoken');" % (tableName), "Total time of unrated music", ['Days', 'Hours', 'Minutes', 'Seconds'], 'unratedTime')
executeCommand(cursor, "SELECT * FROM (SELECT artist, COUNT(*) AS numTracks, ROUND(AVG(rating/20.0),5) AS averageRating, ROUND(STDDEV(rating/20.0),5) AS stdDev FROM %s WHERE rating > 20 GROUP BY artist) AS foo INNER JOIN (SELECT artist, COUNT(*) AS totalTracks FROM %s GROUP BY artist) AS bar USING (artist) WHERE numTracks > 4 ORDER BY foo.averageRating DESC, foo.numTracks DESC;" % (tableName, tableName), "Highest average rated artists with more than four tracks rated", ['Artist', 'Chart', 'Rated tracks', 'Average rating', 'Standard deviation', 'Total tracks'], 'bestArtists1', chartCallback(connection, 0, tableName, 'artist'))
executeCommand(cursor, "SELECT * FROM (SELECT artist, COUNT(*) AS numTracks, ROUND(AVG(rating/20.0),5) AS averageRating, ROUND(STDDEV(rating/20.0),5) AS stdDev FROM %s WHERE rating > 20 GROUP BY artist) AS foo INNER JOIN (SELECT artist, COUNT(*) AS totalTracks FROM %s GROUP BY artist) AS bar USING (artist) WHERE numTracks > 1 AND numTracks < 5 ORDER BY foo.averageRating DESC, foo.numTracks DESC;" % (tableName, tableName), "Highest average rated artists with two to four tracks rated", ['Artist', 'Chart', 'Tracks', 'Average rating', 'Standard deviation', 'Total tracks'], 'bestArtists2', chartCallback(connection, 0, tableName, 'artist'))
executeCommand(cursor, "SELECT * FROM (SELECT album, COUNT(*) AS numTracks, ROUND(AVG(rating/20.0),5) AS averageRating, ROUND(STDDEV(rating/20.0),5) AS stdDev FROM %s WHERE rating > 20 GROUP BY album) AS foo INNER JOIN (SELECT album, COUNT(*) AS totalTracks, %s(album) AS mainartist FROM %s GROUP BY album) AS bar USING (album) WHERE numTracks > 1 ORDER BY foo.averageRating DESC, foo.numTracks DESC;" % (tableName, topArtistName, tableName), "Highest average rated albums with at least one track rated", ['Album', 'Chart', 'Rated tracks', 'Average rating', 'Standard deviation', 'Total tracks', 'Main artist'], 'bestAlbums', chartCallback(connection, 0, tableName, 'album'))
# SELECT COUNT(*), ROUND(AVG(rating/20.0),5) FROM (SELECT trackid FROM playlists WHERE playlistid=38072) AS foo INNER JOIN (SELECT * from tracks) AS bar USING (trackid) WHERE rating > 0; -- gets for that playlist
# CREATE FUNCTION playlistdata (integer) RETURNS playlistresults AS 'SELECT * FROM (SELECT playlistname FROM playlistsnames WHERE playlistid=$1) AS tempname, (SELECT COUNT(*) FROM playlists WHERE playlistid=$1) AS tempblah, (SELECT COUNT(*), ROUND(AVG(rating/20.0),5), ROUND(STDDEV(rating/20.0),5) FROM (SELECT trackid FROM playlists WHERE playlistid=$1) AS foo INNER JOIN (SELECT * from tracks) AS bar USING (trackid) WHERE rating > 0) AS baz;' LANGUAGE SQL;
# CREATE TABLE playlistresults (playlistname VARCHAR(200), totalcount bigint, ratedcount bigint, avgRating NUMERIC, stdDev NUMERIC);
# SELECT avgrating(a) FROM (SELECT playlistdata(playlistid) AS a FROM playlists GROUP BY playlistid) AS foo;
if (playlistsNamesTableName):
executeCommand(cursor, 'SELECT playlistname(a), avgrating(a) AS avgrating, stddev(a), ratedcount(a), totalcount(a) FROM (SELECT %s(playlistid) AS a FROM %s GROUP BY playlistid) AS foo ORDER BY avgrating DESC;' % (playlistDataName, playlistsTableName) , "Highest rated playlists", ['Playlist name', 'Chart', 'Average rating', 'Standard deviation', 'Rated tracks', 'Total tracks'], 'bestPlaylists', playlistCallback(connection, 0, tableName, playlistsNamesTableName, playlistsTableName))
executeCommand(cursor, "SELECT bar.genre, foo.averageRating, foo.stdDev, foo.numRatedTracks, bar.totalTracks FROM (SELECT genre, ROUND(AVG(rating/20.0),5) AS averageRating, ROUND(STDDEV(rating/20.0),5) AS stdDev, COUNT(*) AS numRatedTracks FROM %s WHERE rating > 20 GROUP BY genre) AS foo RIGHT JOIN (SELECT genre, COUNT(*) AS totalTracks FROM %s GROUP BY genre) AS bar ON (foo.genre=bar.genre OR (foo.genre IS NULL AND bar.genre IS NULL)) ORDER BY averageRating DESC;" % (tableName, tableName), "Genres in rating order", ['Genre', 'Chart', 'Average rating', 'Standard deviation', 'Rated tracks', 'Total tracks'], 'bestGenres', chartCallback(connection, 0, tableName, 'genre'))
executeCommand(cursor, "SELECT * FROM (SELECT artist, COUNT(*) AS num4and5StarTracks FROM %s WHERE rating >= 80 GROUP BY artist) AS foo INNER JOIN (SELECT artist, COUNT(*) AS ratedTracks FROM %s WHERE rating > 0 GROUP BY artist) AS baz USING (artist) INNER JOIN (SELECT artist, COUNT(*) AS totalTracks FROM %s GROUP BY artist) AS bar USING (artist) ORDER BY num4and5StarTracks DESC, totalTracks;" % (tableName, tableName, tableName), "Artists with most 4 and 5 star tracks", ['Artist', 'Chart', '4 & 5 star tracks', 'Rated tracks', 'Total tracks'], 'fourAndFiveStarArtists', chartCallback(connection, 0, tableName, 'artist'))
executeCommand(cursor, "SELECT * FROM (SELECT artist, COUNT(*) AS num5StarTracks FROM %s WHERE rating = 100 GROUP BY artist) AS foo INNER JOIN (SELECT artist, COUNT(*) AS ratedTracks FROM %s WHERE rating > 0 GROUP BY artist) AS baz USING (artist) INNER JOIN (SELECT artist, COUNT(*) AS totalTracks FROM %s GROUP BY artist) AS bar USING (artist) ORDER BY num5StarTracks DESC, totalTracks;" % (tableName, tableName, tableName), "Artists with most 5 star tracks", ['Artist', 'Chart', '5 star tracks', 'Rated tracks', 'Total tracks'], 'fiveStarArtists', chartCallback(connection, 0, tableName, 'artist'))
executeCommand(cursor, "SELECT * FROM (SELECT album, COUNT(*) AS num5StarTracks FROM %s WHERE rating = 100 GROUP BY album) AS foo INNER JOIN (SELECT album, COUNT(*) AS totalTracks, %s(album) AS mainartist FROM %s GROUP BY album) AS bar USING (album) ORDER BY num5StarTracks DESC, totalTracks;" % (tableName, topArtistName, tableName), "Albums with most 5 star tracks", ['Album', 'Chart', '5 star tracks', 'Total tracks', 'Main artist'], 'fiveStarAlbums', chartCallback(connection, 0, tableName, 'album'))
executeCommand(cursor, "SELECT bar.comments, foo.averageRating, foo.stdDev, foo.numRatedTracks, bar.totalTracks FROM (SELECT comments, ROUND(AVG(rating/20.0),5) AS averageRating, ROUND(STDDEV(rating/20.0),5) AS stdDev, COUNT(*) AS numRatedTracks FROM %s WHERE rating > 20 GROUP BY comments) AS foo RIGHT JOIN (SELECT comments, COUNT(*) AS totalTracks FROM %s GROUP BY comments) AS bar ON (foo.comments=bar.comments OR (foo.comments IS NULL AND bar.comments IS NULL)) WHERE foo.numRatedTracks > 1 ORDER BY averageRating DESC;" % (tableName, tableName), "Comments in rating order", ['Comments', 'Average rating', 'Standard deviation', 'Rated tracks', 'Total tracks'], 'commentsRating')
executeCommand(cursor, "SELECT * FROM (SELECT artist, COUNT(*) AS numTracks FROM %s GROUP BY artist) AS foo WHERE numTracks > 1 ORDER BY numTracks DESC;" % (tableName), "Artists with more than one song", ['Artist', 'Total tracks'], 'totalArtists')
executeCommand(cursor, "SELECT SUM(playcount) FROM %s;" % (tableName), "Total number of tracks played", ['Tracks'], 'tracksPlayed')
linksOutput.append('
')
for line in linksOutput:
outputFile.write(line)
for line in mainOutput:
outputFile.write(line)
outputFile.write('\n')
outputFile.write('
\n')
if (outputFileName == defaultOutputFileName):
# Add tracking, etc. to local one
outputFile.write('')
outputFile.write('\n')
outputFile.close()
def findFavoriteGroups(cursor, tableName, topArtistName, outputFile):
# Find top 5 artists by number of 4 and 5 rated songs and by average rating
# (with more than 4 songs rated)
artists = set()
cursor.execute("SELECT artist, count(*) as numtracks FROM %s WHERE rating >= 80 GROUP BY artist ORDER BY numtracks DESC LIMIT 5;" % tableName)
for record in cursor:
artists.update([record[0]])
cursor.execute("SELECT artist, count(*) as numtracks FROM %s WHERE rating = 100 GROUP BY artist ORDER BY numtracks DESC LIMIT 5;" % tableName)
for record in cursor:
artists.update([record[0]])
cursor.execute("SELECT * FROM (SELECT artist, COUNT(*) AS numTracks, AVG(rating) AS avgRating FROM %s WHERE rating > 20 GROUP BY artist) AS foo WHERE foo.numTracks > 4 ORDER BY foo.avgRating DESC LIMIT 5;" % tableName)
for record in cursor:
artists.update([record[0]])
artistsList = list(artists)
artistsList.sort()
outputFile.write('Favorite artists: %s
' % (', '.join(artistsList)))
# TODO - topartist() function is really slow here
cursor.execute("SELECT foo.album, %s(foo.album) FROM (SELECT album, COUNT(*) AS numTracks, AVG(rating) AS avgRating FROM %s WHERE rating > 20 GROUP BY album) AS foo WHERE foo.numTracks > 4 ORDER BY foo.avgRating DESC LIMIT 5;" % (topArtistName, tableName))
albumsList = []
for record in cursor:
albumsList.append((record[0], record[1]))
def artistString(artistTuple):
if (artistTuple[1] != '' and artistTuple[1] != 'None'):
return "%s (by %s)" % artistTuple
else:
return "%s" % artistTuple[0]
outputFile.write('Favorite albums: %s
' % (', '.join([artistString(x) for x in albumsList])))
def executeCommand(cursor, sqlQuery, description, headers, hrefName, rowCallback=None, callAfterEachItem=True):
cursor.execute(sqlQuery)
linksOutput.append('%s' % (hrefName, description))
classStr = ''
# Make the table sortable if there's more than one result.
if (cursor.rowcount > 1):
classStr = 'class="sortable" '
mainOutput.append('%s\n' % (classStr, hrefName, saxutils.escape(description)))
mainOutput.append("")
for header in headers:
mainOutput.append("%s | " % saxutils.escape(header))
mainOutput.append("
\n")
for record in cursor:
mainOutput.append("")
if (rowCallback != None):
row = []
# TODO - make constants for whereInTable
for item in record:
if (rowCallback != None):
row.append(item)
mainOutput.append("%s | " % saxutils.escape(str(item)))
if (callAfterEachItem and rowCallback):
rowCallback(mainOutput, item, 0)
if (not callAfterEachItem and rowCallback):
rowCallback(mainOutput, row, 1)
mainOutput.append("
\n")
if (callAfterEachItem and rowCallback):
rowCallback(mainOutput, None, 1)
mainOutput.append("
\n")
if (rowCallback):
rowCallback(mainOutput, None, 2)
#commandLine = "psql -d %s -c \"%s\" >> %s" % (dbName, sqlQuery, outputFileName)
#os.system(commandLine)
if (__name__ == '__main__'):
main(defaultDbName, defaultTableName, defaultTopArtistName, defaultPlaylistsNamesTableName, defaultPlaylistsTableName, defaultPlaylistDataName, defaultOutputFileName)