#!/usr/bin/python import csv, os, sys, re, os.path, base64 import rpy2.robjects as robjects import numacomp options = {'separator': ':', 'ctree_mincriterion': .999, 'title': 'Benchmark analysis', 'num_multiple_trials': 1, 'decisiontree_height': 800, 'decisiontree_width': 800, 'boxplot_height': 100, 'boxplot_width': 800, 'sort_keys': True} def getDataURL(pngFilename): data = open(pngFilename, 'r').read() #s = base64.urlsafe_b64encode(data) s = base64.b64encode(data) return 'data:image/png;base64,%s' % s def makeSafeFilename(str): str = str.replace('/', '') return str def minMaxAvg(ratios): min = 100000000.0 max = -100000000.0 sum = 0 for r in ratios: if (r < min): min = r if (r > max): max = r sum += r return (min, max, float(sum)/len(ratios)) def median(ratios): rs = ratios[:] rs.sort() if (len(rs) % 2 == 1): return rs[(len(rs) - 1)/2] else: return (rs[(len(rs)/2)-1] + rs[len(rs)/2])/2 def CommonPrefixString2( cStr1, cStr2 ): """Return the common prefix, if any, of both strings.""" nLen = min( len( cStr1 ), len( cStr2 ) ) cPrefix = "" for i in range( nLen ): c1 = cStr1[i] c2 = cStr2[i] if c1 == c2: cPrefix += c1 else: break return cPrefix def CommonPrefixString( *cStrs ): """Return the common prefix, if any, of the strings.""" return reduce( CommonPrefixString2, cStrs ) def numacompwrapper(x, y): r = numacomp.numacomp(x[0], y[0]) if r < 0: return r if r > 0: return r return cmp(x[1], y[1]) def classifyNames(rowNames): keys = [] features = [] for row in rowNames: # TODO - this should do something awesome components = [x.strip() for x in row.split(options['separator'])] feature = [] for i, c in enumerate(components): featureNumber = -1 if len(keys) <= i: keys.append([]) keys[i].append(c) featureNumber = 0 elif c not in keys[i]: keys[i].append(c) featureNumber = len(keys[i]) - 1 else: featureNumber = keys[i].index(c) feature.append(featureNumber) features.append(feature) # Remove all keys that only have one possible value keysToRemove = [] for i, k in enumerate(keys): if len(k) <= 1: keysToRemove.append(i) for i in range(len(keysToRemove)): toRemove = keysToRemove[i] - i if toRemove == 0: keys = keys[1:] features = [f[1:] for f in features] elif toRemove == len(keys) - 1: keys = keys[:-1] features = [f[:-1] for f in features] else: del keys[toRemove] for f in features: del f[toRemove] labels = ['Feature %d' % x for x in range(len(keys))] for keyindex, keylist in enumerate(keys): prefix = CommonPrefixString(*keylist).strip() # Make sure prefix doesn't end in a number while len(prefix) > 0 and prefix[len(prefix)-1] in [chr(x) for x in range(48, 58)]: prefix = prefix[:-1] if prefix != '' and prefix not in labels: labels[keyindex] = prefix # Sort the keys if requested. if options['sort_keys']: for doingkey in range(len(keys)): transformedkeys = [(x[1], x[0]) for x in enumerate(keys[doingkey])] transformedkeys.sort(cmp=numacompwrapper) transformedindices = [x[1] for x in transformedkeys] transformmap = [0] * len(transformedindices) oldkeys = list(keys[doingkey]) for i in range(len(transformedindices)): transformmap[transformedindices[i]] = i keys[doingkey][i] = oldkeys[transformedindices[i]] # Fix up the features for f in features: f[doingkey] = transformmap[f[doingkey]] return keys, features, labels def stripLeading(target, label, index): if label == 'Feature %d' % index: return target return target[len(label):] def makeBaselineSelect(column, dataKeys, datas): thisData = datas[column] toReturn = '

' % options['num_multiple_trials'] print '

' % options['title'] filecontentsstr = base64.b64encode('\n'.join(filecontents)) print '

Feature labels:

' print '' print '

Baseline comparisons:

' print '' print '

' % options['ctree_mincriterion'] print '

' % ('checked="checked"' if options['sort_keys'] else '') print '

x

' % (options['boxplot_width'], options['boxplot_height']) print '

x

' % (options['decisiontree_width'], options['decisiontree_height']) print '

' % options['separator'] print '' % filecontentsstr print '

' print '' print '

Benchmark results:

' print '' print '' print '' def main(filename): f = open(filename) contents = f.readlines() f.close() analyzefile(contents, {}) if (__name__ == '__main__'): if (len(sys.argv) > 1): name = sys.argv[1] else: #name = 'colorhistogrambenchmarks.csv' name = 'PM multicore benchmarks.csv' main(name)