#!/usr/bin/python3 import csv, os, sys, re, os.path, base64 import rpy2.robjects as robjects import naturalsort from functools import reduce options = {'separator': ':', 'ctree_mincriterion': .999, 'title': 'Benchmark analysis', 'num_multiple_trials': 1, 'decisiontree_height': 800, 'decisiontree_width': 800, 'boxplot_height': 100, 'boxplot_width': 800, 'sort_keys': True} def getDataURL(pngFilename): data = open(pngFilename, 'rb').read() #s = base64.urlsafe_b64encode(data) dataBytes = base64.b64encode(data) return 'data:image/png;base64,%s' % dataBytes.decode('ascii') def makeSafeFilename(str): str = str.replace('/', '') return str def minMaxAvg(ratios): min = 100000000.0 max = -100000000.0 sum = 0 for r in ratios: if (r < min): min = r if (r > max): max = r sum += r return (min, max, float(sum)/len(ratios)) def median(ratios): rs = ratios[:] rs.sort() if (len(rs) % 2 == 1): return rs[(len(rs) - 1)//2] else: return (rs[(len(rs)//2)-1] + rs[len(rs)//2])/2 def CommonPrefixString2( cStr1, cStr2 ): """Return the common prefix, if any, of both strings.""" nLen = min( len( cStr1 ), len( cStr2 ) ) cPrefix = "" for i in range( nLen ): c1 = cStr1[i] c2 = cStr2[i] if c1 == c2: cPrefix += c1 else: break return cPrefix def CommonPrefixString( *cStrs ): """Return the common prefix, if any, of the strings.""" return reduce( CommonPrefixString2, cStrs ) def classifyNames(rowNames): keys = [] features = [] for row in rowNames: # TODO - this should do something awesome components = [x.strip() for x in row.split(options['separator'])] feature = [] for i, c in enumerate(components): featureNumber = -1 if len(keys) <= i: keys.append([]) keys[i].append(c) featureNumber = 0 elif c not in keys[i]: keys[i].append(c) featureNumber = len(keys[i]) - 1 else: featureNumber = keys[i].index(c) feature.append(featureNumber) features.append(feature) # Remove all keys that only have one possible value keysToRemove = [] for i, k in enumerate(keys): if len(k) <= 1: keysToRemove.append(i) for i in range(len(keysToRemove)): toRemove = keysToRemove[i] - i if toRemove == 0: keys = keys[1:] features = [f[1:] for f in features] elif toRemove == len(keys) - 1: keys = keys[:-1] features = [f[:-1] for f in features] else: del keys[toRemove] for f in features: del f[toRemove] labels = ['Feature %d' % x for x in range(len(keys))] for keyindex, keylist in enumerate(keys): prefix = CommonPrefixString(*keylist).strip() # Make sure prefix doesn't end in a number while len(prefix) > 0 and prefix[len(prefix)-1] in [chr(x) for x in range(48, 58)]: prefix = prefix[:-1] if prefix != '' and prefix not in labels: labels[keyindex] = prefix # Sort the keys if requested. if options['sort_keys']: for doingkey in range(len(keys)): transformedkeys = [(x[1], x[0]) for x in enumerate(keys[doingkey])] transformedkeys.sort(key=lambda pair: (naturalsort.natural_key(pair[0]), pair[1])) transformedindices = [x[1] for x in transformedkeys] transformmap = [0] * len(transformedindices) oldkeys = list(keys[doingkey]) for i in range(len(transformedindices)): transformmap[transformedindices[i]] = i keys[doingkey][i] = oldkeys[transformedindices[i]] # Fix up the features for f in features: f[doingkey] = transformmap[f[doingkey]] return keys, features, labels def stripLeading(target, label, index): if label == 'Feature %d' % index: return target return target[len(label):] def makeBaselineSelect(column, dataKeys, datas): thisData = datas[column] toReturn = '' return toReturn def analyzefile(filecontents, newOptions): options.update(newOptions) rowNames = [] trials = [] datas = {} # Read the data from the file csvReader = csv.reader(filecontents) first = True baseline = None for row in csvReader: #print row if first: headers = [x.strip() for x in row] lastContiguousHeader = None for i in range(len(headers)): h = headers[i] if h != '': datas[h] = {'index': i, 'data': []} if first: baseline = h first = False datas[h]['isBaseline'] = True else: trials.append(h) if lastContiguousHeader == None: lastContiguousHeader = h datas[h]['baseline'] = baseline else: datas[h]['baseline'] = lastContiguousHeader datas[lastContiguousHeader]['isBaseline'] = True else: lastContiguousHeader = None else: if len(row) > 0 and row[0].strip() != '': rowNames.append(row[0].strip()) for h in datas: datas[h]['data'].append(float(row[datas[h]['index']])) #print datas # Classify the row names. keys, features, labels = classifyNames(rowNames) originalLabels = list(labels) for i in range(len(labels)): if 'label%d' % i in options: labels[i] = options['label%d' % i] # Fix up baseline comparisons. for key in datas: optionToCheck = 'baseline%d' % datas[key]['index'] if optionToCheck in options: try: newBaseline = int(options[optionToCheck]) if newBaseline == -1: datas[key]['isBaseline'] = True if 'baseline' in datas[key]: del datas[key]['baseline'] else: # Find what should be its baseline baselineKey = None for bKey in datas: if datas[bKey]['index'] == newBaseline: baselineKey = bKey if baselineKey != None: if 'isBaseline' in datas[key]: del datas[key]['isBaseline'] datas[key]['baseline'] = baselineKey except ValueError: # bad baseline, skip it pass # Now, consolidate the data into vectors. allRatios = {} for h in datas: if 'isBaseline' not in datas[h]: allRatios[h] = [] thisBaseline = baseline if 'baseline' in datas[h]: thisBaseline = datas[h]['baseline'] for (base, new) in zip(datas[thisBaseline]['data'], datas[h]['data']): # FFV - is this formula right? allRatios[h].append(base*100/new) # Using keys, features, labels, allRatios print('
') print('') print('') print('') print('') print('%s Change options
' % ('-' if 'firstTime' in options else '+')) print('') print('Benchmark results:
') print('