#!/usr/bin/env python import string import os import time import commands import gzip import sys import string import xreadlines import re import struct import math from Numeric import * import Gnuplot, Gnuplot.funcutils #Start out simple, it's my zcat def proc_str(str): sys.stdout.write(str) def proc_file(filename): myfile = gzip.GzipFile(filename, "rb") str = myfile.readline() while (str): str = myfile.readline() proc_str(str) myfile.close(); class NotImplemented(Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value) class ExpResults: def __init__(self): self.name = "Error! Name undefined!" self.files = "Woot! No files!" self.clients = 0 self.clientbandwidth = 0 self.filesize = 0 raise NotImplemented("ExpResults subtypes need better constructors!") def get_name(self): return self.name def get_files(self): return self.files def get_clients(self): return self.clients def get_bw(self): return self.bandwidth class BaseResults(ExpResults): def __init__(self, clients, pipe, dir, max_seeds): self.max_seeds = max_seeds self.filesize = 1000000 # bytes self.name='base' self.clients = clients if (pipe == 'smallpipe'): self.serverbandwidth = 1000 # kbps self.pipename='smallpipe' else: self.serverbandwidth = 10000 # kbps self.pipename='' self.clientbandwidth = 100 # kbps self.searchname= dir + '/' + \ self.name + '-'+ \ self.pipename + \ repr(self.clients) + 'C1M' + \ '-seed-*.log.gz' self.files = string.split(commands.getoutput(('ls '+self.searchname))) self.files = self.files[0:max_seeds] def get_pipename(self): return self.pipename def getlines(filename): myfile = gzip.GzipFile(filename, "rb") lines = [] str = myfile.readline() while (str): str = myfile.readline() lines.append(str) myfile.close(); return lines def alt_getlines(filename): myfile = gzip.GzipFile(filename, "rb") lines=[] count=0 for line in xreadlines.xreadlines(myfile): count = count + 1 lines.append(line) if ((count % 10000) == 0): sys.stdout.write('.') sys.stdout.flush() myfile.close(); return lines def compute_latencies(rset,loadcp): cpver = 0 # What CP version are we on? subexpcount = len(rset.get_files()) starts = zeros(rset.get_clients()*subexpcount,Float) ends = zeros(rset.get_clients()*subexpcount,Float) print ('Processing experiment \"' + \ rset.get_name() + ' ' + \ rset.get_pipename() + '\":' + \ repr(rset.get_clients()) + ' clients.') checkPointFilename=('CP-V-'+repr(cpver)+'-'+ \ rset.get_name()+'-'+ \ rset.get_pipename() + \ repr(rset.get_clients())+'S1-'+ \ repr(subexpcount)+ '.summary.gz') checkpointread=0 if loadcp and os.access(checkPointFilename,os.R_OK): checkpointread=1 try: sys.stdout.write("using CP file "+checkPointFilename+". ") ifile=gzip.GzipFile(checkPointFilename,'rb') ustarts = pickle.load(ifile) uends = pickle.load(ifile) ifile.close() except: sys.stdout.write('Error!') checkpointread=0 sys.stdout.flush() print('') if not checkpointread: begin = re.compile(r"^CLIENT\s+(\S+)\s+BEGIN_TRANSFER\s+(\S+)$") done = re.compile(r"^CLIENT\s+(\S+)\s+GOT\s+1000000\s+at\s+(\S+)$") for i in range(subexpcount): print(' Part ' + repr(i+1) + ' of ' + repr(subexpcount)) sys.stdout.write(' Reading ') offset = i*rset.get_clients() # ls = getlines(rset.get_files()[i]) ls = alt_getlines(rset.get_files()[i]) print('') print(' '+repr(len(ls)) + ' lines read.') sys.stdout.write(' Processing ') for j in range(len(ls)): line=ls[j] b_m = begin.match(line) d_m = done.match(line) if b_m: num = string.atoi(b_m.group(1)) time = b_m.group(2) # sys.stdout.write((repr(num) + ': ' + repr(time) + ' '+line)) starts[offset+num]=string.atof(time) if d_m: num = string.atoi(d_m.group(1)) time = d_m.group(2) # sys.stdout.write((repr(num) + ': ' + repr(time) + ' '+line)) ends[offset+num]=string.atof(time) if ((j % 10000) == 0): sys.stdout.write('.') sys.stdout.flush() print('') print(" "+repr(len(ls))+" lines processed.") sys.stdout.write('Post-processing experiment') sys.stdout.flush() goodstarti = nonzero(starts) ustarts = take(starts,goodstarti) uends = take(ends, goodstarti) print('Of ' + repr(len(starts)) + ' exps, ' + repr(len(ustarts))+ 'have valid start times.') goodendi = nonzero(uends) ustarts = take(ustarts,goodendi) uends = take(uends,goodendi) print('Of those, ' + repr(len(uends))+ ' have valid end times.') ofile=gzip.GzipFile(checkPointFilename,"wb",9) pickle.dump(ustarts,ofile) pickle.dump(uends,ofile) ofile.close() latencies = uends - ustarts slatencies = sort(latencies) count = rset.get_clients()*subexpcount def tile(n): # 0 < n < 1 index = int(math.floor(count*n)) # print(index) return slatencies[index] deciles = map (tile,[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]) print('') # print(repr(deciles)) return ([rset.get_clients(),deciles]) #To scatterplot client latencies from big pipe def proc_base(loadcp,maxseeds,pipe): def make_result(x,y): return BaseResults(x,pipe,'exps',y) resultsets = [] clientcounts = [128,256,512,1024,2048] for i in range(len(clientcounts)): resultsets.append(make_result(clientcounts[i],maxseeds)) def latency_wrap(rset): return compute_latencies(rset,loadcp) latency_set = map(latency_wrap,resultsets) return latency_set class Application: def __init__(self,args): """Run the bloody program! """ self.maxseeds=1000 # Effectively infinite self.usecheckpoints=0 self.cpver=0 def process_args(args): while args: if args[0]== '--use-cp': self.usecheckpoints=1 args = args[1:] elif args[0] == '--maxseeds' : self.maxseeds=string.atoi(args[1]) args = args [2:] else: print "Don't know how to deal with arguments like", args[0] args = args[1:] process_args(args[1:]) base_latency_deciles = [] base_name = ('base-deciles-s'+repr(self.maxseeds)+'-v'+repr(self.cpver)+'.gz') if self.usecheckpoints and os.access(base_name,os.R_OK): sys.stdout.write('Reading experimental deciles: '+base_name) print('') sys.stdout.flush() try: ifile=gzip.GzipFile(base_name,'rb') base_latency_deciles=pickle.load(ifile) ifile.close() except: print('Error!') else: base_latency_deciles=proc_base(self.usecheckpoints,self.maxseeds,'') ofile=gzip.GzipFile(base_name,"wb",9) pickle.dump(base_latency_deciles,ofile) ofile.close() small_deciles = proc_base(self.usecheckpoints,self.maxseeds,'smallpipe') temp = base_latency_deciles temp2 = small_deciles records = [] for i in range(len(temp)): e = temp[i] records.append([e[0]]) for q in e[1]: records[i].append(q) a = array(records) b = transpose(a) print(repr(b)) xvals=b[0,:] ds = [] records = [] for i in range(len(temp2)): e = temp2[i] records.append([e[0]]) for q in e[1]: records[i].append(q) a = array(records) c = transpose(a) print(repr(b)) xvals2=b[0,:] ds = [] ods = [] for i in range(1,10): print(repr(b[i,:])) ds.append(Gnuplot.Data(xvals, b[i,:], title= \ ('10mbps-'+repr(i*10)+'th %ile'))) for i in range(1,10): print(repr(c[i,:])) ods.append(Gnuplot.Data(xvals, c[i,:], title= \ ('1mbps-'+repr(i*10)+'th %ile'))) g = Gnuplot.Gnuplot(debug=1) g.title('Latency as a Function of Clients') g.xlabel('# of Clients arriving in 0.5 seconds') g.ylabel('Latency in seconds') g('set data style linespoints') g('set key right bottom') g('set nologscale') g.plot(ds[0],ds[1],ds[2],ds[3],ds[4],ds[5],ds[6],ds[7],ds[8]) g.hardcopy('base-test-full.ps', enhanced=1, color=1) #raw_input('bob') g.plot(ds[0],ds[4],ds[8]) g.hardcopy('base-test-pruned.ps', enhanced=1, color=1) #raw_input('bob') fBps = (10000000 / 8) # (10mbps / 8bits per byte) sBps = (10000000 / 8) # (10mbps / 8bits per byte) optimal_fatpipe=('(1000000 * x)/ '+repr(fBps)) optimal_smallpipe=('(1000000 * x)/ '+repr(sBps)) g.plot(ds[0],ds[4],ds[8],Gnuplot.Func(optimal_fatpipe,title='10mbps Optimal Median')) g.hardcopy('base-test-pruned-opt.ps', enhanced=1, color=1) #raw_input('bob') g.plot(ds[0],ds[4],ds[8], \ Gnuplot.Func(optimal_fatpipe,title='10mbps Optimal Median'),\ ods[0],ods[4],ods[8], \ Gnuplot.Func(optimal_fatpipe,title='1mbps Optimal Median')) g.hardcopy('base-small-pruned-opt.ps', enhanced=1, color=1) raw_input('bob') g('set logscale') g.title('Latency as a Function of Clients (log/log)') g.plot(ds[0],ds[4],ds[8], \ Gnuplot.Func(optimal_fatpipe,title='10mbps Optimal Median'),\ ods[0],ods[4],ods[8], \ Gnuplot.Func(optimal_fatpipe,title='1mbps Optimal Median')) g.hardcopy('base-small-logs-pruned-opt.ps', enhanced=1, color=1) raw_input('bob') # d = Gnuplot.Data(x, y1, # title='calculated by python', # with='points 3 3') # print(repr(latency_deciles)) Application(sys.argv)