In [1]:
import numpy as np
import pandas as pd

In [95]:
def extract_script_info(df_s, n):  
    info = {}         
    script = df_s["s"]
    
    # prepare info vars
    nM = 0
    nS = 0
    nI = 0
    nD = 0
    j_cross = -1
    Delta = -1
    
    cell = [0,0]    
    # True when border is hit    
    ins_on_border = 0   
    
    # compute info
    for s in script:
        
        if (cell[0] == int(n/2)-1 and s != 'I'):
            nextD = int(s == 'D')           
            j_cross = cell[1] + 1 - nextD
            info["DI_half"] = nD + nextD - nI                        
        
        if (s == "M"):
            nM += 1
            cell[0] += 1
            cell[1] += 1
        if (s == "S"):
            nS += 1
            cell[0] += 1
            cell[1] += 1
        if (s == "D"):
            nD += 1
            cell[0] += 1            
        if (s == "I"):
            if (cell[0] == int(n/2)):
                ins_on_border += 1
            nI += 1
            cell[1] += 1                                                           
    
    # construct info dictionary
    info["nM"] = nM
    info["nS"] = nS
    info["nD"] = nD
    info["nI"] = nI
    info["d"] = nS + nD + nI
    info["d1"] = df_s["d1"]
    info["d2"] = df_s["d2"]
    info["edges"] = nS + nD + nI + nM
    info["j_cross"] = j_cross
    info["border_I"] = ins_on_border
    
    if (j_cross <= int(n/2)):
        Delta = int(n/2) - min(j_cross + ins_on_border, int(n/2)) 
    else:
        Delta = int(n/2) - max(j_cross - ins_on_border, int(n/2))
    info["Delta"] = Delta
    
    
    return info

In [96]:
def infos_on_file(file_name):
    info_df = pd.DataFrame()
    df = pd.read_csv(file_name)
    n = len(df.loc[0].x)
    for i in df.index:        
        info_df = info_df.append(pd.DataFrame(extract_script_info(df.loc[i],n), index=[i]))    
    return info_df

In [97]:
info_64 = infos_on_file("/tmp/Delta64.csv")
info_256 = infos_on_file("/tmp/Delta256.csv")
info_1024 = infos_on_file("/tmp/Delta1024.csv")
info_4096 = infos_on_file("/tmp/Delta4096.csv")

In [98]:
info_64["G"] = info_64["d1"] + info_64["d2"] - info_64["d"]
info_256["G"] = info_256["d1"] + info_256["d2"] - info_256["d"]
info_1024["G"] = info_1024["d1"] + info_1024["d2"] - info_1024["d"]
info_4096["G"] = info_4096["d1"] + info_4096["d2"] - info_4096["d"]

info_64["AbsDelta"] = info_64["Delta"].abs()
info_256["AbsDelta"] = info_256["Delta"].abs()
info_1024["AbsDelta"] = info_1024["Delta"].abs()
info_4096["AbsDelta"] = info_4096["Delta"].abs()

In [99]:
print("64    {0:.6f}".format(info_64.AbsDelta.mean()))
print("256   {0:.6f}".format(info_256.AbsDelta.mean()))
print("1024  {0:.6f}".format(info_1024.AbsDelta.mean()))
print("4096  {0:.6f}".format(info_4096.AbsDelta.mean()))


64    2.827200
256   7.598200
1024  19.581500
4096  49.750600

In [100]:
(info_256/256).describe()


Out[100]:
DI_half nM nS nD nI d d1 d2 edges j_cross border_I Delta G AbsDelta
count 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000
mean -0.000579 0.578223 0.304326 0.117451 0.117451 0.539228 0.275552 0.275667 1.117451 0.500579 0.000428 -0.000599 0.011991 0.029680
std 0.037915 0.020080 0.031901 0.014847 0.014847 0.015153 0.011775 0.011707 0.014847 0.037915 0.001507 0.037615 0.008330 0.023114
min -0.140625 0.507812 0.167969 0.070312 0.070312 0.472656 0.230469 0.226562 1.070312 0.378906 0.000000 -0.140625 0.000000 0.000000
25% -0.027344 0.566406 0.281250 0.105469 0.105469 0.527344 0.269531 0.269531 1.105469 0.476562 0.000000 -0.027344 0.003906 0.011719
50% 0.000000 0.578125 0.304688 0.117188 0.117188 0.539062 0.277344 0.277344 1.117188 0.500000 0.000000 0.000000 0.011719 0.023438
75% 0.023438 0.593750 0.324219 0.128906 0.128906 0.550781 0.285156 0.285156 1.128906 0.527344 0.000000 0.023438 0.015625 0.042969
max 0.121094 0.656250 0.417969 0.179688 0.179688 0.593750 0.320312 0.312500 1.179688 0.640625 0.023438 0.121094 0.062500 0.140625

In [101]:
(info_1024).describe()


Out[101]:
DI_half nM nS nD nI d d1 d2 edges j_cross border_I Delta G AbsDelta
count 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000 10000.00000
mean 0.128800 605.782000 297.664600 120.553400 120.553400 538.771400 272.146100 272.041900 1144.553400 511.871200 0.113900 0.128900 5.416600 19.58150
std 24.608482 9.309981 15.652732 7.461294 7.461294 6.299628 4.971783 4.860381 7.461294 24.608482 0.406132 24.519295 3.399824 14.75598
min -90.000000 572.000000 228.000000 95.000000 95.000000 513.000000 252.000000 252.000000 1119.000000 434.000000 0.000000 -90.000000 0.000000 0.00000
25% -16.000000 599.000000 287.000000 116.000000 116.000000 535.000000 269.000000 269.000000 1140.000000 495.000000 0.000000 -16.000000 3.000000 8.00000
50% 0.000000 606.000000 298.000000 120.000000 120.000000 539.000000 272.000000 272.000000 1144.000000 512.000000 0.000000 0.000000 5.000000 17.00000
75% 17.000000 612.000000 308.000000 125.000000 125.000000 543.000000 276.000000 275.000000 1149.000000 528.000000 0.000000 17.000000 8.000000 29.00000
max 78.000000 640.000000 353.000000 160.000000 160.000000 561.000000 292.000000 288.000000 1184.000000 602.000000 6.000000 78.000000 22.000000 90.00000

In [108]:
info_4096["DI_abs"] = info_4096["DI_half"].abs()
info_4096[["Delta","DI_half","AbsDelta", "DI_abs", "nD"]].describe()


Out[108]:
Delta DI_half AbsDelta DI_abs nD
count 10000.000000 10000.000000 10000.000000 10000.000000 10000.000000
mean 0.729200 0.734400 49.750600 49.876800 481.950900
std 61.963506 62.062103 36.941008 36.936433 14.944511
min -209.000000 -210.000000 0.000000 0.000000 423.000000
25% -42.000000 -42.000000 20.000000 20.000000 472.000000
50% 1.000000 1.000000 42.000000 43.000000 482.000000
75% 43.000000 43.000000 73.000000 73.000000 492.000000
max 209.000000 209.000000 209.000000 210.000000 542.000000

In [ ]: