DTAT PROCESS FOR NEW DATA


In [41]:
import os
from os.path import join
AllFile = []
dest = "F:\\data\\newData"
for root, dirs, files in os.walk( dest ):
    for OneFileName in files :
        OneFullFileName = join( root, OneFileName )
        AllFile.append(OneFullFileName)
    print(AllFile)


['F:\\data\\newData\\48378xTNTE-1.txt', 'F:\\data\\newData\\48378xTNTE-10.txt', 'F:\\data\\newData\\48378xTNTE-2.txt', 'F:\\data\\newData\\48378xTNTE-3.txt', 'F:\\data\\newData\\48378xTNTE-4.txt', 'F:\\data\\newData\\48378xTNTE-5.txt', 'F:\\data\\newData\\48378xTNTE-6.txt', 'F:\\data\\newData\\48378xTNTE-7.txt', 'F:\\data\\newData\\48378xTNTE-8.txt', 'F:\\data\\newData\\48378xTNTE-9.txt']

In [42]:
# firstData = []
secondData = []
# for i in AllFile:
#     if(len(i)==26):
#         firstData.append(i)
#     else:
#         secondData.append(i)
for i in AllFile:
    secondData.append(i)

In [43]:
# firstData

In [44]:
secondData


Out[44]:
['F:\\data\\newData\\48378xTNTE-1.txt',
 'F:\\data\\newData\\48378xTNTE-10.txt',
 'F:\\data\\newData\\48378xTNTE-2.txt',
 'F:\\data\\newData\\48378xTNTE-3.txt',
 'F:\\data\\newData\\48378xTNTE-4.txt',
 'F:\\data\\newData\\48378xTNTE-5.txt',
 'F:\\data\\newData\\48378xTNTE-6.txt',
 'F:\\data\\newData\\48378xTNTE-7.txt',
 'F:\\data\\newData\\48378xTNTE-8.txt',
 'F:\\data\\newData\\48378xTNTE-9.txt']

In [45]:
# def read_dta(dta):
#     """read the dta file and return numerical array"""
#     with open(dta) as f:
#         newData = []
#         data  = f.readlines()
#         for i in data[6:]:
#               newData.append(float(i[0:6]))
#         return newData

In [46]:
# datatest = read_dta(secondData[1])

In [47]:
# def getPIValue(data):
#     PIValue = []
#     for j in range(9):
#         upT = 0
#         downT = 0
#         for i in range(0+j*1200,1200+j*1200):
#             if (data[i]>1488 and data[i]<2000) or (data[i]>2516 or data[i] <976):
#                 upT+=1
#             else:
#                 downT+=1
#         print(upT)
#         print(downT)
#         print(upT+downT)
#         PIValue.append((upT-downT)/(upT+downT))
#     return PIValue

In [48]:
# firstDataPIValue = []
# for i in firstData:
#     data = read_dta(i)
#     PI = getPIValue(data)
#     firstDataPIValue.append(PI)
# print(firstDataPIValue)

In [49]:
# for i in range(9):
#     firstDataPIValue[4][i] = -firstDataPIValue[4][i]
    
# for i in firstDataPIValue:
#     print(i)

In [51]:
secondPIValue=[]
for i in secondData:
    temp = []
    with open(i) as f:
        newData = []
        data  = f.readlines()
        for j in data[5][13:-2].split(","):
            temp.append(float(j))
    secondPIValue.append(temp)
    
print(secondPIValue)


[[0.16, -0.2, 0.69, 0.84, 0.19, 0.6, 0.71, 0.07, -0.02], [0.04, 0.18, 0.32, 0.12, 0.14, 0.12, 0.13, 0.06, 0.12], [0.13, -0.07, 0.5, 0.29, -0.09, 0.09, 0.77, 0.46, 0.09], [-0.24, 0.0, 0.07, 0.53, -0.19, 0.6, -0.08, 0.11, 0.39], [0.04, -0.07, 0.7, 0.63, -0.36, 0.68, 0.4, 0.17, 0.01], [0.0, -0.12, -0.08, 0.26, 0.09, -0.24, 0.01, 0.03, -0.37], [-0.23, -0.25, 0.03, 0.63, 0.01, 0.32, 0.22, 0.19, -0.37], [-0.05, -0.2, 0.55, 0.55, 0.37, 0.65, 0.14, -0.2, -0.12], [0.2, -0.11, 0.17, -0.25, 0.15, -0.2, 0.1, 0.14, 0.17], [-0.3, -0.17, 0.32, 0.15, -0.16, 0.21, 0.05, 0.27, 0.22]]

In [ ]:


In [12]:
# firstDataPIValue


Out[12]:
[]

In [53]:
firstDataPIValue=[]
for i in secondPIValue:
    firstDataPIValue.append(i)

In [54]:
firstDataPIValue


Out[54]:
[[0.16, -0.2, 0.69, 0.84, 0.19, 0.6, 0.71, 0.07, -0.02],
 [0.04, 0.18, 0.32, 0.12, 0.14, 0.12, 0.13, 0.06, 0.12],
 [0.13, -0.07, 0.5, 0.29, -0.09, 0.09, 0.77, 0.46, 0.09],
 [-0.24, 0.0, 0.07, 0.53, -0.19, 0.6, -0.08, 0.11, 0.39],
 [0.04, -0.07, 0.7, 0.63, -0.36, 0.68, 0.4, 0.17, 0.01],
 [0.0, -0.12, -0.08, 0.26, 0.09, -0.24, 0.01, 0.03, -0.37],
 [-0.23, -0.25, 0.03, 0.63, 0.01, 0.32, 0.22, 0.19, -0.37],
 [-0.05, -0.2, 0.55, 0.55, 0.37, 0.65, 0.14, -0.2, -0.12],
 [0.2, -0.11, 0.17, -0.25, 0.15, -0.2, 0.1, 0.14, 0.17],
 [-0.3, -0.17, 0.32, 0.15, -0.16, 0.21, 0.05, 0.27, 0.22]]

In [56]:
for i in firstDataPIValue:
    print(i)


[0.16, -0.2, 0.69, 0.84, 0.19, 0.6, 0.71, 0.07, -0.02]
[0.04, 0.18, 0.32, 0.12, 0.14, 0.12, 0.13, 0.06, 0.12]
[0.13, -0.07, 0.5, 0.29, -0.09, 0.09, 0.77, 0.46, 0.09]
[-0.24, 0.0, 0.07, 0.53, -0.19, 0.6, -0.08, 0.11, 0.39]
[0.04, -0.07, 0.7, 0.63, -0.36, 0.68, 0.4, 0.17, 0.01]
[0.0, -0.12, -0.08, 0.26, 0.09, -0.24, 0.01, 0.03, -0.37]
[-0.23, -0.25, 0.03, 0.63, 0.01, 0.32, 0.22, 0.19, -0.37]
[-0.05, -0.2, 0.55, 0.55, 0.37, 0.65, 0.14, -0.2, -0.12]
[0.2, -0.11, 0.17, -0.25, 0.15, -0.2, 0.1, 0.14, 0.17]
[-0.3, -0.17, 0.32, 0.15, -0.16, 0.21, 0.05, 0.27, 0.22]

In [17]:
# for i in range(9):
#     firstDataPIValue[18][i] = -firstDataPIValue[18][i]

In [ ]:


In [57]:
import pandas as pd

data = {i:firstDataPIValue[i] for i in range(len(firstDataPIValue))}
pdData = pd.DataFrame(data)
pdData


Out[57]:
0 1 2 3 4 5 6 7 8 9
0 0.16 0.04 0.13 -0.24 0.04 0.00 -0.23 -0.05 0.20 -0.30
1 -0.20 0.18 -0.07 0.00 -0.07 -0.12 -0.25 -0.20 -0.11 -0.17
2 0.69 0.32 0.50 0.07 0.70 -0.08 0.03 0.55 0.17 0.32
3 0.84 0.12 0.29 0.53 0.63 0.26 0.63 0.55 -0.25 0.15
4 0.19 0.14 -0.09 -0.19 -0.36 0.09 0.01 0.37 0.15 -0.16
5 0.60 0.12 0.09 0.60 0.68 -0.24 0.32 0.65 -0.20 0.21
6 0.71 0.13 0.77 -0.08 0.40 0.01 0.22 0.14 0.10 0.05
7 0.07 0.06 0.46 0.11 0.17 0.03 0.19 -0.20 0.14 0.27
8 -0.02 0.12 0.09 0.39 0.01 -0.37 -0.37 -0.12 0.17 0.22

In [58]:
s = pdData.T.describe()

In [59]:
s


Out[59]:
0 1 2 3 4 5 6 7 8
count 10.000000 10.000000 10.000000 10.000000 10.00000 10.000000 10.000000 10.000000 10.000000
mean -0.025000 -0.101000 0.327000 0.375000 0.01500 0.283000 0.245000 0.130000 0.012000
std 0.177153 0.123868 0.277931 0.321049 0.21614 0.344901 0.290718 0.170489 0.244758
min -0.300000 -0.250000 -0.080000 -0.250000 -0.36000 -0.240000 -0.080000 -0.200000 -0.370000
25% -0.185000 -0.192500 0.095000 0.177500 -0.14250 0.097500 0.062500 0.062500 -0.095000
50% 0.020000 -0.115000 0.320000 0.410000 0.05000 0.265000 0.135000 0.125000 0.050000
75% 0.107500 -0.070000 0.537500 0.610000 0.14750 0.600000 0.355000 0.185000 0.157500
max 0.200000 0.180000 0.700000 0.840000 0.37000 0.680000 0.770000 0.460000 0.390000

In [60]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10,8), dpi=80)
name_list = ['test','test','tr','tr','test','tr','tr','test','test']
num_list = s.loc["mean"]
yerr = s.loc["std"]/np.sqrt(s.iloc[0,0])
plt.bar(range(len(num_list)), num_list,tick_label=name_list,color=['c','c','seagreen','seagreen','c','seagreen','seagreen','c','c'])
plt.xlabel("Sequence")
plt.ylabel("Performance Index")
plt.title("PI Figure")
plt.ylim(-0.6,0.8)
#plt.errorbar([i for i in range(9)],num_list,yerr=yerr,fmt="b",color="teal")
plt.errorbar([i for i in range(9)],num_list,yerr=yerr,ls='none',fillstyle='none',ms=9,mew=1.3,color='r')
plt.grid(color="grey",linewidth='0.1')
plt.show()



In [61]:
import numpy as np
fig = plt.figure(0)
x = np.arange(10.0)
y = np.sin(np.arange(10.0) / 20.0 * np.pi)

plt.errorbar(x, y, yerr=0.1)

y = np.sin(np.arange(10.0) / 20.0 * np.pi) + 1
plt.errorbar(x, y, yerr=0.1, uplims=True)

y = np.sin(np.arange(10.0) / 20.0 * np.pi) + 2
upperlimits = np.array([1, 0] * 5)
lowerlimits = np.array([0, 1] * 5)
plt.errorbar(x, y, yerr=0.1, uplims=upperlimits, lolims=lowerlimits)

plt.xlim(-1, 10)
plt.show()