Labeling



In [2]:
import pandas as pd
import numpy as np
import csv
import gc
import os

In [14]:
directory='../GeneInteractionsBN_Datasets/Labeled'
if not os.path.exists(directory):
    os.makedirs(directory)

In [26]:
directory = '../GeneInteractionsBN_Datasets/DataGeneCausality/Raw/'
total=0

for filename in os.listdir(directory):
    if filename.endswith(".csv") or filename.endswith(".py"): 
        pseudo_total=0
        print(os.path.join(directory, filename))
        csv = pd.read_csv(filepath_or_buffer=os.path.join(directory, filename), sep=';')
        csv['Target']=list(map(lambda x: 1 if x<=0.01 else (0 if x>=0.5 else -1),csv['Pvalue']))
        df=pd.DataFrame(data=csv['Target'].values,columns=['Target'])
        #print(df.head())
        df.to_csv('../GeneInteractionsBN_Datasets/Labeled/'+filename.split('.')[0]+'_Target.csv',index=True,index_label='Index')
        #print(sum(1 for i in csv['Target'] if i==1))
        #print(sum(1 for i in csv['Target'] if i==0))
        pseudo_total=sum(1 for i in csv['Target'] if i==1)+sum(1 for i in csv['Target'] if i==0)
        total=total+pseudo_total
        print(pseudo_total)
        del csv
        gc.collect()
        continue
    else:
        continue
print(total)


../GeneInteractionsBN_Datasets/DataGeneCausality/Raw/LEF1.csv
68070
../GeneInteractionsBN_Datasets/DataGeneCausality/Raw/MYC.csv
64002
../GeneInteractionsBN_Datasets/DataGeneCausality/Raw/CTNNB1.csv
82038
../GeneInteractionsBN_Datasets/DataGeneCausality/Raw/Erk.csv
140730
../GeneInteractionsBN_Datasets/DataGeneCausality/Raw/IRF4.csv
96654
../GeneInteractionsBN_Datasets/DataGeneCausality/Raw/Ikk2.csv
108984
../GeneInteractionsBN_Datasets/DataGeneCausality/Raw/Jnk.csv
213102
773580

In [ ]:


In [ ]: