Labeling-checkpoint



In [1]:
import pandas as pd
import numpy as np
import csv
import gc
import os

In [2]:
directory='../GeneInteractionsBN Datasets/Labeled'
if not os.path.exists(directory):
    os.makedirs(directory)

In [6]:
directory = '../GeneInteractionsBN Datasets/DataGeneCausality/Raw/'
for filename in os.listdir(directory):
    if filename.endswith(".csv") or filename.endswith(".py"): 
        print(os.path.join(directory, filename))
        csv = pd.read_csv(filepath_or_buffer=os.path.join(directory, filename), sep=';')
        csv['Target']=list(map(lambda x: 1 if x<=0.01 else (0 if x>=0.5 else np.nan),csv['Pvalue']))
        csv['Target'].to_csv('../GeneInteractionsBN Datasets/Labeled/'+filename.split('.')[0]+'_Target.csv',index=True)
        print(sum(1 for i in csv['Target'] if i==1))
        print(sum(1 for i in csv['Target'] if i==0))
        print(sum(1 for i in csv['Target'] if i!=0 and i!=1))
        print(len(csv['Target']))
        del csv
        gc.collect()
        continue
    else:
        continue


../GeneInteractionsBN Datasets/DataGeneCausality/Raw/LEF1.csv
132
67938
32076
100146
../GeneInteractionsBN Datasets/DataGeneCausality/Raw/MYC.csv
5598
58404
36144
100146
../GeneInteractionsBN Datasets/DataGeneCausality/Raw/CTNNB1.csv
60
81978
18108
100146
../GeneInteractionsBN Datasets/DataGeneCausality/Raw/Erk.csv
264
140466
59562
200292
../GeneInteractionsBN Datasets/DataGeneCausality/Raw/IRF4.csv
66
96588
3492
100146
../GeneInteractionsBN Datasets/DataGeneCausality/Raw/Ikk2.csv
4548
104436
91308
200292
../GeneInteractionsBN Datasets/DataGeneCausality/Raw/Jnk.csv
7110
205992
87336
300438

In [ ]:
# For opening one of these target files
output = pd.read_csv('CTNNB1_Target.csv',header=None)

In [ ]: