In [28]:
"""
Data parsers for files generated from different types of assays:
- ELISA (Enzyme-linked immunosorbent assay):
    -> data files are generated by a microplate reader from 96-well plates
- Softmax:
    -> another microplate reader for ELISA assays that allows for more extensive 
       data analysis for 96 and 384-well plates
- FACS (Fluorescence-activated cell sorting):
    -> data files are generated by different FACS machines and preprocessed with FlowJo. 
    -> the output data files are parsed
- Biacore/Biacore4000
    -> a molecular interaction analysis system for rapid screening and characterization.
    -> the output plate data is parsed
- MSD:
    -> Electrochemiluminescence detection system
    -> the output plate data is parsed

Some of the machine-generated data is sometimes preprocessed with a specific software, i.e. FlowJo for FACS. 
"""

from StringIO import StringIO
import re
import os
import csv
import pprint

# init pprint
pp = pprint.PrettyPrinter(indent=2)

In [29]:
def straight_elisa_parse_file(data_file):
    """ Parsing Straight ELISA files. Used for a single and multiple plates in one file.
    Args:
        data_file: file to Parse
    Returns:
        plates: a dictionary of plate, position, values
    """
    data_file.seek(0)
    data_file = StringIO(data_file.read().replace('\r', '\n'))
    plate_pattern = re.compile(r"^Plate:\t([^\t]+)\t")
    values_pattern = re.compile(r"^\t[^\t()A-Za-z]*((\t\d+(\.\d+)?){%d})\s*$" % 12)
    plates = {}
    for line in data_file:
        plate_match = plate_pattern.match(line)
        if plate_match:
            # Reset count
            last_position = 1
            plate_name = plate_match.group(1).strip()
            plates[plate_name] = []
        else:
            values_match = values_pattern.match(line)
            if values_match:
                values = values_match.group(1).strip().split("\t")
                for position, value in zip( range(last_position, last_position + len(values)), values):
                    plates[plate_name].append(float(value))
                last_position += len(values)
    return plates

# test a single 96 ELISA plate
#file_name = 'Data/1_straight_elisa.txt'
# test multiple 96 ELISA plates | NOTE: make sure to test with an original file (not splitted) 
file_name = 'Data/9_splitted_384.txt'
with open(file_name) as data_file: 
    plates = straight_elisa_parse_file(data_file)
    for antigen, values in plates.iteritems():
        print antigen, "=>", values
        print "==="*30


A3792.Ag25-01 => [0.0468, 0.0438, 0.0449, 0.0439, 0.0439, 0.0467, 0.0439, 0.0433, 0.0446, 0.0445, 0.0441, 0.0457, 0.0449, 0.044, 0.0455, 0.0442, 0.0522, 0.045, 0.0442, 0.0441, 0.0539, 0.0459, 0.0463, 0.0446, 0.0452, 0.0459, 0.045, 0.0464, 0.0536, 0.0452, 0.0445, 0.0447, 0.0532, 0.0455, 0.0446, 0.0458, 0.0454, 0.0443, 0.0444, 0.0442, 0.0442, 0.0448, 0.0442, 0.0441, 0.0536, 0.0442, 0.0451, 0.0463, 0.0437, 0.0432, 0.0449, 0.0432, 0.0449, 0.0426, 0.0442, 0.0436, 0.054, 0.0437, 0.0446, 0.0444, 0.0472, 0.0477, 0.0443, 0.0439, 0.0461, 0.061, 0.0442, 0.0443, 0.0536, 0.0433, 0.0446, 0.047, 0.0464, 0.0454, 0.0449, 0.0441, 0.053, 0.0457, 0.0431, 0.0444, 0.0476, 0.0437, 0.0451, 0.0446, 0.0477, 0.0465, 0.0458, 0.0462, 0.0537, 0.0454, 0.0458, 0.0573, 0.0504, 0.0453, 0.0477, 0.0447]
==========================================================================================
A3792.Ag25-02 => [0.0439, 0.0448, 0.048, 0.0448, 0.0441, 0.0457, 0.0531, 0.0445, 0.0461, 0.0446, 0.0447, 0.0454, 0.0444, 0.0468, 0.0451, 0.0532, 0.0455, 0.0449, 0.0456, 0.0466, 0.0455, 0.0446, 0.0447, 0.0453, 0.0486, 0.0457, 0.0451, 0.0514, 0.0454, 0.0446, 0.0445, 0.0444, 0.0513, 0.0444, 0.0456, 0.0538, 0.0447, 0.0466, 0.0442, 0.0528, 0.0446, 0.0447, 0.0438, 0.0475, 0.0447, 0.0446, 0.0627, 0.0539, 0.0457, 0.0432, 0.0436, 0.05, 0.0437, 0.0441, 0.0444, 0.0461, 0.0455, 0.049, 0.0444, 0.0451, 0.0442, 0.0444, 0.0456, 0.0485, 0.0449, 0.0452, 0.0445, 0.0415, 0.0448, 0.045, 0.0449, 0.0463, 0.046, 0.0449, 0.044, 0.0516, 0.0445, 0.0443, 0.0435, 0.0443, 0.0439, 0.0437, 0.0437, 0.0444, 0.0475, 0.0453, 0.0456, 0.0466, 0.0451, 0.047, 0.0453, 0.0448, 0.0453, 0.0478, 0.0437, 0.0519]
==========================================================================================
A3792.Ag-79-01 => [0.1133, 0.116, 0.101, 0.1139, 0.1051, 0.1179, 0.1309, 0.1234, 0.1148, 0.121, 0.1157, 0.1172, 0.1273, 0.1197, 0.1164, 0.1178, 0.1102, 0.0914, 0.117, 0.1223, 0.1287, 0.1292, 0.1287, 0.1244, 0.1293, 0.108, 0.1143, 0.1014, 0.0968, 0.1014, 0.1115, 0.112, 0.1049, 0.1096, 0.1222, 0.1133, 0.1043, 0.1086, 0.1107, 0.111, 0.1084, 0.0913, 0.1067, 0.1001, 0.1015, 0.0965, 0.0996, 0.099, 0.1036, 0.085, 0.0801, 0.089, 0.0854, 0.0991, 0.1041, 0.102, 0.1047, 0.1046, 0.1078, 0.1021, 0.1116, 0.1244, 0.11, 0.1038, 0.1043, 0.109, 0.1186, 0.1116, 0.1334, 0.1106, 0.1175, 0.1182, 0.1293, 0.1218, 0.1283, 0.1232, 0.1182, 0.1116, 0.1261, 0.1213, 0.1225, 0.1252, 0.1273, 0.128, 0.1086, 0.1063, 0.118, 0.1134, 0.105, 0.1078, 0.1212, 0.1113, 0.1083, 0.1185, 0.1185, 2.402]
==========================================================================================
A3792.Ag-79-02 => [0.0917, 0.0959, 0.0869, 0.1029, 0.0977, 0.103, 0.0987, 0.0937, 0.0944, 0.0989, 0.098, 0.0973, 0.0853, 0.0845, 0.0814, 0.0848, 0.0775, 0.0776, 0.0837, 0.0883, 0.0818, 0.0847, 0.0825, 0.0802, 0.1162, 0.1067, 0.1038, 0.1133, 0.0958, 0.0926, 0.0958, 0.1046, 0.1097, 0.1047, 0.1035, 0.1037, 0.1104, 0.108, 0.1102, 0.1168, 0.0912, 0.1057, 0.1088, 0.1032, 0.1149, 0.113, 0.1127, 0.1116, 0.107, 0.1007, 0.1068, 0.1096, 0.0871, 0.105, 0.102, 0.1045, 0.1077, 0.1143, 0.1099, 0.1087, 0.096, 0.0928, 0.0996, 0.0992, 0.0967, 0.0924, 0.0971, 0.1099, 0.1023, 0.1023, 0.1043, 0.1048, 0.0838, 0.0833, 0.0869, 0.0901, 0.0792, 0.0827, 0.0849, 0.0927, 0.0854, 0.0972, 0.089, 0.0918, 0.1004, 0.0974, 0.0993, 0.1088, 0.0932, 0.0975, 0.0971, 0.0955, 0.0976, 0.0976, 0.1029, 2.1608]
==========================================================================================
A3793.Ag25-01 => [0.044, 0.0436, 0.0445, 0.044, 0.052, 0.0434, 0.0449, 0.0445, 0.0523, 0.044, 0.0446, 0.044, 0.045, 0.0469, 0.0458, 0.0445, 0.0518, 0.0451, 0.0437, 0.0445, 0.0532, 0.0445, 0.0452, 0.052, 0.0452, 0.0529, 0.0445, 0.0456, 0.0472, 0.0468, 0.0446, 0.0424, 0.0526, 0.0444, 0.0447, 0.0445, 0.0451, 0.0447, 0.044, 0.044, 0.0446, 0.0439, 0.0443, 0.0441, 0.054, 0.0446, 0.0443, 0.0539, 0.0449, 0.0434, 0.0432, 0.0427, 0.0493, 0.046, 0.0449, 0.0433, 0.0499, 0.0438, 0.0448, 0.0441, 0.0458, 0.0455, 0.0437, 0.0436, 0.0507, 0.0447, 0.045, 0.0452, 0.0542, 0.0448, 0.0457, 0.046, 0.0475, 0.0468, 0.0461, 0.0451, 0.0522, 0.0443, 0.041, 0.0452, 0.0451, 0.0442, 0.045, 0.045, 0.0523, 0.0468, 0.0469, 0.0478, 0.055, 0.0455, 0.0496, 0.0451, 0.0516, 0.0453, 0.0445, 0.0432]
==========================================================================================
A3793.Ag25-02 => [0.0437, 0.0432, 0.044, 0.0497, 0.044, 0.0434, 0.0456, 0.0441, 0.044, 0.0456, 0.0451, 0.0439, 0.0445, 0.0446, 0.0449, 0.0541, 0.044, 0.0448, 0.0434, 0.0449, 0.0534, 0.0445, 0.0449, 0.0458, 0.0488, 0.0454, 0.0446, 0.0514, 0.0443, 0.0444, 0.0439, 0.0439, 0.0467, 0.0453, 0.0447, 0.0481, 0.0452, 0.0451, 0.0442, 0.0527, 0.044, 0.0444, 0.044, 0.0492, 0.048, 0.0443, 0.045, 0.0542, 0.0447, 0.0435, 0.0434, 0.0518, 0.0434, 0.0437, 0.0441, 0.043, 0.0462, 0.0437, 0.0438, 0.0445, 0.0448, 0.044, 0.0434, 0.0467, 0.0446, 0.0447, 0.043, 0.0449, 0.044, 0.0457, 0.0452, 0.0457, 0.0463, 0.046, 0.0454, 0.0507, 0.0448, 0.0444, 0.0441, 0.045, 0.0445, 0.0444, 0.047, 0.046, 0.048, 0.0485, 0.0467, 0.0461, 0.0457, 0.0488, 0.0461, 0.0449, 0.0444, 0.0447, 0.0437, 0.0492]
==========================================================================================
A3793.Ag79-01 => [0.1149, 0.116, 0.1034, 0.1083, 0.1135, 0.145, 0.123, 0.1112, 0.1231, 0.1124, 0.1174, 0.1146, 0.1147, 0.1097, 0.1069, 0.1054, 0.1057, 0.1015, 0.1244, 0.1197, 0.124, 0.115, 0.1129, 0.1391, 0.1053, 0.1078, 0.0997, 0.102, 0.093, 0.0983, 0.1084, 0.1066, 0.1029, 0.1037, 0.1017, 0.1089, 0.127, 0.1127, 0.1146, 0.12, 0.1209, 0.1116, 0.1292, 0.1184, 0.1192, 0.1203, 0.1178, 0.117, 0.0803, 0.0777, 0.0818, 0.0789, 0.0772, 0.0814, 0.0835, 0.0821, 0.0807, 0.0844, 0.0836, 0.0876, 0.1193, 0.1147, 0.1198, 0.1079, 0.1159, 0.1134, 0.1234, 0.1158, 0.1178, 0.1154, 0.1265, 0.1179, 0.0887, 0.0812, 0.0827, 0.0885, 0.086, 0.0847, 0.0969, 0.0884, 0.0897, 0.0887, 0.0898, 0.1119, 0.0844, 0.0856, 0.087, 0.0885, 0.0807, 0.0939, 0.0945, 0.0845, 0.0819, 0.0908, 0.0906, 1.937]
==========================================================================================
A3793.Ag79-02 => [0.0909, 0.0919, 0.0873, 0.0891, 0.0873, 0.0976, 0.1008, 0.0938, 0.0963, 0.0886, 0.0972, 0.0944, 0.0981, 0.1, 0.0981, 0.1006, 0.1062, 0.0918, 0.1024, 0.1022, 0.1017, 0.1022, 0.0992, 0.099, 0.1067, 0.1125, 0.1097, 0.1192, 0.099, 0.1049, 0.1119, 0.1089, 0.1179, 0.1096, 0.1168, 0.1055, 0.0866, 0.0876, 0.0813, 0.0968, 0.0879, 0.0845, 0.0875, 0.089, 0.0879, 0.0846, 0.0867, 0.0938, 0.1203, 0.1189, 0.1177, 0.1236, 0.1164, 0.1215, 0.101, 0.1278, 0.1248, 0.1253, 0.1262, 0.1243, 0.098, 0.0887, 0.092, 0.0949, 0.0897, 0.0912, 0.0917, 0.1026, 0.0988, 0.0946, 0.0976, 0.0967, 0.1213, 0.1156, 0.1186, 0.1247, 0.1182, 0.112, 0.1233, 0.1264, 0.1168, 0.1207, 0.1223, 0.1221, 0.1274, 0.1221, 0.1273, 0.133, 0.1163, 0.1271, 0.1231, 0.1298, 0.1193, 0.1249, 0.1297, 2.3882]
==========================================================================================

In [30]:
def softmax_parse_file(data_file):
    """ Parsing 384 ELISA files. Used for a single and multiple plates in one file.
    Args:
        data_file: file to Parse
    Returns:
        plates: a dictionary of plate, position, values
    """
    data_file = StringIO(data_file.read().replace('\r', '\n'))
    cols = 24
    plate_pattern = re.compile(r"^Plate:\t([^\t]+)\t")
    values_pattern = re.compile(r"^\t[^\t()A-Za-z]*((\t\d+(\.\d+)?){%d})\s*$" % cols)
    plate_names = []
    col_headers = '123456789101112131415161718192021222324' # 1-24 numeric col headers
    plates = {}
    for line in data_file:
        plate_match = plate_pattern.match(line)
        value_match = values_pattern.match(line)
        if plate_match:
            last_position = 1
            plate_name = plate_match.group(1).strip()
            plate_names.append(plate_name)
        elif value_match:
            values = value_match.group(1).strip().split("\t")
            current_plate_name = plate_names[-1]
            plates.setdefault(current_plate_name, [])
            for value in values:
                plates[current_plate_name].append(float(value))
    return plates

# test a single 384 ELISA plate
#file_name = 'Data/2_elisa_384.txt'
file_name = 'Data/2a_multiple_elisa_384.txt'
#file_name = 'Data/5_elisa_384_no_temp.txt'
with open(file_name) as data_file:
    plates = softmax_parse_file(data_file)
    for plate_name, values in plates.iteritems():
        print plate_name, "=>", values
        print "==="*30


PTH cross screen S3210-01 and 02 => [0.0548, 0.0511, 0.0546, 0.0551, 0.117, 0.0568, 0.1388, 0.0957, 0.3267, 0.2228, 0.1439, 0.0761, 0.2641, 0.13, 0.0924, 0.1247, 0.1051, 0.0799, 0.1259, 0.1028, 0.453, 0.0716, 0.1445, 0.0636, 0.0516, 0.0488, 0.0522, 0.0523, 0.0984, 0.0537, 0.1241, 0.0803, 0.2265, 0.2136, 0.1365, 0.0668, 0.2241, 0.1297, 0.0914, 0.0818, 0.1106, 0.0853, 0.1208, 0.088, 0.4499, 0.0682, 0.0994, 0.0715, 0.1326, 0.0526, 0.0857, 0.0521, 0.0638, 0.0511, 0.0686, 0.0535, 0.0647, 0.0538, 0.1989, 0.0538, 0.1713, 0.0594, 0.0886, 0.0598, 0.1646, 0.0603, 0.099, 0.0647, 0.0974, 0.0668, 0.2022, 0.0689, 0.1276, 0.052, 0.0883, 0.0518, 0.063, 0.0521, 0.0666, 0.0522, 0.0647, 0.0533, 0.2082, 0.0552, 0.1865, 0.0617, 0.0931, 0.0627, 0.1534, 0.0617, 0.1001, 0.063, 0.0948, 0.0657, 0.2291, 0.0712, 0.0636, 0.0508, 0.0617, 0.0525, 0.0571, 0.052, 0.0564, 0.0525, 0.0945, 0.0537, 0.3076, 0.0547, 0.0759, 0.0595, 0.0748, 0.0577, 0.098, 0.0633, 0.1012, 0.0655, 0.096, 0.0673, 0.0724, 0.0697, 0.0635, 0.0511, 0.0547, 0.0513, 0.0562, 0.0514, 0.0555, 0.0511, 0.0789, 0.0533, 0.4249, 0.06, 0.0756, 0.0609, 0.0758, 0.0571, 0.1073, 0.0634, 0.0935, 0.0653, 0.0943, 0.0659, 0.0734, 0.0695, 0.0569, 0.0517, 0.0791, 0.0523, 0.0568, 0.0505, 0.1491, 0.0525, 0.0699, 0.0522, 0.077, 0.0537, 0.5475, 0.0613, 0.0908, 0.0588, 0.2709, 0.0621, 0.1356, 0.0621, 0.0927, 0.0634, 0.2799, 0.0655, 0.0561, 0.0505, 0.0808, 0.0518, 0.0578, 0.0511, 0.1509, 0.0522, 0.0708, 0.0539, 0.0817, 0.0582, 0.4668, 0.0698, 0.0874, 0.0584, 0.2609, 0.0629, 0.1374, 0.0613, 0.0847, 0.0637, 0.307, 0.0683, 0.083, 0.0516, 0.0504, 0.0519, 0.0531, 0.0517, 0.0736, 0.0519, 0.0553, 0.0527, 0.0763, 0.0558, 0.134, 0.0572, 0.3625, 0.0552, 0.1052, 0.064, 0.0695, 0.0674, 0.073, 0.0654, 0.0986, 0.0661, 0.0844, 0.0494, 0.051, 0.0502, 0.0537, 0.0527, 0.0747, 0.053, 0.056, 0.0506, 0.0611, 0.0553, 0.1386, 0.0563, 0.3768, 0.0553, 0.1061, 0.0605, 0.0711, 0.0637, 0.0731, 0.0643, 0.0983, 0.0702, 0.0567, 0.0522, 0.0553, 0.0518, 0.0887, 0.0544, 0.0576, 0.0559, 0.2339, 0.0536, 0.1422, 0.0605, 0.065, 0.0597, 0.0674, 0.0649, 0.1275, 0.0633, 0.1083, 0.0689, 0.1423, 0.0663, 0.169, 0.0723, 0.0555, 0.0536, 0.0555, 0.0522, 0.0831, 0.0521, 0.057, 0.0528, 0.2429, 0.0551, 0.1296, 0.0577, 0.0632, 0.0587, 0.0668, 0.062, 0.113, 0.063, 0.1095, 0.0628, 0.1374, 0.0662, 0.1444, 0.0715, 0.0815, 0.055, 0.0551, 0.0534, 0.8665, 0.0512, 0.1493, 0.0546, 0.0707, 0.0568, 0.0801, 0.0574, 0.0747, 0.0602, 0.0747, 0.0592, 0.0645, 0.065, 0.0647, 0.0666, 0.0652, 0.0674, 0.0812, 0.0716, 0.0777, 0.0554, 0.0563, 0.0539, 0.8789, 0.058, 0.1444, 0.0578, 0.0712, 0.0565, 0.0807, 0.058, 0.0754, 0.0698, 0.0782, 0.0593, 0.0673, 0.0647, 0.0682, 0.0649, 0.0788, 0.0666, 0.0818, 0.066, 0.0564, 0.0522, 0.0777, 0.0517, 0.1293, 0.0542, 0.1226, 0.0589, 0.1842, 0.0601, 0.0902, 0.0613, 0.1497, 0.0667, 0.086, 0.0677, 0.0982, 0.0604, 0.1655, 0.0666, 0.0876, 0.0714, 0.1001, 0.1057, 0.0598, 0.0534, 0.0804, 0.0533, 0.149, 0.0558, 0.1509, 0.0618, 0.1851, 0.0592, 0.0844, 0.0748, 0.1712, 0.0637, 0.0914, 0.0632, 0.1033, 0.0665, 0.1804, 0.0723, 0.0797, 0.0703, 0.1054, 0.1252]
==========================================================================================
PTH plate 2 testing => [0.0548, 0.0511, 0.0546, 0.0551, 0.117, 0.0568, 0.1388, 0.0957, 0.3267, 0.2228, 0.1439, 0.0761, 0.2641, 0.13, 0.0924, 0.1247, 0.1051, 0.0799, 0.1259, 0.1028, 0.453, 0.0716, 0.1445, 0.0636, 0.0516, 0.0488, 0.0522, 0.0523, 0.0984, 0.0537, 0.1241, 0.0803, 0.2265, 0.2136, 0.1365, 0.0668, 0.2241, 0.1297, 0.0914, 0.0818, 0.1106, 0.0853, 0.1208, 0.088, 0.4499, 0.0682, 0.0994, 0.0715, 0.1326, 0.0526, 0.0857, 0.0521, 0.0638, 0.0511, 0.0686, 0.0535, 0.0647, 0.0538, 0.1989, 0.0538, 0.1713, 0.0594, 0.0886, 0.0598, 0.1646, 0.0603, 0.099, 0.0647, 0.0974, 0.0668, 0.2022, 0.0689, 0.1276, 0.052, 0.0883, 0.0518, 0.063, 0.0521, 0.0666, 0.0522, 0.0647, 0.0533, 0.2082, 0.0552, 0.1865, 0.0617, 0.0931, 0.0627, 0.1534, 0.0617, 0.1001, 0.063, 0.0948, 0.0657, 0.2291, 0.0712, 0.0636, 0.0508, 0.0617, 0.0525, 0.0571, 0.052, 0.0564, 0.0525, 0.0945, 0.0537, 0.3076, 0.0547, 0.0759, 0.0595, 0.0748, 0.0577, 0.098, 0.0633, 0.1012, 0.0655, 0.096, 0.0673, 0.0724, 0.0697, 0.0635, 0.0511, 0.0547, 0.0513, 0.0562, 0.0514, 0.0555, 0.0511, 0.0789, 0.0533, 0.4249, 0.06, 0.0756, 0.0609, 0.0758, 0.0571, 0.1073, 0.0634, 0.0935, 0.0653, 0.0943, 0.0659, 0.0734, 0.0695, 0.0569, 0.0517, 0.0791, 0.0523, 0.0568, 0.0505, 0.1491, 0.0525, 0.0699, 0.0522, 0.077, 0.0537, 0.5475, 0.0613, 0.0908, 0.0588, 0.2709, 0.0621, 0.1356, 0.0621, 0.0927, 0.0634, 0.2799, 0.0655, 0.0561, 0.0505, 0.0808, 0.0518, 0.0578, 0.0511, 0.1509, 0.0522, 0.0708, 0.0539, 0.0817, 0.0582, 0.4668, 0.0698, 0.0874, 0.0584, 0.2609, 0.0629, 0.1374, 0.0613, 0.0847, 0.0637, 0.307, 0.0683, 0.083, 0.0516, 0.0504, 0.0519, 0.0531, 0.0517, 0.0736, 0.0519, 0.0553, 0.0527, 0.0763, 0.0558, 0.134, 0.0572, 0.3625, 0.0552, 0.1052, 0.064, 0.0695, 0.0674, 0.073, 0.0654, 0.0986, 0.0661, 0.0844, 0.0494, 0.051, 0.0502, 0.0537, 0.0527, 0.0747, 0.053, 0.056, 0.0506, 0.0611, 0.0553, 0.1386, 0.0563, 0.3768, 0.0553, 0.1061, 0.0605, 0.0711, 0.0637, 0.0731, 0.0643, 0.0983, 0.0702, 0.0567, 0.0522, 0.0553, 0.0518, 0.0887, 0.0544, 0.0576, 0.0559, 0.2339, 0.0536, 0.1422, 0.0605, 0.065, 0.0597, 0.0674, 0.0649, 0.1275, 0.0633, 0.1083, 0.0689, 0.1423, 0.0663, 0.169, 0.0723, 0.0555, 0.0536, 0.0555, 0.0522, 0.0831, 0.0521, 0.057, 0.0528, 0.2429, 0.0551, 0.1296, 0.0577, 0.0632, 0.0587, 0.0668, 0.062, 0.113, 0.063, 0.1095, 0.0628, 0.1374, 0.0662, 0.1444, 0.0715, 0.0815, 0.055, 0.0551, 0.0534, 0.8665, 0.0512, 0.1493, 0.0546, 0.0707, 0.0568, 0.0801, 0.0574, 0.0747, 0.0602, 0.0747, 0.0592, 0.0645, 0.065, 0.0647, 0.0666, 0.0652, 0.0674, 0.0812, 0.0716, 0.0777, 0.0554, 0.0563, 0.0539, 0.8789, 0.058, 0.1444, 0.0578, 0.0712, 0.0565, 0.0807, 0.058, 0.0754, 0.0698, 0.0782, 0.0593, 0.0673, 0.0647, 0.0682, 0.0649, 0.0788, 0.0666, 0.0818, 0.066, 0.0564, 0.0522, 0.0777, 0.0517, 0.1293, 0.0542, 0.1226, 0.0589, 0.1842, 0.0601, 0.0902, 0.0613, 0.1497, 0.0667, 0.086, 0.0677, 0.0982, 0.0604, 0.1655, 0.0666, 0.0876, 0.0714, 0.1001, 0.1057, 0.0598, 0.0534, 0.0804, 0.0533, 0.149, 0.0558, 0.1509, 0.0618, 0.1851, 0.0592, 0.0844, 0.0748, 0.1712, 0.0637, 0.0914, 0.0632, 0.1033, 0.0665, 0.1804, 0.0723, 0.0797, 0.0703, 0.1054, 0.1252]
==========================================================================================

In [31]:
def get_matrix_pos(col, row):
    """ From col and row show position in the matrix. """
    return col + 24 * (row - 1)

def split_4_quadrants(data_file):
    """ Split 384 file into 4 plates represented by each quadrant: 
            UL (Upper Left) | UR (Upper Right)
            LL (Lower Left) | LR (Lower Right)
    Args:
        data_file: ELISA 384-well plate
    Returns:
        An ordered dictionary of quadrant_platename_1: {pos_1: value, ...},
                                 quadrant_platename_2: {pos_1: value, ...}
    """
    plates = softmax_parse_file(data_file)
    quadrants = {'UL': {'x': [1, 12], 'y': [1, 8]},
                 'UR': {'x': [13, 24], 'y': [1, 8]},
                 'LL': {'x': [1, 12], 'y': [9, 16]},
                 'LR': {'x': [13, 24], 'y': [9, 16]}
                 }
    quadrant_plates = {}
    for plate, values in plates.iteritems():
        for quadrant, coordinates in quadrants.iteritems():
            # plate name is: "quadrant_plate"
            plate_name = "{0}_{1}".format(quadrant, plate)
            # get the list of predefined coordinates on x and y axis
            cols_coordinates = coordinates['x']
            rows_coordinates = coordinates['y']
            quadrant_plates.setdefault(plate_name, [])
            # walk through coordinates, get the position on the matrix, and show the correspoinding value
            for row in range(rows_coordinates[0], rows_coordinates[1] + 1):
                for col in range(cols_coordinates[0], cols_coordinates[1] + 1):
                    pos = get_matrix_pos(col, row)
                    quadrant_plates[plate_name].append(values[pos-1])
    return quadrant_plates

# test a single 384 ELISA - splitting into 4 quadrants
file_name = 'Data/2_elisa_384.txt'
with open(file_name) as data_file: 
    plates = split_4_quadrants(data_file)
    for plate_name, values in plates.iteritems():
        print plate_name, "=>", values
        print "==="*30


UR_PTH cross screen S3210-01 and 02 => [0.2641, 0.13, 0.0924, 0.1247, 0.1051, 0.0799, 0.1259, 0.1028, 0.453, 0.0716, 0.1445, 0.0636, 0.2241, 0.1297, 0.0914, 0.0818, 0.1106, 0.0853, 0.1208, 0.088, 0.4499, 0.0682, 0.0994, 0.0715, 0.1713, 0.0594, 0.0886, 0.0598, 0.1646, 0.0603, 0.099, 0.0647, 0.0974, 0.0668, 0.2022, 0.0689, 0.1865, 0.0617, 0.0931, 0.0627, 0.1534, 0.0617, 0.1001, 0.063, 0.0948, 0.0657, 0.2291, 0.0712, 0.0759, 0.0595, 0.0748, 0.0577, 0.098, 0.0633, 0.1012, 0.0655, 0.096, 0.0673, 0.0724, 0.0697, 0.0756, 0.0609, 0.0758, 0.0571, 0.1073, 0.0634, 0.0935, 0.0653, 0.0943, 0.0659, 0.0734, 0.0695, 0.5475, 0.0613, 0.0908, 0.0588, 0.2709, 0.0621, 0.1356, 0.0621, 0.0927, 0.0634, 0.2799, 0.0655, 0.4668, 0.0698, 0.0874, 0.0584, 0.2609, 0.0629, 0.1374, 0.0613, 0.0847, 0.0637, 0.307, 0.0683]
==========================================================================================
LL_PTH cross screen S3210-01 and 02 => [0.083, 0.0516, 0.0504, 0.0519, 0.0531, 0.0517, 0.0736, 0.0519, 0.0553, 0.0527, 0.0763, 0.0558, 0.0844, 0.0494, 0.051, 0.0502, 0.0537, 0.0527, 0.0747, 0.053, 0.056, 0.0506, 0.0611, 0.0553, 0.0567, 0.0522, 0.0553, 0.0518, 0.0887, 0.0544, 0.0576, 0.0559, 0.2339, 0.0536, 0.1422, 0.0605, 0.0555, 0.0536, 0.0555, 0.0522, 0.0831, 0.0521, 0.057, 0.0528, 0.2429, 0.0551, 0.1296, 0.0577, 0.0815, 0.055, 0.0551, 0.0534, 0.8665, 0.0512, 0.1493, 0.0546, 0.0707, 0.0568, 0.0801, 0.0574, 0.0777, 0.0554, 0.0563, 0.0539, 0.8789, 0.058, 0.1444, 0.0578, 0.0712, 0.0565, 0.0807, 0.058, 0.0564, 0.0522, 0.0777, 0.0517, 0.1293, 0.0542, 0.1226, 0.0589, 0.1842, 0.0601, 0.0902, 0.0613, 0.0598, 0.0534, 0.0804, 0.0533, 0.149, 0.0558, 0.1509, 0.0618, 0.1851, 0.0592, 0.0844, 0.0748]
==========================================================================================
UL_PTH cross screen S3210-01 and 02 => [0.0548, 0.0511, 0.0546, 0.0551, 0.117, 0.0568, 0.1388, 0.0957, 0.3267, 0.2228, 0.1439, 0.0761, 0.0516, 0.0488, 0.0522, 0.0523, 0.0984, 0.0537, 0.1241, 0.0803, 0.2265, 0.2136, 0.1365, 0.0668, 0.1326, 0.0526, 0.0857, 0.0521, 0.0638, 0.0511, 0.0686, 0.0535, 0.0647, 0.0538, 0.1989, 0.0538, 0.1276, 0.052, 0.0883, 0.0518, 0.063, 0.0521, 0.0666, 0.0522, 0.0647, 0.0533, 0.2082, 0.0552, 0.0636, 0.0508, 0.0617, 0.0525, 0.0571, 0.052, 0.0564, 0.0525, 0.0945, 0.0537, 0.3076, 0.0547, 0.0635, 0.0511, 0.0547, 0.0513, 0.0562, 0.0514, 0.0555, 0.0511, 0.0789, 0.0533, 0.4249, 0.06, 0.0569, 0.0517, 0.0791, 0.0523, 0.0568, 0.0505, 0.1491, 0.0525, 0.0699, 0.0522, 0.077, 0.0537, 0.0561, 0.0505, 0.0808, 0.0518, 0.0578, 0.0511, 0.1509, 0.0522, 0.0708, 0.0539, 0.0817, 0.0582]
==========================================================================================
LR_PTH cross screen S3210-01 and 02 => [0.134, 0.0572, 0.3625, 0.0552, 0.1052, 0.064, 0.0695, 0.0674, 0.073, 0.0654, 0.0986, 0.0661, 0.1386, 0.0563, 0.3768, 0.0553, 0.1061, 0.0605, 0.0711, 0.0637, 0.0731, 0.0643, 0.0983, 0.0702, 0.065, 0.0597, 0.0674, 0.0649, 0.1275, 0.0633, 0.1083, 0.0689, 0.1423, 0.0663, 0.169, 0.0723, 0.0632, 0.0587, 0.0668, 0.062, 0.113, 0.063, 0.1095, 0.0628, 0.1374, 0.0662, 0.1444, 0.0715, 0.0747, 0.0602, 0.0747, 0.0592, 0.0645, 0.065, 0.0647, 0.0666, 0.0652, 0.0674, 0.0812, 0.0716, 0.0754, 0.0698, 0.0782, 0.0593, 0.0673, 0.0647, 0.0682, 0.0649, 0.0788, 0.0666, 0.0818, 0.066, 0.1497, 0.0667, 0.086, 0.0677, 0.0982, 0.0604, 0.1655, 0.0666, 0.0876, 0.0714, 0.1001, 0.1057, 0.1712, 0.0637, 0.0914, 0.0632, 0.1033, 0.0665, 0.1804, 0.0723, 0.0797, 0.0703, 0.1054, 0.1252]
==========================================================================================

In [32]:
def prepare_default_dict(plates):
    """ Quadrants are used to separate plate names; user-defined plate names will be used in the future.
    Args:
        plates: a list of plate names being parsed
    """
    quadrants = ['UL', 'UR', 'LL', 'LR']
    quadrant_plates = {}
    for plate in plates:
        for quadrant in quadrants:
            plate_name = "{0}_{1}".format(quadrant, plate)
            quadrant_plates.setdefault(plate_name, [])
    return quadrant_plates

def get_matrix(rows, cols):
    """
    Args:
        rows: a number of rows in the matrix
        cols: a number of cols in the matrix
    Return:
        A matrix for rows x cols and take into account offset of 1.
    """
    return [(i, j) for i in range(1, cols + 1) for j in range(1, rows + 1)]

def update_pos_value(plate_name, quadrant, pos, values, quadrant_plates):
    """ Add pos:value to a plate.
    """
    plate_name = "{0}_{1}".format(quadrant, plate_name)
    return quadrant_plates[plate_name].append(values[pos-1])

def split_96_quadrants(data_file):
    """ Split 384 plate into 96 quadrant in the following format:
          1 2 3 4 ...
        1 A B A B ...
        2 C D C D ...
        .
        .
        where A == Plate 1, B == Plate 2, C == Plate 3, D == Plate 4
    """
    # get the plates
    plates = softmax_parse_file(data_file)
    # generate default dict
    quadrant_plates = prepare_default_dict(plates.keys())
    # generate the matrix for the 384 plates
    matrix = get_matrix(24, 16)
    # add the approapiate data to each plate
    for plate, values in plates.iteritems():
        for pos in matrix:
            pos_y = pos[0]
            pos_x = pos[1]
            # algorithm to determine which data is related to which plate
            # based on (x, y) coordinates in the 384 plate matrix
            if pos_x % 2 != 0 and pos_y % 2 != 0:
                pos = get_matrix_pos(pos_x, pos_y)
                update_pos_value(plate, 'UL', pos, values, quadrant_plates)
            elif pos_x % 2 != 0 and pos_y % 2 == 0:
                pos = get_matrix_pos(pos_x, pos_y)
                update_pos_value(plate, 'UR', pos, values, quadrant_plates)
            elif pos_x % 2 == 0 and pos_y % 2 != 0:
                pos = get_matrix_pos(pos_x, pos_y)
                update_pos_value(plate, 'LL', pos, values, quadrant_plates)
            elif pos_x % 2 == 0 and pos_y % 2 == 0:
                pos = get_matrix_pos(pos_x, pos_y)
                update_pos_value(plate, 'LR', pos, values, quadrant_plates)

    return quadrant_plates

file_name = 'Data/2_elisa_384.txt'
with open(file_name) as data_file:
    plates = split_96_quadrants(data_file)
    for plate_name, values in plates.iteritems():
        print plate_name, "=>", values
        print "***"*10


UR_PTH cross screen S3210-01 and 02 => [0.0516, 0.0522, 0.0984, 0.1241, 0.2265, 0.1365, 0.2241, 0.0914, 0.1106, 0.1208, 0.4499, 0.0994, 0.1276, 0.0883, 0.063, 0.0666, 0.0647, 0.2082, 0.1865, 0.0931, 0.1534, 0.1001, 0.0948, 0.2291, 0.0635, 0.0547, 0.0562, 0.0555, 0.0789, 0.4249, 0.0756, 0.0758, 0.1073, 0.0935, 0.0943, 0.0734, 0.0561, 0.0808, 0.0578, 0.1509, 0.0708, 0.0817, 0.4668, 0.0874, 0.2609, 0.1374, 0.0847, 0.307, 0.0844, 0.051, 0.0537, 0.0747, 0.056, 0.0611, 0.1386, 0.3768, 0.1061, 0.0711, 0.0731, 0.0983, 0.0555, 0.0555, 0.0831, 0.057, 0.2429, 0.1296, 0.0632, 0.0668, 0.113, 0.1095, 0.1374, 0.1444, 0.0777, 0.0563, 0.8789, 0.1444, 0.0712, 0.0807, 0.0754, 0.0782, 0.0673, 0.0682, 0.0788, 0.0818, 0.0598, 0.0804, 0.149, 0.1509, 0.1851, 0.0844, 0.1712, 0.0914, 0.1033, 0.1804, 0.0797, 0.1054]
******************************
LL_PTH cross screen S3210-01 and 02 => [0.0511, 0.0551, 0.0568, 0.0957, 0.2228, 0.0761, 0.13, 0.1247, 0.0799, 0.1028, 0.0716, 0.0636, 0.0526, 0.0521, 0.0511, 0.0535, 0.0538, 0.0538, 0.0594, 0.0598, 0.0603, 0.0647, 0.0668, 0.0689, 0.0508, 0.0525, 0.052, 0.0525, 0.0537, 0.0547, 0.0595, 0.0577, 0.0633, 0.0655, 0.0673, 0.0697, 0.0517, 0.0523, 0.0505, 0.0525, 0.0522, 0.0537, 0.0613, 0.0588, 0.0621, 0.0621, 0.0634, 0.0655, 0.0516, 0.0519, 0.0517, 0.0519, 0.0527, 0.0558, 0.0572, 0.0552, 0.064, 0.0674, 0.0654, 0.0661, 0.0522, 0.0518, 0.0544, 0.0559, 0.0536, 0.0605, 0.0597, 0.0649, 0.0633, 0.0689, 0.0663, 0.0723, 0.055, 0.0534, 0.0512, 0.0546, 0.0568, 0.0574, 0.0602, 0.0592, 0.065, 0.0666, 0.0674, 0.0716, 0.0522, 0.0517, 0.0542, 0.0589, 0.0601, 0.0613, 0.0667, 0.0677, 0.0604, 0.0666, 0.0714, 0.1057]
******************************
UL_PTH cross screen S3210-01 and 02 => [0.0548, 0.0546, 0.117, 0.1388, 0.3267, 0.1439, 0.2641, 0.0924, 0.1051, 0.1259, 0.453, 0.1445, 0.1326, 0.0857, 0.0638, 0.0686, 0.0647, 0.1989, 0.1713, 0.0886, 0.1646, 0.099, 0.0974, 0.2022, 0.0636, 0.0617, 0.0571, 0.0564, 0.0945, 0.3076, 0.0759, 0.0748, 0.098, 0.1012, 0.096, 0.0724, 0.0569, 0.0791, 0.0568, 0.1491, 0.0699, 0.077, 0.5475, 0.0908, 0.2709, 0.1356, 0.0927, 0.2799, 0.083, 0.0504, 0.0531, 0.0736, 0.0553, 0.0763, 0.134, 0.3625, 0.1052, 0.0695, 0.073, 0.0986, 0.0567, 0.0553, 0.0887, 0.0576, 0.2339, 0.1422, 0.065, 0.0674, 0.1275, 0.1083, 0.1423, 0.169, 0.0815, 0.0551, 0.8665, 0.1493, 0.0707, 0.0801, 0.0747, 0.0747, 0.0645, 0.0647, 0.0652, 0.0812, 0.0564, 0.0777, 0.1293, 0.1226, 0.1842, 0.0902, 0.1497, 0.086, 0.0982, 0.1655, 0.0876, 0.1001]
******************************
LR_PTH cross screen S3210-01 and 02 => [0.0488, 0.0523, 0.0537, 0.0803, 0.2136, 0.0668, 0.1297, 0.0818, 0.0853, 0.088, 0.0682, 0.0715, 0.052, 0.0518, 0.0521, 0.0522, 0.0533, 0.0552, 0.0617, 0.0627, 0.0617, 0.063, 0.0657, 0.0712, 0.0511, 0.0513, 0.0514, 0.0511, 0.0533, 0.06, 0.0609, 0.0571, 0.0634, 0.0653, 0.0659, 0.0695, 0.0505, 0.0518, 0.0511, 0.0522, 0.0539, 0.0582, 0.0698, 0.0584, 0.0629, 0.0613, 0.0637, 0.0683, 0.0494, 0.0502, 0.0527, 0.053, 0.0506, 0.0553, 0.0563, 0.0553, 0.0605, 0.0637, 0.0643, 0.0702, 0.0536, 0.0522, 0.0521, 0.0528, 0.0551, 0.0577, 0.0587, 0.062, 0.063, 0.0628, 0.0662, 0.0715, 0.0554, 0.0539, 0.058, 0.0578, 0.0565, 0.058, 0.0698, 0.0593, 0.0647, 0.0649, 0.0666, 0.066, 0.0534, 0.0533, 0.0558, 0.0618, 0.0592, 0.0748, 0.0637, 0.0632, 0.0665, 0.0723, 0.0703, 0.1252]
******************************

In [33]:
FACS_PATTERN = re.compile(r'[\w\d_-]+_[A-Z]\d+_(?P<well>[A-Z]\d{2})')
ANTIGEN_PATTERN = re.compile(r'(Ag\d+)')
ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"


def well_name_position(well_name, columns=12):
    '''Convert a well name (A01-K12) or (1-96) to its position (1-96)
    '''
    match = re.match( r"^(?P<row>[A-W])?(?P<column>\d{1,2})$", well_name.upper())
    if match:
        # If there is an alphanumeric row position
        if match.group("row"):
            row = ALPHABET.index(match.group("row"))
            column = int(match.group("column"))
            return (row * columns) + column
        # Otherwise the only position is an absolute one
        else:
            return int(match.group("column"))
    else:
        raise ValueError("Invalid well_name: %s" % str(well_name))


def antigen_header(antigen_header):
    """ Return antigen name
    """
    match = ANTIGEN_PATTERN.search(antigen_header)
    return match.group(1) if match else antigen_header


def facs_parse_file(data):
    """ Parsing ACCURI and FLOWJO FACS files.
    Args:
        data_file: file to Parse
    Returns:
        plates: a dictionary of plate, position, values
    """
    data_file.seek(0)
    filename = os.path.split(data_file.name)[1]
    data = data_file.read().replace('\r', '\n')
    reader = csv.reader(StringIO(data), dialect=csv.Sniffer().sniff(data))
    rows = [ row for row in reader if row ]
    default_name = os.path.splitext(filename)[0]
    antigen_names = [antigen_header(c) for c in rows[0][1:]]
    plates = {}
    for row in rows[1:]:
        match = FACS_PATTERN.search(row[0])
        if match:
            well = match.groupdict()['well']
            position = well_name_position(well)
            for column, ag_name in zip(row[1:], antigen_names):
                plate_name = match.groupdict().get('plate', default_name)
                full_plate_name = "{0} - {1}".format(plate_name, ag_name)
                plates.setdefault(full_plate_name, []).append(float(column))
    return plates

# file_name = 'Data/6_facs_96.txt'
file_name = 'Data/7_facs_384.txt'
with open(file_name) as data_file:
    plates = facs_parse_file(data_file)
    for plate_name, values in plates.iteritems():
        print plate_name, "=>", values
        print "==="*30


7_facs_384 - Ratio => [2.05, 2.05, 283.0, 392.0, 1.36, 1.43, 2.06, 8.77, 1.66, 0.58, 1.16, 1.62, 1.19, 1.97, 1.67, 2.94, 1.2, 1.17, 1.96, 1.33, 1.29, 1.27, 1.55, 1.32, 1.88, 2.25, 282.0, 246.0, 1.55, 1.21, 1.32, 2.14, 1.69, 1.47, 1.2, 1.16, 1.36, 1.5, 1.45, 2.04, 1.65, 1.08, 1.23, 1.38, 1.42, 1.44, 1.34, 1.09, 1.52, 1.22, 1.1, 1.47, 1.43, 1.54, 1.38, 1.41, 2.87, 1.18, 1.78, 3.03, 1.37, 1.35, 2.0, 1.22, 1.46, 1.33, 1.46, 1.34, 1.28, 2.03, 1.37, 1.58, 1.82, 1.3, 1.34, 1.44, 1.39, 1.18, 1.32, 1.24, 1.04, 1.39, 1.35, 1.72, 1.76, 2.77, 1.16, 2.57, 1.31, 1.26, 0.68, 1.14, 1.14, 1.43, 1.4, 1.54, 1.52, 2.98, 4.57, 14.3, 1.83, 0.79, 1.32, 1.32, 2.31, 3.09, 3.0, 1.94, 1.5, 1.29, 1.37, 1.25, 1.23, 1.34, 0.92, 1.62, 1.78, 1.07, 2.6, 2.0, 1.67, 3.14, 0.71, 1.6, 1.06, 1.1, 0.89, 1.49, 1.17, 1.17, 1.53, 1.29, 2.77, 2.42, 1.39, 1.38, 1.99, 2.04, 0.85, 1.27, 1.27, 0.93, 1.89, 1.34, 1.58, 1.25, 1.16, 0.67, 1.33, 1.35, 1.36, 1.62, 1.36, 1.72, 1.29, 2.78, 1.65, 3.08, 1.79, 1.78, 1.37, 1.19, 1.45, 1.24, 1.21, 1.36, 1.21, 1.17, 1.67, 2.39, 1.37, 1.42, 1.35, 1.67, 1.12, 1.84, 1.24, 1.19, 1.03, 1.64, 1.61, 2.35, 1.33, 1.2, 1.55, 1.27, 1.72, 1.59, 1.32, 1.18, 1.12, 0.41, 1.64, 1.17, 1.5, 2.94, 1.31, 2.23, 1.17, 1.67, 1.69, 1.47, 1.28, 2.82, 1.99, 1.44, 1.62, 1.42, 1.28, 1.43, 1.26, 1.09, 1.28, 1.1, 1.48, 1.44, 1.18, 1.2, 1.55, 1.02, 3.03, 1.16, 2.85, 1.55, 0.85, 1.34, 1.11, 1.28, 2.02, 1.44, 1.62, 1.13, 1.24, 1.25, 3.11, 1.5, 1.28, 1.32, 2.96, 1.42, 3.02, 1.07, 0.54, 1.2, 1.33, 1.65, 1.42, 1.26, 2.3, 1.07, 1.7, 1.15, 1.9, 1.19, 1.2, 1.33, 1.26, 1.29, 1.21, 1.21, 1.31, 2.96, 1.64, 1.2, 1.24, 3.75, 1.52, 1.53, 1.3, 1.32, 3.75, 1.17, 1.27, 1.2, 2.39, 1.32, 1.34, 1.41, 1.4, 1.16, 2.83, 1.14, 1.33, 1.5, 1.23, 1.19, 1.45, 1.62, 0.91, 1.55, 1.44, 1.39, 3.6, 0.94, 1.38, 1.74, 1.74, 1.18, 0.84, 1.18, 1.37, 0.78, 2.38, 1.53, 1.3, 1.07, 1.16, 1.53, 1.05, 1.05, 2.18, 1.19, 1.27, 1.17, 1.55, 1.22, 1.24, 1.01, 1.48, 1.26, 1.69, 1.26, 3.83, 1.44, 2.36, 1.12, 1.45, 1.36, 1.24, 3.49, 0.9, 1.22, 0.9, 1.16, 4.06, 1.47, 1.38, 1.19, 1.08, 1.01, 1.12, 2.07, 0.76, 3.97, 1.17, 2.72, 1.28, 1.35, 1.72, 1.43, 1.36, 1.29, 1.57, 1.84, 1.1, 1.42, 1.28, 0.79, 6.39, 11.1, 0.69, 0.44, 1.35, 1.75, 0.58, 1.43, 1.34, 1.04, 3.04, 0.52, 1.17, 3.26, 1.5, 1.16, 1.37, 1.14, 0.75, 0.84, 4.44, 1.18, 3.11, 1.58, 6.28, 13.9]
==========================================================================================
7_facs_384 - CHOKO 22 HuLPAR1 => [285.0, 271.0, 39570.0, 39570.0, 1778.0, 2651.0, 257.0, 10519.0, 637.0, 2366.0, 4265.0, 490.0, 350.0, 232.0, 307.0, 2574.0, 3464.0, 1943.0, 356.0, 349.0, 213.0, 274.0, 355.0, 6910.0, 244.0, 268.0, 34400.0, 35930.0, 648.0, 270.0, 2082.0, 616.0, 264.0, 187.0, 225.0, 285.0, 198.0, 282.0, 552.0, 829.0, 712.0, 295.0, 5522.0, 301.0, 3247.0, 1561.0, 282.0, 318.0, 805.0, 5346.0, 8396.0, 213.0, 197.0, 294.0, 249.0, 2183.0, 2309.0, 190.0, 467.0, 1292.0, 223.0, 4423.0, 262.0, 4895.0, 245.0, 3130.0, 207.0, 319.0, 2486.0, 327.0, 371.0, 319.0, 474.0, 5542.0, 3081.0, 235.0, 2362.0, 6002.0, 268.0, 5080.0, 300.0, 249.0, 270.0, 235.0, 287.0, 1674.0, 261.0, 1363.0, 2141.0, 5818.0, 6918.0, 5481.0, 327.0, 663.0, 250.0, 277.0, 248.0, 2012.0, 4166.0, 14108.0, 513.0, 4019.0, 267.0, 125.0, 902.0, 2110.0, 552.0, 252.0, 294.0, 268.0, 857.0, 276.0, 320.0, 2127.0, 2543.0, 282.0, 518.0, 416.0, 341.0, 457.0, 240.0, 3470.0, 3387.0, 187.0, 181.0, 376.0, 412.0, 2451.0, 1707.0, 5467.0, 1465.0, 287.0, 1554.0, 979.0, 301.0, 2173.0, 531.0, 411.0, 656.0, 5471.0, 346.0, 281.0, 5818.0, 269.0, 316.0, 401.0, 340.0, 3641.0, 286.0, 5160.0, 965.0, 196.0, 225.0, 224.0, 2717.0, 961.0, 263.0, 1964.0, 266.0, 232.0, 310.0, 1776.0, 292.0, 6043.0, 213.0, 2559.0, 4704.0, 4205.0, 261.0, 933.0, 1153.0, 201.0, 1185.0, 205.0, 5549.0, 322.0, 2245.0, 5260.0, 366.0, 248.0, 371.0, 575.0, 455.0, 244.0, 302.0, 341.0, 268.0, 986.0, 3187.0, 355.0, 342.0, 1311.0, 226.0, 4340.0, 2049.0, 743.0, 2546.0, 692.0, 368.0, 355.0, 318.0, 222.0, 221.0, 1550.0, 705.0, 323.0, 507.0, 430.0, 3483.0, 276.0, 3526.0, 460.0, 253.0, 6130.0, 1766.0, 225.0, 5163.0, 5732.0, 401.0, 204.0, 1674.0, 7223.0, 1424.0, 282.0, 329.0, 6739.0, 303.0, 333.0, 259.0, 267.0, 312.0, 4265.0, 261.0, 6910.0, 1570.0, 555.0, 3592.0, 2221.0, 1974.0, 1402.0, 2616.0, 416.0, 2700.0, 242.0, 425.0, 258.0, 437.0, 554.0, 1015.0, 252.0, 240.0, 221.0, 340.0, 271.0, 5018.0, 256.0, 4583.0, 317.0, 6024.0, 293.0, 220.0, 794.0, 175.0, 581.0, 6698.0, 2568.0, 378.0, 1352.0, 263.0, 432.0, 1132.0, 280.0, 246.0, 5739.0, 864.0, 264.0, 1964.0, 266.0, 5612.0, 1592.0, 1300.0, 5099.0, 181.0, 1747.0, 5262.0, 280.0, 2213.0, 242.0, 246.0, 273.0, 392.0, 5300.0, 1280.0, 1783.0, 4427.0, 462.0, 426.0, 459.0, 576.0, 227.0, 5355.0, 2704.0, 635.0, 260.0, 258.0, 193.0, 310.0, 732.0, 3985.0, 232.0, 513.0, 4629.0, 4968.0, 4208.0, 218.0, 213.0, 219.0, 200.0, 1048.0, 4532.0, 245.0, 4786.0, 3385.0, 221.0, 267.0, 285.0, 190.0, 318.0, 5359.0, 1901.0, 202.0, 469.0, 253.0, 5746.0, 2357.0, 419.0, 4403.0, 4849.0, 227.0, 341.0, 277.0, 427.0, 242.0, 3423.0, 5861.0, 280.0, 4937.0, 5118.0, 285.0, 1413.0, 262.0, 1119.0, 1822.0, 250.0, 211.0, 649.0, 239.0, 2439.0, 2734.0, 2141.0, 395.0, 2434.0, 338.0, 233.0, 590.0, 196.0, 422.0, 307.0, 1169.0, 248.0, 492.0, 692.0, 228.0, 246.0, 2064.0, 230.0, 291.0, 308.0, 259.0, 334.0, 2759.0, 429.0, 2149.0, 2154.0]
==========================================================================================
7_facs_384 - CHOKO 22 => [139.0, 132.0, 140.0, 101.0, 1311.0, 1851.0, 125.0, 1199.0, 383.0, 4100.0, 3663.0, 303.0, 293.0, 118.0, 184.0, 877.0, 2878.0, 1656.0, 182.0, 262.0, 165.0, 215.0, 229.0, 5221.0, 130.0, 119.0, 122.0, 146.0, 417.0, 223.0, 1578.0, 288.0, 156.0, 127.0, 188.0, 245.0, 146.0, 188.0, 382.0, 407.0, 432.0, 272.0, 4476.0, 218.0, 2287.0, 1084.0, 211.0, 291.0, 531.0, 4383.0, 7650.0, 145.0, 138.0, 191.0, 181.0, 1550.0, 804.0, 161.0, 262.0, 427.0, 163.0, 3275.0, 131.0, 4019.0, 168.0, 2357.0, 142.0, 238.0, 1948.0, 161.0, 271.0, 202.0, 261.0, 4255.0, 2295.0, 163.0, 1702.0, 5074.0, 203.0, 4089.0, 289.0, 179.0, 200.0, 137.0, 163.0, 604.0, 225.0, 530.0, 1632.0, 4606.0, 10162.0, 4789.0, 286.0, 465.0, 179.0, 180.0, 163.0, 675.0, 912.0, 985.0, 281.0, 5093.0, 202.0, 94.5, 390.0, 682.0, 184.0, 130.0, 196.0, 208.0, 626.0, 220.0, 261.0, 1582.0, 2767.0, 174.0, 291.0, 388.0, 131.0, 228.0, 144.0, 1105.0, 4763.0, 117.0, 170.0, 342.0, 461.0, 1646.0, 1463.0, 4692.0, 959.0, 222.0, 562.0, 404.0, 216.0, 1573.0, 267.0, 201.0, 775.0, 4308.0, 273.0, 302.0, 3072.0, 200.0, 200.0, 320.0, 294.0, 5440.0, 215.0, 3830.0, 710.0, 121.0, 165.0, 130.0, 2101.0, 346.0, 159.0, 637.0, 149.0, 130.0, 226.0, 1493.0, 202.0, 4864.0, 176.0, 1883.0, 3873.0, 3596.0, 156.0, 391.0, 844.0, 142.0, 878.0, 123.0, 4962.0, 175.0, 1809.0, 4405.0, 355.0, 151.0, 231.0, 245.0, 343.0, 204.0, 195.0, 268.0, 156.0, 621.0, 2410.0, 302.0, 305.0, 3164.0, 138.0, 3714.0, 1370.0, 253.0, 1940.0, 311.0, 314.0, 212.0, 188.0, 151.0, 172.0, 550.0, 355.0, 225.0, 313.0, 303.0, 2723.0, 193.0, 2791.0, 421.0, 197.0, 5563.0, 1193.0, 156.0, 4394.0, 4757.0, 258.0, 200.0, 552.0, 6238.0, 500.0, 182.0, 386.0, 5011.0, 273.0, 261.0, 128.0, 186.0, 193.0, 3769.0, 211.0, 5519.0, 505.0, 370.0, 2801.0, 1684.0, 667.0, 989.0, 867.0, 389.0, 4962.0, 202.0, 319.0, 156.0, 308.0, 440.0, 442.0, 235.0, 141.0, 192.0, 179.0, 227.0, 4182.0, 192.0, 3650.0, 245.0, 4974.0, 242.0, 168.0, 268.0, 107.0, 483.0, 5393.0, 684.0, 249.0, 883.0, 202.0, 327.0, 302.0, 239.0, 193.0, 4792.0, 362.0, 200.0, 1467.0, 189.0, 4009.0, 1369.0, 459.0, 4482.0, 136.0, 1164.0, 4276.0, 235.0, 1528.0, 149.0, 269.0, 176.0, 272.0, 3816.0, 356.0, 1894.0, 3199.0, 265.0, 245.0, 389.0, 683.0, 193.0, 3902.0, 3474.0, 267.0, 170.0, 199.0, 181.0, 268.0, 479.0, 3792.0, 222.0, 235.0, 3878.0, 3921.0, 3596.0, 141.0, 174.0, 177.0, 198.0, 707.0, 3610.0, 145.0, 3806.0, 884.0, 153.0, 113.0, 255.0, 131.0, 233.0, 4329.0, 544.0, 224.0, 383.0, 281.0, 4974.0, 580.0, 286.0, 3195.0, 4079.0, 211.0, 338.0, 247.0, 206.0, 319.0, 862.0, 5011.0, 103.0, 3863.0, 3797.0, 166.0, 989.0, 192.0, 866.0, 1164.0, 136.0, 191.0, 457.0, 186.0, 3072.0, 428.0, 193.0, 571.0, 5535.0, 250.0, 133.0, 1024.0, 137.0, 316.0, 294.0, 384.0, 478.0, 421.0, 212.0, 152.0, 212.0, 1506.0, 202.0, 390.0, 368.0, 58.3, 283.0, 887.0, 272.0, 342.0, 155.0]
==========================================================================================

In [34]:
def biacore_parse_file(data_file):
    """ Parsing Biacore files.
    Args:
        data_file: file to Parse
    Returns:
        plates: a dictionary of plate, position, values
    """
    data_file.seek(0)
    data = data_file.read().replace('\r', '\n')
    reader = csv.reader(StringIO(data), dialect=csv.Sniffer().sniff(data))
    rows = [row for row in reader if row]
    plates = {}
    for row in rows:
        if len(row) != 3:
            raise ValueError("Every row should have 3 columns")
        name = row[0].strip()
        position = well_name_position(row[1])
        value = float(row[2])
        plates.setdefault(name, []).append(value)
    return plates


def biacore4000_parse_file(data_file):
    """ Parsing Biacore4000 files.
    Args:
        data_file: file to Parse
    Returns:
        plates: a dictionary of plate, position, values
    """
    data_file.seek(0)
    data = data_file.read().replace('\r', '\n')
    reader = csv.reader(StringIO(data), dialect=csv.Sniffer().sniff(data))
    rows = [row for row in reader if row]
    plates = {}
    # skip the first line
    for row in rows[1:]:
        sample_name = row[7]
        antigen = row[5]
        value = row[3]
        match = re.match(r'^(.+)_(\w\d{2})$', sample_name)
        if match:
            plate_name = '{0} - {1}'.format(match.group(1), antigen)
            plates.setdefault(plate_name, []).append(float(value))
    return plates


print "Biacore: " + "==="*30
#file_name = 'Data/3_biacore.csv'
file_name_biacore = 'Data/3a_biacore_multiple.csv'
with open(file_name_biacore) as data_file:
    plates = biacore_parse_file(data_file)
    for plate_name, values in plates.iteritems():
        print plate_name, "=>", values
        print "***"*10
    print "\n\n"


print "Biacore4000: " + "==="*30

file_name_biacore_4000 = 'Data/4_biacore_4000.txt'
with open(file_name_biacore_4000) as data_file:
    plates = biacore4000_parse_file(data_file)
    for plate_name, values in plates.iteritems():
        print plate_name, "=>", values
        print "***"*10


Biacore: ==========================================================================================
260.B1.8B2.Ag112-02 => [12.8, 24.9, 69.2, 30.3, 103.3, 15.7, 12.4, 56.2, 14.5, 44.0, 46.5, 76.3, 60.0, 31.7, 23.3, 33.0, 14.3, 15.8, 12.7, 59.5, 12.9, 39.8, 10.7, 52.3]
******************************
260.B1.8B2.Ag104-01 => [7.7, 8.0, 470.5, 856.9, 871.9, 818.7, 716.5, 514.9, 432.4, 594.8, 587.4, 938.0, 245.0, 428.5, 587.7, 226.9, 195.9, 400.3, 329.1, 452.0, 436.3, 449.8, 739.6, 827.5, 543.9, 269.4, 147.1, 125.2, 412.8, 258.8]
******************************



Biacore4000: ==========================================================================================
428_B1_1_01 - Ag124-Fc => [-3.3, -5.0, -0.1, -5.3, -3.0, -3.9, -1.3, -4.7, -3.9, -2.2, -2.8, -4.3, -2.5, -2.5, -1.1, -3.2, -1.3, -3.1, -0.6, -3.9, -1.6, 0.8, -1.0, -2.9, 26.3, -2.5, 25.4, -2.4, -5.6, -1.6, -4.4, 30.2, -3.3, -2.2, -0.6, -7.0, -1.3, -0.1, -0.7, -3.3, 26.3, -3.6, -0.9, -0.7, -4.6, -2.7, 1.7, 0.1, -2.7, 1.4, 0.8, -1.2, -3.3, -2.6, -1.2, -1.4, -2.0, 4.2, 0.7, 18.6, -1.2, -3.3, 13.0, -3.5, 0.9, -2.2, 1.2, -1.3, 12.5, -0.6, 6.6, -1.5, -2.6, -1.2, -1.0, -0.9, -2.4, -1.4, -0.6, -1.2, -1.6, 2.9, 18.7, -0.7, -2.6, 2.9, -1.1, -1.0, -1.6, -0.8, -1.3, 2.9, -2.0, -2.5, 0.2, -4.5]
******************************
426_B1_1_01 - Ag124 IgV => [-4.4, 76.7, 15.9, 17.8, -3.2, 65.9, 47.1, 59.8, 48.8, 25.2, 54.1, 39.4, 67.5, 75.1, 52.6, 27.4, 47.1, 35.2, 30.9, 15.1, 6.8, 10.2, 17.9, 20.4, 75.7, 19.9, 90.6, 78.2, 27.0, 66.9, 34.9, 38.5, 67.6, 28.9, 56.7, 56.6, 68.3, 48.9, 8.1, 64.7, 0.8, 43.3, 8.5, 51.0, 68.9, 77.4, 72.3, 56.2, 63.2, 32.1, 34.0, 12.7, 57.4, 14.3, 59.8, 25.6, 40.1, 87.7, 5.3, 8.9, 27.8, 22.7, 75.5, 64.7, 9.5, 66.0, 24.9, 38.8, 44.4, 50.7, 63.5, 11.7, 19.4, 7.1, 10.4, 59.2, 44.5, 67.3, 19.2, 56.9, 18.6, 48.3, 50.0, 53.1, 14.3, 46.7, 11.4, 61.9, 65.4, 26.7, 40.6, 46.5, 1.7, 64.7, 12.3, -10.9]
******************************
426_B1_1_01 - Ag124-Fc => [-4.3, 31.6, 156.3, 3.3, -3.1, 361.9, 883.3, 22.3, 41.6, 248.6, 414.0, 678.1, 560.5, 377.9, 421.8, 5.9, 17.1, 213.0, 63.9, -0.2, -3.3, -5.5, 1.3, 3.3, 30.5, 1.4, 44.2, 392.2, 7.4, 26.7, 451.3, 924.7, 258.8, 7.2, 422.5, 412.0, 468.9, 891.6, -4.3, 1074.9, -8.0, 31.4, -3.1, 314.9, 240.1, 371.0, 29.4, 513.4, 359.9, 366.0, 793.0, -2.2, 357.3, -3.0, 14.6, 5.2, 12.8, 36.7, -4.7, -2.3, 4.6, 5.1, 1087.3, 24.5, -4.4, 389.6, -73.4, 1238.4, 798.4, 399.6, 21.2, -15.9, -5.3, -5.2, -3.8, 24.5, 828.1, 25.6, 2.7, 526.5, -5.5, 13.1, 441.6, 70.0, 0.4, 848.5, -2.1, 402.0, 459.9, -1.1, 77.7, 583.8, -8.2, 250.8, -2.6, -11.8]
******************************
428_B1_1_01 - Ag124 IgV => [-3.2, 4.8, 3.1, 2.3, -3.5, 5.9, 7.5, 5.0, 5.1, 9.9, 8.2, 6.4, 7.7, 11.3, 11.3, 8.8, 10.3, 10.8, 12.5, 8.3, 9.2, 19.2, 10.5, 7.8, 63.5, 10.8, 60.0, 9.6, 5.1, 14.0, 5.6, 68.5, 6.6, 16.5, 13.7, 0.1, 10.2, 12.7, 11.3, 6.2, 61.0, 4.6, 7.6, 8.8, 7.6, 2.8, 13.1, 12.3, 3.4, 12.8, 11.2, 9.6, 4.2, 12.0, 7.0, 8.6, 8.9, 23.2, 10.6, 51.6, 9.1, 9.3, 31.9, 6.9, 17.1, 12.6, 11.7, 10.3, 35.7, 13.7, 25.6, 9.9, 6.5, 15.5, 9.6, 12.1, 8.6, 14.6, 10.1, 11.8, 8.7, 19.3, 41.0, 11.7, 8.4, 19.0, 11.0, 12.4, 11.3, 11.3, 8.1, 16.7, 10.6, 11.4, 12.2, -4.1]
******************************

In [35]:
def msd_parse_file(data_file):
    """ Parsing MSD files.
    Args:
        data_file: file to Parse
    Returns:
        plates: a dictionary of plate, position, values
    """
    data_file = StringIO(data_file.read().replace('\r', '\n'))
    plate_pattern = re.compile(r"^Plate #\s+:\s+([^\s]+)\s*$")
    values_pattern = re.compile(r"^[A-Z]+((\s+-?\d+){%d})\s*$" % 12)
    plate_names = []
    plates = {}
    for line in data_file:
        plate_match = plate_pattern.match(line)
        if plate_match:
            plate_name = plate_match.group(1)
            plate_names.append(plate_name)
        else:
            values_match = values_pattern.match(line)
            if values_match:
                values = values_match.group(1).strip().split()
                current_plate_name = plate_names[-1]
                plates.setdefault(current_plate_name, [])
                for value in values:
                    plates[current_plate_name].append(float(value))
    return plates

# file_name = 'Data/8_msd_single_plate.txt'
file_name = 'Data/8a_msd_multiple_plates.txt'
with open(file_name) as data_file:
    plates = msd_parse_file(data_file)
    for plate, values in plates.iteritems():
        print plate, " : ", len(values)
        print values


test-plate-1  :  96
[46.0, 41.0, 44.0, 58.0, 39.0, 49.0, 41.0, 36.0, 42.0, 40.0, 43.0, 34.0, 32.0, 38.0, 36.0, 27.0, 34.0, 37.0, 40.0, 29.0, 263.0, 41.0, 85.0, 63.0, 51.0, 68.0, 52.0, 81.0, 69.0, 57.0, 48.0, 33.0, 93.0, 97.0, 38.0, 79.0, 936.0, 138.0, 224.0, 42.0, 123.0, 46.0, 216.0, 42.0, 227.0, 80.0, 52.0, 37.0, 62.0, 291.0, 42.0, 46.0, 53.0, 36.0, 35754.0, 37.0, 32.0, 37.0, 40.0, 34.0, -2.0, 3.0, 2.0, -3.0, 0.0, -2.0, 4.0, -7.0, -7.0, 1.0, -3.0, -8.0, -5.0, -3.0, -2.0, -2.0, -7.0, -1.0, -3.0, -7.0, -9.0, -6.0, -7.0, -10.0, 1.0, -2.0, 0.0, 1.0, -6.0, -2.0, -2.0, 2.0, -4.0, -5.0, 1.0, -4.0]
test-plate-2  :  96
[52.0, 100.0, 52.0, 81.0, 57.0, 79.0, 47.0, 49.0, 43.0, 39.0, 44.0, 42.0, 45.0, 48.0, 49.0, 70.0, 45.0, 44.0, 48.0, 46.0, 201.0, 39.0, 71.0, 60.0, 64.0, 67.0, 64.0, 78.0, 69.0, 55.0, 56.0, 46.0, 79.0, 69.0, 43.0, 59.0, 774.0, 94.0, 90.0, 48.0, 76.0, 51.0, 93.0, 49.0, 83.0, 63.0, 54.0, 42.0, 59.0, 100.0, 42.0, 50.0, 46.0, 38.0, 70.0, 42.0, 38.0, 41.0, 40.0, 43.0, 5.0, 0.0, 9.0, 7.0, 2.0, -6.0, 3.0, -2.0, -1.0, 3.0, 3.0, 2.0, 2.0, 5.0, 7.0, 5.0, 1.0, -3.0, 2.0, -3.0, 1.0, -3.0, 7.0, 3.0, 5.0, 7.0, 9.0, 5.0, -5.0, -1.0, 4.0, 0.0, -5.0, 2.0, 8.0, 2.0]
test-plate-3  :  96
[51.0, 518.0, 41.0, 872.0, 407.0, 714.0, 68.0, 49.0, 61.0, 52.0, 40.0, 49.0, 40.0, 45.0, 45.0, 472.0, 50.0, 49.0, 53.0, 41.0, 147.0, 55.0, 74.0, 61.0, 53.0, 70.0, 53.0, 71.0, 66.0, 59.0, 55.0, 45.0, 83.0, 68.0, 51.0, 55.0, 673.0, 85.0, 79.0, 40.0, 84.0, 50.0, 89.0, 47.0, 92.0, 66.0, 57.0, 42.0, 58.0, 104.0, 39.0, 48.0, 57.0, 52.0, 31179.0, 52.0, 43.0, 52.0, 44.0, 45.0, 2.0, 2.0, 6.0, -1.0, 9.0, 14.0, 8.0, 3.0, 2.0, 9.0, 9.0, 0.0, 0.0, 6.0, 1.0, -2.0, 5.0, 9.0, 9.0, 3.0, 3.0, 9.0, 7.0, 14.0, 2.0, 2.0, 3.0, -2.0, 6.0, 6.0, 4.0, 7.0, 4.0, 3.0, 6.0, 3.0]