In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
from small_script.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
In [3]:
data = pd.read_csv("/Users/weilu/Research/Build/netsurfp-2.0.Any/test/test.csv")
In [12]:
a = pd.read_csv("/Users/weilu/Research/Build/netsurfp-2.0.Any/test/ssweight.stride_clean", sep="\s+", names=["a", "b", "seq", "i", "id", "q3Real", "c", "d", "e", "f", "g"])
In [26]:
b = a[["b", "q3Real"]]
In [29]:
b
Out[29]:
b
q3Real
0
LYS
C
1
GLU
C
2
THR
C
3
ALA
H
4
ALA
H
5
ALA
H
6
LYS
H
7
PHE
H
8
GLU
H
9
ARG
H
10
GLN
H
11
HIS
H
12
MET
C
13
ASP
T
14
SER
T
15
SER
T
16
THR
T
17
SER
T
18
ALA
C
19
ALA
C
20
SER
C
21
SER
C
22
SER
C
23
ASN
H
24
TYR
H
25
CYS
H
26
ASN
H
27
GLN
H
28
MET
H
29
MET
H
...
...
...
94
CYS
T
95
ALA
C
96
TYR
E
97
LYS
E
98
THR
E
99
THR
C
100
GLN
C
101
ALA
E
102
ASN
E
103
LYS
C
104
HIS
C
105
ILE
C
106
ILE
E
107
VAL
E
108
ALA
E
109
CYS
E
110
GLU
E
111
GLY
T
112
ASN
T
113
PRO
T
114
TYR
T
115
VAL
E
116
PRO
E
117
VAL
E
118
HIS
B
119
PHE
C
120
ASP
C
121
ALA
C
122
SER
C
123
VAL
C
124 rows × 2 columns
In [38]:
count = 1
for i, line in d.iterrows():
if line["q3Real"] == line["q3"]:
count += 1
else:
print(line)
b GLN
q3Real H
seq Q
q3 C
Name: 10, dtype: object
b HIS
q3Real H
seq H
q3 C
Name: 11, dtype: object
b ASP
q3Real T
seq D
q3 C
Name: 13, dtype: object
b SER
q3Real T
seq S
q3 C
Name: 14, dtype: object
b SER
q3Real T
seq S
q3 C
Name: 15, dtype: object
b THR
q3Real T
seq T
q3 C
Name: 16, dtype: object
b SER
q3Real T
seq S
q3 C
Name: 17, dtype: object
b ASN
q3Real H
seq N
q3 C
Name: 23, dtype: object
b ASN
q3Real T
seq N
q3 C
Name: 33, dtype: object
b LEU
q3Real T
seq L
q3 C
Name: 34, dtype: object
b THR
q3Real T
seq T
q3 C
Name: 35, dtype: object
b LYS
q3Real T
seq K
q3 C
Name: 36, dtype: object
b ASP
q3Real T
seq D
q3 C
Name: 37, dtype: object
b ARG
q3Real T
seq R
q3 C
Name: 38, dtype: object
b ASN
q3Real E
seq N
q3 C
Name: 43, dtype: object
b VAL
q3Real G
seq V
q3 H
Name: 56, dtype: object
b CYS
q3Real G
seq C
q3 H
Name: 57, dtype: object
b SER
q3Real G
seq S
q3 C
Name: 58, dtype: object
b LYS
q3Real E
seq K
q3 C
Name: 60, dtype: object
b ASN
q3Real E
seq N
q3 C
Name: 61, dtype: object
b VAL
q3Real E
seq V
q3 C
Name: 62, dtype: object
b CYS
q3Real E
seq C
q3 C
Name: 64, dtype: object
b LYS
q3Real T
seq K
q3 C
Name: 65, dtype: object
b ASN
q3Real T
seq N
q3 C
Name: 66, dtype: object
b GLY
q3Real T
seq G
q3 C
Name: 67, dtype: object
b GLN
q3Real E
seq Q
q3 C
Name: 68, dtype: object
b ASN
q3Real E
seq N
q3 C
Name: 70, dtype: object
b SER
q3Real T
seq S
q3 C
Name: 74, dtype: object
b TYR
q3Real T
seq Y
q3 C
Name: 75, dtype: object
b SER
q3Real T
seq S
q3 C
Name: 76, dtype: object
b THR
q3Real T
seq T
q3 C
Name: 77, dtype: object
b MET
q3Real C
seq M
q3 E
Name: 78, dtype: object
b THR
q3Real T
seq T
q3 C
Name: 86, dtype: object
b GLY
q3Real T
seq G
q3 C
Name: 87, dtype: object
b SER
q3Real T
seq S
q3 C
Name: 88, dtype: object
b SER
q3Real T
seq S
q3 C
Name: 89, dtype: object
b LYS
q3Real T
seq K
q3 C
Name: 90, dtype: object
b TYR
q3Real T
seq Y
q3 C
Name: 91, dtype: object
b PRO
q3Real T
seq P
q3 C
Name: 92, dtype: object
b ASN
q3Real T
seq N
q3 C
Name: 93, dtype: object
b CYS
q3Real T
seq C
q3 C
Name: 94, dtype: object
b ALA
q3Real C
seq A
q3 E
Name: 95, dtype: object
b THR
q3Real C
seq T
q3 E
Name: 99, dtype: object
b GLN
q3Real C
seq Q
q3 E
Name: 100, dtype: object
b ASN
q3Real E
seq N
q3 C
Name: 102, dtype: object
b HIS
q3Real C
seq H
q3 E
Name: 104, dtype: object
b ILE
q3Real C
seq I
q3 E
Name: 105, dtype: object
b GLU
q3Real E
seq E
q3 C
Name: 110, dtype: object
b GLY
q3Real T
seq G
q3 C
Name: 111, dtype: object
b ASN
q3Real T
seq N
q3 C
Name: 112, dtype: object
b PRO
q3Real T
seq P
q3 C
Name: 113, dtype: object
b TYR
q3Real T
seq Y
q3 C
Name: 114, dtype: object
b HIS
q3Real B
seq H
q3 E
Name: 118, dtype: object
In [41]:
count
Out[41]:
72
In [40]:
count/124
Out[40]:
0.5806451612903226
In [37]:
d
Out[37]:
b
q3Real
seq
q3
0
LYS
C
K
C
1
GLU
C
E
C
2
THR
C
T
C
3
ALA
H
A
H
4
ALA
H
A
H
5
ALA
H
A
H
6
LYS
H
K
H
7
PHE
H
F
H
8
GLU
H
E
H
9
ARG
H
R
H
10
GLN
H
Q
C
11
HIS
H
H
C
12
MET
C
M
C
13
ASP
T
D
C
14
SER
T
S
C
15
SER
T
S
C
16
THR
T
T
C
17
SER
T
S
C
18
ALA
C
A
C
19
ALA
C
A
C
20
SER
C
S
C
21
SER
C
S
C
22
SER
C
S
C
23
ASN
H
N
C
24
TYR
H
Y
H
25
CYS
H
C
H
26
ASN
H
N
H
27
GLN
H
Q
H
28
MET
H
M
H
29
MET
H
M
H
...
...
...
...
...
94
CYS
T
C
C
95
ALA
C
A
E
96
TYR
E
Y
E
97
LYS
E
K
E
98
THR
E
T
E
99
THR
C
T
E
100
GLN
C
Q
E
101
ALA
E
A
E
102
ASN
E
N
C
103
LYS
C
K
C
104
HIS
C
H
E
105
ILE
C
I
E
106
ILE
E
I
E
107
VAL
E
V
E
108
ALA
E
A
E
109
CYS
E
C
E
110
GLU
E
E
C
111
GLY
T
G
C
112
ASN
T
N
C
113
PRO
T
P
C
114
TYR
T
Y
C
115
VAL
E
V
E
116
PRO
E
P
E
117
VAL
E
V
E
118
HIS
B
H
E
119
PHE
C
F
C
120
ASP
C
D
C
121
ALA
C
A
C
122
SER
C
S
C
123
VAL
C
V
C
124 rows × 4 columns
In [36]:
d = pd.concat([b,c], axis=1)
In [22]:
c = data[["seq", "q3"]]
In [23]:
c
Out[23]:
seq
q3
0
K
C
1
E
C
2
T
C
3
A
H
4
A
H
5
A
H
6
K
H
7
F
H
8
E
H
9
R
H
10
Q
C
11
H
C
12
M
C
13
D
C
14
S
C
15
S
C
16
T
C
17
S
C
18
A
C
19
A
C
20
S
C
21
S
C
22
S
C
23
N
C
24
Y
H
25
C
H
26
N
H
27
Q
H
28
M
H
29
M
H
...
...
...
94
C
C
95
A
E
96
Y
E
97
K
E
98
T
E
99
T
E
100
Q
E
101
A
E
102
N
C
103
K
C
104
H
E
105
I
E
106
I
E
107
V
E
108
A
E
109
C
E
110
E
C
111
G
C
112
N
C
113
P
C
114
Y
C
115
V
E
116
P
E
117
V
E
118
H
E
119
F
C
120
D
C
121
A
C
122
S
C
123
V
C
124 rows × 2 columns
In [ ]:
Content source: luwei0917/awsemmd_script
Similar notebooks: