In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import subprocess
%matplotlib inline
In [2]:
GlpG_seq = "ERAGPVTWVMMIACVVVFIAMQILGDQEVMLWLAWPFDPTLKFEFWRYFTHALMHFSLMHILFNLLWWWYLGGAVEKRLGSGKLIVITLISALLSGYVQQKFSGPWFGGLSGVVYALMGYVWLRGERDPQSGIYLQRGLIIFALIWIVAGWFDLFGMSMANGAHIAGLAVGLAMAFVDSLN"
In [1]:
t = "MTRTEIIRELERSLRLQLVLAIFLMALLIVLLWLQQNGSSNNNVNYLLIVILVLVLVIVALAVIQKYLVEQLKRQADPTDDSRTEIIRELERSLRLQLVLAIFLMALLIVLLWLQQNGSSNNNVNYLLIVILVLVLVIVALAVTQKYLVEQLKRQD"
In [2]:
scTMHC2_seq = "MTRTEIIRELERSLRLQLVLAIFLMALLIVLLWLQQNGSSNNNVNYLLIVILVLVLVIVALAVIQKYLVEQLKRQADPTDDSRTEIIRELERSLRLQLVLAIFLMALLIVLLWLQQNGSSNNNVNYLLIVILVLVLVIVALAVTQKYLVEQLKRQD"
In [2]:
def getFromTerminal(CMD):
return subprocess.Popen(CMD,stdout=subprocess.PIPE,shell=True).communicate()[0].decode()
In [3]:
len(scTMHC2_seq)
In [ ]:
In [29]:
seq = scTMHC2_seq
In [4]:
seq = GlpG_seq
len(seq)
Out[4]:
In [5]:
seq_dataFrame = pd.DataFrame({"oneLetterCode":list(seq)})
HFscales = pd.read_table("/Users/weilu/opt/small_script/Whole_residue_HFscales.txt")
# Octanol Scale
# code = {"GLY" : "G", "ALA" : "A", "LEU" : "L", "ILE" : "I",
# "ARG+" : "R", "LYS+" : "K", "MET" : "M", "CYS" : "C",
# "TYR" : "Y", "THR" : "T", "PRO" : "P", "SER" : "S",
# "TRP" : "W", "ASP-" : "D", "GLU-" : "E", "ASN" : "N",
# "GLN" : "Q", "PHE" : "F", "HIS+" : "H", "VAL" : "V",
# "M3L" : "K", "MSE" : "M", "CAS" : "C" }
code = {"GLY" : "G", "ALA" : "A", "LEU" : "L", "ILE" : "I",
"ARG+" : "R", "LYS+" : "K", "MET" : "M", "CYS" : "C",
"TYR" : "Y", "THR" : "T", "PRO" : "P", "SER" : "S",
"TRP" : "W", "ASP-" : "D", "GLU-" : "E", "ASN" : "N",
"GLN" : "Q", "PHE" : "F", "HIS0" : "H", "VAL" : "V",
"M3L" : "K", "MSE" : "M", "CAS" : "C" }
HFscales_with_oneLetterCode = HFscales.assign(oneLetterCode = HFscales.AA.str.upper().map(code)).dropna()
data = seq_dataFrame.merge(HFscales_with_oneLetterCode, on="oneLetterCode", how="left")
In [19]:
print(HFscales.query("AA !='His+' and AA != 'Glu0'")[["AA", "DGwoct"]].to_latex())
In [8]:
HFscales.to_latex()
Out[8]:
In [6]:
data.plot("oneLetterCode", "DGwoct", kind="bar")
plt.show()
In [10]:
data
Out[10]:
In [9]:
data[24:57].sum()
Out[9]:
In [6]:
data.query("oneLetterCode == 'H'")
Out[6]:
In [31]:
HFscales
Out[31]:
In [6]:
x = HFscales_with_oneLetterCode["DGwif"].values
y = HFscales_with_oneLetterCode["DGwoct"].values
In [7]:
z = data["DGwoct"].values
np.savetxt("/Users/weilu/Research/server/apr_2018/rg_0.15_lipid_1.0_mem_1_go_0.8/simulation/dis_30.0/1/original_9/test", z, fmt="%.2f")
In [23]:
z = data["Oct-IF"].values
np.savetxt("/Users/weilu/Research/server/apr_2018/zim_oct_if", z, fmt="%.2f")
In [8]:
data.query("oneLetterCode == 'H'")
Out[8]:
In [10]:
import scipy
from scipy import stats
In [11]:
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x,y)
In [12]:
r_value
Out[12]:
In [13]:
intercept
Out[13]:
In [11]:
HFscales_with_oneLetterCode.plot("Oct-IF", "DGwoct", kind="scatter")
Out[11]:
In [ ]:
In [14]:
HFscales_with_oneLetterCode.plot("DGwif", "DGwoct", kind="scatter")
Out[14]:
In [15]:
HFscales_with_oneLetterCode.plot("oneLetterCode", "DGwoct", kind="bar")
Out[15]:
In [12]:
HFscales_with_oneLetterCode.plot("oneLetterCode", "Oct-IF", kind="bar")
Out[12]:
In [22]:
def isHelix(id):
helices_list = [(94,114), (147,168), (171, 192), (200, 217), (226, 241), (250, 269)]
for count, (i, j) in enumerate(helices_list):
if id >= i and id <= j:
return 1+count
return 0
data["resId"] = 91 + data.index
data["isHelix"] = data["resId"].apply(isHelix)
In [23]:
def isHelix2(id):
helices_list = [(95,114), (148,169), (171, 193), (201, 217), (227, 241), (251, 268)]
for count, (i, j) in enumerate(helices_list):
if id >= i and id <= j:
return 1+count
return 0
data["resId"] = 91 + data.index
data["isHelix2"] = data["resId"].apply(isHelix2)
In [24]:
def isHelix3(id):
helices_list = [(91,114), (115,169), (171, 192), (193, 217), (226, 241), (242, 268)]
for count, (i, j) in enumerate(helices_list):
if id >= i and id <= j:
return 1+count
return 0
data["resId"] = 91 + data.index
data["isHelix3"] = data["resId"].apply(isHelix3)
In [25]:
def isHelix4(id):
helices_list = [(91,114), (115,170), (174, 192), (193, 213), (229, 241), (242, 267)]
for count, (i, j) in enumerate(helices_list):
if id >= i and id <= j:
return 1+count
return 0
data["resId"] = 91 + data.index
data["isHelix4"] = data["resId"].apply(isHelix4)
In [26]:
data.groupby("isHelix").sum()
Out[26]:
In [9]:
data.groupby("isHelix4").sum()
Out[9]:
In [12]:
data.groupby("isHelix4").sum().sum(axis=0)
Out[12]:
In [33]:
a = data.groupby("isHelix3").sum().drop(["isHelix","isHelix2", "isHelix4"], axis=1)[1:].reset_index()
a["group"] = (a["isHelix3"]+1) // 2
b = a.groupby("group").sum()
b.drop(["resId", "isHelix3"],axis=1).plot()
Out[33]:
In [32]:
a = data.groupby("isHelix2").sum().drop(["isHelix","isHelix3", "isHelix4"], axis=1)[1:].reset_index()
a["group"] = (a["isHelix2"]+1) // 2
b = a.groupby("group").sum()
b.drop(["resId", "isHelix2"],axis=1).plot()
Out[32]:
In [31]:
a = data.groupby("isHelix4").sum()[1:].reset_index()
a["group"] = (a["isHelix4"]+1) // 2
b = a.groupby("group").sum()
b.drop(["resId", "isHelix4"],axis=1).plot()
Out[31]:
In [59]:
data[110:]
Out[59]:
In [ ]:
b = a.groupby("group").sum()
In [ ]:
In [18]:
data.groupby("isHelix").sum()
Out[18]:
In [20]:
data.groupby("isHelix2").sum()
Out[20]:
In [22]:
data.groupby("isHelix3").sum()
Out[22]:
In [40]:
# pd.options.display.max_rows = 999
# data
In [ ]:
data.groupby("isHelix").sum()
In [41]:
data.groupby("isHelix").sum()
Out[41]:
In [32]:
a = data.groupby("isHelix").sum()[1:].reset_index()
a["group"] = (a["isHelix"]+1) // 2
b = a.groupby("group").sum()
b.drop(["resId", "isHelix"],axis=1).plot()
In [33]:
In [34]:
In [35]:
b
Out[35]:
In [21]:
b
Out[21]:
In [22]:
-4.59*1.6
Out[22]:
In [23]:
a.reset_index(drop=True)
Out[23]:
In [24]:
b.drop(["resId", "isHelix"],axis=1).plot()
Out[24]:
In [25]:
6*0.6
Out[25]:
In [26]:
b.drop(["resId", "isHelix", "Oct-IF"],axis=1).plot()
Out[26]:
In [30]:
data = pd.read_feather("/Users/weilu/Research/data/pulling/GlpG_Hydrophobicity.feather")
In [ ]:
# data.join(zim123).to_feather("/Users/weilu/Research/data/pulling/GlpG_Hydrophobicity.feather")
In [31]:
data.plot("oneLetterCode", "DGwoct", kind="bar")
plt.show()
In [34]:
def V_membrane(z, kbin=0.2, memb=14.4):
return 0.5*(np.tanh(kbin*(z+memb)) + np.tanh(kbin*(memb-z)))
kbin = 0.2
memb = 14.4
z = np.linspace(-30,30,100)
v = V_membrane(z, kbin, memb)
plt.plot(z,v)
Out[34]:
In [35]:
def V_membrane(z, kbin=0.2, memb=14.4):
return 0.5*(np.tanh(kbin*(z+memb)) + np.tanh(kbin*(memb-z)))
kbin = 0.4
memb = 14.4
z = np.linspace(-30,30,100)
v = V_membrane(z, kbin, memb)
plt.plot(z,v)
Out[35]:
In [33]:
V_membrane(-20)/V_membrane(0)
Out[33]:
In [34]:
V_membrane(-10)/V_membrane(0)
Out[34]:
In [ ]:
, (174, 192), (193, 213), (229, 241), (242, 267)
In [60]:
def isHelix6(id):
helices_list = [(93,113), (132,163), (174, 190), (197, 213), (229, 241), (242, 267)]
for count, (i, j) in enumerate(helices_list):
if id >= i and id <= j:
return 1+count
return 0
data["resId"] = 91 + data.index
data["isHelix6"] = data["resId"].apply(isHelix6)
In [61]:
data.groupby("isHelix6").sum()
Out[61]:
In [65]:
a = data.groupby("isHelix6").sum()[1:].reset_index()
a["group"] = (a["isHelix6"]+1) // 2
b = a.groupby("group").sum()
# b.drop(["resId", "isHelix"],axis=1).plot()
b[["DGwif", "DGwoct", "Oct-IF"]].plot()
Out[65]:
In [42]:
with open("/Users/weilu/Research/2xov_ca.dat", "r") as f:
for line in f:
# print(line)
a = float(line.split()[-4])
# print(a)
i = int(line.split()[-7])
if a > 14.4 or a < -14.4:
c = 1
else:
c = 0
print(i,a, c)
In [ ]: