In [115]:
import matplotlib.pyplot as plt  # to start
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('classic')
import numpy as np

"""version when the same protein gets SMD2_2 and SMD2_1"""

f = "/Users/magnus/work-src/rna-tools/rna_tools/tools/PyMOL4RNA/Spliceosome_PyMOL.xlsx"
df = pd.read_excel(f)

strucs = [#['S1_B_5zwo', '5zwo', '_B5zwo'],
        #['S2_Bact_5gm6', '5gm6', '_Ba5gm6'],
         # ['S3_C_5lj3', '5lj3', '_C5lj3'],
         # ['S4_Cstar_5mps', '5mps', '_Cs5mps'],
         # ['S5_P_6exn', '6exn', '_P6exn'],
         # ['S5_P_5ylz', '5ylz', '_P5ylz'],
         # ['S6_ILS_5y88', '5y88', '_I5y88'],
         # ['SX_pX_3jb9', '3jb9', '_3jb9'],
         #        ['S6_hP_6icz', '6icz', '_hP_6icz'],
    ['hBa_6ff7', '6ff7', '_hBa_6ff7'],
    ['hC_5yzg', '5yzg', '_hC_5yzg'],
    ['hX_5xjc', '5xjc', '_hX_5xjc']
         ]  
strs = []
for s in strucs:
    strs.append(s[1])
strs = str(strs)    #strs = "['5gm6', '5zwo', '5lj3', '5gm6', '5mps', '6exn', '5ylz', '5y88', '3jb9', '6icz']"

In [117]:
#strucs = [['S3_C_5lj3', '5lj3']] # for debugging
txt = """
try:
    from pymol import cmd
except ImportError:
    print("PyMOL Python lib is missing")
    # sys.exit(0)

def spl_extract():
 for name in cmd.get_names("all"):
    #if name in """ + strs + """: # this should be auto
    print(" \ Extracting mode for %s" % name)
"""

for struc_col, pdb_id, complex_code in strucs:
    txt += "    if '" + pdb_id + "' in name.lower():\n"
    for index, i in df.iterrows():
        #print(i)
        ref = str(i['chain']).strip()
        struc = str(i[struc_col]).strip()
        # print(ref, struc)
        # todo fix  cmd.extract("CDC40 (PRP17, SLU4, XRS2)", "chain n")
        i['protein'] = i['protein'].split()[0]
        if struc != 'nan' and struc != '-': # if ref != 'nan' # i dont' use reference chain any more
            if ',' in struc:
                subchains = struc.split(',') # a,P,h
                for index , sc in enumerate(subchains):    
                    sc = sc.strip()
                    # cmd.extract(name, selection, source_state, target_state)
                    txt += '        cmd.extract("' + i['protein'] + '_' + str(index + 1) \
                    + complex_code + '", "chain ' + sc + ' and ' + pdb_id + '")\n'
                # one mode to see these two proteins
                #print('        cmd.extract("' + i['protein'] + '", "chain ' + '+'.join(subchains) + '")') 
            else:
                #if ref != struc:
                txt += '        cmd.extract("' + i['protein'] + complex_code + '", "chain ' + struc + ' and ' + pdb_id + '")\n'
    # the rest make unkown at the very end
    txt += '        cmd.set_name("' + pdb_id + '", "unknown_other' + complex_code + '")\n'
    txt += '        cmd.group("' + complex_code.replace('_', '') + '", "*' + complex_code + '")\n'
    txt += '        cmd.do("order *, yes")\n'
print(txt) 
with open('/home/magnus/work-src/rna-tools/rna_tools/tools/PyMOL4RNA/code_for_spl.py', 'w') as f:
    f.write(txt)


try:
    from pymol import cmd
except ImportError:
    print("PyMOL Python lib is missing")
    # sys.exit(0)

def spl_extract():
 for name in cmd.get_names("all"):
    #if name in ['6ff7', '5yzg', '5xjc']: # this should be auto
    print(" \ Extracting mode for %s" % name)
    if '6ff7' in name.lower():
        cmd.extract("CWC15_hBa_6ff7", "chain R and 6ff7")
        cmd.extract("CWC2_hRBM22_hBa_6ff7", "chain P and 6ff7")
        cmd.extract("U2_hBa_6ff7", "chain 2 and 6ff7")
        cmd.extract("U5_hBa_6ff7", "chain 5 and 6ff7")
        cmd.extract("U6_hBa_6ff7", "chain 6 and 6ff7")
        cmd.extract("Intron_hBa_6ff7", "chain Z and 6ff7")
        cmd.set_name("6ff7", "unknown_other_hBa_6ff7")
        cmd.group("hBa6ff7", "*_hBa_6ff7")
        cmd.do("order *, yes")
    if '5yzg' in name.lower():
        cmd.extract("CWC15_hC_5yzg", "chain P and 5yzg")
        cmd.extract("CWC2_hRBM22_hC_5yzg", "chain O and 5yzg")
        cmd.extract("CWC25_hC_5yzg", "chain X and 5yzg")
        cmd.extract("PRP16_hDHX38_hC_5yzg", "chain Z and 5yzg")
        cmd.extract("U2_hC_5yzg", "chain H and 5yzg")
        cmd.extract("U5_hC_5yzg", "chain B and 5yzg")
        cmd.extract("U6_hC_5yzg", "chain F and 5yzg")
        cmd.extract("Intron_hC_5yzg", "chain G and 5yzg")
        cmd.set_name("5yzg", "unknown_other_hC_5yzg")
        cmd.group("hC5yzg", "*_hC_5yzg")
        cmd.do("order *, yes")
    if '5xjc' in name.lower():
        cmd.extract("CWC15_hX_5xjc", "chain P and 5xjc")
        cmd.extract("CWC25_hX_5xjc", "chain X and 5xjc")
        cmd.extract("U2_hX_5xjc", "chain H and 5xjc")
        cmd.extract("U5_hX_5xjc", "chain B and 5xjc")
        cmd.extract("U6_hX_5xjc", "chain F and 5xjc")
        cmd.extract("Intron_hX_5xjc", "chain G and 5xjc")
        cmd.set_name("5xjc", "unknown_other_hX_5xjc")
        cmd.group("hX5xjc", "*_hX_5xjc")
        cmd.do("order *, yes")


In [113]:
txt = """
try:
    from pymol import cmd
except ImportError:
    print("PyMOL Python lib is missing")
    # sys.exit(0)

def spl_color():
 for name in cmd.get_names("all"):
  # cmd.do('color grey50') # off gray
  if name in """ + strs + """: # this should be auto
    print(" \ Extracting mode for %s" % name)
"""

for struc_col, pdb_id, complex_code in strucs:
    txt += "    if '" + pdb_id + "' in name.lower():\n"
    for index, i in df.iterrows():
        ref = str(i['chain']).strip()
        struc = str(i[struc_col]).strip()
        # print(ref, struc)
        # todo fix  cmd.extract("CDC40 (PRP17, SLU4, XRS2)", "chain n")
        i['protein'] = i['protein'].split()[0]
        if struc != 'nan' and struc != '-': # if ref != 'nan' # i dont' use reference chain any more
            if ',' in struc:
                subchains = struc.split(',') # a,P,h
                for index , sc in enumerate(subchains):    
                    sc = sc.strip()
                    # cmd.extract(name, selection, source_state, target_state)
                    txt += '        cmd.do("color ' + i['color'] + \
                    ', chain ' + sc + ' and ' + pdb_id + '")\n'
                # one mode to see these two proteins
                #print('        cmd.extract("' + i['protein'] + '", "chain ' + '+'.join(subchains) + '")') 
            else:
                #if ref != struc:
                txt += '        #' + i['protein'] +'\n'
                txt += '        cmd.do("color ' + i['color'] + ', chain ' + struc + ' and ' + \
                pdb_id + '")' + '\n'
                # color <object>
                txt += '        cmd.do("color ' + i['color'] + ', ' + i['protein'] + \
                complex_code + '")\n'
print(txt) 

with open('/home/magnus/work-src/rna-tools/rna_tools/tools/PyMOL4RNA/code_for_color_spl.py', 'w') as f:
    f.write(txt)


try:
    from pymol import cmd
except ImportError:
    print("PyMOL Python lib is missing")
    # sys.exit(0)

def spl_color():
 for name in cmd.get_names("all"):
  # cmd.do('color grey50') # off gray
  if name in ['6ff7', '5yzg', '5xjc']: # this should be auto
    print(" \ Extracting mode for %s" % name)
    if '6ff7' in name.lower():
        #CWC15
        cmd.do("color orange, chain R and 6ff7")
        cmd.do("color orange, CWC15_hBa_6ff7")
        #CWC2_hRBM22
        cmd.do("color ruby, chain P and 6ff7")
        cmd.do("color ruby, CWC2_hRBM22_hBa_6ff7")
        #U2
        cmd.do("color forest, chain 2 and 6ff7")
        cmd.do("color forest, U2_hBa_6ff7")
        #U5
        cmd.do("color density, chain 5 and 6ff7")
        cmd.do("color density, U5_hBa_6ff7")
        #U6
        cmd.do("color firebrick, chain 6 and 6ff7")
        cmd.do("color firebrick, U6_hBa_6ff7")
        #Intron
        cmd.do("color black, chain Z and 6ff7")
        cmd.do("color black, Intron_hBa_6ff7")
    if '5yzg' in name.lower():
        #CWC15
        cmd.do("color orange, chain P and 5yzg")
        cmd.do("color orange, CWC15_hC_5yzg")
        #CWC2_hRBM22
        cmd.do("color ruby, chain O and 5yzg")
        cmd.do("color ruby, CWC2_hRBM22_hC_5yzg")
        #CWC25
        cmd.do("color deepteal, chain X and 5yzg")
        cmd.do("color deepteal, CWC25_hC_5yzg")
        #PRP16_hDHX38
        cmd.do("color smudge, chain Z and 5yzg")
        cmd.do("color smudge, PRP16_hDHX38_hC_5yzg")
        #U2
        cmd.do("color forest, chain H and 5yzg")
        cmd.do("color forest, U2_hC_5yzg")
        #U5
        cmd.do("color density, chain B and 5yzg")
        cmd.do("color density, U5_hC_5yzg")
        #U6
        cmd.do("color firebrick, chain F and 5yzg")
        cmd.do("color firebrick, U6_hC_5yzg")
        #Intron
        cmd.do("color black, chain G and 5yzg")
        cmd.do("color black, Intron_hC_5yzg")
    if '5xjc' in name.lower():
        #CWC15
        cmd.do("color orange, chain P and 5xjc")
        cmd.do("color orange, CWC15_hX_5xjc")
        #CWC25
        cmd.do("color deepteal, chain X and 5xjc")
        cmd.do("color deepteal, CWC25_hX_5xjc")
        #U2
        cmd.do("color forest, chain H and 5xjc")
        cmd.do("color forest, U2_hX_5xjc")
        #U5
        cmd.do("color density, chain B and 5xjc")
        cmd.do("color density, U5_hX_5xjc")
        #U6
        cmd.do("color firebrick, chain F and 5xjc")
        cmd.do("color firebrick, U6_hX_5xjc")
        #Intron
        cmd.do("color black, chain G and 5xjc")
        cmd.do("color black, Intron_hX_5xjc")


In [114]:
#strucs = [['S3_C_5lj3', '5lj3']] # for debugging
txt = """
try:
    from pymol import cmd
except ImportError:
    print("PyMOL Python lib is missing")
    # sys.exit(0)

def spl_color():
  #cmd.do('color grey50') # PRP8
"""

for struc_col, pdb_id, complex_code in strucs:
    txt += '  if True: # fake if, just a quick hack\n'
    for index, i in df.iterrows():
        ref = str(i['chain']).strip()
        struc = str(i[struc_col]).strip()
        # print(ref, struc)
        # todo fix  cmd.extract("CDC40 (PRP17, SLU4, XRS2)", "chain n")
        i['protein'] = i['protein'].split()[0]
        if struc != 'nan' and struc != '-': # if ref != 'nan' # i dont' use reference chain any more
            if ',' in struc:
                subchains = struc.split(',') # a,P,h
                for index , sc in enumerate(subchains):    
                    sc = sc.strip()
                    # cmd.extract(name, selection, source_state, target_state)
                    txt += '        cmd.do("color ' + i['color'] + ', ' + i['protein'] + '_' + str(index + 1) + \
                    complex_code + '")\n'
                # one mode to see these two proteins
                #print('        cmd.extract("' + i['protein'] + '", "chain ' + '+'.join(subchains) + '")') 
            else:
                #if ref != struc:
                txt += '        cmd.do("color ' + i['color'] + ', ' + i['protein'] + complex_code + '")\n'
print(txt) 
with open('/home/magnus/work-src/rna-tools/rna_tools/tools/PyMOL4RNA/code_for_color_spl_objects.py', 'w') as f:
    f.write(txt)


try:
    from pymol import cmd
except ImportError:
    print("PyMOL Python lib is missing")
    # sys.exit(0)

def spl_color():
  #cmd.do('color grey50') # PRP8
  if True: # fake if, just a quick hack
        cmd.do("color orange, CWC15_hBa_6ff7")
        cmd.do("color ruby, CWC2_hRBM22_hBa_6ff7")
        cmd.do("color forest, U2_hBa_6ff7")
        cmd.do("color density, U5_hBa_6ff7")
        cmd.do("color firebrick, U6_hBa_6ff7")
        cmd.do("color black, Intron_hBa_6ff7")
  if True: # fake if, just a quick hack
        cmd.do("color orange, CWC15_hC_5yzg")
        cmd.do("color ruby, CWC2_hRBM22_hC_5yzg")
        cmd.do("color deepteal, CWC25_hC_5yzg")
        cmd.do("color smudge, PRP16_hDHX38_hC_5yzg")
        cmd.do("color forest, U2_hC_5yzg")
        cmd.do("color density, U5_hC_5yzg")
        cmd.do("color firebrick, U6_hC_5yzg")
        cmd.do("color black, Intron_hC_5yzg")
  if True: # fake if, just a quick hack
        cmd.do("color orange, CWC15_hX_5xjc")
        cmd.do("color deepteal, CWC25_hX_5xjc")
        cmd.do("color forest, U2_hX_5xjc")
        cmd.do("color density, U5_hX_5xjc")
        cmd.do("color firebrick, U6_hX_5xjc")
        cmd.do("color black, Intron_hX_5xjc")


In [ ]:


In [ ]:


In [ ]:


In [ ]: