In [1]:
import numpy  as np
import pandas as pd

In [2]:
html_list = np.loadtxt('../../../../Downloads/listahtml.txt', delimiter='\n', dtype=str)

In [3]:
# html_list[0].split('=')[-1].split('>')[-3]

In [4]:
teste = html_list[0].split('=')[-1].split('>')[-3]

In [5]:
teste[1:-1]


Out[5]:
'G02_Y3_001_001.fit'

In [6]:
files = []
for i in range(html_list.size):
    files_i = html_list[i].split('=')[-1].split('>')[-3]
    files.append(files_i[1:-1])                          # removing " "
files = np.array(files)
print files


['G02_Y3_001_001.fit' 'G02_Y3_001_002.fit' 'G02_Y3_001_003.fit' ...
 'G15_Y6_108_397.fit' 'G15_Y6_108_398.fit' 'G15_Y6_108_399.fit']

In [7]:
files_df = pd.DataFrame(files)

In [10]:
files_df.to_csv('./../../../GAMADR3_SPECTRA/GAMADR3_ALL.txt', index=False, header=False)

In [ ]: