In [ ]:
hightemp.txtは,日本の最高気温の記録を「都道府県」「地点」「℃」「日」のタブ区切り形式で格納したファイルである.以下の処理を行うプログラムを作成し,hightemp.txtを入力ファイルとして実行せよ.さらに,同様の処理をUNIXコマンドでも実行し,プログラムの実行結果を確認せよ.
In [4]:
with open("hightemp.txt") as f:
count = len(f.readlines())
print(count)
In [5]:
%%bash
wc -l hightemp.txt
In [8]:
def replace_tab2space(file):
with open(file) as f:
for i in f.readlines():
print(i.strip('\n').replace('\t', ' '))
replace_tab2space('hightemp.txt')
In [7]:
%%bash
expand -t 1 hightemp.txt
In [17]:
def write_col(col):
with open("hightemp.txt", 'r') as f:
writing = [i.split('\t')[col-1]+"\n" for i in f.readlines()]
with open('col{}.txt'.format(col), 'w') as f:
f.write("".join(writing))
write_col(1)
write_col(2)
テキスト・ファイルの各行から一部分を取り出す
In [20]:
%%bash
cut -f 1 hightemp.txt > cut_col1.txt
cut -f 2 hightemp.txt > cut_col2.txt
In [23]:
with open('col1.txt', 'r') as f1:
col1 = [i.strip('\n') for i in f1.readlines()]
with open('col2.txt', 'r') as f2:
col2 = [i.strip('\n') for i in f2.readlines()]
writing = ""
for i in range(len(col1)):
writing += col1[i] + '\t' + col2[i] + '\n'
with open('marge.txt', 'w') as f:
f.write(writing)
In [25]:
%%bash
paste col1.txt col2.txt > paste_marge.txt
In [30]:
def head(N):
with open('marge.txt') as f:
return "".join(f.readlines()[:N+1])
print(head(3))
ファイルの先頭部分を表示する
In [39]:
%%bash
head -n 3 marge.txt
In [47]:
def tail(N):
with open('marge.txt') as f:
tail = "".join(f.readlines()[-1:-N:-1])
return tail
print(tail(3))
In [55]:
def split_flie(name, N):
with open(name, 'r') as f:
split = "".join(f.readlines()[:N])
return split
print(split_flie("marge.txt", 3))
ファイルを分割する
In [61]:
%%bash
split -l 3 marge.txt split_marge.txt
In [5]:
def kinds_col(file_name, N=0):
with open(file_name, 'r') as f:
tmp = f.readlines()
return set([i.strip('\n') for i in tmp])
print(kinds_col('col1.txt'))
In [15]:
def sorted_list(filename, col):
with open(filename, 'r') as f:
return_list = [i.strip("\n").split('\t') for i in f.readlines()]
return sorted(return_list, key=lambda x: x[col], reverse=True)
print(sorted_list("hightemp.txt", 2))
In [19]:
def frequency_sort(filename, col):
from collections import Counter
with open(filename, 'r') as f:
return_list = [i.strip("\n").split('\t')[col-1] for i in f.readlines()]
return [i[0] for i in Counter(return_list).most_common()]
print(frequency_sort("hightemp.txt", 1))
In [ ]: