In [22]:
import os, sys, string
In [14]:
data_dir = "../test_data"
data_files = os.listdir(data_dir)
data_files
Out[14]:
In [18]:
all_line_numbers = []
for file_name in data_files:
print("file:", file_name)
file_path = os.path.join(data_dir, file_name)
n_lines = 0
with open(file_path) as file:
for line in file:
n_lines += 1
print("lines:", n_lines)
all_line_numbers.append(n_lines)
print("AVERAGE LINE NUMBER:", sum(all_line_numbers)/len(all_line_numbers))
In [19]:
found_symbols = set()
all_symbols = set(string.punctuation)
all_line_numbers = []
for file_name in data_files:
file_path = os.path.join(data_dir, file_name)
with open(file_path) as file:
for line in file:
for char in line:
if char in all_symbols:
if char not in found_symbols:
found_symbols.add(char)
print("ALL SYMBOLS FOUND:", found_symbols)
In [23]:
found_symbols = set()
all_symbols = set(string.punctuation)
total_sentence_length = 0
total_sentences = 0
max_sentence_length = 0
min_sentence_length = sys.maxsize
for file_name in data_files:
file_path = os.path.join(data_dir, file_name)
with open(file_path) as file:
for line in file:
sentence_length = len(line)
total_sentences += 1
total_sentence_length += sentence_length
if sentence_length > max_sentence_length:
max_sentence_length = sentence_length
if sentence_length < min_sentence_length:
min_sentence_length = sentence_length
print("Average Sentence Length:", total_sentence_length / total_sentences)
print("Max sentence length:", max_sentence_length)
print("Min sentence length:", min_sentence_length)
In [ ]: