In [1]:
import collections
import sqlite3
import os
import pickle
from tqdm import tqdm_notebook
In [2]:
files_q = []
for root, dirs, files in os.walk("results"):
if files:
files_q += files
print(files_q)
In [3]:
train_list = ['100000_dataset.pkl', '10000_dataset.pkl', '105000_dataset.pkl', '110000_dataset.pkl', '115000_dataset.pkl', '120000_dataset.pkl', '125000_dataset.pkl', '130000_dataset.pkl', '135000_dataset.pkl', '140000_dataset.pkl', '15000_dataset.pkl', '20000_dataset.pkl', '25000_dataset.pkl', '30000_dataset.pkl', '35000_dataset.pkl', '40000_dataset.pkl', '45000_dataset.pkl', '50000_dataset.pkl', '5000_dataset.pkl', '55000_dataset.pkl']
test_list = ['60000_dataset.pkl', '65000_dataset.pkl', '70000_dataset.pkl', '75000_dataset.pkl', '80000_dataset.pkl', '85000_dataset.pkl', '90000_dataset.pkl', '95000_dataset.pkl', 'dataset.pkl']
In [ ]:
dataset_train = collections.Counter()
for filename in tqdm_notebook(train_list, desc="Files"):
with open("results/{}".format(filename), "rb") as f:
data = pickle.loads(f.read())
dataset_train.update(data)
In [ ]:
inputs_train = []
for (k, v), c in tqdm_notebook(dataset_train.items(), desc="Data"):
inputs_train.append((str(k), str(v), c))
In [ ]:
conn = sqlite3.connect("dataset.sqlite")
cursor = conn.cursor()
cursor.executemany("INSERT INTO train_data VALUES(?, ?, ?)", inputs_train)
conn.commit()
conn.close()
In [4]:
dataset_test = collections.Counter()
for filename in tqdm_notebook(test_list, desc="Files"):
with open("results/{}".format(filename), "rb") as f:
data = pickle.loads(f.read())
dataset_test.update(data)
In [5]:
inputs_test = []
for (k, v), c in tqdm_notebook(dataset_test.items(), desc="Data"):
inputs_test.append((str(k), str(v), c))
In [6]:
conn = sqlite3.connect("dataset.sqlite")
cursor = conn.cursor()
cursor.executemany("INSERT INTO test_data VALUES(?, ?, ?)", inputs_test)
conn.commit()
conn.close()
In [ ]: