In [1]:
import collections
import sqlite3
import os
import pickle
from tqdm import tqdm_notebook

In [2]:
files_q = []
for root, dirs, files in os.walk("results"):
    if files:
        files_q += files
print(files_q)


['100000_dataset.pkl', '10000_dataset.pkl', '105000_dataset.pkl', '110000_dataset.pkl', '115000_dataset.pkl', '120000_dataset.pkl', '125000_dataset.pkl', '130000_dataset.pkl', '135000_dataset.pkl', '140000_dataset.pkl', '15000_dataset.pkl', '20000_dataset.pkl', '25000_dataset.pkl', '30000_dataset.pkl', '35000_dataset.pkl', '40000_dataset.pkl', '45000_dataset.pkl', '50000_dataset.pkl', '5000_dataset.pkl', '55000_dataset.pkl', '60000_dataset.pkl', '65000_dataset.pkl', '70000_dataset.pkl', '75000_dataset.pkl', '80000_dataset.pkl', '85000_dataset.pkl', '90000_dataset.pkl', '95000_dataset.pkl', 'dataset.pkl']

In [3]:
train_list = ['100000_dataset.pkl', '10000_dataset.pkl', '105000_dataset.pkl', '110000_dataset.pkl', '115000_dataset.pkl', '120000_dataset.pkl', '125000_dataset.pkl', '130000_dataset.pkl', '135000_dataset.pkl', '140000_dataset.pkl', '15000_dataset.pkl', '20000_dataset.pkl', '25000_dataset.pkl', '30000_dataset.pkl', '35000_dataset.pkl', '40000_dataset.pkl', '45000_dataset.pkl', '50000_dataset.pkl', '5000_dataset.pkl', '55000_dataset.pkl']
test_list = ['60000_dataset.pkl', '65000_dataset.pkl', '70000_dataset.pkl', '75000_dataset.pkl', '80000_dataset.pkl', '85000_dataset.pkl', '90000_dataset.pkl', '95000_dataset.pkl', 'dataset.pkl']

Train Data


In [ ]:
dataset_train = collections.Counter()
for filename in tqdm_notebook(train_list, desc="Files"):
    with open("results/{}".format(filename), "rb") as f:
        data = pickle.loads(f.read())
        dataset_train.update(data)

In [ ]:
inputs_train = []
for (k, v), c in tqdm_notebook(dataset_train.items(), desc="Data"):
    inputs_train.append((str(k), str(v), c))

In [ ]:
conn = sqlite3.connect("dataset.sqlite")
cursor = conn.cursor()
cursor.executemany("INSERT INTO train_data VALUES(?, ?, ?)", inputs_train)
conn.commit()
conn.close()

Test Data


In [4]:
dataset_test = collections.Counter()
for filename in tqdm_notebook(test_list, desc="Files"):
    with open("results/{}".format(filename), "rb") as f:
        data = pickle.loads(f.read())
        dataset_test.update(data)




In [5]:
inputs_test = []
for (k, v), c in tqdm_notebook(dataset_test.items(), desc="Data"):
    inputs_test.append((str(k), str(v), c))




In [6]:
conn = sqlite3.connect("dataset.sqlite")
cursor = conn.cursor()
cursor.executemany("INSERT INTO test_data VALUES(?, ?, ?)", inputs_test)
conn.commit()
conn.close()

In [ ]: