In [56]:
import csv, os, shutil, collections, copy

CSV export file manipulations


In [57]:
#INPUT
#folder = 'sample' # create this folder and place the exported file
folder = 'all'
#export_filename = 'English_Vocabulary_Sample.csv'
export_filename = 'English_Vocabulary3.csv'

In [58]:
"""
Generate a dictionary from Anki export file.
Note:
Only GRE words have proper Serial No. Rest words have 99999 as Serial No.
"""

word_db = []

with open(folder + os.sep + export_filename, mode='r') as infile:
    reader = csv.reader(infile)
    Serial_No = 1
    for line in reader:
        row = {} 
        row['Word'] = line[0]
        row['Word audio'] = line[1]
        
        aud_files = []
        for i in range(len(row['Word audio'])):
            if (row['Word audio'][i]==':'):
                aud_file = ""
                for j in range(i+1, len(row['Word audio'])):
                    if (row['Word audio'][j]==']'):
                        break
                    aud_file += row['Word audio'][j]
                aud_files.append(aud_file.strip(' '))
        row['Word audio2'] = aud_files # list of Word audio files
        
        row['POS'] = line[2]
        row['Other form'] = line[3]
        row['Meaning'] = line[4]
        row['Example'] = line[5]
        row['Picture'] = line[6]
        
        row['Picture'] = row['Picture']
        img_files = []
        for i in range(len(row['Picture'])):
            if (row['Picture'][i]=='='):
                img_file = ""
                for j in range(i+1, len(row['Picture'])):
                    if (row['Picture'][j]=='/'):
                        break
                    img_file += row['Picture'][j]
                img_files.append(img_file.strip(' '))
        row['Picture2'] = img_files # list of images files
        
        row['Mnemonic'] = line[7]
        row['Synonym'] = line[8]
        row['Antonym'] = line[9]
        row['Note'] = line[10]
        
        #row['Note'] = row['Note'].replace('"', '')
        img_files = []
        for i in range(len(row['Note'])):
            if (row['Note'][i]=='='):
                img_file = ""
                for j in range(i+1, len(row['Note'])):
                    if (row['Note'][j]=='/'):
                        break
                    img_file += row['Note'][j]
                img_files.append(img_file.strip(' '))
        row['Note Image'] = img_files # list of images files in Note
        
        row['Multimedia Example'] = line[11]
        
        mul_files = []
        for i in range(len(row['Multimedia Example'])):
            if (row['Multimedia Example'][i]==':'):
                mul_file = ""
                for j in range(i+1, len(row['Multimedia Example'])):
                    if (row['Multimedia Example'][j]==']'):
                        break
                    mul_file += row['Multimedia Example'][j]
                mul_files.append(mul_file.strip(' '))
        row['Multimedia Example2'] = mul_files # list of Multimedia Example files
        
        row['Tags'] = line[12]
        
        row['Tags2'] = row['Tags'].split(' ') # list of tags
        
        if 'EV_GRE' in row['Tags2']:
            row['Word No'] = Serial_No
            Serial_No += 1
        else:
            row['Word No'] = 99999 # Serial No for non GRE words
        
        word_db.append(row)
for i in word_db: print(i)

In [59]:
"""
Writes out the generated word_db
"""

with open(folder + os.sep + 'word_db.csv','wb') as f:
    w = csv.writer(f)
    w.writerow(word_db[0].keys()) # keys in top row
    for i in word_db:
        w.writerow(i.values())

Copying media files to folders

Google drive creates problem when generated shared links if there are more than 3500 files in a folder.
So, I am copying media files to folders.

path = r'C:\Users\Gyan Tatiya\AppData\Roaming\Anki2\User 2\collection.media' if not os.path.exists(path + os.sep + 'Word audio2'): os.makedirs(path + os.sep + 'Word audio2') if not os.path.exists(path + os.sep + 'Picture2'): os.makedirs(path + os.sep + 'Picture2') if not os.path.exists(path + os.sep + 'Note Image'): os.makedirs(path + os.sep + 'Note Image') if not os.path.exists(path + os.sep + 'Multimedia Example2'): os.makedirs(path + os.sep + 'Multimedia Example2') for i in word_db: for IMA_file in i['Word audio2']: shutil.copy(path + os.sep + IMA_file, path + os.sep + 'Word audio2') for IMA_file in i['Picture2']: shutil.copy(path + os.sep + IMA_file, path + os.sep + 'Picture2') for IMA_file in i['Note Image']: shutil.copy(path + os.sep + IMA_file, path + os.sep + 'Note Image') for IMA_file in i['Multimedia Example2']: shutil.copy(path + os.sep + IMA_file, path + os.sep + 'Multimedia Example2')

Upload files

Go to google drive and upload the 4 generated folder.

Input files:

Generate csv files that has shared links, and remane: FlashVocab_sheet - Multimedia Example2.csv, FlashVocab_sheet - Note Image.csv, FlashVocab_sheet - Picture2.csv, FlashVocab_sheet - Word audio2.csv

with open(folder + os.sep + 'FlashVocab_sheet - Multimedia Example2.csv', mode='r') as infile: reader = csv.reader(infile) mul_dict = {} for i in reader: mul_dict[i[1]] = i[3] #print(mul_dict)
with open(folder + os.sep + 'FlashVocab_sheet - Note Image.csv', mode='r') as infile: reader = csv.reader(infile) not_dict = {} for i in reader: not_dict[i[1]] = i[3] #print(not_dict)
with open(folder + os.sep + 'FlashVocab_sheet - Picture2.csv', mode='r') as infile: reader = csv.reader(infile) pic_dict = {} for i in reader: pic_dict[i[1]] = i[3] #print(pic_dict)
with open(folder + os.sep + 'FlashVocab_sheet - Word audio2.csv', mode='r') as infile: reader = csv.reader(infile) aud_dict = {} for i in reader: aud_dict[i[1]] = i[3] #print(aud_dict)
""" Updating word_db by inserting shared links """ for i in range(len(word_db)): links = [] for IMA_file in word_db[i]['Word audio2']: link = aud_dict[IMA_file] links.append(link) word_db[i]['Word audio2 links'] = links links = [] for IMA_file in word_db[i]['Picture2']: link = pic_dict[IMA_file] links.append(link) word_db[i]['Picture2 links'] = links links = [] for IMA_file in word_db[i]['Note Image']: link = not_dict[IMA_file] links.append(link) word_db[i]['Note Image links'] = links links = [] for IMA_file in word_db[i]['Multimedia Example2']: link = mul_dict[IMA_file] links.append(link) word_db[i]['Multimedia Example2 links'] = links
""" Writes out the generated word_db """ with open(folder + os.sep + 'word_db_links.csv','wb') as f: w = csv.writer(f) w.writerow(word_db[0].keys()) # keys in top row for i in word_db: w.writerow(i.values())
""" Updating word_db by inserting export shared links https://drive.google.com/uc?export=view&id={fileId} """ for i in range(len(word_db)): links = [] for IMA_file in word_db[i]['Word audio2 links']: file_id = IMA_file.split('/')[5] export_link = 'https://drive.google.com/uc?export=view&id=' + file_id links.append(export_link) word_db[i]['Word audio2 export links'] = links links = [] for IMA_file in word_db[i]['Picture2 links']: file_id = IMA_file.split('/')[5] export_link = 'https://drive.google.com/uc?export=view&id=' + file_id links.append(export_link) word_db[i]['Picture2 export links'] = links links = [] for IMA_file in word_db[i]['Note Image links']: file_id = IMA_file.split('/')[5] export_link = 'https://drive.google.com/uc?export=view&id=' + file_id links.append(export_link) word_db[i]['Note Image export links'] = links links = [] for IMA_file in word_db[i]['Multimedia Example2 links']: file_id = IMA_file.split('/')[5] export_link = 'https://drive.google.com/uc?export=view&id=' + file_id links.append(export_link) word_db[i]['Multimedia Example2 export links'] = links
""" Writes out the generated word_db """ with open(folder + os.sep + 'word_db_links_export.csv','wb') as f: w = csv.writer(f) w.writerow(word_db[0].keys()) # keys in top row for i in word_db: w.writerow(i.values())

In [60]:
"""
Updating word_db by Google Cloud Storage links
https://storage.googleapis.com/staging.my-first-cloud-app-gtatiya.appspot.com/FlashVocab/{filename}
"""

for i in range(len(word_db)):
    links = []
    for IMA_file in word_db[i]['Picture2']:
        link = "https://storage.googleapis.com/staging.my-first-cloud-app-gtatiya.appspot.com/FlashVocab/"+IMA_file
        links.append(link)
    word_db[i]['Picture2 GCS links'] = links

In [61]:
"""
Writes out the generated word_db
"""

with open(folder + os.sep + 'word_db_links_GCS.csv','wb') as f:
    w = csv.writer(f)
    w.writerow(word_db[0].keys()) # keys in top row
    for i in word_db:
        w.writerow(i.values())

Sort the columns


In [62]:
"""
This function sorts the dictionary based on keys and writes a CSV file
"""

def sort_dict_column(filename, dict_db):
    with open(filename,'wb') as f:
        w = csv.writer(f)
        temp = sorted(dict_db[0].keys())
        w.writerow(temp)
        for i in dict_db:
            i = collections.OrderedDict(sorted(i.items()))
            w.writerow((i.values()))

#sort_dict_column(folder + os.sep + 'word_db_links_export_sorted.csv', word_db)

GRE words only


In [63]:
"""
Generate a dictionary from word_db for GRE words based on tags
"""

word_gre_db = []

for i in word_db:
    if 'EV_GRE' in i['Tags2']:
        word_gre_db.append(i)

In [64]:
"""
Writes out the generated word_gre_db
word_db_links_export_gre.csv
word_db_links_GCP_gre.csv
"""

with open(folder + os.sep + 'word_db_links_GCP_gre.csv','wb') as f:
    w = csv.writer(f)
    w.writerow(word_gre_db[0].keys()) # keys in top row
    for i in word_gre_db:
        w.writerow(i.values())
sort_dict_column(folder + os.sep + 'word_db_links_export_gre_sorted.csv', word_gre_db)
""" This it to be used in Words.java in MyWordDataDisplay project """ words_tem = word_gre_db #words_tem = word_db java_commands = open('java_commands.txt', 'w') java_commands.close() n = 0 for i in words_tem: del i['Mnemonic'] # delete Mnemonic because it has '"' character del i['Note'] # I am not going to use Note del i['Note Image export links'] # I am not going to use Note del i['Other form'] # I am not going to use Other form del i['Picture'] # I am not going to use Picture del i['Word audio2'] # I am not going to use del i['Tags'] # I am not going to use del i['Tags2'] # I am not going to use del i['Picture2'] # I am not going to use del i['Multimedia Example'] # I am not going to use del i['Multimedia Example2 links'] # I am not going to use del i['Note Image links'] # I am not going to use del i['Word audio2 export links'] # I am not going to use del i['Picture2 links'] # I am not going to use del i['Multimedia Example2 export links'] # I am not going to use del i['Note Image'] # I am not going to use del i['Word audio'] # I am not going to use del i['Word audio2 links'] # I am not going to use del i['Multimedia Example2'] # I am not going to use java_commands = open('java_commands.txt', 'a') # append to the file created java_commands.write("Map map"+str(n)+ " = new HashMap();" + '\n') for k, v in i.items(): java_commands.write("map"+str(n)+ ".put(\""+str(k)+"\""+", \""+str(v)+"\""+");" + '\n') java_commands.write("maps.add(map"+str(n)+ ");" + '\n') n += 1 java_commands.close()
""" This it to be used in word_db.txt in MyWordDataDisplay project """ words_tem = word_gre_db #words_tem = word_db java_commands = open('java_commands.txt', 'w') java_commands.close() #n = 0 for i in words_tem: del i['Mnemonic'] # delete Mnemonic because it has '"' character del i['Note'] # I am not going to use Note del i['Note Image export links'] # I am not going to use Note del i['Other form'] # I am not going to use Other form del i['Picture'] # I am not going to use Picture del i['Word audio2'] # I am not going to use del i['Tags'] # I am not going to use del i['Tags2'] # I am not going to use del i['Picture2'] # I am not going to use del i['Multimedia Example'] # I am not going to use del i['Multimedia Example2 links'] # I am not going to use del i['Note Image links'] # I am not going to use del i['Word audio2 export links'] # I am not going to use del i['Picture2 links'] # I am not going to use del i['Multimedia Example2 export links'] # I am not going to use del i['Note Image'] # I am not going to use del i['Word audio'] # I am not going to use del i['Word audio2 links'] # I am not going to use del i['Multimedia Example2'] # I am not going to use java_commands = open('java_commands.txt', 'a') # append to the file created #java_commands.write("Map map"+str(n)+ " = new HashMap();" + '\n') for k, v in i.items(): java_commands.write(str(v)+'\n') #java_commands.write("maps.add(map"+str(n)+ ");" + '\n') #n += 1 java_commands.close()

In [65]:
"""
Getting word_db ready for FlashVocab SQLlite export

https://stackoverflow.com/questions/46835197/python-list-of-dictionary-stores-only-last-appended-value-in-every-iteration
"""

words_tem = copy.deepcopy(word_gre_db)
#words_tem = copy.deepcopy(word_db)

word_db2 = []

card_key = 1
for i in words_tem:
    del i['Mnemonic'] # delete Mnemonic because it has '"' character
    del i['Note'] # I am not going to use Note
    #del i['Note Image export links'] # I am not going to use Note
    del i['Other form'] # I am not going to use Other form
    del i['Picture'] # I am not going to use Picture
    del i['Word audio2'] # I am not going to use 
    del i['Tags'] # I am not going to use
    del i['Tags2'] # I am not going to use 
    del i['Picture2'] # I am not going to use 
    del i['Multimedia Example'] # I am not going to use
    #del i['Multimedia Example2 links'] # I am not going to use
    #del i['Note Image links'] # I am not going to use
    #del i['Word audio2 export links'] # I am not going to use
    #del i['Picture2 links'] # I am not going to use
    #del i['Multimedia Example2 export links'] # I am not going to use
    del i['Note Image'] # I am not going to use
    del i['Word audio'] # I am not going to use
    #del i['Word audio2 links'] # I am not going to use
    del i['Multimedia Example2'] # I am not going to use
    #del i['Picture2 export links']
    for j in range(1, 4):
        #i = dict(i) # shallow copy
        i = copy.deepcopy(i)
        i['Card Type'] = 'Type '+str(j)
        i['Card Key'] = card_key
        i['Schedule Score'] = 0
        word_db2.append(i)
        card_key += 1

In [66]:
"""
Writes out the generated word_db2

word_db2_links_GCS_sorted.csv
word_db2_links_GCS_gre_sorted.csv
"""

sort_dict_column(folder + os.sep + 'word_db2_links_GCS_gre_sorted.csv', word_db2)
#sort_dict_column(folder + os.sep + 'word_db2_links_GCS_gre_sorted.csv', word_db2)

In [67]:
"""
This it to be used in word_db.txt in MyWordDataDisplay project
"""

words_tem = copy.deepcopy(word_db2)

java_commands = open('java_commands.txt', 'w')
java_commands.close()

for i in words_tem:
    java_commands = open('java_commands.txt', 'a') # append to the file created
    i = collections.OrderedDict(sorted(i.items()))
    for k, v in i.items():
        java_commands.write(str(v)+'\n')
    java_commands.close()

In [ ]:


In [ ]: