In [2]:
from glob import glob
import os
import sh

In [3]:
def get_target():
    zips_1 = glob('RawDictionary/StarDict/*/*.zip')
    zips_2 = glob('RawDictionary/StarDict/*/*.ZIP')
    zips = zips_2+zips_1
    return zips

def cmd_ops(zip_stardict):
    sh.unzip(zip_stardict)

In [4]:
zips = get_target()

In [5]:
zips


Out[5]:
['RawDictionary/StarDict/en-th/VolubilisEnTh-2.4.2.zip.ZIP',
 'RawDictionary/StarDict/en-th/VolubilisThEn-2.4.2.zip.ZIP',
 'RawDictionary/StarDict/fr-th/VolubilisThFr-2.4.2.zip.ZIP',
 'RawDictionary/StarDict/en-th/LexitronEnTh-2.4.2.zip',
 'RawDictionary/StarDict/en-th/LexitronThEn-2.4.2.zip',
 'RawDictionary/StarDict/en-th/NontriEnTh-2.4.2.zip',
 'RawDictionary/StarDict/en-th/PlantsEnTh-2.4.2.zip',
 'RawDictionary/StarDict/en-th/PlantsThEn-2.4.2.zip',
 'RawDictionary/StarDict/en-th/SEASiteThEn-2.4.2.zip',
 'RawDictionary/StarDict/en-th/TumCivilEnTh-2.4.2.zip',
 'RawDictionary/StarDict/fr-th/DicoThaiFrTh-2.4.2.zip',
 'RawDictionary/StarDict/fr-th/DicoThaiThFr-2.4.2.zip',
 'RawDictionary/StarDict/other/BuddhistTermsPiTh-2.4.2.zip',
 'RawDictionary/StarDict/other/DeutshThai-2.4.2.zip',
 'RawDictionary/StarDict/other/Dict2UThZh-2.4.2.zip',
 'RawDictionary/StarDict/other/Dict2UZhTh-2.4.2.zip',
 'RawDictionary/StarDict/other/English2Lao-2.4.2.zip',
 'RawDictionary/StarDict/other/JTDicJaTh-2.4.2.zip',
 'RawDictionary/StarDict/other/JTDicThJa-2.4.2.zip',
 'RawDictionary/StarDict/other/KDTKoTh-2.4.2.zip',
 'RawDictionary/StarDict/other/KDTThKo-2.4.2.zip',
 'RawDictionary/StarDict/other/Lao2English-2.4.2.zip',
 'RawDictionary/StarDict/other/MahaSilaLoLo-2.4.2.zip',
 'RawDictionary/StarDict/ru-th/PhoneticThai-2.4.2.zip',
 'RawDictionary/StarDict/ru-th/RussianThai-2.4.2.zip',
 'RawDictionary/StarDict/ru-th/ThaiRussian-2.4.2.zip',
 'RawDictionary/StarDict/th-th/AbbrevThTh-2.4.2.zip',
 'RawDictionary/StarDict/th-th/NickNamesThTh-2.4.2.zip',
 'RawDictionary/StarDict/th-th/RoyalInstituteThTh-2.4.2.zip']

In [7]:
log = sh.unzip(zips[0])

In [8]:
log


Out[8]:
Archive:  RawDictionary/StarDict/en-th/VolubilisEnTh-2.4.2.zip.ZIP
   creating: VolubilisEnTh-2.4.2/
  inflating: VolubilisEnTh-2.4.2/VolubilisEnTh.bmp  
  inflating: VolubilisEnTh-2.4.2/VolubilisEnTh.dict.dz  
  inflating: VolubilisEnTh-2.4.2/VolubilisEnTh.idx.gz  
  inflating: VolubilisEnTh-2.4.2/VolubilisEnTh.ifo  

In [9]:
from zipfile import ZipFile

folder, zipfile = os.path.split(zips[0])

zip_ref = ZipFile(zips[0], 'r')
zip_ref.extractall(folder)
zip_ref.close()

In [10]:
folder,zipfile


Out[10]:
('RawDictionary/StarDict/en-th', 'VolubilisEnTh-2.4.2.zip.ZIP')

In [43]:
import re

In [48]:
re.split(r'.zip',zipfile, maxsplit=10,flags=re.IGNORECASE)


Out[48]:
['VolubilisEnTh-2.4.2', '', '']

In [42]:
c=[x for x in glob(folder+'/*') if os.path.isdir(x) and not 'not use' in x];c


Out[42]:
['RawDictionary/StarDict/en-th/VolubilisEnTh-2.4.2']

In [23]:
sample = glob(_[0]+'/*.ifo') + glob(_[0]+'/*.IFO')

In [24]:
sh.python('pyglossary/pyglossary.pyw',sample[0],'xml')


Out[24]:
no `Reader` class found in Stardict plugin, falling back to indirect mode

Writing to file "/Users/heim/Developer/Potchana/RawDictionary/StarDict/en-th/VolubilisEnTh-2.4.2/VolubilisEnTh.xml"

Running time of convert: 402.1 seconds
Writing file "xml" done.

In [31]:
d=[x for x in glob(c[0]+'/*') if os.path.isdir(x)]
d


Out[31]:
['RawDictionary/StarDict/en-th/VolubilisEnTh-2.4.2/VolubilisEnTh']

In [32]:
sh.make('-C',d[0])


Out[32]:
"""/Developer/Extras/Dictionary Development Kit"/bin"/build_dict.sh"  "VolubilisEnTh" "VolubilisEnTh.xml" "VolubilisEnTh.css" "VolubilisEnTh.plist"
- Building VolubilisEnTh.dictionary.
- Checking source.
- Cleaning objects directory.
- Preparing dictionary template.
- Preprocessing dictionary sources.
- Extracting index data.
- Preparing dictionary bundle.
- Adding body data.
- Preparing index data.
- Building key_text index.
- Building reference index.
- Fixing dictionary property.
- Copying CSS.
- Finished building ./objects/VolubilisEnTh.dictionary.
echo "Done."
Done.

In [40]:
a = glob('**/*.dictionary',recursive=True);a


Out[40]:
['RawDictionary/StarDict/en-th/VolubilisEnTh-2.4.2/VolubilisEnTh/objects/VolubilisEnTh.dictionary']

In [38]:
import shutil

In [41]:
shutil.move(a[0],'CompiledDictionary')


Out[41]:
'CompiledDictionary/VolubilisEnTh.dictionary'

In [51]:
tempfile.gettempdir()


Out[51]:
'/var/folders/89/0z3my29s2j5039v_x3nbl5140000gn/T'

In [50]:
import tempfile

In [54]:
def cmd_ops(zip_stardict):
    folder, zipfile = os.path.split(zip_stardict)

    folder_unzip_to = tempfile.gettempdir()

    zip_ref = ZipFile(zip_stardict, 'r')
    zip_ref.extractall(folder_unzip_to)
    zip_ref.close()

    # VolubilisEnTh-2.4.2.zip.ZIP -> VolubilisEnTh-2.4.2
    folder_unzip = re.split(r'.zip', zipfile, maxsplit=10, flags=re.IGNORECASE)[0]

    folder_in_extracted = os.path.join(folder_unzip_to, folder_unzip)
    if os.path.exists(folder_in_extracted):
        raise Exception('unzip folder not found')

    file_ifo = glob(folder_in_extracted + '/*.ifo') + glob(folder_in_extracted + '/*.IFO')
    if len(file_ifo) > 1:
        raise Exception('found more than 1 ifo')
    elif len(file_ifo) == 0:
        raise Exception('no ifp file found')

    # convert using pyglossary
    sh.python('pyglossary/pyglossary.pyw', file_ifo, 'xml')

    # VolubilisEnTh-2.4.2 -> VolubilisEnTh
    folder_converted = re.split(r'-',folder_unzip,maxsplit=10)[0]
    path_make = os.path.join(folder_in_extracted,folder_converted)

    sh.make('-C', path_make)

    compiledDict = glob(folder_unzip_to + '/**/*.dictionary', recursive=True)
    shutil.move(compiledDict, 'CompiledDictionary')

In [55]:
cmd_ops(zips[6])


---------------------------------------------------------------------------
Exception                                 Traceback (most recent call last)
<ipython-input-55-0f9581f71b4a> in <module>()
----> 1 cmd_ops(zips[6])

<ipython-input-54-d3457be7e25a> in cmd_ops(zip_stardict)
     13     folder_in_extracted = os.path.join(folder_unzip_to, folder_unzip)
     14     if os.path.exists(folder_in_extracted):
---> 15         raise Exception('unzip folder not found')
     16 
     17     file_ifo = glob(folder_in_extracted + '/*.ifo') + glob(folder_in_extracted + '/*.IFO')

Exception: unzip folder not found

In [56]:
folder_in_extracted = os.path.join(folder_unzip_to, folder_unzip)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-56-e3ce7059e23e> in <module>()
----> 1 folder_in_extracted = os.path.join(folder_unzip_to, folder_unzip)

NameError: name 'folder_unzip_to' is not defined

In [58]:
sh.python('pyglossary/pyglossary.pyw', r'/private/var/folders/89/0z3my29s2j5039v_x3nbl5140000gn/T/PlantsThEn-2.4.2/PlantsThEn.ifo', 'xml')


Out[58]:
no `Reader` class found in Stardict plugin, falling back to indirect mode

Writing to file "/private/var/folders/89/0z3my29s2j5039v_x3nbl5140000gn/T/PlantsThEn-2.4.2/PlantsThEn.xml"

Running time of convert: 0.1 seconds
Writing file "xml" done.

In [59]:
_


Out[59]:
no `Reader` class found in Stardict plugin, falling back to indirect mode

Writing to file "/private/var/folders/89/0z3my29s2j5039v_x3nbl5140000gn/T/PlantsThEn-2.4.2/PlantsThEn.xml"

Running time of convert: 0.1 seconds
Writing file "xml" done.

In [61]:
sh.which('python')


Out[61]:
'/Users/heim/anaconda3/envs/35/bin/python'

In [66]:
plist_info = glob('CompiledDictionary/**/Info.plist',recursive=True)

In [65]:
import plistlib as plist

In [73]:
with open(plist_info[0],mode='rb') as file:
    a = plist.load(file)

In [74]:
a


Out[74]:
{'CFBundleDevelopmentRegion': 'English',
 'CFBundleDisplayName': 'Plant Names (En-Th)',
 'CFBundleIdentifier': 'PlantsEnTh',
 'CFBundleInfoDictionaryVersion': '6.0',
 'CFBundleName': 'PlantsEnTh',
 'CFBundleShortVersionString': '1.0',
 'DCSBuildToolVersion': 3,
 'DCSDictionaryCSS': 'DefaultStyle.css',
 'DCSDictionaryCopyright': '.',
 'DCSDictionaryDefaultPrefs': {},
 'DCSDictionaryManufacturerName': '(c) V-I-C, 2010-2011.',
 'DCSDictionaryPrefsHTML': '',
 'DCSDictionaryXSL': '',
 'IDXDictionaryIndexes': [{'IDXIndexAccessMethod': 'com.apple.TrieAccessMethod',
   'IDXIndexBigEndian': False,
   'IDXIndexDataFields': {'IDXExternalDataFields': [{'IDXDataFieldName': 'DCSExternalBodyID',
      'IDXDataSize': 4,
      'IDXIndexName': 'DCSBodyDataIndex'}],
    'IDXFixedDataFields': [{'IDXDataFieldName': 'DCSPrivateFlag',
      'IDXDataSize': 2}],
    'IDXVariableDataFields': [{'IDXDataFieldName': 'DCSKeyword',
      'IDXDataSizeLength': 2},
     {'IDXDataFieldName': 'DCSHeadword', 'IDXDataSizeLength': 2},
     {'IDXDataFieldName': 'DCSEntryTitle', 'IDXDataSizeLength': 2},
     {'IDXDataFieldName': 'DCSAnchor', 'IDXDataSizeLength': 2},
     {'IDXDataFieldName': 'DCSYomiWord', 'IDXDataSizeLength': 2}]},
   'IDXIndexDataSizeLength': 2,
   'IDXIndexKeyMatchingMethods': ['IDXExactMatch',
    'IDXPrefixMatch',
    'IDXCommonPrefixMatch',
    'IDXWildcardMatch',
    'IDXAllMatch'],
   'IDXIndexName': 'DCSKeywordIndex',
   'IDXIndexPath': 'KeyText.index',
   'IDXIndexSupportDataID': False,
   'IDXIndexWritable': False,
   'TrieAuxiliaryDataOptions': {'IDXIndexPath': 'KeyText.data'}},
  {'IDXIndexAccessMethod': 'com.apple.TrieAccessMethod',
   'IDXIndexBigEndian': False,
   'IDXIndexDataFields': {'IDXExternalDataFields': [{'IDXDataFieldName': 'DCSExternalBodyID',
      'IDXDataSize': 4,
      'IDXIndexName': 'DCSBodyDataIndex'}]},
   'IDXIndexDataSizeLength': 2,
   'IDXIndexKeyMatchingMethods': ['IDXExactMatch'],
   'IDXIndexName': 'DCSReferenceIndex',
   'IDXIndexPath': 'EntryID.index',
   'IDXIndexSupportDataID': False,
   'IDXIndexWritable': False,
   'TrieAuxiliaryDataOptions': {'IDXIndexPath': 'EntryID.data'}},
  {'HeapDataCompressionType': 1,
   'IDXIndexAccessMethod': 'com.apple.HeapAccessMethod',
   'IDXIndexBigEndian': False,
   'IDXIndexDataFields': {'IDXVariableDataFields': [{'IDXDataFieldName': 'DCSBodyData',
      'IDXDataSizeLength': 4}]},
   'IDXIndexName': 'DCSBodyDataIndex',
   'IDXIndexPath': 'Body.data',
   'IDXIndexSupportDataID': True,
   'IDXIndexWritable': False}],
 'IDXDictionaryVersion': 1}

In [79]:
a['DCSDictionaryManufacturerName'] = 'Codustry Laboratory'

In [84]:
ORIGIN_URL = 'https://sites.google.com/site/thaidictproject/'
a['DCSDictionaryCopyright'] = a['CFBundleDisplayName'] + "<br>this original dictionary is from {}<br>Developed Jun 2017 by {}<br>".format(
    ORIGIN_URL,
    a['DCSDictionaryManufacturerName'] + ': Potchana Project.'
)
a['DCSDictionaryCopyright']


Out[84]:
'Plant Names (En-Th)<br>this original dictionary is from https://sites.google.com/site/thaidictproject/<br>Developed Jun 2017 by Codustry Laboratory: Potchana Project.<br>'

In [ ]: