In [2]:
import sys
sys.path.append('/Users/spacecoffin/Development')

import GravelKicker as gk
import os
import pandas as pd

from datetime import datetime
from supriya.tools import nonrealtimetools

In [3]:
this_dir = '/Users/spacecoffin/Development/GravelKicker/__gen_files'

Batch processing

  1. Decide on a size limit for aiff files (882kb/10s file)
  2. Generate that much files
  3. Process those files and append results to DataFrame
  4. Remove those files

In [1]:
import hurry.filesize

In [38]:
hurry.filesize.size(903168, system=hurry.filesize.alternative)


Out[38]:
'882 KB'

In [40]:
hurry.filesize.size(882102, system=hurry.filesize.si)


Out[40]:
'882K'

In [7]:
hurry.filesize.size(882000, system=hurry.filesize.si)


Out[7]:
'882K'

In [43]:
hurry.filesize.size(1073741824, system=hurry.filesize.alternative)


Out[43]:
'1 GB'

In [42]:
hurry.filesize.size(1073741824, system=hurry.filesize.si)


Out[42]:
'1G'

In [44]:
1073741824 / 882102


Out[44]:
1217.2535874536052

Loading


In [7]:
dir_list = os.listdir(path=this_dir)

if "df.p" in dir_list:
    _pickle_path = os.path.join(this_dir, "df.p")
    _old_df = pd.read_pickle(_pickle_path)

In [9]:
_old_df["hash"]


Out[9]:
0     62d1647a3f349aa6f232ea9001fa8d93
1     9f4d97cc9b5cfa0baf92601facffacb9
2     21e57a94b2c037258bcf1fad573fe2fb
3     6ba325edba9f428c62d46a952d10e776
4     1abd6ebc0e59bb9e56e7b153121db105
5     fed6d9c28a803eec577e8484f55fd46a
6     755812ae03e22cf0d4b1429cdbf0d7d4
7     f7dd58a8829ffda9c60da001639493ae
8     8aae91b8dd88f68b073b7e327b93449c
9     4c3a940fe9f5fa062791891292b3bd14
10    1809ca586e295df92d949a37dbada5b2
11    005c1e45724d2af61b84def5b441bec9
12    a2f46fa9db3c589db004ed4b4856adf4
13    989e9f197eb34b3a92503f468becafb2
14    bcecc01fb3503dc246dcbe36544d9768
15    0785bb918fa5f111a51b46bb098ef022
16    9243eb9b68203c383d283a48728e2d11
17    cb1a6248f10147bf29ed65969c41d516
18    014e14b31011d8477cd2c2a4963553d5
19    12aea483698ea62932324887b9067444
Name: hash, dtype: object

In [11]:
_old_df.dtypes


Out[11]:
adparam         float64
ampscale        float64
ddparam         float64
durscale        float64
ampdist         float64
durdist         float64
knum            float64
minfrequency    float64
maxfrequency    float64
init_cps        float64
hash             object
dtype: object

In [15]:
pmtx = gk.generator.gendy1.gen_params(rows=20)
df = gk.generator.gendy1.format_params(pmtx)
df.sort_values(["hash"])


Out[15]:
adparam ampscale ddparam durscale ampdist durdist knum minfrequency maxfrequency init_cps hash
11 0.174762 0.221599 0.073717 0.397338 5.0 3.0 13.0 46.249303 51.913087 16.0 005c1e45724d2af61b84def5b441bec9
18 0.524953 0.459376 0.614547 0.765223 3.0 1.0 14.0 783.990872 1661.218790 16.0 014e14b31011d8477cd2c2a4963553d5
15 0.743336 0.924925 0.801979 0.144148 0.0 5.0 15.0 9.722718 466.163762 16.0 0785bb918fa5f111a51b46bb098ef022
19 0.435284 0.421904 0.600644 0.241138 5.0 3.0 13.0 261.625565 554.365262 16.0 12aea483698ea62932324887b9067444
10 0.229079 0.403972 0.678791 0.254824 0.0 0.0 11.0 20.601722 184.997211 16.0 1809ca586e295df92d949a37dbada5b2
4 0.695884 0.897465 0.222027 0.000964 1.0 1.0 7.0 554.365262 659.255114 16.0 1abd6ebc0e59bb9e56e7b153121db105
2 0.264392 0.046060 0.569097 0.639831 5.0 3.0 10.0 155.563492 554.365262 16.0 21e57a94b2c037258bcf1fad573fe2fb
9 0.399641 0.051536 0.632386 0.369661 3.0 0.0 10.0 38.890873 155.563492 16.0 4c3a940fe9f5fa062791891292b3bd14
0 0.769618 0.196847 0.688058 0.069917 5.0 2.0 12.0 10.300861 123.470825 16.0 62d1647a3f349aa6f232ea9001fa8d93
3 0.166905 0.426392 0.067223 0.950235 3.0 4.0 10.0 2637.020455 2793.825851 16.0 6ba325edba9f428c62d46a952d10e776
6 0.211677 0.716012 0.776628 0.249939 2.0 0.0 10.0 440.000000 3951.066410 16.0 755812ae03e22cf0d4b1429cdbf0d7d4
8 0.999191 0.380020 0.203953 0.248066 1.0 3.0 11.0 880.000000 3729.310092 16.0 8aae91b8dd88f68b073b7e327b93449c
16 0.406652 0.434271 0.017588 0.599756 1.0 3.0 12.0 1396.912926 4186.009045 16.0 9243eb9b68203c383d283a48728e2d11
13 0.637114 0.941036 0.800288 0.038992 4.0 3.0 12.0 69.295658 195.997718 16.0 989e9f197eb34b3a92503f468becafb2
1 0.652324 0.047935 0.618170 0.248597 1.0 0.0 12.0 880.000000 1046.502261 16.0 9f4d97cc9b5cfa0baf92601facffacb9
12 0.566293 0.635214 0.189112 0.301430 5.0 1.0 11.0 1760.000000 2959.955382 16.0 a2f46fa9db3c589db004ed4b4856adf4
14 0.048555 0.682584 0.721583 0.334873 4.0 1.0 15.0 233.081881 987.766603 16.0 bcecc01fb3503dc246dcbe36544d9768
17 0.127376 0.162602 0.285705 0.972035 4.0 3.0 14.0 103.826174 880.000000 16.0 cb1a6248f10147bf29ed65969c41d516
7 0.584381 0.581177 0.563843 0.343797 5.0 2.0 14.0 783.990872 1046.502261 16.0 f7dd58a8829ffda9c60da001639493ae
5 0.536318 0.878002 0.918937 0.308756 4.0 3.0 9.0 783.990872 1396.912926 16.0 fed6d9c28a803eec577e8484f55fd46a

In [20]:
for i, row in df.iterrows():
    
    session = nonrealtimetools.Session()
    
    builder = gk.generator.gendy1.make_builder(row)
    
    out = gk.generator.gendy1.build_out(builder)
    
    synthdef = builder.build()
    
    with session.at(0):
        synth_a = session.add_synth(duration=10, synthdef=synthdef)
    
    gk.util.render_session(session, this_dir, row["hash"])

In [47]:
dt = datetime.now().strftime("%Y_%m_%d") #_%H-%M-%S")
identifier = '{0}-len{1}'.format(dt, str(df.shape[0]))

In [53]:
df.to_pickle("{0}/df-{1}.p".format(this_dir, dt))

In [54]:
df.to_pickle("{0}/df.p".format(this_dir, dt))

Next, we need to:

  1. extract features from each file
  2. join features with parameter df (index on hash?)
  3. get it into a regressor

In [ ]:

def pickle_data(file_pickle, path): """Saves pickle file depending on object instance""" if isinstance(file_pickle, pd.DataFrame): file_pickle.to_pickle(path) elif isinstance(file_pickle, pyspark.RDD): file_pickle.saveAsPickleFile(path) else: with open(path, 'wb') as p_file: pickle.dump(file_pickle, p_file) pickle_data(data_pandas, path='{0}/pandas-df-{1}.p'.format(dat_path, identifier))