ConvertNPYData



In [1]:
import os
from tqdm import tqdm
import numpy as np
import tensorflow as tf

Read npy files, and convert it to tensorflow tfrecords format


In [2]:
user = "bdonnot"
nnodes = 118
size = 5000

# user = "benjamin"
# nnodes = 30
# size = 10000
path_data_in = os.path.join("/home",user,"Documents","PyHades2","ampsdatareal_withreact_{}_{}".format(nnodes,size))
path_data_out = os.path.join("/home",user,"Documents","PyHades2","tfrecords_{}_{}".format(nnodes,size))
nquads = 186 if nnodes == 118 else 41
if not os.path.exists(path_data_out):
    print("Creating the repository {}".format(path_data_out))
    os.mkdir(path_data_out)

In [3]:
def _int64_feature(value):
      return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
      return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
def _floats_feature(value):
      return tf.train.Feature(float_list=tf.train.FloatList(value=value))

In [4]:
vars = ["prod_q", "flows_a","flows_MW", "loads_p", "loads_q", "loads_v", "prod_p", "prod_v",
        "prod_p_target", "flowsext_a", "flowsext_MW"]
ds = "train"

Save the results for the base case


In [21]:
for ds in ["train","val","test"]:
    # open the proper connection
    writer = tf.python_io.TFRecordWriter(os.path.join(path_data_out,"{}.tfrecord".format(ds)))
    writer_small = tf.python_io.TFRecordWriter(os.path.join(path_data_out,"{}_small.tfrecord".format(ds)))
    # read the data (numpy)
    dict_data = {}
    for var in vars:
        dict_data[var] = np.load(os.path.join(path_data_in,"{}_{}.npy".format(ds,var)))
    #wirte it to tensorboard
    for idx in tqdm(range(dict_data[vars[0]].shape[0])):
        #write the whole set for a specific dataset
        d_feature = {}
        for var in vars:
            x = dict_data[var][idx]
            d_feature[var] = _floats_feature(x)
        d_feature["deco_enco"] = _floats_feature([0. for _ in range(dict_data["flows_a"].shape[1])])
        features = tf.train.Features(feature=d_feature)
        example = tf.train.Example(features=features)
        serialized = example.SerializeToString()
        writer.write(serialized)
        if idx < 100:
            writer_small.write(serialized)


100%|██████████| 10000/10000 [00:01<00:00, 8386.65it/s]
100%|██████████| 5000/5000 [00:00<00:00, 8450.79it/s]
100%|██████████| 5000/5000 [00:00<00:00, 8439.69it/s]

Save the results for n-1


In [37]:
import re
def quadnamefromfilename(fn):
    tmp =  re.sub("(^((test)|(val)|(train))\_)|","",fn)
    # tmp = re.sub("(\_((loads_p)|(loads_q)|(loads_v)|(prod_p)|(prod_q)|(prod_v)|(transits_a)|(transits_MW))\.npy$)", "", tmp)
    tmp = re.sub("_loads_p.npy$", "", tmp)
    tmp = re.sub("_loads_q.npy$", "", tmp)
    tmp = re.sub("_loads_v.npy$", "", tmp)
    tmp = re.sub("_prod_p.npy$", "", tmp)
    tmp = re.sub("_prod_p_target.npy$", "", tmp)
    tmp = re.sub("_prod_q.npy$", "", tmp)
    tmp = re.sub("_prod_v.npy$", "", tmp)
    tmp = re.sub("_flows_a.npy$", "", tmp)
    tmp = re.sub("_flows_MW.npy$", "", tmp)
    tmp = re.sub("_flowsext_MW.npy$", "", tmp)
    tmp = re.sub("_flowsext_a.npy$", "", tmp)
    return tmp

In [38]:
path_data_in_n1 = os.path.join(path_data_in,"N1")
qnames = set([quadnamefromfilename(el) for el in os.listdir(path_data_in_n1)
                  if os.path.isfile(os.path.join(path_data_in_n1, el))])
qnames = np.sort(list(qnames))

In [39]:
id_q = {}
import copy
refbytefeatures = [0. for _ in range(nquads)]
for idx, qn in enumerate(qnames):
    id_q[qn] = copy.deepcopy(refbytefeatures)
    id_q[qn][idx] = 1.

In [40]:
dataset = "N1"

In [42]:
path_data_in_dataset = os.path.join(path_data_in, dataset)
for ds in ["train","val","test"]:
# for ds in ["test"]:
    # open the proper connection
    writer = tf.python_io.TFRecordWriter(os.path.join(path_data_out,"{}-{}.tfrecord".format(dataset, ds)))
    writer_small = tf.python_io.TFRecordWriter(os.path.join(path_data_out,"{}-{}_small.tfrecord".format(dataset, ds)))
    for qn in tqdm(qnames):
        # read the data (numpy)
        dict_data = {}
        for var in vars:
            dict_data[var] = np.load(os.path.join(path_data_in_dataset,"{}_{}_{}.npy".format(ds,qn,var)))
        #wirte it to tensorboard
        for idx in range(dict_data[vars[0]].shape[0]):
            #write the whole lines for a specific dataset
            d_feature = {}
            for var in vars:
                x = dict_data[var][idx]
                d_feature[var] = _floats_feature(x)
            d_feature["deco_enco"] = _floats_feature(id_q[qn])
            features = tf.train.Features(feature=d_feature)
            example = tf.train.Example(features=features)
            serialized = example.SerializeToString()
            writer.write(serialized)
            if idx < 100:
                writer_small.write(serialized)


  0%|          | 0/41 [00:00<?, ?it/s]
  2%|▏         | 1/41 [00:01<00:45,  1.14s/it]
  5%|▍         | 2/41 [00:02<00:44,  1.14s/it]
  7%|▋         | 3/41 [00:03<00:43,  1.14s/it]
 10%|▉         | 4/41 [00:04<00:42,  1.14s/it]
 12%|█▏        | 5/41 [00:05<00:41,  1.15s/it]
 15%|█▍        | 6/41 [00:06<00:39,  1.14s/it]
100%|██████████| 41/41 [00:48<00:00,  1.32s/it]
100%|██████████| 41/41 [00:30<00:00,  1.48it/s]
100%|██████████| 41/41 [00:30<00:00,  1.48it/s]

For n-2 data


In [32]:
datasets_ = ["neighbours","random"]
datasets_ = ["random"]
datasets_ = ["neighbours"]
datasets_ = ["two_changes"]
for dataset in datasets_:
    path_data_in_dataset = os.path.join(path_data_in, dataset)
    
    qnames = set([quadnamefromfilename(el) for el in os.listdir(path_data_in_dataset)
                  if os.path.isfile(os.path.join(path_data_in_dataset, el))])
    qnames = np.sort(list(qnames))
    qnames = [q for q in qnames if (q != "computation_infos.json" and q != 'computation_infos_tmp.json')]
    for ds in  ["train","val","test"]:
        # open the proper connection
        writer = tf.python_io.TFRecordWriter(os.path.join(path_data_out,"{}-{}.tfrecord".format(dataset, ds)))
        writer_small = tf.python_io.TFRecordWriter(os.path.join(path_data_out,"{}-{}_small.tfrecord".format(dataset, ds)))
        for qn in tqdm(qnames):
            # read the data (numpy)
            dict_data = {}
            for var in vars:
                dict_data[var] = np.load(os.path.join(path_data_in_dataset,"{}_{}_{}.npy".format(ds,qn,var)))
            #wirte it to tensorboard
            for idx in range(dict_data[vars[0]].shape[0]):
                #write the whole set for a specific dataset
                d_feature = {}
                for var in vars:
                    x = dict_data[var][idx]
                    d_feature[var] = _floats_feature(x)
                
                qn1, qn2 = qn.split("@")
                tmp = copy.deepcopy(id_q[qn1])
                for id_, el in enumerate(id_q[qn2]):
                    if el:
                        tmp[id_] = el
                d_feature["deco_enco"] = _floats_feature(tmp)
                
                features = tf.train.Features(feature=d_feature)
                example = tf.train.Example(features=features)
                serialized = example.SerializeToString()
                writer.write(serialized)
                if idx < 100:
                    writer_small.write(serialized)


100%|██████████| 819/819 [23:31<00:00,  1.64s/it]
100%|██████████| 819/819 [12:25<00:00,  1.22it/s]
100%|██████████| 819/819 [12:26<00:00,  1.10it/s]

Check for reading data


In [46]:
tf.reset_default_graph()
filenames = [os.path.join(path_data_out,"{}.tfrecord".format(ds))]
var = "deco_enco"
print(var)

def _parse_function(example_proto, var, size):
    features = {var: tf.FixedLenFeature((size,), tf.float32, default_value=[0.0 for _ in range(size)]) }
    parsed_features = tf.parse_single_example(example_proto, features)
    return parsed_features[var]

dataset = tf.contrib.data.TFRecordDataset(filenames)
dataset = dataset.map(lambda x : _parse_function(x,var,dict_data[var].shape[1]))  # Parse the record into tensors.
dataset = dataset.repeat()  # Repeat the input indefinitely.
dataset = dataset.batch(1)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()


deco_enco
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-46-e74cfde73114> in <module>()
     10 
     11 dataset = tf.contrib.data.TFRecordDataset(filenames)
---> 12 dataset = dataset.map(lambda x : _parse_function(x,var,dict_data[var].shape[1]))  # Parse the record into tensors.
     13 dataset = dataset.repeat()  # Repeat the input indefinitely.
     14 dataset = dataset.batch(1)

/usr/local/lib/python3.4/dist-packages/tensorflow/python/util/deprecation.py in new_func(*args, **kwargs)
    314                 'in a future version' if date is None else ('after %s' % date),
    315                 instructions)
--> 316       return func(*args, **kwargs)
    317     return tf_decorator.make_decorator(func, new_func, 'deprecated',
    318                                        _add_deprecated_arg_notice_to_docstring(

/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/data/python/ops/dataset_ops.py in map(self, map_func, num_threads, output_buffer_size, num_parallel_calls)
    499     """
    500     if num_threads is None and num_parallel_calls is None:
--> 501       ret = Dataset(dataset_ops.MapDataset(self._dataset, map_func))
    502     else:
    503       if num_threads is None:

/usr/local/lib/python3.4/dist-packages/tensorflow/python/data/ops/dataset_ops.py in __init__(self, input_dataset, map_func)
   1383 
   1384     self._map_func = tf_map_func
-> 1385     self._map_func.add_to_graph(ops.get_default_graph())
   1386 
   1387   def _as_variant_tensor(self):

/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/function.py in add_to_graph(self, g)
    484   def add_to_graph(self, g):
    485     """Adds this function into the graph g."""
--> 486     self._create_definition_if_needed()
    487 
    488     # Adds this function into 'g'.

/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/function.py in _create_definition_if_needed(self)
    319     """Creates the function definition if it's not created yet."""
    320     with context.graph_mode():
--> 321       self._create_definition_if_needed_impl()
    322 
    323   def _create_definition_if_needed_impl(self):

/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/function.py in _create_definition_if_needed_impl(self)
    336       # Call func and gather the output tensors.
    337       with vs.variable_scope("", custom_getter=temp_graph.getvar):
--> 338         outputs = self._func(*inputs)
    339 
    340       # There is no way of distinguishing between a function not returning

/usr/local/lib/python3.4/dist-packages/tensorflow/python/data/ops/dataset_ops.py in tf_map_func(*args)
   1358         ret = map_func(*nested_args)
   1359       else:
-> 1360         ret = map_func(nested_args)
   1361 
   1362       # If `map_func` returns a list of tensors, `nest.flatten()` and

<ipython-input-46-e74cfde73114> in <lambda>(x)
     10 
     11 dataset = tf.contrib.data.TFRecordDataset(filenames)
---> 12 dataset = dataset.map(lambda x : _parse_function(x,var,dict_data[var].shape[1]))  # Parse the record into tensors.
     13 dataset = dataset.repeat()  # Repeat the input indefinitely.
     14 dataset = dataset.batch(1)

KeyError: 'deco_enco'

In [31]:
sess = tf.InteractiveSession()
sess.run(iterator.initializer)

# Start populating the filename queue.
# coord = tf.train.Coordinator()
# threads = tf.train.start_queue_runners(coord=coord)

for i in range(20):
    # Retrieve a single instance:
    x_ = sess.run(next_element)
    print("x_ : {}".format(x_))
#     print("{} \n{}\n\n_______________________".format(x_,dataset_npy[i]))
sess.close()


x_ : [[ 132.57000732   -1.          132.16499329  140.64440918  138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244  132.16499329  140.64440918  138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244  132.16499329  140.64440918  138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244  132.16499329  140.64440918  138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244  132.16499329  140.64440918   -1.
   137.16000366]]
x_ : [[ 132.57000732  144.31500244  132.16499329  140.64440918  138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244  132.16499329  140.64440918  138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244  132.16499329  140.64440918  138.50999451
   137.16000366]]
x_ : [[  -1.          144.31500244  132.16499329  140.64440918  138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244  132.16499329  140.64440918  138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244   -1.          140.64440918  138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244  132.16499329  140.64440918  138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244  132.16499329  140.64440918  138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244  132.16499329   -1.          138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244  132.16499329  140.64440918  138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244   -1.          140.64440918  138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244  132.16499329  140.64440918  138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244  132.16499329  140.64440918   -1.           -1.        ]]
x_ : [[ 132.57000732  144.31500244  132.16499329  140.64440918  138.50999451
   137.16000366]]
x_ : [[ 132.57000732  144.31500244  132.16499329  140.64440918  138.50999451
   137.16000366]]

In [ ]: