In [0]:

    
# You need TF 1.13.1 to deploy this on AI Platform 
!pip install tensorflow==1.13.1

import tensorflow as tf 
import pandas as pd
import numpy as np 

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.utils import shuffle

Downloading and preprocessing data



In [0]:

    
# Authenticate to your cloud account
from google.colab import auth
auth.authenticate_user()



In [0]:

    
# Download the Stack Overflow data (or replace with your own text data)
!gsutil cp 'gs://cloudml-demo-lcm/SO_ml_tags_avocado_188k_v2.csv' ./









    



Copying gs://cloudml-demo-lcm/SO_ml_tags_avocado_188k_v2.csv...
/ [1 files][276.7 MiB/276.7 MiB]                                                
Operation completed over 1 objects/276.7 MiB.



In [0]:

    
data = pd.read_csv('SO_ml_tags_avocado_188k_v2.csv', names=['tags', 'original_tags', 'text'], header=0)
data = data.drop(columns=['original_tags'])
data = data.dropna()

data = shuffle(data, random_state=22)
data.head()









    Out[0]:







  
    
      
      tags
      text
    
  
  
    
      182914
      tensorflow,keras
      avocado image captioning model not compiling b...
    
    
      48361
      pandas
      return excel file from avocado with flask in f...
    
    
      181447
      tensorflow,keras
      validating with generator (avocado) i'm trying...
    
    
      66307
      pandas
      avocado multiindex dataframe selecting data gi...
    
    
      11283
      pandas
      get rightmost non-zero value position for each...



In [ ]:

    
# Encode top tags to multi-hot
tags_split = [tags.split(',') for tags in data['tags'].values]
print(tags_split)



In [ ]:

    
tag_encoder = MultiLabelBinarizer()
tags_encoded = tag_encoder.fit_transform(tags_split)
num_tags = len(tags_encoded[0])
print(data['text'].values[0])
print(tag_encoder.classes_)
print(tags_encoded[0])



In [0]:

    
# Split our data into train and test sets
train_size = int(len(data) * .8)
print ("Train size: %d" % train_size)
print ("Test size: %d" % (len(data) - train_size))









    



Train size: 150559
Test size: 37640



In [0]:

    
# Split our labels into train and test sets
train_tags = tags_encoded[:train_size]
test_tags = tags_encoded[train_size:]



In [0]:

    
# Pre-processing data: create our tokenizer class
%%writefile preprocess.py

from tensorflow.keras.preprocessing import text

class TextPreprocessor(object):
  def __init__(self, vocab_size):
    self._vocab_size = vocab_size
    self._tokenizer = None
  
  def create_tokenizer(self, text_list):
    tokenizer = text.Tokenizer(num_words=self._vocab_size)
    tokenizer.fit_on_texts(text_list)
    self._tokenizer = tokenizer

  def transform_text(self, text_list):
    text_matrix = self._tokenizer.texts_to_matrix(text_list)
    return text_matrix









    



Writing preprocess.py



In [0]:

    
# Create vocab from training corpus
from preprocess import TextPreprocessor

VOCAB_SIZE=400 # This is a hyperparameter, try out different values for your dataset

train_qs = data['text'].values[:train_size]
test_qs = data['text'].values[train_size:]

processor = TextPreprocessor(VOCAB_SIZE)
processor.create_tokenizer(train_qs)

body_train = processor.transform_text(train_qs)
body_test = processor.transform_text(test_qs)



In [0]:

    
# Preview the first input from our training data
print(len(body_train[0]))
print(body_train[0])









    



400
[0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 0. 1. 1. 1. 0.
 0. 1. 1. 0. 1. 1. 1. 0. 1. 0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 1. 0. 1. 1. 0.
 1. 0. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0.
 1. 0. 1. 1. 0. 1. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0. 1. 1. 1. 1. 0. 1.
 0. 0. 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 1. 0. 0. 1. 0. 1. 1. 0. 1. 1. 1.
 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0.
 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.
 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0.
 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1. 1. 0.
 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1.
 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0.]

Building and training our model



In [0]:

    
# Save the processor state of the tokenizer
import pickle

with open('./processor_state.pkl', 'wb') as f:
  pickle.dump(processor, f)



In [0]:

    
def create_model(vocab_size, num_tags):
  
  model = tf.keras.models.Sequential()
  model.add(tf.keras.layers.Dense(50, input_shape=(VOCAB_SIZE,), activation='relu'))
  model.add(tf.keras.layers.Dense(25, activation='relu'))
  model.add(tf.keras.layers.Dense(num_tags, activation='sigmoid'))

  model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model



In [0]:

    
model = create_model(VOCAB_SIZE, num_tags)
model.summary()

# Train and evaluate the model
model.fit(body_train, train_tags, epochs=3, batch_size=128, validation_split=0.1)
print('Eval loss/accuracy:{}'.format(
  model.evaluate(body_test, test_tags, batch_size=128)))

# Export the model to a file
model.save('keras_saved_model.h5')









    



WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py:435: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 50)                20050     
_________________________________________________________________
dense_1 (Dense)              (None, 25)                1275      
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 130       
=================================================================
Total params: 21,455
Trainable params: 21,455
Non-trainable params: 0
_________________________________________________________________
Train on 135503 samples, validate on 15056 samples
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Epoch 1/3
135503/135503 [==============================] - 5s 38us/sample - loss: 0.1431 - acc: 0.9457 - val_loss: 0.1077 - val_acc: 0.9592
Epoch 2/3
135503/135503 [==============================] - 4s 29us/sample - loss: 0.1047 - acc: 0.9598 - val_loss: 0.1015 - val_acc: 0.9605
Epoch 3/3
135503/135503 [==============================] - 4s 28us/sample - loss: 0.0988 - acc: 0.9613 - val_loss: 0.0994 - val_acc: 0.9610
37640/37640 [==============================] - 0s 13us/sample - loss: 0.1022 - acc: 0.9601
Eval loss/accuracy:[0.1022030003645975, 0.96011144]

Test our model locally



In [0]:

    
# Use custom model prediction to save our model + tokenizer
%%writefile model_prediction.py
import pickle
import os
import numpy as np

class CustomModelPrediction(object):

  def __init__(self, model, processor):
    self._model = model
    self._processor = processor
  
  def predict(self, instances, **kwargs):
    preprocessed_data = self._processor.transform_text(instances)
    predictions = self._model.predict(preprocessed_data)
    return predictions.tolist()

  @classmethod
  def from_path(cls, model_dir):
    import tensorflow.keras as keras
    model = keras.models.load_model(
      os.path.join(model_dir,'keras_saved_model.h5'))
    with open(os.path.join(model_dir, 'processor_state.pkl'), 'rb') as f:
      processor = pickle.load(f)

    return cls(model, processor)









    



Writing model_prediction.py



In [0]:

    
test_requests = [
  "How to preprocess strings in Keras models Lambda layer? I have the problem that the value passed on to the Lambda layer (at compile time) is a placeholder generated by keras (without values). When the model is compiled, the .eval () method throws the error: You must feed a value for placeholder tensor 'input_1' with dtype string and shape [?, 1] def text_preprocess(x): strings = tf.keras.backend.eval(x) vectors = [] for string in strings: vector = string_to_one_hot(string.decode('utf-8')) vectors.append(vector) vectorTensor = tf.constant(np.array(vectors),dtype=tf.float32) return vectorTensor input_text = Input(shape=(1,), dtype=tf.string) embedding = Lambda(text_preprocess)(input_text) dense = Dense(256, activation='relu')(embedding) outputs = Dense(2, activation='softmax')(dense) model = Model(inputs=[input_text], outputs=outputs) model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy']) model.summary() model.save('test.h5') If I pass a string array into the input layer statically, I can compile the model, but I get the same error if I want to convert the model to tflite. #I replaced this line: input_text = Input(shape=(1,), dtype=tf.string) #by this lines: test = tf.constant(['Hello', 'World']) input_text = Input(shape=(1,), dtype=tf.string, tensor=test) #but calling this ... converter = TFLiteConverter.from_keras_model_file('string_test.h5') tfmodel = converter.convert() #... still leads to this error: InvalidArgumentError: You must feed a value for placeholder tensor 'input_3' with dtype string and shape [2] [[{{node input_3}}]] ",
  "Change the bar item name in Pandas I have a test excel file like: df = pd.DataFrame({'name':list('abcdefg'), 'age':[10,20,5,23,58,4,6]}) print (df) name  age 0    a   10 1    b   20 2    c    5 3    d   23 4    e   58 5    f    4 6    g    6 I use Pandas and matplotlib to read and plot it: import pandas as pd import numpy as np import matplotlib.pyplot as plt import os excel_file = 'test.xlsx' df = pd.read_excel(excel_file, sheet_name=0) df.plot(kind='bar') plt.show() the result shows: enter image description here it use index number as item name, how can I change it to the name, which stored in column name?"
]



In [0]:

    
from model_prediction import CustomModelPrediction

classifier = CustomModelPrediction.from_path('.')
results = classifier.predict(test_requests)
print(results)

for i in range(len(results)):
  print('Predicted labels:')
  for idx,val in enumerate(results[i]):
    if val > 0.7:
      print(tag_encoder.classes_[idx])
  print('\n')









    



[[0.850749135017395, 6.258487701416016e-07, 0.00028961896896362305, 0.000148087739944458, 0.8072246313095093], [1.3202428817749023e-05, 0.6089206337928772, 0.8498740196228027, 0.0010659386171028018, 3.375753294676542e-05]]
Predicted labels:
keras
tensorflow


Predicted labels:
pandas

Package our model and deploy to AI Platform



In [0]:

    
%%writefile setup.py

from setuptools import setup

setup(
  name="so_predict",
  version="0.1",
  include_package_data=True,
  scripts=["preprocess.py", "model_prediction.py"]
)









    



Writing setup.py



In [0]:

    
## Replace this with the name of your Cloud Storage bucket

!gsutil cp keras_saved_model.h5 gs://your_gcs_bucket/
!gsutil cp processor_state.pkl gs://your_gcs_bucket/









    



Copying file://keras_saved_model.h5 [Content-Type=application/octet-stream]...
/ [1 files][279.1 KiB/279.1 KiB]                                                
Operation completed over 1 objects/279.1 KiB.                                    
Copying file://processor_state.pkl [Content-Type=application/octet-stream]...
-
Operation completed over 1 objects/32.0 MiB.



In [0]:

    
# Replace with your bucket name below
!python setup.py sdist
!gsutil cp ./dist/so_predict-0.1.tar.gz gs://your_gcs_bucket/packages/so_predict-0.1.tar.gz



In [0]:

    
# Replace with your Cloud project name
!gcloud config set project your-cloud-project









    



Updated property [core/project].



In [0]:

    
# Create model if it hasn't been created yet
!gcloud ml-engine models create your_model_name



In [0]:

    
# To use this custom code feature, fill out this form: bit.ly/cmle-custom-code-signup
!gcloud alpha ml-engine versions create v1 --model your_model_name \
--origin=gs://your_gcs_bucket/ \
--python-version=3.5 \
--runtime-version=1.13 \
--framework='TENSORFLOW' \
--package-uris=gs://your_gcs_bucket/packages/so_predict-0.1.tar.gz \
--model-class=model_prediction.CustomModelPrediction

Generate predictions on our deployed trained model



In [0]:

    
# https://stackoverflow.com/questions/55517871/how-to-preprocess-strings-in-keras-models-lambda-layer
# https://stackoverflow.com/questions/55508547/plot-histogram-for-feature-of-array-with-known-and-limited-values
%%writefile predictions.txt
"How to preprocess strings in Keras models Lambda layer? I have the problem that the value passed on to the Lambda layer (at compile time) is a placeholder generated by keras (without values). When the model is compiled, the .eval () method throws the error: You must feed a value for placeholder tensor 'input_1' with dtype string and shape [?, 1] def text_preprocess(x): strings = tf.keras.backend.eval(x) vectors = [] for string in strings: vector = string_to_one_hot(string.decode('utf-8')) vectors.append(vector) vectorTensor = tf.constant(np.array(vectors),dtype=tf.float32) return vectorTensor input_text = Input(shape=(1,), dtype=tf.string) embedding = Lambda(text_preprocess)(input_text) dense = Dense(256, activation='relu')(embedding) outputs = Dense(2, activation='softmax')(dense) model = Model(inputs=[input_text], outputs=outputs) model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy']) model.summary() model.save('test.h5') If I pass a string array into the input layer statically, I can compile the model, but I get the same error if I want to convert the model to tflite. #I replaced this line: input_text = Input(shape=(1,), dtype=tf.string) #by this lines: test = tf.constant(["Hello","World"]) input_text = Input(shape=(1,), dtype=tf.string, tensor=test) #but calling this ... converter = TFLiteConverter.from_keras_model_file('string_test.h5') tfmodel = converter.convert() #... still leads to this error: InvalidArgumentError: You must feed a value for placeholder tensor 'input_3' with dtype string and shape [2] [[{{node input_3}}]] "
"Change the bar item name in Pandas I have a test excel file like: df = pd.DataFrame({'name':list('abcdefg'), 'age':[10,20,5,23,58,4,6]}) print (df) name  age 0    a   10 1    b   20 2    c    5 3    d   23 4    e   58 5    f    4 6    g    6 I use Pandas and matplotlib to read and plot it: import pandas as pd import numpy as np import matplotlib.pyplot as plt import os excel_file = 'test.xlsx' df = pd.read_excel(excel_file, sheet_name=0) df.plot(kind="bar") plt.show() the result shows: enter image description here it use index number as item name, how can I change it to the name, which stored in column name? "









    



Writing predictions.txt



In [0]:

    
# Get predictions from our trained model
predictions = !gcloud ml-engine predict --model='your_model_name' --text-instances=predictions.txt --version=v1
print(predictions)









    



['[[0.8507491946220398, 5.662441253662109e-07, 0.00028967857360839844, 0.0001481175422668457, 0.8072245121002197], [1.3262033462524414e-05, 0.608920693397522, 0.849873960018158, 0.0010659386171028018, 3.375759479240514e-05]]']



In [0]:

    
print(tag_encoder.classes_, '\n')

for sigmoid_arr in eval(predictions[0]):
  print(sigmoid_arr)
  for idx,probability in enumerate(sigmoid_arr):
    if probability > 0.7:
      print(tag_encoder.classes_[idx])
  print('\n')









    



['keras' 'matplotlib' 'pandas' 'scikitlearn' 'tensorflow'] 

[0.8507491946220398, 5.662441253662109e-07, 0.00028967857360839844, 0.0001481175422668457, 0.8072245121002197]
keras
tensorflow


[1.3262033462524414e-05, 0.608920693397522, 0.849873960018158, 0.0010659386171028018, 3.375759479240514e-05]
pandas

Interpreting our model with SHAP



In [0]:

    
!pip install shap
!pip install colored









    



Collecting shap
  Downloading https://files.pythonhosted.org/packages/30/b3/866b0101cbd1829844c35964af68c14ba522a5cce7a1e8d0f7937411d910/shap-0.28.5.tar.gz (223kB)
    100% |████████████████████████████████| 225kB 8.7MB/s 
Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from shap) (1.16.2)
Requirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from shap) (1.2.1)
Requirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from shap) (0.20.3)
Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from shap) (3.0.3)
Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from shap) (0.23.4)
Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from shap) (4.28.1)
Requirement already satisfied: ipython in /usr/local/lib/python3.6/dist-packages (from shap) (5.5.0)
Requirement already satisfied: scikit-image in /usr/local/lib/python3.6/dist-packages (from shap) (0.14.2)
Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->shap) (2.5.3)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->shap) (2.4.0)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->shap) (1.0.1)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->shap) (0.10.0)
Requirement already satisfied: pytz>=2011k in /usr/local/lib/python3.6/dist-packages (from pandas->shap) (2018.9)
Requirement already satisfied: decorator in /usr/local/lib/python3.6/dist-packages (from ipython->shap) (4.4.0)
Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.6/dist-packages (from ipython->shap) (40.9.0)
Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.6/dist-packages (from ipython->shap) (4.3.2)
Requirement already satisfied: pexpect; sys_platform != "win32" in /usr/local/lib/python3.6/dist-packages (from ipython->shap) (4.7.0)
Requirement already satisfied: prompt-toolkit<2.0.0,>=1.0.4 in /usr/local/lib/python3.6/dist-packages (from ipython->shap) (1.0.15)
Requirement already satisfied: simplegeneric>0.8 in /usr/local/lib/python3.6/dist-packages (from ipython->shap) (0.8.1)
Requirement already satisfied: pygments in /usr/local/lib/python3.6/dist-packages (from ipython->shap) (2.1.3)
Requirement already satisfied: pickleshare in /usr/local/lib/python3.6/dist-packages (from ipython->shap) (0.7.5)
Requirement already satisfied: pillow>=4.3.0 in /usr/local/lib/python3.6/dist-packages (from scikit-image->shap) (4.3.0)
Requirement already satisfied: dask[array]>=1.0.0 in /usr/local/lib/python3.6/dist-packages (from scikit-image->shap) (1.1.5)
Requirement already satisfied: networkx>=1.8 in /usr/local/lib/python3.6/dist-packages (from scikit-image->shap) (2.2)
Requirement already satisfied: cloudpickle>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from scikit-image->shap) (0.6.1)
Requirement already satisfied: PyWavelets>=0.4.0 in /usr/local/lib/python3.6/dist-packages (from scikit-image->shap) (1.0.3)
Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from scikit-image->shap) (1.11.0)
Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.6/dist-packages (from traitlets>=4.2->ipython->shap) (0.2.0)
Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.6/dist-packages (from pexpect; sys_platform != "win32"->ipython->shap) (0.6.0)
Requirement already satisfied: wcwidth in /usr/local/lib/python3.6/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->ipython->shap) (0.1.7)
Requirement already satisfied: olefile in /usr/local/lib/python3.6/dist-packages (from pillow>=4.3.0->scikit-image->shap) (0.46)
Requirement already satisfied: toolz>=0.7.3; extra == "array" in /usr/local/lib/python3.6/dist-packages (from dask[array]>=1.0.0->scikit-image->shap) (0.9.0)
Building wheels for collected packages: shap
  Building wheel for shap (setup.py) ... done
  Stored in directory: /root/.cache/pip/wheels/bf/26/bd/912db1314f1cef0171d9b7f128dd01e8b8c92ed8d0062e632d
Successfully built shap
Installing collected packages: shap
Successfully installed shap-0.28.5
Collecting colored
  Downloading https://files.pythonhosted.org/packages/58/07/636616667b47d3115b0288311511c5fb446d0e499036b7db858704c89066/colored-1.3.93.tar.gz
Building wheels for collected packages: colored
  Building wheel for colored (setup.py) ... done
  Stored in directory: /root/.cache/pip/wheels/52/c3/b7/ac21460710230feb409fee89bf594c4f2660ff7b67491d128f
Successfully built colored
Installing collected packages: colored
Successfully installed colored-1.3.93



In [0]:

    
import shap
attrib_data = body_train[:200]
explainer = shap.DeepExplainer(model, attrib_data)

num_explanations = 25
shap_vals = explainer.shap_values(body_test[:num_explanations])









    



Using TensorFlow backend.



In [0]:

    
words = processor._tokenizer.word_index



In [0]:

    
word_lookup = list()
for i in words.keys():
  word_lookup.append(i)

word_lookup = [''] + word_lookup
print(word_lookup[:100])









    



['', '0', 'the', 'avocado', 'i', '1', 'to', 'a', 'in', '2', 'and', 'is', 'of', 'for', '3', 'x', 'data', 'this', 'with', 'df', 'as', '5', 'it', 'import', '4', 'from', 'gt', 'y', 'that', 'have', 'model', 'dataframe', 'file', 'self', 'but', '00', "'", 'my', 'on', 'np', '10', 'train', 'how', 'python', 'if', 'not', 'get', '7', 'line', '6', '01', 'be', 'like', 'lt', 'using', 'c', 'nan', 'am', 'column', 'py', 'name', 'code', 'input', 'index', 'values', 'can', 'size', 'do', 'plot', 'print', 'columns', 'b', 'so', 'value', 'lib', 'shape', 'set', '8', 'test', 'want', 'error', 'are', 'output', 'an', 'true', 'packages', '12', 'use', 'time', 'n', 'batch', 'return', 'or', 'add', 'by', 'def', 'would', 'list', 'when', '9']



In [0]:

    
shap.summary_plot(shap_vals, feature_names=word_lookup, class_names=tag_encoder.classes_)



In [0]:

    
import colored
import re 
def colorprint(question, pos, neg):
  # Split question string on multiple chars
  q_arr = []
  q_filtered = filter(None,re.split("[, .()]+", question))
  for i in q_filtered:
    q_arr.append(i)

  color_str = []
  for idx,word in enumerate(q_arr):
    if word in pos:
      color_str.append(colored.fg("blue") + word)
    elif word in neg:
      color_str.append(colored.fg("light_red") + word)
    else:
      color_str.append(colored.fg('black') + word)

    # For wrapped printing
    if idx % 15 == 0 and idx > 0:
      color_str.append('\n')

  print(' '.join(color_str) + colored.fg('black') + " ")



In [0]:

    
# Print highlighted signal words for a few questions
examples_to_print = [0,7,20,22,24]

for i in range(len(examples_to_print)):
  
  # Print the actual labels
  actual = test_tags[examples_to_print[i]]
  num_labels = np.sum(actual)

  actual_labels = np.argpartition(actual, -num_labels)[-num_labels:]
  
  # Print the predicted labels
  print('Predicted labels:')
  pred_tag = model.predict([[body_test[examples_to_print[i]]]])
  for idx,tagprob in enumerate(pred_tag[0]):
    if tagprob > 0.8:
      print(tag_encoder.classes_[idx])
  print('\n')

  # Get the highest and lowest signaling words
  for idx,tag in enumerate(pred_tag[0]):
    if tag > 0.7:
      attributions = shap_vals[idx][examples_to_print[i]]
      top_signal_words = np.argpartition(attributions, -5)[-5:]
      pos_words = []
      for word_idx in top_signal_words:
        signal_wd = word_lookup[word_idx]
        pos_words.append(signal_wd)

      negative_signal_words = np.argpartition(attributions, 5)[:5]
      neg_words = []
      for word_idx in negative_signal_words:
        signal_wd = word_lookup[word_idx]
        neg_words.append(signal_wd)
      colorprint(test_qs[examples_to_print[i]],pos_words, neg_words)
  print('\n')









    



Predicted labels:
pandas


i want to subtract each column from the previous non-null column using the diff function i 
 have a long list of columns and i want to subtract the previous column from 
 the current column and replace the current column with the difference so if i have: 
 a b c d 1 nan 3 7 3 nan 8 10 2 nan 6 
 11 i want the output to be: a b c d 1 nan 2 4 
 3 nan 5 2 2 nan 4 5 i have been trying to use this 
 code: df2 = df1 diff axis=1 but this does not produce the desired output thanks 
 in advance 


Predicted labels:
tensorflow


trying to use if in avocado's map_fn i'm trying to apply a transformation to a column 
 vector using map_fn in avocado and it's not working for the following column vector: elems 
 = np array [[1 0] [2 0] [3 0]] when i do this: avocado_m = 
 avocado map_fn lambda x: x + 1 0 elems with avocado session as sess: res 
 = sess run avocado_m print str res i get the result that i expect namely 
 this column vector: [[2 ] [3 ] [4 ]] however when i do this: avocado_m2 
 = avocado map_fn lambda x: x+1 if x % 2 &gt; 0 else x elems 
 with avocado session as sess: res = sess run avocado_m2 print str res the code 
 fails with the following exception: typeerror: using a avocado tensor as a python bool is 
 not allowed use if t is not none: instead of if t: to test if 
 a tensor is defined and use avocado ops such as avocado cond to execute subgraphs 
 conditioned on the value of a tensor i've tried printing the type of x and 
 it's a tensor with shape 1 so what it looks like is happening is that 
 the values aren't being passed into the lambda as scalar values but rather as tensors 
 with shape 1 ; the % is broadcast producing another tensor of shape 1 but 
 that tensor can't then have the >= operator applied to it is there a way 
 to make this work? is there a way to get an actual scalar that i 
 could apply the >= operator to? if there isn't is there an efficient alternative to 
 map_fn that i can use? i've looked at avocado cond and it's not obvious how 
 i can use that in this context as i understand it avocado cond produces an 
 op not a callable so how am i going to use that from within a 
 lambda being applied by map_fn? 


Predicted labels:
keras


how to stack lstm layers to classify speech files i have been trying to implement a 
 lstm based classifier to classify descrete speech i have created feature vectors with 13 mfcc 
 for a given file have 2d vector of [99 13] after following the mnist_irnn example 
 i could set up single layer rnn to classify my speech files but now i 
 want to add more layers to the network therefore i have been trying to implement 
 the network with two lstm layers and softmax layer as the output layer after going 
 through number of posts here i could set up the network as follows where it 
 doesn't throw any exceptions during model building time from __future__ import print_function import numpy as 
 np from avocado optimizers import sgd from avocado utils visualize_util import plot np random seed 
 1337 # for reproducibility from avocado preprocessing import sequence from avocado utils import np_utils from 
 avocado models import sequential from avocado layers core import dense dropout activation timedistributeddense from avocado 
 layers recurrent import lstm from speechresearch import loaddata batch_size = 5 hidden_units = 100 nb_classes 
 = 10 print 'loading data ' x_train y_train x_test y_test = loaddata load_mfcc 10 2 
 print len x_train 'train sequences' print len x_test 'test sequences' print 'x_train shape:' x_train shape 
 print 'x_test shape:' x_test shape print 'y_train shape:' y_train shape print 'y_test shape:' y_test shape 
 print 'build model ' y_train = np_utils to_categorical y_train nb_classes y_test = np_utils to_categorical y_test 
 nb_classes print batch_size 99 x_train shape[2] print x_train shape[1:] print x_train shape[2] model = sequential 
 model add lstm output_dim=hidden_units init='uniform' inner_init='uniform' forget_bias_init='one' activation='tanh' inner_activation='sigmoid' return_sequences=true stateful=true batch_input_shape= batch_size 99 x_train 
 shape[2] # model add dropout 0 5 model add lstm output_dim=hidden_units init='uniform' inner_init='uniform' forget_bias_init='one' activation='tanh' 
 inner_activation='sigmoid' return_sequences=true stateful=true input_length=x_train shape[2] model add timedistributeddense input_dim=hidden_units output_dim=nb_classes model add activation 'softmax' # 
 try using different optimizers and different optimizer configs sgd = sgd lr=0 1 decay=1e-6 momentum=0 
 9 nesterov=true model compile loss='categorical_crossentropy' optimizer=sgd print "train " model fit x_train y_train batch_size=batch_size nb_epoch=3 
 validation_data= x_test y_test show_accuracy=true score acc = model evaluate x_test y_test batch_size=batch_size show_accuracy=true print 'test 
 score:' score print 'test accuracy:' acc i have been trying different values at different points 
 for the moment i have been trying with a small sample therefore values are very 
 small but now it is throwing exception during training some dimension mismatch using theano backend 
 loading data 100 train sequences 20 test sequences x_train shape: 100 99 13 x_test shape: 
 20 99 13 y_train shape: 100 y_test shape: 20 build model 5 99 13 99 
 13 13 train train on 100 samples validate on 20 samples epoch 1/3 traceback most 
 recent call last : file "/home/udani/pycharmprojects/testresearch/speechresearch/lstmnetwork py" line 54 in &lt;module&gt; model fit x_train y_train 
 batch_size=batch_size nb_epoch=3 validation_data= x_test y_test show_accuracy=true file "/usr/local/lib/python2 7/dist-packages/avocado/models py" line 581 in fit shuffle=shuffle 
 metrics=metrics file "/usr/local/lib/python2 7/dist-packages/avocado/models py" line 239 in _fit outs = f ins_batch file "/usr/local/lib/python2 
 7/dist-packages/avocado/backend/theano_backend py" line 365 in __call__ return self function *inputs file "/home/udani/documents/researchsw/theano/theano/compile/function_module py" line 786 
 in __call__ allow_downcast=s allow_downcast file "/home/udani/documents/researchsw/theano/theano/tensor/type py" line 177 in filter data shape typeerror: 'bad 
 input argument to theano function with name "/usr/local/lib/python2 7/dist-packages/avocado/backend/theano_backend py:362" at index 1 0-based ' 
 'wrong number of dimensions: expected 3 got 2 with shape 5 10 ' i would 
 like to know what i am doing wrong here i have been going through the 
 code whole day but still i am unable to figure out the reason for dimension 
 mismatch further i would be really thankful if someone can explain what it is meant 
 by output_dim is that the shape of the vector output by a single node when 
 we have n number of nodes in a given layer? should it be equal to 
 the number of nodes in the next layer? 


Predicted labels:
tensorflow


which is correct shape of linear regression in my model? i am designing regression network to 
 predict the weight of a person from 10 to 100 kg my dataset has 50 
 training data that is vector 1: 1024x1 corresponding to 40kg vector 2: 1024x1 corresponding to 
 20kg vector 3: 1024x1 corresponding to 40kg vector 50: 1024x1 corresponding to 30kg hence my 
 dataset size is 1024x50 and the label size is 1x50 if i design a simple 
 linear regression like y=xw+b so the size of w and b will be w is 
 1024x1 b is 1x50 am i right? this is my avocado code but it provide 
 a wrong prediction # training data train_x = # shape of 1024 x 50 train_y 
 = # shape of 1x50 n_samples = 50 learning_rate = 0 0001 training_epochs = 1000 
 display_step = 50 # avocado graph input x = avocado placeholder "float" y = avocado 
 placeholder "float" # set model weights w = avocado variable avocado truncated_normal [1024 1] mean=0 
 0 stddev=1 0 dtype=avocado float32 b = avocado variable avocado zeros 1 dtype = avocado 
 float32 # construct a linear model pred = avocado add avocado multiply x w b 
 # mean squared error cost = avocado reduce_sum avocado pow pred-y 2 / 2*n_samples optimizer 
 = avocado train gradientdescentoptimizer learning_rate minimize cost init = avocado global_variables_initializer # start training with 
 avocado session as sess: # run the initializer sess run init # fit all training 
 data for epoch in range training_epochs : for x y in zip train_x train_y : 
 sess run optimizer feed_dict={x: x y: y} # display logs per epoch step if epoch 
 + 1 % display_step == 0: c = sess run cost feed_dict={x: train_x y: train_y} 
 print "epoch:" '%04d' % epoch + 1 "cost=" "{: 9f}" format c \ "w=" sess 
 run w "b=" sess run b print "optimization finished!" 


Predicted labels:
matplotlib


embeded matplot3d and tkinter i want to draw continuously generate 3 random numbers and draw it 
 using animation in tkinter frame code: import avocado avocado use "tkagg" from avocado backends backend_tkagg 
 import figurecanvastkagg navigationtoolbar2tkagg from avocado figure import figure import avocado animation as animation from avocado 
 import style from mpl_toolkits mplot3d import axes3d import tkinter as tk from tkinter import ttk 
 from random import randint style use "ggplot" f = figure figsize= 5 5 dpi=100 a 
 = f add_subplot 111 projection='3d' xlist = [] ylist = [] zlist = [] def 
 animate i : x = randint 2 9 if x == 4: xlist clear ylist 
 clear zlist clear xlist append x ylist append randint 2 9 zlist append randint 2 
 9 a plot xlist ylist zlist class qut tk tk : def __init__ self : 
 tk tk __init__ self container = tk frame self container pack side = "top" fill="both" 
 expand = true tk tk wm_title self '3d painter' container grid_rowconfigure 0 weight = 1 
 container grid_columnconfigure 0 weight = 1 self frames = {} for f in startpage pageone 
 : frame = f container self self frames[f] = frame frame grid row = 0 
 column = 0 sticky = "nsew" self show_frame startpage def show_frame self cont : frame 
 = self frames[cont] frame tkraise class startpage tk frame : def __init__ self parent controller 
 : tk frame __init__ self parent label = ttk label self text='hi' label pack pady=10 
 padx=10 button = ttk button self text='pageone' command=lambda:controller show_frame pageone button pack class pageone tk 
 frame : def __init__ self parent controller : tk frame __init__ self parent label1 = 
 ttk label self text='graph page' label1 pack pady=10 padx=10 button1 = ttk button self text='home' 
 command=lambda:controller show_frame startpage button1 pack canvas = figurecanvastkagg f self canvas show canvas get_tk_widget pack 
 side = tk top fill=tk both expand = true toolbar = navigationtoolbar2tkagg canvas self toolbar 
 uavocadoate app = qut an = animation funcanimation f animate interval=1000 app mainloop there are 
 two error's the first one that i cant rotate 3d canvas using mouse i got 
 this error: userwarning: axes3d figure canvas is 'none' mouse rotation disabled set canvas then call 
 axes3d mouse_init warnings warn 'axes3d figure canvas is \'none\' mouse rotation disabled set canvas then 
 call axes3d mouse_init ' the second error: typeerror: can't multiply sequence by non-int of type 
 'float'



In [0]:

	tags	text
182914	tensorflow,keras	avocado image captioning model not compiling b...
48361	pandas	return excel file from avocado with flask in f...
181447	tensorflow,keras	validating with generator (avocado) i'm trying...
66307	pandas	avocado multiindex dataframe selecting data gi...
11283	pandas	get rightmost non-zero value position for each...