keras mask not propagated - tensorflow

Keras (tensorflow 2.6 backend) masks supposed to propagate through the network, as mentioned in the docs:
When using the Functional API or the Sequential API, a mask generated by an Embedding or Masking layer will be propagated through the network for any layer that is capable of using them.
Both Conv1D and GlobalMaxPool1D support masks but mask is not propagated, as demonstrated by the following example,
model1 applies mask and GlobalMaxPool1D
model2 applies mask and Conv1D
model3 applies mask and Conv1D followed by GlobalMaxPool1D
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import initializers
import numpy as np
mask_val = 10.
inp = layers.Input(shape = (4,3))
masked = layers.Masking(mask_value = mask_val)(inp)
max_pool = layers.GlobalMaxPool1D()(masked)
model1 = keras.models.Model(inputs = inp, outputs = max_pool)
#--- initialize the conv kernel to -1. so it's easy to interpret output
conv = layers.Conv1D(1, 2, padding = 'valid', kernel_initializer = initializers.Constant(-1.))(masked)
model2 = keras.models.Model(inputs = inp, outputs = conv)
out = layers.GlobalMaxPool1D()(conv)
model3 = models.Model(inputs = inp, outputs = out)
Now test these 3 models on a simple input, 2 samples of dimension 3, followed by 2 masked samples:
x0 = np.concatenate((np.ones((1,2,3)), mask_val * np.ones((1,2,3))), axis = 1)
model1(x0) # outputs [1,1,1], as expected
model2(x0) # outputs [-6, -3, 0] as expected
model3(x0) # outputs [0], but should output [-3] as the 0 value should be masked
Am I missing something?

So, I was wrong thinking that Conv1D and GlobalMaxPool1D support masks. Turns out that:
layers.GlobalMaxPooling1D().supports_masking #--- this property is False
layers.Conv1D(1,1).supports_masking #--- this property is False
It seemed to support mask since the masking layer replaces the masked values by 0s, and then the outputs of both the Conv1D layer and the GlobalMaxPooling1D layer were not affected by these 0.
A different input (with -1 instead of 1 as the un-masked values) shows it:
x0 = np.concatenate((-np.ones((1,2,3)), mask_val * np.ones((1,2,3))), axis = 1)
l_masked = layers.Masking(mask_value = mask_val)
l_max_pool = layers.GlobalMaxPool1D()
l_max_pool(l_masked(x0)) #--- returns [0,0,0] - ignoring the mask :(

Related

Modify Tensorflow model at runtime

I am trying to modify data flow of a tensorflow model at runtime. Consider a 3 layers FC neural network. Let's say I want to define 2 different layers for the middle position.
Let's say,
1st option: 64 neuron layer
2nd option: 128 neuran layer.
Then during predict function, I want to give an input alongside the input data like;
model.predict([x_test, decider])
Then if decider is 0, I want my model to execute 64 neuron layer as middle layer. Otherwise, I want my model to execute 128 neuron layer as middle layer.
If I choose one of them, I don't want the other option to be executed for performance reasons.
Note: I do not care for training.
Is there a way to do that? So far, I have been trying to use tf.cond() but could not make it work.
I think you could achieve same thing by recombine the independent models:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
# First model
input_shape = (16, )
inputs_0 = layers.Input(shape=input_shape)
outputs_0 = layers.Dense(256, 'relu')(inputs_0)
fc_0 = models.Model(inputs_0, outputs_0)
# Middel model 0
inputs_1_0 = layers.Input(shape=(256, ))
outputs_1_0 = layers.Dense(64, 'relu')(inputs_1_0)
outputs_1_0 = layers.Dense(128, 'relu')(outputs_1_0)
fc_1_0 = models.Model(inputs_1_0, outputs_1_0)
# Middel model 1
inputs_1_1 = layers.Input(shape=(256, ))
outputs_1_1 = layers.Dense(128, 'relu')(inputs_1_1)
outputs_1_1 = layers.Dense(128, 'relu')(outputs_1_1)
fc_1_1 = models.Model(inputs_1_1, outputs_1_1)
# Last model
inputs_2 = layers.Input(shape=(128, ))
outputs_2 = layers.Dense(1, 'sigmoid')(inputs_2)
fc_2 = models.Model(inputs_2, outputs_2)
def custom_model(x, d):
h = fc_0(x)
if d == 1:
h = fc_1_0(h)
else:
h = fc_1_1(h)
return fc_2(h)
x = np.random.rand(1, input_shape[0])
decider = 0 # Middel model 0 or 1
y = custom_model(x, decider)

how to initialize a Variable tensor for the weight matrix in a keras model?

I am trying to use tensors variables to use as weights in a keras layer..
I know that I can use numpy arrays instead but the reason I want to feed tensors is that I want my weight matrices to be of the type SparseTensor.
This is a small example that I have coded so far:
def model_keras(seed, new_hidden_size_list=None):
number_of_layers = 1
hidden_size = 512
hidden_size_list = [hidden_size] * number_of_layers
input_size = 784
output_size = 10
if new_hidden_size_list is not None:
hidden_size_list = new_hidden_size_list
weight_input = tf.Variable(tf.random.normal([784, 512], mean=0.0, stddev=1.0))
bias_input = tf.Variable(tf.random.normal([512], mean=0.0, stddev=1.0))
weight_output = tf.Variable(tf.random.normal([512, 10], mean=0.0, stddev=1.0))
# This gives me an error when trying to use in kernel_initializer and bias_initializer in the keras model
weight_initializer_input = tf.initializers.variables([weight_input])
bias_initializer_input = tf.initializers.variables([bias_input])
weight_initializer_output = tf.initializers.variables([weight_output])
# This works fine
#weight_initializer_input = tf.initializers.lecun_uniform(seed=None)
#bias_initializer_input = tf.initializers.lecun_uniform(seed=None)
#weight_initializer_output = tf.initializers.lecun_uniform(seed=None)
print(weight_initializer_input, bias_initializer_input, weight_initializer_output)
model = keras.models.Sequential()
for index in range(number_of_layers):
if index == 0:
# input layer
model.add(keras.layers.Dense(hidden_size_list[index], activation=nn.selu, use_bias=True,
kernel_initializer=weight_initializer_input,
bias_initializer=bias_initializer_input,
input_shape=(input_size,)))
else:
model.add(keras.layers.Dense(hidden_size_list[index], activation=nn.selu, use_bias=True,
kernel_initializer=weight_initializer_hidden,
bias_initializer=bias_initializer_hidden))
# output layer
model.add(keras.layers.Dense(output_size, use_bias=False, kernel_initializer=weight_initializer_output))
model.add(keras.layers.Activation(nn.softmax))
return model
I am using tensorflow 1.15.
Any idea how one can use custom (user defined) Tensor Variables as initializer instead of pre-set schemes (e.g. Glorot, Truncated Normal etc). Another approach that I could take is to explicitly define the computations instead of using the keras.Layer.
Many thanks
Your code works after enabling eager execution.
import tensorflow as tf
tf.compat.v1.enable_eager_execution()
Add this at the top of you file.
See this for working code.

Prediction in non classified answer

I have create neuronetwork in Kerars, program is runing but there is problem of result, it is Forexforcast network in forcast it should return 0 or 1 , as provided in traing dataset but result is showing in between 0 and 1 in float like "[[0.47342286]]"
I have tried to use numpy athmax but it only result in 1 answer
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime
from sklearn.preprocessing import MinMaxScaler
from ta import *
dataset = pd.read_csv('C:/Users/SIGMA COM/PycharmProjects/deep/GBP_JPY Historical Data.csv',index_col="Date",parse_dates=True)
dataset = dataset[::-1]
print(dataset.head())
print(dataset.isna().any())
print(dataset.info())
dataset['Open'].plot(figsize=(16,6))
# initial value
step_size = 4
batch_sizes = 1
dataset['Diff'] = dataset['Open'] - dataset['Price']
dataset['Range'] = dataset['High'] - dataset['Low']
dataset['Rsi'] = rsi(close=dataset['Price'],n=4,fillna=True)
dataset['Macd'] = macd(close=dataset['Price'],n_fast=12,n_slow=26,fillna=True)
dataset['Cci'] = cci(high=dataset['High'],low=dataset['Low'],close=dataset['Price'],n=20,fillna=True)
# dataset['Rsi'] = dataset['Rsi'] /100.0
# # dataset['Macd'] = dataset['Macd'] /2.0
# dataset['Cci'] = dataset['Cci'] / 500.0
training_set = dataset[['Rsi','Macd','Cci','Price','Low','High','Open','Signal']]
sc = MinMaxScaler()
training_set_scaled = sc.fit_transform(training_set)
# Creating a data structure with 60 timesteps and 1 output
X_train = []
y_train = []
for i in range(60, 1258):
X_train.append(training_set_scaled[i-60:i, 0])
y_train.append(training_set_scaled[i, -1:])
X_train, y_train = np.array(X_train), np.array(y_train)
# Reshaping
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
print(X_train.shape)
print(X_train)
plt.show()
# Part 2 - Building the RNN
# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
print((X_train.shape[1], 1))
print(X_train.shape)
# Initialising the RNN
regressor = Sequential()
# Adding the first LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
regressor.add(Dropout(0.2))
# Adding a second LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
# Adding a third LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
# Adding a fourth LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))
# Adding the output layer
regressor.add(Dense(units = 1,activation='sigmoid'))
# Compiling the RNN
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
# Fitting the RNN to the Training set
regressor.fit(X_train, y_train, epochs = 10, batch_size = 32)
result = regressor.predict(np.reshape(X_train[100],(1,60,1)))
print(result)
I want to make model to make predication in class 0 and 1
This behavior is expected, because the sigmoid function is going to return a number between zero and one, like so:
So if your class labels are either 0 or 1, which seems to be the case here, for a binary classification problem you can just round the resultant output for your class prediction. Let's make a distinction between a classification vs. a regression problem here: regression is like finding the "line of best fit;" that is, the model is being trained to approximate the data. This appears to be what you're doing here: you're minimizing the mean squared error and searching for the model that best approximates your data, but that doesn't make a prediction.
If you want to actually make a classification, you can just round all elements of the result of regressor.predict to 0 or 1, and then compare your predictions with the true labels. This can actually be done easily in numpy like so: numpy.around(your_predictions, decimals=0). Note the decimals argument is not strictly required since it defaults to a value of 0, it's nice for clarity.
As for using numpy.argmax (I'm going to assume that's what you meant by athmax since I can't find a function with that spelling), it will give you the same label for everything because it returns the index of the largest element in an array. Since your output array has length one (because it's simply a single neuron that calculates the logistic function), it will always return index zero! However, you're sort of on the right track: if your last layer was instead Dense(units=n_classes, activation='softmax') — softmax outputs a probability distribution that a particular row of data will produce each label. In that case, numpy.argmax is correct.
Here's a Tensorflow tutorial on classification that I found super helpful when I was just learning it myself. It uses softmax instead of sigmoid like you, but I think it's fairly adaptable to your needs: https://www.tensorflow.org/tutorials/keras/basic_classification
Hope this helps!

How to transfer the follow Embedding code in tensorflow to pytorch?

I have an Embedding code in Tensorflow as follow
self.input_u = tf.placeholder(tf.int32, [None, user_length], name="input_u")
with tf.name_scope("user_embedding"):
self.W1 = tf.Variable(
tf.random_uniform([user_vocab_size, embedding_size], -1.0, 1.0),
name="W")
self.embedded_user = tf.nn.embedding_lookup(self.W1, self.input_u)
self.embedded_users = tf.expand_dims(self.embedded_user, -1)
And I want to re-write in pytorch, How to do that?
Method 1: Use Embedding layer and freeze the weight to act as lookup table
import numpy as np
import torch
# user_vocab_size = 10
# embedding_size = 5
W1 = torch.FloatTensor(np.random.uniform(-1,1,size=(user_vocab_size,embedding_size)))
embedded_user = torch.nn.Embedding(user_vocab_size,embedding_size, _weight=W1)
embedded_user.weight.requires_grad = False
embedded_users = torch.unsqueeze(embedded_user, -1)
# user_length = 5
# batch_size = 4
#input = torch.LongTensor(np.random.randint(0,user_vocab_size,(batch_size,user_length)))
#embb = embedded_user(input)
You can change the dimensions of embb tensor to your needs using torch.unqueeze
W1 : A tensor of uniform distribution between (-1,1) of size (user_vocab_size, embedding_size)
embedded_user : Is an embedding layer which uses W1 as embedding vectors
Method 2: Use Embedding functional api
input_u = torch.LongTensor(np.random.randint(0,user_vocab_size,(batch_size,user_length)))
embedded_user = torch.nn.functional.embedding(input_u,W1)
embedded_users = torch.unsqueeze(embedded_user, -1)

MXNET custom loss function and eval_metric

How do I create a custom loss function in MXNET? For example, instead of computing cross-entropy loss for one label (using standard mx.sym.SoftmaxOutput layer which computes cross-entropy loss and returns a symbol that can be passed as a loss symbol to the fit function), I want to compute weighted cross-entropy loss for each possible label. The MXNET tutorials mention using
mx.symbol.MakeLoss(scalar_loss_symbol, normalization='batch')
However, when I use MakeLoss function, the standard eval_metric - "acc" does not work (obviously as the model doesn't know what is my predicted probability vector). Therefore I need to write my own eval_metric.
Further, at the time of prediction, I need to predict the probability vector as well, which cannot be accessed unless I group the final probability vector with the loss symbol and block_grad on it.
The code below is a modification of the MXNET tutorial http://mxnet.io/tutorials/python/mnist.html where the standard SoftmaxOutput loss function is rewritten for a custom weighted loss function and required custom eval_metric is written.
import logging
logging.getLogger().setLevel(logging.DEBUG)
import mxnet as mx
import numpy as np
mnist = mx.test_utils.get_mnist()
batch_size = 100
weighted_train_labels =
np.zeros((mnist['train_label'].shape[0],np.max(mnist['train_label'])+ 1))
weighted_train_labels[np.arange(mnist['train_label'].shape[0]),mnist['train_label']] = 1
train_iter = mx.io.NDArrayIter(mnist['train_data'], {'label':weighted_train_labels}, batch_size, shuffle=True)
weighted_test_labels = np.zeros((mnist['test_label'].shape[0],np.max(mnist['test_label'])+ 1))
weighted_test_labels[np.arange(mnist['test_label'].shape[0]),mnist['test_label']] = 1
val_iter = mx.io.NDArrayIter(mnist['test_data'], {'label':weighted_test_labels}, batch_size)
data = mx.sym.var('data')
# first conv layer
conv1 = mx.sym.Convolution(data=data, kernel=(5,5), num_filter=20)
tanh1 = mx.sym.Activation(data=conv1, act_type="tanh")
pool1 = mx.sym.Pooling(data=tanh1, pool_type="max", kernel=(2,2), stride=(2,2))
# second conv layer
conv2 = mx.sym.Convolution(data=pool1, kernel=(5,5), num_filter=50)
tanh2 = mx.sym.Activation(data=conv2, act_type="tanh")
pool2 = mx.sym.Pooling(data=tanh2, pool_type="max", kernel=(2,2), stride=(2,2))
# first fullc layer
flatten = mx.sym.flatten(data=pool2)
fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=500)
tanh3 = mx.sym.Activation(data=fc1, act_type="tanh")
# second fullc
fc2 = mx.sym.FullyConnected(data=tanh3, num_hidden=10)
# softmax loss
#lenet = mx.sym.SoftmaxOutput(data=fc2, name='softmax')
label = mx.sym.var('label')
softmax = mx.sym.log_softmax(data=fc2)
softmax_output = mx.sym.BlockGrad(data = softmax,name = 'softmax')
ce = ce = -mx.sym.sum(mx.sym.sum(mx.sym.broadcast_mul(softmax,label),1))
lenet = mx.symbol.MakeLoss(ce, normalization='batch')
sym = mx.sym.Group([softmax_output,lenet])
print sym.list_outputs
def custom_metric(label,softmax):
return len(np.where(np.argmax(softmax,1)==np.argmax(label,1))[0])/float(label.shape[0])
eval_metrics = mx.metric.CustomMetric(custom_metric,name='custom-accuracy', output_names=['softmax_output'],label_names=['label'])
lenet_model = mx.mod.Module(symbol=sym, context=mx.gpu(),data_names=['data'], label_names=['label'])
lenet_model.fit(train_iter,
eval_data=val_iter,
optimizer='sgd',
optimizer_params={'learning_rate':0.1},
eval_metric=eval_metrics,#mx.metric.Loss(),#'acc',
#batch_end_callback = mx.callback.Speedometer(batch_size, 100),
num_epoch=10)