How to train parameters of 2 different classes together? - tensorflow

How to train the parameters of Class1 and Class2 together? That is weights of self.linear1 and self.linear2 fromClass1 along with weight of Class2? Since Class1 calls Class2 as self.conv1 = Class2(w_in, w_out) hence they are interlinked and will form a chain during forward pass. That's why I wish to train them together! What will I write in my training loop, while calculating the grads? grads = tape.gradient(loss, ? )
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class Class1(layers.Layer):
def __init__(self, num_channels, w_in, w_out, num_class):
super(Class1, self).__init__()
self.num_channels = num_channels
self.w_in = w_in
self.w_out = w_out
self.conv1 = Class2(w_in, w_out)
self.linear1 = tf.keras.layers.Dense( self.w_out, input_shape =(self.w_out*self.num_channels, ), activation= None)
self.linear2 = tf.keras.layers.Dense( self.num_class, input_shape=(self.w_out, ), activation= None)
def call(self, A):
a = self.conv1(A)
return a
class Class2(tf.keras.layers.Layer):
def __init__(self, in_channels, out_channels):
super(Class2, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.weight = self.add_weight(
shape= (out_channels,in_channels,1,1), initializer="random_normal", trainable=True)
def call(self, A):
print(A)
A = tf.reduce_sum(A*(tf.nn.softmax(self.weight,1)), 1)
print(A)
return A

Related

How to write a custom call function for a Tensorflow LSTM class?

I have defined a custom LSTM Layer as follows:
class LSTMModel(tf.keras.Model):
def __init__(self, CNN_model, num_classes):
super().__init__()
self.cnn_model = CNN_model
self.lstm = tf.keras.layers.LSTM(units=64, return_state=True, dropout=0.3)
self.dense = tf.keras.layers.Dense(num_classes, activation="softmax")
def call(self, input):
pass
However, I am unclear what needs too occur in the call function here. I also wrote a generic CNN class like below:
class generic_vns_function(tf.keras.Model):
# Where would we use layer_units here?
def __init__(self, input_shape, layers, layer_units):
super().__init__()
self.convolutions = []
# Dynamically create Convolutional layers and MaxPools
for layer in range(len(layers)):
self.convolutions.append(tf.keras.layers.Conv2D(layer, 3, padding="same",
input_shape=input_shape, activation="relu"))
# Add MaxPooling layer
self.convolutions.append(tf.keras.layers.MaxPooling2D((2,2)))
# Flatten
self.flatten = tf.keras.layers.Flatten()
# Dense layer
self.dense1 = tf.keras.layers.Dense(1024, activation="relu")
def call(self, input):
x = input
for layer in self.convolutions:
x = layer(x)
x = self.flatten(x)
x = self.dense1(x)
return x
but here the required structure makes a lot more sense to me. I am just initializing all of the layers. What do I need to do to initialize my LSTM layers?
You could write it like this:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras import Model
class LSTMModel(Model):
def __init__(self, num_classes, num_units=64, drop_prob=0.3):
super().__init__()
self.num_classes = num_classes
self.num_units = num_units
self.drop_prob = drop_prob
self.lstm = LSTM(
units=self.num_units,
return_state=True,
dropout=self.drop_prob)
self.dense = Dense(
num_classes,
activation="softmax")
def call(self, x, training=True):
x, *state = self.lstm(x, training=training)
x = self.dense(x)
return x
And then you would use it like:
model = LSTMModel(num_classes=2)
time_series = tf.random.normal((32, 64, 128))
x_pred = model(time_series)
# loss and gradients calculations ...
It is a common tensorflow idom to instantiate layers when initializing a custom layer/model, and then execute their call() methods by passing data through them in your custom call implementation.

How does one use keras add_weight() vars with tensorflow probability distributions?

I am creating a new keras layer which accepts a vector of input data and is parameterized by 2 scalars, a mean and standard deviation. I model the input data as a normal distribution and estimate its mean and variance through gradient descent. However, when I initialize tfp.Normal(mu, sigma) which mu and sigma are from add_weights() during, build(), the gradients do not propagate through mu and sigma.
The tensorflow probability documentation states that you can pass in training variables for distribution parameters and backprop through them. How do I get this to work inside of keras?
Below is a minimal working example.
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
tfk = tf.keras
tfkl = tf.keras.layers
tfd = tfp.distributions
tfpl = tfp.layers
EPS = 1e-5
batch_size = 4
N = 100
x = np.random.randn(batch_size, N)
class NormalLikelihood(tf.keras.layers.Layer):
def __init__(self):
super(NormalLikelihood, self).__init__()
def build(self, input_shape):
self.mu = self.add_weight("mean", shape=[1], initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=1), dtype=tf.float32)
self.sigma = self.add_weight("std", shape=[1], initializer=tf.keras.initializers.RandomUniform(minval=EPS, maxval=5.0, seed=None), constraint=tf.keras.constraints.non_neg(), dtype=tf.float32)
self.distribution = tfp.distributions.Normal(self.mu[0], self.sigma[0])
def call(self, input):
r = self.distribution.prob(input)
r = tf.clip_by_value(r, 1e-3, 1-1e-3)
return r
input_layer = tf.keras.layers.Input(shape=(100,))
r = NormalLikelihood()(input_layer)
r = -tf.reduce_sum(tf.math.log(r))
model = tf.keras.models.Model(input_layer, r)
model.add_loss(r)
model.compile(optimizer='rmsprop', loss=None)
model.fit(x, y=None)
This code results in builtins.ValueError: No gradients provided for any variable: ['normal_likelihood/mean:0', 'normal_likelihood/std:0'] which is not expected. Desired behavior would be that ['normal_likelihood/mean:0', 'normal_likelihood/std:0'] have gradients provided for them.
See the code in google colab: https://colab.research.google.com/drive/1_u4XTCIH-2qwNSgv9zkZiCG_zeCIEZGp?usp=sharing
Change tfp.distributions.Normal(self.mu[0], self.sigma[0]) to tfp.distributions.Normal(self.mu, self.sigma).
The reason this works is because under the hood of the .fit() keras method, the gradient computation is looking for trainable variables. When you index into the weights of the model you're taking the gradient against a constant that destroys the connectivity of the chain rule.
Example:
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
EPS = 1e-5
class NormalLikelihoodYours(tf.keras.layers.Layer):
def __init__(self):
super(NormalLikelihoodYours, self).__init__()
def build(self, input_shape):
self.mu = self.add_weight(
"mean", shape=[1],
initializer=tf.keras.initializers.RandomNormal(
mean=0.0, stddev=1), dtype=tf.float32)
self.sigma = self.add_weight(
"std", shape=[1],
initializer=tf.keras.initializers.RandomUniform(
minval=EPS, maxval=5.0, seed=None),
constraint=tf.keras.constraints.non_neg(),
dtype=tf.float32)
self.distribution = tfp.distributions.Normal(self.mu[0], self.sigma[0])
def call(self, input):
r = self.distribution.prob(input)
r = tf.clip_by_value(r, 1e-3, 1-1e-3)
return r
class NormalLikelihoodMine(tf.keras.layers.Layer):
def __init__(self):
super(NormalLikelihoodMine, self).__init__()
def build(self, input_shape):
self.mu = self.add_weight(
"mean", shape=[1],
initializer=tf.keras.initializers.RandomNormal(
mean=0.0, stddev=1), dtype=tf.float32)
self.sigma = self.add_weight(
"std", shape=[1],
initializer=tf.keras.initializers.RandomUniform(
minval=EPS, maxval=5.0, seed=None),
constraint=tf.keras.constraints.non_neg(),
dtype=tf.float32)
self.distribution = tfp.distributions.Normal(self.mu, self.sigma)
def call(self, input):
r = self.distribution.prob(input)
r = tf.clip_by_value(r, 1e-3, 1-1e-3)
return r
# loss function
def calc_loss(logits):
return -tf.math.reduce_sum(tf.math.log(logits))
# model input
input_layer = tf.keras.layers.Input(shape=(100,))
x_in = tf.random.normal([4, 100])
# your model
your_output = NormalLikelihoodYours()(input_layer)
your_model = tf.keras.models.Model(input_layer, your_output)\
# my model
my_output = NormalLikelihoodMine()(input_layer)
my_model = tf.keras.models.Model(input_layer, my_output)
# yours has no gradients because the network weights are not
# included anywhere in the loss calculation. When you index them
# with `[0]` they go from being trainable variables in the network,
# to just constants.
with tf.GradientTape() as tape:
y_hat = your_model(x_in)
loss = calc_loss(y_hat)
print(tape.gradient(loss, your_model.trainable_variables))
# [None, None]
# my model has gradients because `loss` and the weights in
# `trainable_variables` are connected
with tf.GradientTape() as tape:
y_hat = my_model(x_in)
loss = calc_loss(y_hat)
print(tape.gradient(loss, my_model.trainable_variables))
# [<tf.Tensor: shape=(1,), numpy=array([43.83749], dtype=float32)>,
# <tf.Tensor: shape=(1,), numpy=array([-37.348656], dtype=float32)>]

ValueError: Dimensions must be equal, but are 2 and 1 in time2vec example

I have 2 inputs and 4 outputs. I want to use the time2vec to predict the outputs. I have used the code in https://towardsdatascience.com/time2vec-for-time-series-features-encoding-a03a4f3f937e, it works for one input and one output. But when I want to use for (2 inputs and four outputs) it gives me the following error:
import numpy as np
import tensorflow as tf
from keras.layers import Dense, Dropout, Activation, Flatten, LSTM, Embedding, Input, concatenate,
Lambda
from sklearn.preprocessing import MinMaxScaler
from keras.callbacks import EarlyStopping
import keras
import random
import os
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.optimizers import *
from tensorflow.keras import backend as K
from kerashypetune import KerasGridSearch
import matplotlib.pyplot as plt
w = 5
ts = 10
nt = 10
ntest = nt + int(percent*nt)
X_train = np.random.rand(90,5,2)
X_test = np.random.rand(5,5,2)
y_train = np.random.rand(90,4)
y_test = np.random.rand(5,4)
""" ### DEFINE T2V LAYER ###
class T2V(Layer):
def __init__(self, output_dim=None, **kwargs):
self.output_dim = output_dim
super(T2V, self).__init__(**kwargs)
def build(self, input_shape):
self.W = self.add_weight(name='W', shape=(1, self.output_dim), initializer='uniform',
trainable=True)
self.P = self.add_weight(name='P',shape=(1,
self.output_dim),initializer='uniform',trainable=True)
self.w = self.add_weight(name='w',shape=(1, 1),initializer='uniform', trainable=True)
self.p = self.add_weight(name='p',shape=(1, 1),initializer='uniform',trainable=True)
super(T2V, self).build(input_shape)
def call(self, x):
original = self.w * x + self.p
sin_trans = K.sin(K.dot(x, self.W) + self.P)
return K.concatenate([sin_trans, original], -1)
CREATE GENERATOR FOR LSTM AND T2V
sequence_length = w
def gen_sequence(id_df, seq_length, seq_cols):
data_matrix = id_df[seq_cols].values
num_elements = data_matrix.shape[0]
for start, stop in zip(range(0, num_elements-seq_length), range(seq_length, num_elements)):
yield data_matrix[start:stop, :]
def gen_labels(id_df, seq_length, label):
data_matrix = id_df[label].values
num_elements = data_matrix.shape[0]
return data_matrix[seq_length:num_elements, :]
DEFINE MODEL STRUCTURES
def set_seed_TF2(seed):
tf.random.set_seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
random.seed(seed)
def T2V_NN(param, dim):
inp = Input(shape=(dim,2))
x = T2V(param['t2v_dim'])(inp)
x = LSTM(param['unit'], activation=param['act'])(x)
x = Dense(2)(x)
m = Model(inp, x)
m.compile(loss='mse', optimizer=Adam(lr=param['lr']))
return m
def NN(param, dim):
inp = Input(shape=(dim,2))
x = LSTM(param['unit'], activation=param['act'])(inp)
x = Dense(2)(x)
m = Model(inp, x)
m.compile(loss='mse', optimizer=Adam(lr=param['lr']))
return m
Param grid
param_grid = {'unit': [64,32],'t2v_dim': [128,64],'lr': [1e-2,1e-3], 'act': ['elu','relu'], 'epochs': 1,'batch_size': [512,1024]}
FIT T2V + LSTM
es = EarlyStopping(patience=5, verbose=0, min_delta=0.001, monitor='val_loss', mode='auto',
restore_best_weights=True)
hypermodel = lambda x: T2V_NN(param=x, dim=sequence_length)
kgs_t2v = KerasGridSearch(hypermodel, param_grid, monitor='val_loss', greater_is_better=False,
tuner_verbose=1)
kgs_t2v.set_seed(set_seed_TF2, seed=33)
kgs_t2v.search(X_train, y_train, validation_split=0.2, callbacks=[es], shuffle=False)
But when I run the model, I've got this error :
ValueError: Dimensions must be equal, but are 2 and 1 for '{{node t2v_2/MatMul}} = MatMul[T=DT_FLOAT,
transpose_a=false, transpose_b=false](t2v_2/Reshape, t2v_2/Reshape_1)' with input shapes: [?,2], [1,128].
Could you help me to solve this?
You have to change the parameters inside the T2V layer and inside your network in order to correctly match the shapes
class T2V(Layer):
def __init__(self, output_dim=None, **kwargs):
self.output_dim = output_dim
super(T2V, self).__init__(**kwargs)
def build(self, input_shape):
self.W = self.add_weight(name='W', shape=(input_shape[-1], self.output_dim),
initializer='uniform', trainable=True)
self.P = self.add_weight(name='P', shape=(input_shape[1], self.output_dim),
initializer='uniform', trainable=True)
self.w = self.add_weight(name='w', shape=(input_shape[1], 1),
initializer='uniform', trainable=True)
self.p = self.add_weight(name='p', shape=(input_shape[1], 1),
initializer='uniform', trainable=True)
super(T2V, self).build(input_shape)
def call(self, x):
original = self.w * x + self.p
sin_trans = K.sin(K.dot(x, self.W) + self.P)
return K.concatenate([sin_trans, original], -1)
create a dummy example
n_sample = 90
timesteps = 5
feat_inp = 2
feat_out = 4
X = np.random.uniform(0,1, (n_sample, timesteps, feat_inp))
y = np.random.uniform(0,1, (n_sample, feat_out))
def T2V_NN():
inp = Input(shape=(timesteps,feat_inp))
x = T2V(32)(inp)
x = LSTM(8)(x)
x = Dense(feat_out)(x)
m = Model(inp, x)
m.compile(loss='mse', optimizer='adam')
return m
model = T2V_NN()
model.fit(X,y, epochs=3)

Keras Dense layer gets input_shape wrong

I wrote the below custom layer and when I try to add a Dense layer afterwards it gets the input_shape wrong and expects the shape[-1] dimension of the tensor before the layer.
from keras import backend as K
from keras.engine.topology import Layer
from keras.layers import Conv2D, Dense, Input
class SMSO(Layer):
def __init__(self, feature_dim=256, **kwargs):
self.feature_dim = feature_dim
super(SMSO, self).__init__(**kwargs)
def build(self, input_shape):
self.scale = self.add_weight('scale',
shape=(1, self.feature_dim),
initializer='ones',
trainable=True)
self.offset = self.add_weight('offset',
shape=(1, self.feature_dim),
initializer='zeros',
trainable=True)
super(SMSO, self).build(input_shape)
def call(self, x):
x = x - K.mean(x, axis=(1, 2), keepdims=True)
x = K.square(Conv2D(self.feature_dim, 1)(x))
x = K.sqrt(K.sum(x, axis=(1, 2)))
return self.scale * x + self.offset
x = Input(shape=(10, 10, 32))
l1 = SMSO(16)(x)
print(l1.shape)
l2 = Dense(10)(l1)
Here is the code to reproduce the error. l1.shape gives (?, 16) as expected but the next line fails.
Adding a compute_output_shape function solves the problem.
def compute_output_shape(self, input_shape):
return (input_shape[0], self.feature_dim)
Any layer that modifies shape needs to have a compute_output_shape.

How to input csv data in an autoencoder

I am using the code below that implements an autoencoder. How can I feed the autoencoder with data for training and testing?
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
class Autoencoder(object):
def __init__(self, n_input, n_hidden, transfer_function=tf.nn.softplus, optimizer = tf.train.AdamOptimizer()):
self.n_input = n_input
self.n_hidden = n_hidden
self.transfer = transfer_function
network_weights = self._initialize_weights()
self.weights = network_weights
# model
self.x = tf.placeholder(tf.float32, [None, self.n_input])
self.hidden = self.transfer(tf.add(tf.matmul(self.x, self.weights['w1']), self.weights['b1']))
self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2'])
# cost
self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
self.optimizer = optimizer.minimize(self.cost)
init = tf.global_variables_initializer()
self.sess = tf.Session()
self.sess.run(init)
def _initialize_weights(self):
all_weights = dict()
all_weights['w1'] = tf.get_variable("w1", shape=[self.n_input, self.n_hidden],
initializer=tf.contrib.layers.xavier_initializer())
all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype=tf.float32))
all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype=tf.float32))
all_weights['b2'] = tf.Variable(tf.zeros([self.n_input], dtype=tf.float32))
return all_weights
def partial_fit(self, X):
cost, opt = self.sess.run((self.cost, self.optimizer), feed_dict={self.x: X})
return cost
def calc_total_cost(self, X):
return self.sess.run(self.cost, feed_dict = {self.x: X})
def transform(self, X):
return self.sess.run(self.hidden, feed_dict={self.x: X})
def generate(self, hidden = None):
if hidden is None:
hidden = self.sess.run(tf.random_normal([1, self.n_hidden]))
return self.sess.run(self.reconstruction, feed_dict={self.hidden: hidden})
def reconstruct(self, X):
return self.sess.run(self.reconstruction, feed_dict={self.x: X})
def getWeights(self):
return self.sess.run(self.weights['w1'])
def getBiases(self):
return self.sess.run(self.weights['b1'])
# I instantiate the class autoencoder, 5 is the dimension of a raw input,
2 is the dimension of the hidden layer
autoencoder = Autoencoder(5, 2, transfer_function=tf.nn.softplus, optimizer
= tf.train.AdamOptimizer())
# I prepare my data**
IRIS_TRAINING = "C:\\Users\\Desktop\\iris_training.csv"
#Feeding data to Autoencoder ???
Train and Test ??
How can I train this model with csv file data? I think I need to run the following instruction as _, c = sess.run([optimizer, cost], feed_dict={self.x: batch_ofd_ata}) inside a loop of epochs, but I am struggling with it.
Check out Stanford CS20SI's tutorial.
https://github.com/chiphuyen/tf-stanford-tutorials/blob/master/examples/05_csv_reader.py