I have a model, I compile it using binary_crossentropy, the training process goes well, the loss is printed.
model = MyModel()
model.compile(optimizer="adadelta", loss="binary_crossentropy")
data1, data2 = get_random_data(4, 3) # this method return data1:(1000,4),data2:(1000,3)
model.fit([data1, data2], y, batch_size=4)
Then I write a custom loss function, the loss become nan
import tensorflow.keras.backend as K
class MyModel():
...
def batch_loss(self, y_true, y_pred_batch):
bottom = K.sum(K.exp(y_pred_batch))
batch_softmax = K.exp(y_pred_batch) / bottom
batch_log_likelihood = K.log(batch_softmax)
loss = K.sum(batch_log_likelihood)
return loss
model.compile(optimizer="adadelta", loss=model.batch_loss) # change above compile code to this
I use a batch_loss(tf.ones((1,))) to test my loss function, seems it return the correct result.
But when it run together with training, it becomes nan, where should I start to debug?
model and data code (for those who need it to reproduce):
class MyModel(tf.keras.models.Model):
def __init__(self):
super().__init__()
self.t1A = tf.keras.layers.Dense(300, activation='relu', input_dim=1)
self.t1B = tf.keras.layers.Dense(300, activation='relu', input_dim=1)
self.t1v = tf.keras.layers.Dense(128, activation='relu')
self.t2A = tf.keras.layers.Dense(300, activation='relu')
self.t2B = tf.keras.layers.Dense(300, activation='relu')
self.t2v = tf.keras.layers.Dense(128, activation='relu')
self.out = tf.keras.layers.Dot(axes=1)
def call(self, inputs, training=None, mask=None):
u, i = inputs[0], inputs[1]
u = self.t1A(u)
u = self.t1B(u)
u = self.t1v(u)
i = self.t2A(i)
i = self.t2B(i)
i = self.t2v(i)
out = self.out([u, i])
return out
def get_random_data(user_feature_num, item_feature_num):
def get_random_ndarray(data_size, dis_list, feature_num):
data_list = []
for i in range(feature_num):
arr = np.random.randint(dis_list[i], size=data_size)
data_list.append(arr)
data = np.array(data_list)
return np.transpose(data, axes=(1, 0))
uf_dis, if_dis, data_size = [1000, 2, 10, 20], [10000, 50, 60], 1000
y = np.zeros(data_size)
for i in range(int(data_size/10)):
y[i] = 1
return get_random_ndarray(data_size, uf_dis, feature_num=user_feature_num), \
get_random_ndarray(data_size, if_dis, feature_num=item_feature_num), y
The values outputted by your models are quite big. Combined with a call to tf.exp in your function, values quickly grows to nan. You might consider applying an activation function like a sigmoid to keep the values between 0 and 1.
I think your error is caused by calling exp(). This function quickly growing and returns nan.
Related
I am using graph convolutions in Deepchem/Keras for predicting molecular properties. Following the Deepchem tutorials I created a data generator. While there is no error in my code below, I fail to understand why the size of pred changes with epoch and batch_size.
First we create some dummy data.
!pip install --pre deepchem
!pip install --pre rdkit
import deepchem as dc
import numpy as np
import tensorflow as tf
from deepchem.feat.mol_graphs import ConvMol
mol = ['C-C-O']*240
ftr = dc.feat.ConvMolFeaturizer(per_atom_fragmentation=False)
X=ftr.featurize(mol)
y = np.arange(0,240,1)
w = np.arange(0,240,1)
ids = np.arange(0,240,1)
ds = dc.data.NumpyDataset(X=X, y=y, ids=ids)
Edit: We use the following function as generator:
def data_generator(dataset, epochs=1, batch_size = 100, pad_batches = True):
print(dataset)
for ind, (X_b, y_b, w_b, ids_b) in enumerate(dataset.iterbatches(batch_size, epochs,
deterministic=False, pad_batches=pad_batches)):
multiConvMol = ConvMol.agglomerate_mols(X_b)
inputs = [multiConvMol.get_atom_features(), multiConvMol.deg_slice, np.array(multiConvMol.membership)]
for i in range(1, len(multiConvMol.get_deg_adjacency_lists())):
inputs.append(multiConvMol.get_deg_adjacency_lists()[i])
labels = [y_b]
weights = [w_b]
yield (inputs, labels, weights)
(end edit)
Then we define the model and fit it to the dataset generated above:
batch_size = 100
n_tasks = 1
class TestModel(tf.keras.Model):
def __init__(self, model = 1):
super(TestModel, self).__init__()
self.model = model
#____________Test Model 1___________
if self.model == 1:
self.gc1 = GraphConv(128, activation_fn=tf.nn.tanh)
self.readout = GraphGather(batch_size=batch_size,
activation_fn=tf.nn.tanh)
self.dense2 = layers.Dense(1)
def call(self, inputs):
#____________Test Model 1___________
if self.model == 1:
gc1_output = self.gc1(inputs)
readout_output = self.readout([gc1_output]+ inputs[1:])
dense2_output = self.dense2(readout_output)
return dense2_output
#Fit_generator
print("_________\nFitting:")
testmodel = dc.models.KerasModel(TestModel(1), loss=dc.models.losses.L2Loss())
testmodel.fit_generator(data_generator(ds, epochs=1, batch_size = 100))
Finally we try to predict the dataset labels setting epochs = 2:
#Predict
print("_________\nPredicting:")
pred = testmodel.predict_on_generator(data_generator(ds, epochs = 2, batch_size = 100, pad_batches = True))
print(ds.y.shape, pred.shape)
Giving:
_________
Predicting:
<NumpyDataset X.shape: (240,), y.shape: (240,), w.shape: (240,), ids: [0 1 2 ... 237 238 239], task_names: [0]>
(240,) (600, 1)
However if I change epochs to 1, the size of pred changes (300, 1) i.e. half of what we had before. Similarly, changing the batch_size affects the prediction size too.
Can anyone explain what I'm doing wrong?
I have a GAN model in which I need to integrate the output of the generator before passing it to the discriminator. The method of integration I am choosing is scipy.integrate.cumtrapz(). This method keeps track of integral of every timestep so I get basically a discrete anti-derivative instead of a numeric integral. I am aware that there is a tf.trapz() but it will only return the numerical value. My issue is that when I pass the output of the generator x_fake = self.generator(latent_vec) to the cumtrapz(), I get an error because the generator output is a tensor not an numpy array. Okay, I get it, the input to cumtrapz() needs to be an array. But I cannot convert the generator output tensor x_fake to a numpy array because it is a symbolic tensor. I get why Keras constructs the model as symbolic initially, but when a numeric input is passed to that model, (in my case a random latent vector), why would the output still be symbolic? I can view the numeric value of this output using tf.print(), but that does not help as I need it in a variable. What is further confusing is that if I pass input to the generator outside of training, I get a numeric tensor with a numpy method.
I have read that this symbolic behavior happens when using the Keras Functional API, but I switched to the Sequential API for the generator model without any different behavior. I have tried running a TF Session, and got about a 1000-line error (no exaggeration). So how can I convert the generator output to a numpy array during training to perform integration?
Here is my model with loss functions:
def define_generator(latent_dim):
inputs = Input(shape=latent_dim)
h = Conv1DTranspose(1, 11, strides=1, activation=LeakyReLU(alpha=0.2))(inputs)
h = Conv1DTranspose(1, 11, strides=1, activation=LeakyReLU(alpha=0.2))(h)
h = Conv1DTranspose(1, 11, strides=1, activation=LeakyReLU(alpha=0.2))(h)
outputs = Conv1DTranspose(1, 12, strides=1, activation='linear')(h)
model = Model(inputs=inputs, outputs=outputs, name='GENERATOR')
return model
def generator_loss(D_labels, D_pred):
bce = tf.keras.losses.BinaryCrossentropy(from_logits=False)
loss = bce(D_labels, D_pred)
return loss
def define_discriminator(data_shape):
inputs = Input(shape=data_shape)
h = Conv1D(1, 12, strides=1, activation=LeakyReLU(alpha=0.2))(inputs)
h = Conv1D(1, 11, strides=1, activation=LeakyReLU(alpha=0.2))(h)
h = Conv1D(1, 11, strides=1, activation=LeakyReLU(alpha=0.2))(h)
h = Conv1D(1, 11, strides=1, activation=LeakyReLU(alpha=0.2))(h)
outputs = Conv1D(1, 10, strides=1, activation=LeakyReLU(alpha=0.2))(h)
model = Model(inputs=inputs, outputs=outputs, name='DISCRIMINATOR')
return model
def discriminator_loss(D_labels, D_pred):
bce = tf.keras.losses.BinaryCrossentropy(from_logits=False)
loss = bce(D_labels, D_pred)
return loss
And here is my code for the training:
class define_GAN(Model):
def __init__(self, gen, disc, latent_dim, n_disc_train):
super().__init__()
self.generator = gen
self.discriminator = disc
self.latent_dim = latent_dim
self.n_disc_train = n_disc_train
def compile(self, gen_loss_fcn, disc_loss_fcn, gen_lr=0.001, gen_beta1=0.9, gen_beta2=0.999, disc_lr=0.001, disc_beta1=0.9, disc_beta2=0.999):
super().compile()
self.gen_optimizer = Adam(learning_rate=gen_lr, beta_1=gen_beta1, beta_2=gen_beta2)
self.disc_optimizer = Adam(learning_rate=disc_lr, beta_1=disc_beta1, beta_2=disc_beta2)
self.gen_loss_fcn = gen_loss_fcn
self.disc_loss_fcn = disc_loss_fcn
def train_step(self, x_real):
batch_size = tf.shape(x_real)[0]
for i in range(self.n_disc_train):
latent_vec = tf.random.normal(shape=(batch_size, self.latent_dim))
x_fake = self.generator(latent_vec)
#tf.print(x_fake)
#x_fake = integrate.cumtrapz(x_fake.numpy(), t)
with tf.GradientTape() as tape:
D_real = self.discriminator(x_real, training=True)
y_real = tf.ones([batch_size, 1])
D_fake = self.discriminator(x_fake, training=True)
y_fake = tf.zeros([batch_size, 1])
pred = tf.concat([D_real, D_fake], 0)
label = tf.concat([y_real, y_fake], 0)
disc_loss = self.disc_loss_fcn(label, pred)
disc_gradient = tape.gradient(disc_loss, self.discriminator.trainable_variables)
self.disc_optimizer.apply_gradients(zip(disc_gradient, self.discriminator.trainable_variables))
latent_vec = tf.random.normal(shape=(batch_size, self.latent_dim))
with tf.GradientTape() as tape:
x_fake = self.generator(latent_vec, training=True)
#x_fake = integrate.cumtrapz(x_fake, t)
D_fake = self.discriminator(x_fake, training=False)
label = tf.zeros([batch_size, 1])
gen_loss = self.gen_loss_fcn(label, D_fake)
gen_gradient = tape.gradient(gen_loss, self.generator.trainable_variables)
self.gen_optimizer.apply_gradients(zip(gen_gradient, self.generator.trainable_variables))
return {"gen_loss":gen_loss, "disc_loss":disc_loss}
This piece of standalone code works fine:
latent_vec = tf.random.normal(shape=(1,10,1))
fake = GAN.generator(latent_vec).numpy()
fake
I think my attempt at running a TF session was this: Wasn't sure what it was actually doing.
sess = tf.compat.v1.Session
with sess:
x_fake.eval()
sess.run(x_fake)
I have also tried disabling eager execution which throws an error about an unexpected keyword argument to compile(). The GAN will run and train without error in eager mode and without trying to convert the generator output to an array, so I don't think eager execution is an issue.
I am trying to create a design that would look something like this:
Right now, I am trying using the following code but what I need is the output of the second hidden layer in the 15th position of the next layer. In my case, it is being added to the 31st position using this code.
inputs = Input(shape=(30,), name='first_input')
hn = Dense(4, activation='relu')(inputs)
output = Dense(1, activation='linear')(hn)
first_model = Model(inputs=inputs, outputs=output)
second_input = Input(shape=(30,), name='second_input')
from_first_model = first_model.output
merge_layer = concatenate([second_input, from_first_model ])
hn = Dense(4, activation="relu")(merge_layer)
dnn_op_layer = Dense(1, activation='linear')(hn)
model_b = Model(inputs=[second_input, first_model.input], outputs=dnn_op_layer)
This should work - slicing the tensor into the two parts and then concatenating the two parts with the output from the second hidden layer.
test = tf.convert_to_tensor(np.random.rand(30), dtype=tf.float32)
test2 = tf.convert_to_tensor([100], dtype=tf.float32)
# create layer 3 as desired
temp, temp2 = test[:15], test[15:]
layer3 = tf.concat([temp, test2, temp2], axis=0)
edit - The error you were getting might have been from using the functional api. Additionally the previous example assumes the input has a shape (30,) when really the input should have a shape like (1, 30) to be consistent with tensorflow.
class model(tf.keras.Model):
def __init__(self):
super().__init__()
self.hidden1 = tf.keras.layers.Dense(4, activation='relu')
self.hidden2 = tf.keras.layers.Dense(1)
self.hidden4 = tf.keras.layers.Dense(4, activation='relu')
self.hidden5 = tf.keras.layers.Dense(1)
def call(self, inputs):
x = self.hidden1(inputs)
x = self.hidden2(x)
temp, temp2 = inputs[:,:15], inputs[:,15:]
layer3 = tf.concat([temp, x, temp2], axis=1)
x = self.hidden4(layer3)
x = self.hidden5(x)
return x
# test
mymodel = model()
inputs = tf.convert_to_tensor(np.random.rand(1,30), dtype=tf.float32)
mymodel(inputs)
I have implemented the Basic MNIST model with Custom convolution layer as shown below. The problem is that the Gradients are always 'None' for the Custom Layer and so the learning does not happens during back propagation, as the Grad has None values.
I have debugged the outputs of the layers during forward pass and they are OK.
Here is the sample code, for simplicity I have passed image of 'Ones' and have just returned the matrix from the custom layer.
I have tried my best but could make it work any help is very much appreciated in advance
following code is executable and raises the
warning
:tensorflow:Gradients do not exist for variables ['cnn/custom_conv2d/kernel:0', 'cnn/custom_conv2d/bias:0', 'cnn/custom_conv2d_1/kernel:0', 'cnn/custom_conv2d_1/bias:0', 'cnn/custom_conv2d_2/kernel:0', 'cnn/custom_conv2d_2/bias:0'] when minimizing the loss.
import numpy as np
import tensorflow as tf
from grpc.beta import interfaces
class CustomConv2D(tf.keras.layers.Conv2D):
def __init__(self, filters,
kernel_size,
strides=(1, 1),
padding='valid',
data_format=None,
dilation_rate=(1, 1),
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='glorot_uniform',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
__name__ = 'CustomConv2D',
**kwargs
):
super(CustomConv2D, self).__init__(
filters=filters,
kernel_size=kernel_size,
strides=strides,
padding=padding,
data_format=data_format,
dilation_rate=dilation_rate,
activation=activation,
use_bias=use_bias,
kernel_initializer=kernel_initializer,
bias_initializer=bias_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
activity_regularizer=activity_regularizer,
kernel_constraint=kernel_constraint,
bias_constraint=bias_constraint,
**kwargs )
def call(self, input):
(unrolled_mat, filters, shape) = self.prepare(input)
#unrolled_mat=unrolled inputs
#filters=unrolled kernels of the lAYER
#convolution through unrolling
conv_result = tf.tensordot(unrolled_mat, filters, axes=1)
result=tf.convert_to_tensor(tf.reshape(conv_result, shape))
return result
def prepare(self, matrix):
batches,rows,cols,channels=matrix.shape
kernel_size = self.kernel_size[0]
unrolled_matrices=None
# start = timer()
for batch in range(batches):
unrolled_maps=None
for chanel in range(channels):
unrolled_map = self.unroll(batch, cols, kernel_size, matrix, rows,chanel)
if unrolled_maps is None:
unrolled_maps = unrolled_map
else:
unrolled_maps=np.append(unrolled_maps,unrolled_map,axis=1)
unrolled_maps = np.reshape(unrolled_maps,(-1,unrolled_maps.shape[0],unrolled_maps.shape[1]))
if unrolled_matrices is None:
unrolled_matrices = unrolled_maps
else:
unrolled_matrices = np.concatenate((unrolled_matrices, unrolled_maps))
kernels=self.get_weights()
kernels=np.reshape(kernels[0],(unrolled_matrices[0].shape[1],-1))
shp=(batches,rows-(kernel_size-1),cols-(kernel_size-1),self.filters)
matrix=unrolled_matrices
return (matrix, kernels, shp)
def unroll(self, batch, cols, kernel_size, matrix, rows, chanel):
# a=np.zeros((shape))
unrolled_feature_map = None
for x in range(0, rows - (kernel_size - 1)):
for y in range(0, (cols - (kernel_size - 1))):
temp_row = None # flattened kernal at single position
for k in range(kernel_size):
for l in range(kernel_size):
if temp_row is None:
temp_row = matrix[batch, x + k, y + l, chanel]
# print(matrix[batch, x + k, y + l])
else:
temp_row = np.append(temp_row, matrix[batch, x + k, y + l, chanel])
# print(matrix[batch, x + k, y + l])
if unrolled_feature_map is None:
unrolled_feature_map = np.reshape(temp_row,
(-1, kernel_size * kernel_size)) # first row of unrolled matrix added
else:
unrolled_feature_map = np.concatenate((unrolled_feature_map, np.reshape(temp_row,
(-1, kernel_size * kernel_size)))) # concatinate subsequent row to un_mat
unrolled_feature_map = np.reshape(unrolled_feature_map,( unrolled_feature_map.shape[0], unrolled_feature_map.shape[1]))
# print(unrolled_feature_map.shape)
matrix=unrolled_feature_map
return matrix
class CNN(tf.keras.Model):
def __init__(self):
super(CNN, self).__init__()
self.learning_rate = 0.001
self.momentum = 0.9
self.optimizer = tf.keras.optimizers.Adam(self.learning_rate, self.momentum)
self.conv1 = CustomConv2D(filters = 6, kernel_size= 3, activation = 'relu') ## valid means no padding
self.pool1 = tf.keras.layers.MaxPool2D(pool_size=2) # default stride??-
self.conv2 = CustomConv2D(filters = 16, kernel_size = 3, activation = 'relu')
self.pool2 = tf.keras.layers.MaxPool2D(pool_size = 2)
self.conv3 = CustomConv2D(filters=120, kernel_size=3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.fc1 = tf.keras.layers.Dense(units=82,kernel_initializer='glorot_uniform')
self.fc2 = tf.keras.layers.Dense(units=10, activation = 'softmax',kernel_initializer='glorot_uniform')
def call(self, x):
x = self.conv1(x) # shap(32,26,26,6) all (6s 3s 6s 3s)
x = self.pool1(x) # shap(32,13,13,6) all (6s)
x = self.conv2(x) # shap(32,11,11,16) all(324s)
x = self.pool2(x) # shap(32,5,5,16)
x = self.conv3(x) # shap(32,3,3,120)all(46656)
x = self.flatten(x) # shap(32,1080)
x = self.fc1(x) # shap(32,82)
x = self.fc2(x) # shap(32,10)
return x
def feedForward(self, image, label):
accuracy_object = tf.metrics.Accuracy()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
with tf.GradientTape() as tape:
feedForwardCompuation = self(image, training=True)
self.loss_value = loss_object(label, feedForwardCompuation)
grads = tape.gradient(self.loss_value, self.variables)
self.optimizer.apply_gradients(zip(grads, self.variables))
accuracy = accuracy_object(tf.argmax(feedForwardCompuation, axis=1, output_type=tf.int32), label)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train=x_train.astype('float32')
y_train = y_train.astype('float32')
image=x_train[0].reshape((1,28,28,1))
label=y_train[0]
cnn=CNN()
cnn.feedForward(image,label)
UPDATE: I am not using the builtin TF conv fucntion rather I am implementing my own custom convolution operation via Matrix unrolling method(unrolled map*unrolled filters). But the Tap.gradient returns "None" for the custom layers however when I use the builtin conv2d function of TF then it works fine!
I have Added the actual code of the operation
Snapshot of grads while debugging
Problem is that the Convolution Operation is not happening in the Class, CustomConv2D. Neither the call Method, nor the customConv Method is performing Convolution Operation, but it is just returning the Input, as it is.
Replacing the line, return self.customConv(matrix) in the call method of CustomConv2D Class with return super(tf.keras.layers.Conv2D, self).call(matrix) will perform the actual Convolutional Operation.
One more change is to invoke the call method of CNN class by including the line, _ = cnn(X_reshaped) before the line, cnn.feedForward(image,label)
By doing the above 2 changes, Gradients will be added.
I am trying to convert a Keras functional model into class derived from tensorflow.keras.models.Model and I'm facing 2 issues.
1. I need to multiply 2 layers using tensorflow.keras.layers.multiply, but it returns a ValueError: A merge layer should be called on a list of inputs.
2. If I remove this layern thus working with a classical CNN, it returns a tensorflow.python.eager.core._SymbolicException:Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'patch:0' shape=(None, 64, 64, 3) dtype=float32>].
I would appreciate some guidance to convert my code. I'm using Python 3.7, TensorFlow 2.0rc2 and Keras 2.3.0. The class I have defined is the following:
class TestCNN(Model):
"""
conv1 > conv2 > fc1 > fc2 > alpha * fc2 > Sigmoid > output
"""
def __init__(self, input_dimension, n_category,**kwargs):
"""
Instanciator
:param input_dimension: tuple of int, theoretically (patch_size x patch_size x channels)
:param n_category: int, the number of categories to classify,
:param weight_decay: float, weight decay parameter for all the kernel regularizers
:return: the Keras model
"""
super(TestCNN, self).__init__(name='testcnn', **kwargs)
self.input_dimension = input_dimension
self.n_category = n_category
self.conv1 = Conv2D(36, activation='relu', name='conv1/relu')
self.conv1_maxpooling = MaxPooling2D((2, 2), name='conv1/maxpooling')
self.conv2 = Conv2D(48, activation='relu', name='conv2/relu')
self.conv2_maxpooling = MaxPooling2D((2, 2), name='conv2/maxpooling')
self.flatten1 = Flatten(name='flatten1')
self.fc1 = Dense(512, activation='relu', name='fc1/relu')
self.fc2 = Dense(512, activation='relu', name='fc2/relu')
self.alpha = TestLayer(layer_dim=128, name='alpha')
self.output1 = TestSigmoid(output_dimension=n_category, name='output_layer')
#tensorflow.function
def call(self, x):
x = self.conv1(x)
x = self.conv1_maxpooling(x)
x = self.conv2(x)
x = self.conv2_maxpooling(x)
x = self.flatten1(x)
x = self.fc1(x)
x = self.fc2(x)
alpha_times_fc2 = multiply([alpha_output, fc2_output], name='alpha_times_fc2')
return self.output1(alpha_times_fc2)
def build(self, **kwargs):
inputs = Input(shape=self.input_dimension, dtype='float32', name='patch')
outputs = self.call(inputs)
super(TestCNN, self).__init__(name="TestCNN", inputs=inputs, outputs=outputs, **kwargs)
Then, in my main loop, I'm creating the instance as following:
testcnn = TestCNN(input_dimension=input_dimension, n_category=training_set.category_count)
optimizer = tensorflow.keras.optimizers.Adam(
lr=parameter['training']['adam']['learning_rate'],
beta_1=parameter['training']['adam']['beta1'],
beta_2=parameter['training']['adam']['beta2'])
metrics_list = [tensorflow.keras.metrics.TruePositives]
loss_function = tensorflow.keras.losses.categorical_crossentropy
loss_metrics = tensorflow.keras.metrics.Mean()
testcnn.build()
testcnn.summary()
This code is raising the tensorflow.python.eager.core._SymbolicException. If I comment out some lines and return directly the results of the fc2 layer, I've got the ValueError.
I have commenter the build() function in my model and call it in my main script as following:
testcnn.build(input_dimension)
testcnn.compile(optimizer=adam_optimizer, loss=loss_function, metrics=metrics_list)
testcnn.summary()
Input dimension is a list formatted as following:
input_dimension = (batch_size, image_size, image_size, channels)