keras triplet loss crashes when training - tensorflow

I am trying to implement a simple face recognition application but I have been stuck with a problem for days now. Hopefully someone having more experience in the subject can help. Fingers crossed.
Here is the program:
import tensorflow as tf
#from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.preprocessing import image
#import PIL
#from PIL import Image
from keras.models import Model
from keras.layers import Dense, Input, subtract, concatenate, Lambda, add, maximum
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
import numpy as np
def identity_loss(y_true, y_pred):
return K.mean(y_pred - 0 * y_true)
def triplet_loss(inputs, dist='euclidean', margin='maxplus'):
anchor, positive, negative = inputs
positive_distance = K.square(anchor - positive)
negative_distance = K.square(anchor - negative)
if dist == 'euclidean':
positive_distance = K.sqrt(K.sum(positive_distance, axis=-1, keepdims=True))
negative_distance = K.sqrt(K.sum(negative_distance, axis=-1, keepdims=True))
elif dist == 'sqeuclidean':
positive_distance = K.sum(positive_distance, axis=-1, keepdims=True)
negative_distance = K.sum(negative_distance, axis=-1, keepdims=True)
loss = positive_distance - negative_distance
if margin == 'maxplus':
loss = K.maximum(0.0, 1 + loss)
elif margin == 'softplus':
loss = K.log(1 + K.exp(loss))
return K.mean(loss)
model = ResNet50(weights='imagenet')
model.layers.pop()
x = model.get_layer('flatten_1').output
model_out = Dense(128, activation='relu', name='model_out')(x)
model_out = Lambda(lambda x: K.l2_normalize(x,axis=-1))(model_out)
new_model = Model(inputs=model.input, outputs=model_out)
anchor_input = Input(shape=(224, 224, 3), name='anchor_input')
pos_input = Input(shape=(224, 224, 3), name='pos_input')
neg_input = Input(shape=(224, 224, 3), name='neg_input')
encoding_anchor = new_model(anchor_input)
encoding_pos = new_model(pos_input)
encoding_neg = new_model(neg_input)
loss = Lambda(triplet_loss)([encoding_anchor, encoding_pos, encoding_neg])
siamese_network = Model(inputs = [anchor_input, pos_input, neg_input],
outputs = loss)
siamese_network.compile(optimizer=Adam(lr=.00001), loss=identity_loss)
######################### For reading img path info - start ##########
train_path1 = '/home/cesncn/Desktop/github_projects/face_recog_proj_with_triplet_loss/training_img_pairs.csv'
TRAIN_INPUT_PATHS = [train_path1]
RECORD_DEFAULTS_TRAIN = [[0], [''], [''], ['']]
def decode_csv_train(line):
parsed_line = tf.decode_csv(line, RECORD_DEFAULTS_TRAIN)
anchor_path = parsed_line[1]
pos_path = parsed_line[2]
neg_path = parsed_line[3]
return anchor_path, pos_path, neg_path
######################### For reading img path info - end ##########
batch_size = 16
filenames = tf.placeholder(tf.string, shape=[None])
dataset = tf.data.Dataset.from_tensor_slices(filenames)
dataset = dataset.flat_map(lambda filename: tf.data.TextLineDataset(filename).skip(1).map(decode_csv_train))
dataset = dataset.shuffle(buffer_size=1000)
dataset = dataset.batch(batch_size)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()
init_global_var = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_global_var)
nr_epochs = 5
for i in range(0, nr_epochs):
print("\nnr_epoch: ", str(i), "\n")
sess.run(iterator.initializer, feed_dict={filenames: TRAIN_INPUT_PATHS})
while True:
try:
anchor_path, pos_path, neg_path = sess.run(next_element)
anchor_imgs = np.empty((0, 224, 224, 3))
pos_imgs = np.empty((0, 224, 224, 3))
neg_imgs = np.empty((0, 224, 224, 3))
for j in range (0, len(anchor_path)):
anchor_img = image.load_img(anchor_path[j], target_size=(224, 224))
anchor_img = image.img_to_array(anchor_img)
anchor_img = np.expand_dims(anchor_img, axis=0)
anchor_img = preprocess_input(anchor_img)
anchor_imgs = np.append(anchor_imgs, anchor_img, axis=0)
pos_img = image.load_img(pos_path[j], target_size=(224, 224))
pos_img = image.img_to_array(pos_img)
pos_img = np.expand_dims(pos_img, axis=0)
pos_img = preprocess_input(pos_img)
pos_imgs = np.append(pos_imgs, pos_img, axis=0)
neg_img = image.load_img(neg_path[j], target_size=(224, 224))
neg_img = image.img_to_array(neg_img)
neg_img = np.expand_dims(neg_img, axis=0)
neg_img = preprocess_input(neg_img)
neg_imgs = np.append(neg_imgs, neg_img, axis=0)
# HERE IT CRASHES WHEN I CALL THE FIT FUNCTION! ! !
siamese_network.fit([anchor_imgs, pos_imgs, neg_imgs],
batch_size = batch_size,
epochs = 1,
verbose = 2)
except tf.errors.OutOfRangeError:
print("Out of range error triggered (looped through training set 1 time)")
break
When I call the function siamese_network.fit(...), it crashes and gives the following error, which tells me absolutely nothing!!
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-16-65401d04281c> in <module>()
61 batch_size = batch_size,
62 epochs = 1,
---> 63 verbose = 2)
64
65 #siamese_network.fit({'anchor_input': anchor_imgs, 'pos_input': pos_imgs, 'neg_input': neg_imgs},
~/anaconda3/envs/tensorflow/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
1628 sample_weight=sample_weight,
1629 class_weight=class_weight,
-> 1630 batch_size=batch_size)
1631 # Prepare validation data.
1632 do_validation = False
~/anaconda3/envs/tensorflow/lib/python3.6/site-packages/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)
1485 sample_weights = [_standardize_weights(ref, sw, cw, mode)
1486 for (ref, sw, cw, mode)
-> 1487 in zip(y, sample_weights, class_weights, self._feed_sample_weight_modes)]
1488
1489 if check_array_lengths:
~/anaconda3/envs/tensorflow/lib/python3.6/site-packages/keras/engine/training.py in <listcomp>(.0)
1484 self._feed_output_names)
1485 sample_weights = [_standardize_weights(ref, sw, cw, mode)
-> 1486 for (ref, sw, cw, mode)
1487 in zip(y, sample_weights, class_weights, self._feed_sample_weight_modes)]
1488
~/anaconda3/envs/tensorflow/lib/python3.6/site-packages/keras/engine/training.py in _standardize_weights(y, sample_weight, class_weight, sample_weight_mode)
538 else:
539 if sample_weight_mode is None:
--> 540 return np.ones((y.shape[0],), dtype=K.floatx())
541 else:
542 return np.ones((y.shape[0], y.shape[1]), dtype=K.floatx())
AttributeError: 'NoneType' object has no attribute 'shape'
Does anyone have any idea about how to solve this?

I have been tyring to implement several variations of the triplet loss function. The above version is just one of the tries...
Anyhow, in the version above, the problem appears to be the following:
Here is how I create and compile the model:
siamese_network = Model(inputs = [anchor_input, pos_input, neg_input],
outputs = loss)
siamese_network.compile(optimizer=Adam(lr=.00001), loss=identity_loss)
Obviously, I say here that there is an output from the model, called "loss".
And later when I train the model, I set the y to null in the fit function. Big mistake.
z = [0] # ADDED LINE
siamese_network.fit(x = [anchor_imgs, pos_imgs, neg_imgs],
y = z # ADDED LINE
batch_size = batch_size,
epochs = 1,
verbose = 2)
And this fix solved the problem.
I rather chose to answer rather than deleting the question. Hopefully it helps others who may have a similar problem..
PS. y is set to NULL by default if it is not set to anything in Keras.

Related

tf.data.Dataset.from_generator : value error "not enough values to unpack (expected 2, got 1)"

I am currently trying to do a classification problem using a pre-trained transformer model. I wrote a custom generator using tf.data.Dataset.from_generator method. The model takes two inputs : input_id and attn_mask. While calling model.fit i am getting value error "not enough values to unpack (expected 2, got 1)" The received arguments list shows it got both input_id and attn_mask. Can anyone help me solve this?
import tensorflow.keras as keras
from tensorflow.keras.models import Model
from transformers import TFBertModel,BertConfig
def _input_fn():
x = (train_data.iloc[:,0:512]).to_numpy()
y = (train_data.iloc[:,512:516]).to_numpy()
attn = np.asarray(np.tile(attn_mask,x.shape[0]).reshape(-1,512))
def generator():
for s1, s2, l in zip(x, attn, y):
yield {"input_id": s1, "attn_mask": s2}, l
dataset = tf.data.Dataset.from_generator(generator, output_types=({"input_id": tf.int32, "attn_mask": tf.int32}, tf.int32))
#dataset = dataset.batch(2)
#dataset = dataset.shuffle
return dataset
train_data is the dataframe containing the training data(16000 x 516). Last four columns are one hot encoded labels. Since i am not using the Autotokenizer function, i am passing the attention mask as attn_mask.
my model
bert = 'bert-base-uncased'
config = BertConfig(dropout=0.2, attention_dropout=0.2)
config.output_hidden_states = False
transformer_model = TFBertModel.from_pretrained(bert, config = config)
input_ids_in = tf.keras.layers.Input(shape=(512), name='input_id', dtype='int32')
input_masks_in = tf.keras.layers.Input(shape=(512), name='attn_mask', dtype='int32')
embedding_layer = transformer_model(input_ids_in, attention_mask=input_masks_in)[0]
#cls_token = embedding_layer[:,0,:]
#X = tf.keras.layers.BatchNormalization()(cls_token)
X = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(embedding_layer)
X = tf.keras.layers.GlobalMaxPool1D()(X)
#X = tf.keras.layers.BatchNormalization()(X)
X = tf.keras.layers.Dense(50, activation='relu')(X)
X = tf.keras.layers.Dropout(0.2)(X)
X = tf.keras.layers.Dense(4, activation='softmax')(X)
model = tf.keras.Model(inputs=[input_ids_in, input_masks_in], outputs = X)
for layer in model.layers[:3]:
layer.trainable = False
optimizer = tf.keras.optimizers.Adam(0.001, beta_1=0.9, beta_2=0.98,
epsilon=1e-9)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
epochs = 1
batch_size =2
history = model.fit(_input_fn(), epochs= epochs, batch_size= batch_size, verbose=2)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_16908/300834086.py in <module>
2 batch_size =2
3 #history = model.fit(trainDataGenerator(batch_size), epochs= epochs, validation_data=valDataGenerator(batch_size), verbose=2) #
----> 4 history = model.fit(_input_fn(), epochs= epochs, batch_size= batch_size, verbose=2) #validation_data=val_ds,
~/.local/lib/python3.8/site-packages/keras/utils/traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
~/.local/lib/python3.8/site-packages/transformers/models/bert/modeling_tf_bert.py in call(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict, training, **kwargs)
1124 kwargs_call=kwargs,
1125 )
-> 1126 outputs = self.bert(
1127 input_ids=inputs["input_ids"],
1128 attention_mask=inputs["attention_mask"],
~/.local/lib/python3.8/site-packages/transformers/models/bert/modeling_tf_bert.py in call(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict, training, **kwargs)
771 raise ValueError("You have to specify either input_ids or inputs_embeds")
772
--> 773 batch_size, seq_length = input_shape
774
775 if inputs["past_key_values"] is None:
ValueError: Exception encountered when calling layer "bert" (type TFBertMainLayer).
not enough values to unpack (expected 2, got 1)
Call arguments received:
• input_ids=tf.Tensor(shape=(512,), dtype=int32)
• attention_mask=tf.Tensor(shape=(512,), dtype=int32)
• token_type_ids=None
• position_ids=None
• head_mask=None
• inputs_embeds=None
• encoder_hidden_states=None
• encoder_attention_mask=None
• past_key_values=None
• use_cache=True
• output_attentions=False
• output_hidden_states=False
• return_dict=True
• training=True
• kwargs=<class 'inspect._empty'>
Edit:
Adding the output of calling _input_fn()
<FlatMapDataset shapes: ({input_id: <unknown>, attn_mask: <unknown>}, <unknown>), types: ({input_id: tf.int32, attn_mask: tf.int32}, tf.int32)>
I solved this error by batching my tf.data.Dataset. This gave the TensorSpec in my dataset a shape that had two values to unpack ->
TensorSpec(shape=(16, 200)...
This is what the error refers to.
Solution
print(train_ds) #Before Batching
new_train_ds = train_ds.batch(16, drop_remainder=True)
print(new_train_ds) #After Batching
# Before Batching
<MapDataset element_spec=({'input_ids': TensorSpec(shape=(200,),
dtype=tf.float64, name=None), 'attention_mask': TensorSpec(shape=
(200,), dtype=tf.float64, name=None)}, TensorSpec(shape=(11,),
dtype=tf.float64, name=None))>
# After Batching
<BatchDataset element_spec=({'input_ids': TensorSpec(shape=(16, 200),
dtype=tf.float64, name=None), 'attention_mask': TensorSpec(shape=(16,
200), dtype=tf.float64, name=None)}, TensorSpec(shape=(16, 11),
dtype=tf.float64, name=None))>

DeblurGAN can't load his own weights anymore

Hey I realy need some help =)
firstly, sorry that it's soo long^^ but I hope that you don't need the full code at the end.
I coded a GAN for deblurring. Now I'm training it. the first 71 epochs have been trained without any problems: I trained some epochs till the colab GPU-time limit was reached, the next day I loaded my weights into the gan and continued training.
2 or 3 weeks ago I wanted to load the weights of epoch 71 in my Gan but I recieved the following error (I'm quite sure that I didn't change anything in the code). Since this moment I only can load the first 65 weights and i get the same error for every epoch higher than 65:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-16-a35c9a2bbf3a> in <module>()
1 # Load weights
----> 2 gan.load_weights(F"/content/gdrive/My Drive/Colab Notebooks/data/deblurGAN_weights66_batchsize_1.h5")
5 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in load_weights(self, filepath, by_name, skip_mismatch, options)
2209 f, self.layers, skip_mismatch=skip_mismatch)
2210 else:
-> 2211 hdf5_format.load_weights_from_hdf5_group(f, self.layers)
2212
2213 def _updated_config(self):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/saving/hdf5_format.py in load_weights_from_hdf5_group(f, layers)
706 str(len(weight_values)) + ' elements.')
707 weight_value_tuples += zip(symbolic_weights, weight_values)
--> 708 K.batch_set_value(weight_value_tuples)
709
710
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
199 """Call target, and fall back on dispatchers if there is a TypeError."""
200 try:
--> 201 return target(*args, **kwargs)
202 except (TypeError, ValueError):
203 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py in batch_set_value(tuples)
3574 if ops.executing_eagerly_outside_functions():
3575 for x, value in tuples:
-> 3576 x.assign(np.asarray(value, dtype=dtype(x)))
3577 else:
3578 with get_graph().as_default():
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py in assign(self, value, use_locking, name, read_value)
856 with _handle_graph(self.handle):
857 value_tensor = ops.convert_to_tensor(value, dtype=self.dtype)
--> 858 self._shape.assert_is_compatible_with(value_tensor.shape)
859 assign_op = gen_resource_variable_ops.assign_variable_op(
860 self.handle, value_tensor, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_shape.py in assert_is_compatible_with(self, other)
1132 """
1133 if not self.is_compatible_with(other):
-> 1134 raise ValueError("Shapes %s and %s are incompatible" % (self, other))
1135
1136 def most_specific_compatible_shape(self, other):
ValueError: Shapes (4, 4, 64, 128) and (64,) are incompatible
I was looking a long time for a solution and i didn't find a real one. But I found out, that if I train one epoch with one of the old weights (1-65) afterwards I can load one of the new weights. So I thought that I could use this "workaround" but yesterday I plotted the scores of the metric of the Test dataset for every epoch. I recieved this picture:
psnrscore/epoch
as you can see it looks like I'm producing trash since epoch 65 (on the pic since 60 because I lost the first 5 epochs, so it starts by 6)
I'm realy frustrated and hope that someone could help me =D
Here's the full code of the GAN:
# Libraries to build the model
from tensorflow import pad
from tensorflow.keras.layers import Layer
from keras.layers import Input, Activation, Add, UpSampling2D
from keras.layers.merge import Add
from keras.layers.core import Dropout, Dense, Flatten
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.core import Lambda
from keras.layers.normalization import BatchNormalization
from keras.models import Model
import keras.backend as K
from keras.applications.vgg16 import VGG16
from keras.optimizers import Adam
import keras
# Reflection padding
from keras.engine import InputSpec
import tensorflow as tf
from keras.engine.topology import Layer
'''
2D Reflection Padding
Attributes:
- padding: (padding_width, padding_height) tuple
'''
class ReflectionPadding2D(Layer):
def __init__(self, padding=(1, 1), **kwargs):
self.padding = tuple(padding)
self.input_spec = [InputSpec(ndim=4)]
super(ReflectionPadding2D, self).__init__(**kwargs)
def compute_output_shape(self, s):
""" If you are using "channels_last" configuration"""
return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3])
def call(self, x, mask=None):
w_pad,h_pad = self.padding
return tf.pad(x, [[0,0], [h_pad,h_pad], [w_pad,w_pad], [0,0] ], 'REFLECT')
# Res Block
def res_block(input, filters, kernel_size = (3,3), strides = (1,1), use_dropout = False):
"""
Instanciate a Keras Resnet Block using sequential API.
:param input: Input tensor
:param filters: Number of filters to use
:param kernel_size: Shape of the kernel for the convolution
:param strides: Shape of the strides for the convolution
:param use_dropout: Boolean value to determine the use of dropout
:return: Keras Model
"""
x = ReflectionPadding2D((1,1))(input)
x = Conv2D(filters = filters,
kernel_size = kernel_size,
strides = strides,)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
if use_dropout:
x = Dropout(0.5)(x)
x = ReflectionPadding2D((1,1))(x)
x = Conv2D(filters = filters,
kernel_size = kernel_size,
strides = strides,)(x)
x = BatchNormalization()(x)
# Two convolution layers followed by a direct connection between input and output (skip connection)
out = Add()([input, x])
return out
# Generator
n_res_blocks = 9
def generator_model():
# encoder
inputs = Input(shape = img_shape)
x = ReflectionPadding2D((3, 3))(inputs)
x = Conv2D(filters = 64, kernel_size = (7,7), padding = 'valid')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2D(128, (3,3), strides=2, padding='same') (x) #DIM(15,15,128)
x = BatchNormalization() (x)
x = Activation('relu') (x)
x = Conv2D(256, (3,3), strides = 2, padding = 'same') (x) #DIM(7,7,256)
x = BatchNormalization() (x)
x = Activation('relu') (x)
# Apply 9 res blocks
for i in range(n_res_blocks):
x = res_block(x, 256, use_dropout = True)
# decoder
#x = Conv2DTranspose(128, (3,3), strides = 2, padding = 'same') (x)
x = UpSampling2D()(x)
x = Conv2D(filters = 128, kernel_size=(3, 3), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
#x = Conv2DTranspose(64, (3,3), strides = 2, padding = 'same') (x)
x = UpSampling2D()(x)
x = Conv2D(filters = 64, kernel_size=(3, 3), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = ReflectionPadding2D((3,3))(x)
x = Conv2D(filters = 3, kernel_size = (7,7), padding = 'valid')(x)
x = Activation('tanh')(x)
# Add direct connection from input to output and recenter to [-1, 1] (skip connection)
outputs = Add()([x, inputs])
outputs = Lambda(lambda z: z/2)(outputs) # to keep normalized outputs
model = Model(inputs = inputs, outputs = outputs, name = 'Generator')
return model
# Discriminator
def discriminator_model():
Input_img = Input(shape=(img_shape))
x = Conv2D(filters = 64, kernel_size = (4, 4), strides = 2, padding='same')(Input_img)
x = LeakyReLU(0.2)(x)
nf_mult, nf_mult_prev = 1, 1
for n in range(3):
nf_mult_prev, nf_mult = nf_mult, min(2**n, 8)
x = Conv2D(filters = 64*nf_mult, kernel_size = (4, 4), strides = 2, padding = 'same')(x)
x = BatchNormalization()(x)
x = LeakyReLU(0.2)(x)
nf_mult_prev, nf_mult = nf_mult, 8
x = Conv2D(filters = 64*nf_mult, kernel_size = (4, 4), strides = 1, padding = 'same')(x)
x = BatchNormalization()(x)
x = LeakyReLU(0.2)(x)
x = Conv2D(filters = 1, kernel_size = (4, 4), strides = 1, padding = 'same')(x)
x = Flatten()(x)
x = Dense(1024, activation = 'tanh')(x)
x = Dense(1, activation = 'sigmoid')(x)
model = Model(inputs = Input_img, outputs = x, name = 'discriminator')
return model
def gan_model(generator, discriminator):
inputs = Input(shape = img_shape)
generated_images = generator(inputs)
outputs = discriminator(generated_images)
model = Model(inputs=inputs, outputs = [generated_images, outputs])
return model
#Losses
#Wassersteinloss:
def wasserstein_loss(y_true, y_pred):
return K.mean(y_true * y_pred)
# vgg16 model for perceptual loss
vgg = VGG16(include_top = False, weights = 'imagenet', input_shape = img_shape)
loss_model = Model(inputs = vgg.input, outputs = vgg.get_layer('block3_conv3').output)
loss_model.trainable = False
#perceptual loss:
def perceptual_loss(y_true, y_pred):
return K.mean(K.square(loss_model(y_true) - loss_model(y_pred)))
#Metrics:
#SSIM:
def ssim_metric(y_true, y_pred):
return tf.reduce_mean(tf.image.ssim(tf.convert_to_tensor(y_true),tf.convert_to_tensor(y_pred), max_val=1.0, ))
#PSNR:
def psnr_metric(y_true, y_pred):
return tf.reduce_mean(tf.image.psnr(y_true, y_pred, max_val=1.0))
def training(epochs, batch_size):
path_psnr = F"/content/gdrive/My Drive/Colab Notebooks/data/psnr"
path_ssim = F"/content/gdrive/My Drive/Colab Notebooks/data/ssim"
GAN_losses = []
#psnrs = []
#ssims = []
random_idx = np.arange(0, X_train.shape[0])
n_batches = int (len(random_idx)/batch_size) #divide trainingset into batches of batch_size
for e in range(epochs):
#weights_name = "deblurGAN_weights%s_batchsize_%r.h5" %(e + 66, batch_size)
weights_name = "deblurGAN_weights_test.h5"
print("epoch: %s " %(e + 66))
#randomize index of trainig set
random.shuffle(random_idx)
for i in range(n_batches):
img_batch_blured = X_train[i*batch_size:(i+1)*batch_size]
img_batch_generated = generator.predict(img_batch_blured)
img_batch_original = Y_train[i*batch_size:(i+1)*batch_size]
img_batch = np.concatenate((img_batch_generated , img_batch_original),0)
valid0 = -np.ones(batch_size)
valid1 = np.ones(batch_size)
valid = np.concatenate((valid0,valid1))
discriminator.trainable = True
for k in range(5):
loss = discriminator.train_on_batch(img_batch, valid)
discriminator.trainable = False
GAN_loss = gan.train_on_batch(img_batch_blured, [img_batch_original, valid1])
GAN_losses.append(GAN_loss)
if (100*i/n_batches).is_integer():
psnr = psnr_metric(img_batch_original, img_batch_generated)
ssim = ssim_metric(img_batch_original, img_batch_generated)
psnrs.append(psnr)
ssims.append(ssim)
#creating 2 files in Google Drive where the psnr and ssim data will be saved.
pickle.dump( psnrs, open( path_psnr, "wb" ) )
pickle.dump( ssims, open( path_ssim, "wb" ) )
print((100*i/n_batches) + 1, "% psnr: ", psnr," ssim: ", ssim)
# Save weights: mode the path to your directory
gan.save_weights(F"/content/gdrive/My Drive/Colab Notebooks/data/{weights_name}")
return [GAN_losses, psnrs, ssims]
# Initialize models
generator = generator_model()
discriminator = discriminator_model()
gan = gan_model(generator, discriminator)
# Initialize optimizers
d_opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
gan_opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
# Compile models
discriminator.trainable = True
discriminator.compile(optimizer = d_opt, loss = wasserstein_loss)
discriminator.trainable = False
loss = [perceptual_loss, wasserstein_loss]
loss_weights = [100, 1]
gan.compile(optimizer = gan_opt, loss = loss, loss_weights = loss_weights)
discriminator.trainable = True
gan.summary()
# Load weights
gan.load_weights(F"/content/gdrive/My Drive/Colab Notebooks/data/deblurGAN_weights66_batchsize_1.h5")
#connect to GPU
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
loss = training(1, 1) #epochs, batchsize
It is solved an can be closed. I didn't know that the "discriminato.Trainable = True/False" was changed. It seems to be the reason for another ordering in the weights.

Implementing HuggingFace BERT using tensorflow fro sentence classification

I am trying to train a model for real disaster tweets prediction(Kaggle Competition) using the Hugging face bert model for classification of the tweets.
I have followed many tutorials and have used many models of bert but none could run in COlab and thros the error
My Code is:
!pip install transformers
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import ModelCheckpoint
from transformers import DistilBertTokenizer, RobertaTokenizer
train = pd.read_csv("/content/drive/My Drive/Kaggle_disaster/train.csv")
test = pd.read_csv("/content/drive/My Drive/Kaggle_disaster/test.csv")
roberta = 'distilbert-base-uncased'
tokenizer = DistilBertTokenizer.from_pretrained(roberta, do_lower_case = True, add_special_tokens = True, max_length = 128, pad_to_max_length = True)
def tokenize(sentences, tokenizer):
input_ids, input_masks, input_segments = [], [], []
for sentence in sentences:
inputs = tokenizer.encode_plus(sentence, add_special_tokens = True, max_length = 128, pad_to_max_length = True, return_attention_mask = True, return_token_type_ids = True)
input_ids.append(inputs['input_ids'])
input_masks.append(inputs['attention_mask'])
input_segments.append(inputs['token_type_ids'])
return np.asarray(input_ids, dtype = "int32"), np.asarray(input_masks, dtype = "int32"), np.asarray(input_segments, dtype = "int32")
input_ids, input_masks, input_segments = tokenize(train.text.values, tokenizer)
from transformers import TFDistilBertForSequenceClassification, DistilBertConfig, TFDistilBertModel
distil_bert = 'distilbert-base-uncased'
config = DistilBertConfig(dropout=0.2, attention_dropout=0.2)
config.output_hidden_states = False
transformer_model = TFDistilBertModel.from_pretrained(distil_bert, config = config)
input_ids_in = tf.keras.layers.Input(shape=(128,), name='input_token', dtype=tf.int32)
input_masks_in = tf.keras.layers.Input(shape=(128,), name='masked_token', dtype=tf.int32)
embedding_layer = transformer_model(input_ids_in, attention_mask=input_masks_in)[0]
X = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(embedding_layer)
X = tf.keras.layers.GlobalMaxPool1D()(X)
X = tf.keras.layers.Dense(50, activation='relu')(X)
X = tf.keras.layers.Dropout(0.2)(X)
X = tf.keras.layers.Dense(1, activation='sigmoid')(X)
model = tf.keras.Model(inputs=[input_ids_in, input_masks_in], outputs = X)
model.compile(Adam(lr = 1e-5), loss = 'binary_crossentropy', metrics = ['accuracy'])
for layer in model.layers[:3]:
layer.trainable = False
bert_input = [
input_ids,
input_masks
]
checkpoint = ModelCheckpoint('/content/drive/My Drive/disaster_model/model_hugging_face.h5', monitor = 'val_loss', save_best_only= True)
train_history = model.fit(
bert_input,
validation_split = 0.2,
batch_size = 16,
epochs = 10,
callbacks = [checkpoint]
)
On running the above code in colab I get the following error:
Epoch 1/10
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-91-9df711c91040> in <module>()
9 batch_size = 16,
10 epochs = 10,
---> 11 callbacks = [checkpoint]
12 )
10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
966 except Exception as e: # pylint:disable=broad-except
967 if hasattr(e, "ag_error_metadata"):
--> 968 raise e.ag_error_metadata.to_exception(e)
969 else:
970 raise
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:571 train_function *
outputs = self.distribute_strategy.run(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:951 run **
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:541 train_step **
self.trainable_variables)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:1804 _minimize
trainable_variables))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:521 _aggregate_gradients
filtered_grads_and_vars = _filter_grads(grads_and_vars)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:1219 _filter_grads
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['tf_distil_bert_model_23/distilbert/embeddings/word_embeddings/weight:0', 'tf_distil_bert_model_23/distilbert/embeddings/position_embeddings/embeddings:0', 'tf_distil_bert_model_23/distilbert/embeddings/LayerNorm/gamma:0', 'tf_distil_bert_model_23/distilbert/embeddings/LayerNorm/beta:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/q_lin/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/q_lin/bias:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/k_lin/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/k_lin/bias:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/v_lin/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/v_lin/bias:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/out_lin/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/out_lin/bias:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/sa_layer_norm/gamma:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/sa_layer_norm/beta:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/ffn/lin1/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/ffn/lin1/bias:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/ffn/lin2/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/ffn/lin2/bias:0', 'tf_...
Follow this tutorial on Text classification using BERT: https://pysnacks.com/machine-learning/bert-text-classification-with-fine-tuning/
It has working code on Google Colab(using GPU) and Kaggle for binary, multi-class and multi-label text classification using BERT.
Hope that helps.
You need to use gpu.
Try this=
with torch.no_grad():

loss value don't change(neural network)

I implemented neural network model with tensorflow(version 2.0) on Python3
I don't know the code works properly because loss value don't almost change.
The code is wrong
or
The model is too many parameter(this mean that the code is right)?
Please tell me whether the code works properly.
The following is the code.
import tensorflow as tf
import numpy as np
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
class Model(object):
def __init__(self):
self.var_list = []
self.w_layer1 = tf.Variable(tf.random.normal(shape=[28*28, 1000], stddev=0.3,dtype=tf.float64))
self.b_layer1 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))
self.w_layer2 = tf.Variable(tf.random.normal(shape=[1000, 100], stddev=0.3,dtype=tf.float64))
self.b_layer2 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))
self.w_layer3 = tf.Variable(tf.random.normal(shape=[100, 100], stddev=0.3,dtype=tf.float64))
self.b_layer3 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))
self.w_layer4 = tf.Variable(tf.random.normal(shape=[100, 10], stddev=0.3,dtype=tf.float64))
self.b_layer4 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))
self.var_list.append(self.w_layer1)
self.var_list.append(self.b_layer1)
self.var_list.append(self.w_layer2)
self.var_list.append(self.b_layer2)
self.var_list.append(self.w_layer3)
self.var_list.append(self.b_layer3)
self.var_list.append(self.w_layer4)
self.var_list.append(self.b_layer4)
def __call__(self, x):
return self.w*x+self.b
def dense_layer(self, inputs, w, b):
z = tf.matmul(inputs, w) + b
return tf.nn.relu(z)
def output_layer(self, inputs, w, b):
return tf.matmul(inputs, w) + b
def flattend(self, inputs):
inputs = tf.cast(inputs, tf.float64)
return tf.reshape(inputs, [-1, 28*28])
def loss(self, outputs, targets):
predicted_y = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = outputs, labels = targets))
return predicted_y
def grad(self, x, target_y):
with tf.GradientTape() as tape:
tape.watch(self.var_list)
loss_value = self.loss(self.run(x), target_y)
return tape.gradient(loss_value, self.var_list)
def run(self, inputs):
inputs = self.flattend(inputs)
layer1 = self.dense_layer(inputs, self.w_layer1, self.b_layer1)
layer2 = self.dense_layer(layer1, self.w_layer2, self.b_layer2)
layer3 = self.dense_layer(layer2, self.w_layer3, self.b_layer3)
layer4 = self.output_layer(layer3, self.w_layer4, self.b_layer4)
return layer4
def optimizer(self):
opt = tf.keras.optimizers.SGD(learning_rate=0.01)
return opt
def make_onehot_labels(labels):
depth = 10
one_hot_labels = tf.one_hot(labels, depth)
return one_hot_labels
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
train_images = train_images/255.0
test_images = test_images/255.0
train_labels = make_onehot_labels(train_labels)
test_labels = make_onehot_labels(test_labels)
ds_train_x = tf.data.Dataset.from_tensor_slices(train_images)
ds_train_y = tf.data.Dataset.from_tensor_slices(train_labels)
train_dataset = tf.data.Dataset.zip((ds_train_x, ds_train_y)).shuffle(1000).repeat().batch(300)
train_images = tf.convert_to_tensor(train_images)
train_labels = tf.convert_to_tensor(train_labels)
test_images = tf.convert_to_tensor(test_images)
test_labels = tf.convert_to_tensor(test_labels)
count = 1
model = Model()
opt = model.optimizer()
print(model.loss(model.run(train_images), train_labels))
for epoch in range(10):
for data in train_dataset:
if count%200==0:
print(model.loss(model.run(train_images), train_labels))
#print(grads)
break
grads = model.grad(data[0], data[1])
opt.apply_gradients(zip(grads, model.var_list))
count = count+1
the following is the result which the above code executed
tf.Tensor(184.81706096058622, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)
The issue is in the following part
for epoch in range(10):
for data in train_dataset:
if count%200==0:
print(model.loss(model.run(train_images), train_labels))
#print(grads)
break
grads = model.grad(data[0], data[1])
opt.apply_gradients(zip(grads, model.var_list))
count = count+1
You have a break within the if condition, meaning you break your training loop (and restart a new epoch) when you hit count%200==0. Remove the break and you'll see the error rate going down.
To elaborate on the issue, as soon as you reach count==200 you break the loop, and the counter does not increase anymore so you're basically not reaching anything beyond that if condition after 200 iterations ( this anything beyond includes your gradient application).

Tensorflow multi-label with NCE or sampled softmax

Are there any code examples for using Tensorflow's sampled_softmax_loss or nce_loss functions with multi-label problems? That is, where num_true is more than one?
What follows is my attempt to create a wrapper for nce_loss() and sampled_softmax_loss() based Jeff Chao's work (https://github.com/joelthchao/keras). In the following code, if you change num_true to 1, both samplers work. But with num_true > 1, both samplers throw slightly different exceptions involving tensor shape.
The main program is a simple autoencoder that replicates the class of problem I'm trying to solve: multi-label testing with a huge number of output classes, with a Zipfian distribution. Comments and stack trace at the end.
import tensorflow as tf
import numpy as np
import keras.layers as layers
from keras.models import Model
from keras import backend as K
from keras import initializers,regularizers,constraints
from keras.models import Model
from keras.layers import Dense
from keras.engine.base_layer import InputSpec
from keras.engine.topology import Layer
from keras.engine.input_layer import Input
from tensorflow.keras.optimizers import Nadam, Adam
np.random.seed(10)
import random
def nce_loss_function(weights, biases, labels, inputs, num_sampled, num_classes, num_true):
if K.learning_phase() == 1:
loss = tf.nn.nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true,
partition_strategy="div")
else:
logits = tf.matmul(inputs, tf.transpose(weights))
logits = tf.nn.bias_add(logits, biases)
labels_one_hot = tf.one_hot(labels, num_classes)
loss = tf.nn.sigmoid_cross_entropy_with_logits(
labels=labels_one_hot[:][0][:],
logits=logits)
loss = tf.reduce_sum(loss, axis=1)
return loss
def sampled_softmax_loss_function(weights, biases, labels, inputs, num_sampled, num_classes, num_true):
if K.learning_phase() == 1:
return tf.nn.sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true,
partition_strategy="div")
else:
logits = tf.matmul(inputs, tf.transpose(weights))
logits = tf.nn.bias_add(logits, biases)
labels_one_hot = tf.one_hot(labels, num_classes)
loss = tf.nn.softmax_cross_entropy_with_logits_v2(
labels=labels_one_hot,
logits=logits)
return loss
class Sampling(Layer):
"""Regular densely-connected NN layer with various sampling Loss.
`Sampling` implements the operation:
`output = dot(input, kernel) + bias`
`kernel` is a weights matrix created by the layer, and `bias` is a bias vector
created by the layer. Also, it adds a sampling Loss to the model.
See [reference](http://proceedings.mlr.press/v9/gutmann10a/gutmann10a.pdf).
# Example
```python
inputs = Input(shape=(4,))
target = Input(shape=(1,)) # sparse format, e.g. [1, 3, 2, 6, ...]
net = Dense(8)(inputs)
net = Sampling(units=128, num_sampled=32)([net, target])
model = Model(inputs=[inputs, target], outputs=net)
model.compile(optimizer='adam', loss=None)
x = np.random.rand(1000, 4)
y = np.random.randint(128, size=1000)
model.fit([x, y], None)
```
# Arguments
units: Positive integer, dimensionality of the output space (num classes).
num_sampled: Positive integer, number of classes to sample in Sampling Loss.
type: 'sampled_softmax', 'nce'
num_true: Max # of positive classes, pad to this for variable inputs
kernel_initializer: Initializer for the `kernel` weights matrix
(see [initializers](../initializers.md)).
bias_initializer: Initializer for the bias vector
(see [initializers](../initializers.md)).
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix
(see [regularizer](../regularizers.md)).
bias_regularizer: Regularizer function applied to the bias vector
(see [regularizer](../regularizers.md)).
activity_regularizer: Regularizer function applied to
the output of the layer (its "activation").
(see [regularizer](../regularizers.md)).
kernel_constraint: Constraint function applied to
the `kernel` weights matrix
(see [constraints](../constraints.md)).
bias_constraint: Constraint function applied to the bias vector
(see [constraints](../constraints.md)).
# Input shape
Two tensors. First one is 2D tensor with shape: `(batch_size, input_dim)`.
Second one is 1D tensor with length `batch_size`
# Output shape
2D tensor with shape: `(batch_size, units)`.
For instance, for a 2D input with shape `(batch_size, input_dim)`,
the output would have shape `(batch_size, units)`.
"""
def __init__(self,
units,
num_sampled,
type='sampled_softmax',
num_true=1,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
**kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(Sampling, self).__init__(**kwargs)
self.units = units
self.num_sampled = num_sampled
if self.num_sampled > self.units:
raise Exception('num_sample: {} cannot be greater than units: {}'.format(
num_sampled, units))
self.type = type
if not (self.type == 'nce' or self.type == 'sampled_softmax'):
raise Exception('type {} is not a valid sampling loss type'.format(type))
self.num_true = num_true
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.input_spec = [InputSpec(min_ndim=2), InputSpec(min_ndim=1)]
self.supports_masking = True
def build(self, input_shape):
assert len(input_shape) == 2
input_dim = input_shape[0][-1]
self.kernel = self.add_weight(shape=(input_dim, self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
self.bias = self.add_weight(shape=(self.units,),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
self.input_spec[0] = InputSpec(min_ndim=2, axes={-1: input_dim})
self.built = True
def call(self, inputs):
pred, target = inputs
output = K.dot(pred, self.kernel)
output = K.bias_add(output, self.bias, data_format='channels_last')
# TODO : check train or test mode
if self.type == 'nce':
nce_loss = nce_loss_function(
K.transpose(self.kernel), self.bias, target, pred, self.num_sampled, self.units, self.num_true)
self.add_loss(K.mean(nce_loss))
else:
sampled_softmax_loss = sampled_softmax_loss_function(
K.transpose(self.kernel), self.bias, target, pred, self.num_sampled, self.units, self.num_true)
self.add_loss(K.mean(sampled_softmax_loss))
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) == 2
assert input_shape[0][-1]
output_shape = list(input_shape[0])
output_shape[-1] = self.units
return tuple(output_shape)
def get_config(self):
config = {
'units': self.units,
'num_sampled': self.num_sampled,
'kernel_initializer': initializers.serialize(self.kernel_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'activity_regularizer': regularizers.serialize(self.activity_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint)
}
base_config = super(Sampling, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def fill_zipf(length, num_classes, num_true=1):
data_onehot = np.zeros((length, num_classes), dtype='float32')
data_labels = np.zeros((length, num_true), dtype='int32')
# all indexes outside of num_classes scattered in existing space
rand = np.random.zipf(1.3, length * num_true) % num_classes
for i in range(length):
for j in range(num_true):
k = rand[i]
data_onehot[i][k] = 1.0
data_labels[i][j] = k
return data_onehot, data_labels
# number of test samples
num_train = 32*500
num_test = 32*500
num_valid = 100
num_epochs = 5
num_hidden = 10
# number of classes
num_classes = 2000
# number of samples for NCE
num_sampled = 24
# number of labels
num_true = 1
# type of negative sampler
sampler_type='sampled_softmax'
inputs = Input(shape=(num_classes,))
target = Input(shape=(num_true,), dtype=tf.int32) # sparse format, e.g. [1, 3, 2, 6, ...]
net = Dense(num_classes)(inputs)
net = Dense(num_hidden, activation='relu')(net)
net = Sampling(units=num_classes, num_sampled=num_sampled, type=sampler_type)([net, target])
model = Model(inputs=[inputs, target], outputs=net)
model.compile(optimizer='adam', loss=None, metrics=['binary_crossentropy'])
model.summary()
train_input, train_output = fill_zipf(num_train, num_classes, num_true)
valid_input, valid_output = fill_zipf(num_valid, num_classes, num_true)
history = model.fit([train_input, train_output], None,
validation_data=([valid_input, valid_output], None),
epochs=num_epochs, verbose=2)
test_input, test_output = fill_zipf(num_test, num_classes, num_true)
predicts = model.predict([test_input, test_output], batch_size=32)
count = 0
for test in range(num_test):
pred = predicts[test]
imax = np.argmax(pred)
if imax == test_output[test]:
count += 1
print("Found {0} out of {1}".format(count/num_true, num_test))
This test works for the single-label case, both 'nce' and 'sampled_softmax'. But, when I set num_true to greater than one, both NCE and Sampled Softmax throw a tensor mismatch exception.
num_true=3
width=2000
sampler_type='sampled_softmax'
With these parameters, for Sampled Softmax, the code throws this exception trace:
File "postable_sampling_tests.py", line 220, in <module>
epochs=num_epochs, verbose=2)
File "/opt/ds/lib/python3.6/site-packages/keras/engine/training.py", line 1039, in fit
validation_steps=validation_steps)
File "/opt/ds/lib/python3.6/site-packages/keras/engine/training_arrays.py", line 199, in fit_loop
outs = f(ins_batch)
File "/opt/ds/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2715, in __call__
return self._call(inputs)
File "/opt/ds/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2675, in _call
fetched = self._callable_fn(*array_vals)
File "/opt/ds/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1399, in __call__
run_metadata_ptr)
File "/opt/ds/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 526, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: logits and labels must be broadcastable: logits_size=[32,2000] labels_size=[96,2000]
[[{{node sampling_1/softmax_cross_entropy_with_logits}} = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _class=["loc:#train...s_grad/mul"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](sampling_1/BiasAdd_1, sampling_1/softmax_cross_entropy_with_logits/Reshape_1)]]
32 is the batch_size. Clearly, something is num_true * batch_size but I don't know how to fix this.
If we change the sampler to NCE:
num_true=3
width=2000
sampler_type='nce'
The final two lines of the exception stack:
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [32,2000] vs. [3,2000]
[[{{node sampling_1/logistic_loss/mul}} = Mul[T=DT_FLOAT, _class=["loc:#training/Adam/gradients/sampling_1/logistic_loss/mul_grad/Reshape"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](sampling_1/BiasAdd_1, sampling_1/strided_slice_2)]]
In this case, the labels have not been multiplied by batch_size.
What am I doing wrong? How can I get this wrapper system working for multi-label cases?
You can also use samples softmax with multiple labels, you just have to take the mean of each samples softmax
embeddings = tf.get_variable( 'embeddings',
initializer= tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
softmax_weights = tf.get_variable( 'softmax_weights',
initializer= tf.truncated_normal([vocabulary_size, embedding_size],
stddev=1.0 / math.sqrt(embedding_size)))
softmax_biases = tf.get_variable('softmax_biases',
initializer= tf.zeros([vocabulary_size]), trainable=False )
embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is
embed_reshaped = tf.reshape( embed, [batch_size*num_inputs, embedding_size] )
segments= np.arange(batch_size).repeat(num_inputs)
averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)
loss = tf.reduce_mean(
tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size))
optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss) #Original learning rate was 1.0
from
https://github.com/Santosh-Gupta/Research2Vec/blob/master/Research2VecTraining2.ipynb