tensorflow simple estimator input function problems - numpy

I am trying to create a simple input function with the feature data being the numbers 1-10 and the labels being 0 when x < 5; 5 when x = 5 and 10 when x > 5.
example:
# data
nmbrs = [10., 1., 2., 3., 4., 5., 6. , 7., 8., 9.]
labels = [10., 0., 0., 0., 0., 5., 10., 10., 10., 10.]
# input function
input_fn = tf.estimator.inputs.numpy_input_fn(
x={'numbers': np.array(nmbrs)}, y=np.array(labels),
batch_size=batch_size, num_epochs=None, shuffle=True)
The problem i am having is that the nmbrs and labels array doesnt seem to be in the right form, i tried making it into a 2d array but that didnt work either im sure im doing something really easy wrong here...
EDIT: model and neural net functions
def neural_net(x_dict):
# TF Estimator input is a dict, in case of multiple inputs
x = x_dict['numbers']
# Hidden fully connected layer with 128 neurons
layer_1 = tf.layers.dense(x, n_hidden_1)
# Hidden fully connected layer with 128 neurons
layer_2 = tf.layers.dense(layer_1, n_hidden_2)
# Output fully connected layer with a neuron for each class
out_layer = tf.layers.dense(layer_2, num_classes)
return out_layer
# Define the model function (following TF Estimator Template)
def model_fn(features, labels, mode):
# Build the neural network
logits = neural_net(features)
# Predictions
pred_classes = tf.argmax(logits, axis=1)
pred_probas = tf.nn.softmax(logits)
# If prediction mode, early return
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=tf.cast(labels, dtype=tf.int32)))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())

Related

Create a weighted MSE loss function in Tensorflow

I want to train a recurrent neural network using Tensorflow. My model outputs a 1 by 100 vector for each training sample. Assume that y = [y_1, y_2, ..., y_100] is my output for training sample x and the expected output is y'= [y'_1, y'_2, ..., y'_100].
I wish to write a custom loss function that calculates the loss of this specific sample as follows:
Loss = 1/sum(weights) * sqrt(w_1*(y_1-y'_1)^2 + ... + w_100*(y_100-y'_100)^2)
which weights = [w_1,...,w_100] is a given weight array.
Could someone help me with implementing such a custom loss function? (I also use mini-batches while training)
I want to underline that you have 2 possibilities according to your problem:
[1] If the weights are equal for all your samples:
You can build a loss wrapper. Here a dummy example:
n_sample = 200
X = np.random.uniform(0,1, (n_sample,10))
y = np.random.uniform(0,1, (n_sample,100))
W = np.random.uniform(0,1, (100,)).astype('float32')
def custom_loss_wrapper(weights):
def loss(true, pred):
sum_weights = tf.reduce_sum(weights) * tf.cast(tf.shape(pred)[0], tf.float32)
resid = tf.sqrt(tf.reduce_sum(weights * tf.square(true - pred)))
return resid/sum_weights
return loss
inp = Input((10,))
x = Dense(256)(inp)
pred = Dense(100)(x)
model = Model(inp, pred)
model.compile('adam', loss=custom_loss_wrapper(W))
model.fit(X, y, epochs=3)
[2] If the weights are different between samples:
You should build your model usind add_loss in order to dinamically take into account the weights for each sample. Here a dummy example:
n_sample = 200
X = np.random.uniform(0,1, (n_sample,10))
y = np.random.uniform(0,1, (n_sample,100))
W = np.random.uniform(0,1, (n_sample,100))
def custom_loss(true, pred, weights):
sum_weights = tf.reduce_sum(weights)
resid = tf.sqrt(tf.reduce_sum(weights * tf.square(true - pred)))
return resid/sum_weights
inp = Input((10,))
true = Input((100,))
weights = Input((100,))
x = Dense(256)(inp)
pred = Dense(100)(x)
model = Model([inp,true,weights], pred)
model.add_loss(custom_loss(true, pred, weights))
model.compile('adam', loss=None)
model.fit([X,y,W], y=None, epochs=3)
When using add_loss you should pass all the tensors involved in the loss as input layer and pass them inside the loss for the computation.
At inference time you can compute predictions as always, simply removing the true and weights as input:
final_model = Model(model.input[0], model.output)
final_model.predict(X)
You can implement custom weighted mse in the following way
import numpy as np
from tensorflow.keras import backend as K
def custom_mse(class_weights):
def weighted_mse(gt, pred):
# Formula:
# w_1*(y_1-y'_1)^2 + ... + w_100*(y_100-y'_100)^2 / sum(weights)
return K.sum(class_weights * K.square(gt - pred)) / K.sum(class_weights)
return weighted_mse
y_true = np.array([[0., 1., 1, 0.], [0., 0., 1., 1.]])
y_pred = np.array([[0., 1, 0., 1.], [1., 0., 1., 1.]])
weights = np.array([0.25, 0.50, 1., 0.75])
print(y_true.shape, y_pred.shape, weights.shape)
(2, 4) (2, 4) (4,)
loss = custom_mse(class_weights=weights)
loss(y_true, y_pred).numpy()
0.8
Using it with model compilation.
model.compile(loss=custom_mse(weights))
This will compute mse with the provided weighted matrices. However, in your question, you quote sqrt..., from which I presume you meant root mse (rmse). To do that you can use K.sqrt(K.sum(...)) / K.sum(...) in the custom function of custom_mse.
FYI, you may also interest to look at class_weights and sample_weights during Model. fit. From source:
class_weight: Optional dictionary mapping class indices (integers) to a weight (float) value, used for weighting the loss
function (during training only). This can be useful to tell the model
to "pay more attention" to samples from an under-represented class.
sample_weight: Optional Numpy array of weights for the training samples, used for weighting the loss function (during training only).
You can either pass a flat (1D) Numpy array with the same length as
the input samples (1:1 mapping between weights and samples), or in the
case of temporal data, you can pass a 2D array with shape (samples,
sequence_length), to apply a different weight to every timestep of
every sample. This argument is not supported when x is a dataset,
generator, or keras.utils.Sequence instance, instead provides the
sample_weights as the third element of x.
And also loss_weights in Model.compile, from source
loss_weights: Optional list or dictionary specifying scalar coefficients (Python floats) to weight the loss contributions of
different model outputs. The loss value that will be minimized by the
model will then be the weighted sum of all individual losses, weighted
by the loss_weights coefficients. If a list, it is expected to have a
1:1 mapping to the model's outputs. If a dict, it is expected to map
output names (strings) to scalar coefficients.
A class version of the weighted mean squared error loss function.
class WeightedMSE(object):
def __init__(self):
pass
def __call__(self, y_true, y_pred, weights):
sum_weights = tf.reduce_sum(weights)
resid = tf.reduce_sum(weights * tf.square(y_true - y_pred))
return resid / sum_weights

T5 Encoder model output all zeros?

I am trying out a project where I use the T5EncoderModel from HuggingFace in order to obtain hidden representations of my input sentences. I have 100K sentences which I tokenize and pad as follows:
for sentence in dataset[original]:
sentence = tokenizer(sentence, max_length=40, padding='max_length', return_tensors='tf', truncation= True)
original_sentences.append(sentence.input_ids)
org_mask.append(sentence.attention_mask)
This gives me the right outputs and tokenizes everything decently. The problem I achieve is when I am trying to actually train the model. The setup is a bit complex and is taken from https://keras.io/examples/vision/semantic_image_clustering/ which I am trying to apply to text.
The set-up for training is as follows:
def create_encoder(rep_dim):
encoder = TFT5EncoderModel.from_pretrained('t5-small', output_hidden_states=True)
encoder.trainable = True
original_input = Input(shape=(max_length), name = 'originalIn', dtype=tf.int32)
augmented_input = Input(shape=(max_length), name = 'originalIn', dtype=tf.int32)
concat = keras.layers.Concatenate(axis=1)([original_input, augmented_input])
#Take 0-index because it returns a TFBERTmodel type, and 0 returns a tensor
encoded = encoder(input_ids=concat)[0]
#This outputs shape: [sentences, max_length, encoded_dims]
output = Dense(rep_dim, activation='relu')(encoded)
return encoder
This function is fed into the ReprensentationLearner class from the above link as such:
class RepresentationLearner(keras.Model):
def __init__(
self,
encoder,
projection_units,
temperature=0.8,
dropout_rate=0.1,
l2_normalize=False,
**kwargs
):
super(RepresentationLearner, self).__init__(**kwargs)
self.encoder = encoder
# Create projection head.
self.projector = keras.Sequential(
[
layers.Dropout(dropout_rate),
layers.Dense(units=projection_units, use_bias=False),
layers.BatchNormalization(),
layers.ReLU(),
]
)
self.temperature = temperature
self.l2_normalize = l2_normalize
self.loss_tracker = keras.metrics.Mean(name="loss")
#property
def metrics(self):
return [self.loss_tracker]
def compute_contrastive_loss(self, feature_vectors, batch_size):
num_augmentations = tf.shape(feature_vectors)[0] // batch_size
if self.l2_normalize:
feature_vectors = tf.math.l2_normalize(feature_vectors, -1)
# The logits shape is [num_augmentations * batch_size, num_augmentations * batch_size].
logits = (
tf.linalg.matmul(feature_vectors, feature_vectors, transpose_b=True)
/ self.temperature
)
# Apply log-max trick for numerical stability.
logits_max = tf.math.reduce_max(logits, axis=1)
logits = logits - logits_max
# The shape of targets is [num_augmentations * batch_size, num_augmentations * batch_size].
# targets is a matrix consits of num_augmentations submatrices of shape [batch_size * batch_size].
# Each [batch_size * batch_size] submatrix is an identity matrix (diagonal entries are ones).
targets = tf.tile(tf.eye(batch_size), [num_augmentations, num_augmentations])
# Compute cross entropy loss
return keras.losses.categorical_crossentropy(
y_true=targets, y_pred=logits, from_logits=True
)
def call(self, inputs):
features = self.encoder(inputs[0])[0]
# Apply projection head.
return self.projector(features[0])
def train_step(self, inputs):
batch_size = tf.shape(inputs)[0]
# Run the forward pass and compute the contrastive loss
with tf.GradientTape() as tape:
feature_vectors = self(inputs, training=True)
loss = self.compute_contrastive_loss(feature_vectors, batch_size)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update loss tracker metric
self.loss_tracker.update_state(loss)
# Return a dict mapping metric names to current value
return {m.name: m.result() for m in self.metrics}
def test_step(self, inputs):
batch_size = tf.shape(inputs)[0]
feature_vectors = self(inputs, training=False)
loss = self.compute_contrastive_loss(feature_vectors, batch_size)
self.loss_tracker.update_state(loss)
return {"loss": self.loss_tracker.result()}
In order to train it, I use the Colab TPU and train it as such:
with strategy.scope():
encoder = create_encoder(rep_dim)
training_model = RepresentationLearner(encoder=encoder, projection_units=128, temperature=0.1)
lr_scheduler = keras.experimental.CosineDecay(initial_learning_rate=0.001, decay_steps=500, alpha=0.1)
training_model.compile(optimizer=tfa.optimizers.AdamW(learning_rate=lr_scheduler, weight_decay=0.0001))
history = training_model.fit(x = [original_train, augmented_train], batch_size=32*8, epocs = 10)
training_model.save_weights('representation_learner.h5', overwrite=True)
Note that I am giving my model two inputs. When I predict on my input data, I get all zeros, and I can not seem to understand why. I predict as follows:
training_model.load_weights('representation_learner.h5')
feature_vectors= training_model.predict([[original_train, augmented_train]], verbose = 1)
And the output is:
array([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]], dtype=float32)
With a way too large shape of (1000000, 128)

Custom Keras binary_crossentropy loss function not working

I’m trying to re-define keras’s binary_crossentropy loss function so that I can customize it but it’s not giving me the same results as the existing one.
I'm using TF 1.13.1 with Keras 2.2.4.
I went through Keras’s github code. My understanding is that the loss in model.compile(optimizer='adam', loss='binary_crossentropy', metrics =['accuracy']), is defined in losses.py, using binary_crossentropy defined in tensorflow_backend.py.
I ran a dummy data and model to test it. Here are my findings:
The custom loss function outputs the same results as keras’s one
Using the custom loss in a keras model gives different accuracy results
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)
import tensorflow as tf
from keras import losses
import keras.backend as K
import keras.backend.tensorflow_backend as tfb
from keras.layers import Dense
from keras import Sequential
#Dummy check of loss output
def binary_crossentropy_custom(y_true, y_pred):
return K.mean(binary_crossentropy_custom_tf(y_true, y_pred), axis=-1)
def binary_crossentropy_custom_tf(target, output, from_logits=False):
"""Binary crossentropy between an output tensor and a target tensor.
# Arguments
target: A tensor with the same shape as `output`.
output: A tensor.
from_logits: Whether `output` is expected to be a logits tensor.
By default, we consider that `output`
encodes a probability distribution.
# Returns
A tensor.
"""
# Note: tf.nn.sigmoid_cross_entropy_with_logits
# expects logits, Keras expects probabilities.
if not from_logits:
# transform back to logits
_epsilon = tfb._to_tensor(tfb.epsilon(), output.dtype.base_dtype)
output = tf.clip_by_value(output, _epsilon, 1 - _epsilon)
output = tf.log(output / (1 - output))
return tf.nn.sigmoid_cross_entropy_with_logits(labels=target,
logits=output)
logits = tf.constant([[-3., -2.11, -1.22],
[-0.33, 0.55, 1.44],
[2.33, 3.22, 4.11]])
labels = tf.constant([[1., 1., 1.],
[1., 1., 0.],
[0., 0., 0.]])
custom_sigmoid_cross_entropy_with_logits = binary_crossentropy_custom(labels, logits)
keras_binary_crossentropy = losses.binary_crossentropy(y_true=labels, y_pred=logits)
with tf.Session() as sess:
print('CUSTOM sigmoid_cross_entropy_with_logits: ', sess.run(custom_sigmoid_cross_entropy_with_logits), '\n')
print('KERAS keras_binary_crossentropy: ', sess.run(keras_binary_crossentropy), '\n')
#CUSTOM sigmoid_cross_entropy_with_logits: [16.118095 10.886106 15.942386]
#KERAS keras_binary_crossentropy: [16.118095 10.886106 15.942386]
#Dummy check of model accuracy
X_train = tf.random.uniform((3, 5), minval=0, maxval=1, dtype=tf.dtypes.float32)
labels = tf.constant([[1., 0., 0.],
[0., 0., 1.],
[1., 0., 0.]])
model = Sequential()
#First Hidden Layer
model.add(Dense(5, activation='relu', kernel_initializer='random_normal', input_dim=5))
#Output Layer
model.add(Dense(3, activation='sigmoid', kernel_initializer='random_normal'))
#I ran model.fit for each model.compile below 10 times using the same X_train and provide the range of accuracy measurement
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics =['accuracy']) #0.748 < acc < 0.779
# model.compile(optimizer='adam', loss=losses.binary_crossentropy, metrics =['accuracy']) #0.761 < acc < 0.778
model.compile(optimizer='adam', loss=binary_crossentropy_custom, metrics =['accuracy']) #0.617 < acc < 0.663
history = model.fit(X_train, labels, steps_per_epoch=100, epochs=1)
I'd expect the custom loss function to give similar model accuracy output but it does not. Any idea? Thanks!
Keras automatically selects which accuracy implementation to use according to the loss, and this won't work if you use a custom loss. But in this case you can just explictly use the right accuracy, which is binary_accuracy:
model.compile(optimizer='adam', loss=binary_crossentropy_custom, metrics =['binary_accuracy'])

How can I pre-compute a mask for each input and adjust the weights according to this mask?

I want to provide a mask, the same size as the input image and adjust the weights learned from the image according to this mask (similar to attention, but pre-computed for each image input). How can I do this with Keras (or TensorFlow)?
Question
How can I add another feature layer to an image, like a Mask, and have the neural network take this new feature layer into account?
Answer
The short answer is to add it as another colour channel to the image. If your image already has 3 colour channels; red, blue, green, then adding another channel of 1 & 0 of a mask gives the neural network that much more information to use to make decisions.
Thought Experiment
As a thought experiment, let's tackle MNIST. MNIST images are 28x28. Let's take 1 image, the 'true' image, and 3 other images, the 'distractions' and form a 56x56 image of the 4 28x28 images. MNIST is black and white so it only has 1 colour channel, brightness. Let's now add another colour channel which is a mask, 1's in area of the 56x56 image where the 'true' image is and 0's else where.
If we use the same architecture as usual for solving MNIST, convolution all the way down, we can imagine that it can use this new information to learn to only pay attention to the 'true' area and categorize the image correctly.
Code Example
In this example we try and solve the XOR problem. We take a classic XOR and double the input with noise and add a channel that is 1's for the non-noise and 0's for the noise
# Adapted from https://github.com/panchishin/learn-to-tensorflow/blob/master/solutions/04-xor-2d.py
# -- The xor problem --
x = np.array([[0., 0.], [1., 1.], [1., 0.], [0., 1.]])
y_ = [[1., 0.], [1., 0.], [0., 1.], [0., 1.]]
def makeBatch() :
# Add an additional 2 channels of noise
# either before or after the two real 'x's.
global x
rx = np.random.rand(4,4,2) > 0.5
# set the mask to 0 for all items
rx[:,:,1] = 0
index = int(np.random.random()*3)
rx[:,index:index+2,0] = x
# set the mask to 1 for 'real' values
rx[:,index:index+2,1] = 1
return rx
# -- imports --
import tensorflow as tf
# np.set_printoptions(precision=1) reduces np precision output to 1 digit
np.set_printoptions(precision=2, suppress=True)
# -- induction --
# Layer 0
x0 = tf.placeholder(dtype=tf.float32, shape=[None, 4, 2])
y0 = tf.placeholder(dtype=tf.float32, shape=[None, 2])
# Layer 1
f1 = tf.reshape(x0,shape=[-1,8])
m1 = tf.Variable(tf.random_uniform([8, 9], minval=0.1, maxval=0.9, dtype=tf.float32))
b1 = tf.Variable(tf.random_uniform([9], minval=0.1, maxval=0.9, dtype=tf.float32))
h1 = tf.sigmoid(tf.matmul(f1, m1) + b1)
# Layer 2
m2 = tf.Variable(tf.random_uniform([9, 2], minval=0.1, maxval=0.9, dtype=tf.float32))
b2 = tf.Variable(tf.random_uniform([2], minval=0.1, maxval=0.9, dtype=tf.float32))
y_out = tf.nn.softmax(tf.matmul(h1, m2) + b2)
# -- loss --
# loss : sum of the squares of y0 - y_out
loss = tf.reduce_sum(tf.square(y0 - y_out))
# training step : gradient descent (1.0) to minimize loss
train = tf.train.GradientDescentOptimizer(1.0).minimize(loss)
# -- training --
# run 500 times using all the X and Y
# print out the loss and any other interesting info
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print("\nloss")
for step in range(5000):
sess.run(train, feed_dict={x0: makeBatch(), y0: y_})
if (step + 1) % 1000 == 0:
print(sess.run(loss, feed_dict={x0: makeBatch(), y0: y_}))
results = sess.run([m1, b1, m2, b2, y_out, loss], feed_dict={x0: makeBatch(), y0: y_})
labels = "m1,b1,m2,b2,y_out,loss".split(",")
for label, result in zip(*(labels, results)):
print("")
print(label)
print(result)
print("")
Output
We can see that the network correctly solves the problem and give the correct output with high certainty
y_ (truth) = [[1., 0.], [1., 0.], [0., 1.], [0., 1.]]
y_out
[[0.99 0.01]
[0.99 0.01]
[0.01 0.99]
[0.01 0.99]]
loss
0.00056630466
Confirmation that the mask is doing something
Let's change the mask function so that it is just random by commenting out the lines that set 0's for noise and 1's for signal
def makeBatch() :
global x
rx = np.random.rand(4,4,2) > 0.5
#rx[:,:,1] = 0
index = int(np.random.random()*3)
rx[:,index:index+2,0] = x
#rx[:,index:index+2,1] = 1
return rx
and then rerun the code. Indeed we can see that the network cannot learn without the mask.
y_out
[[0.99 0.01]
[0.76 0.24]
[0.09 0.91]
[0.58 0.42]]
loss
0.8080765
Conclusion
If you have some signal and noise in an image (or other data structure), and successfully add another channel (a mask) that indicates where the signal is and where the noise is, a neural net can leverage that mask to focus on the signal yet still have access to the noise.

tensorflow NameError: name 'eval_input_fn' is not defined

I am following tensorflow's getting started. I downloaded and installed anaconda today. when run, the program below produces:
File "p3.py", line 35, in <module>
eval_loss = estimator.evaluate(input_fn=eval_input_fn)
NameError: name 'eval_input_fn' is not defined
import numpy as np
import tensorflow as tf
# Declare list of features, we only have one real-valued feature
def model(features, labels, mode):
# Build a linear model and predict values
W = tf.get_variable("W", [1], dtype=tf.float64)
b = tf.get_variable("b", [1], dtype=tf.float64)
y = W*features['x'] + b
# Loss sub-graph
loss = tf.reduce_sum(tf.square(y - labels))
# Training sub-graph
global_step = tf.train.get_global_step()
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = tf.group(optimizer.minimize(loss),
tf.assign_add(global_step, 1))
# ModelFnOps connects subgraphs we built to the
# appropriate functionality.
return tf.contrib.learn.ModelFnOps(
mode=mode, predictions=y,
loss=loss,
train_op=train)
estimator = tf.contrib.learn.Estimator(model_fn=model)
# define our data sets
x_train = np.array([1., 2., 3., 4.])
y_train = np.array([0., -1., -2., -3.])
x_eval = np.array([2., 5., 8., 1.])
y_eval = np.array([-1.01, -4.1, -7, 0.])
input_fn = tf.contrib.learn.io.numpy_input_fn({"x": x_train}, y_train, 4, num_epochs=1000)
# train
estimator.fit(input_fn=input_fn, steps=1000)
# Here we evaluate how well our model did.
train_loss = estimator.evaluate(input_fn=input_fn)
eval_loss = estimator.evaluate(input_fn=eval_input_fn) #line 35
print("train loss: %r"% train_loss)
print("eval loss: %r"% eval_loss)
For some reason you haven't copy-pasted the definition of the evaluation input function.
You can find it directly in the tutorial you linked.
Here's the line
eval_input_fn = tf.contrib.learn.io.numpy_input_fn(
{"x":x_eval}, y_eval, batch_size=4, num_epochs=1000)