Is there an alternative to tf.one_hot()? - tensorflow

I was trying to convert one hot encode tensors, but tf.one_hot takes too much memory and keeps crashing. I cannot use keras-> to_catagorical because I work with tensors. So I was wondering if there is an alternative to tf.one_hot or if there is any way to make it less resource intensive? Below is my code:
def mrr_metric_fn(y_true, y_pred):
y_pred_revel = tf.reshape(y_pred, [-1])
y_pred_revel = tf.one_hot(tf.cast(y_pred_revel, tf.int64), 64)
mrr = mrr_metric(y_true, y_pred_revel)
return mrr
def create_classifier_bert_model():
inputs = layers.Input((config.MAX_LEN,), dtype=tf.int64)
sequence_output = pretrained_bert_model(inputs)
outputs = layers.Dense(vectorize_layer.vocabulary_size(), activation="softmax")(sequence_output)
# outputs2 = layers.Dense(vectorize_layer.vocabulary_size(), activation="softmax")(sequence_output)
classifer_model = keras.Model(inputs, outputs=[outputs], name="prediction")
optimizer = keras.optimizers.Adam(learning_rate=config.LR)
classifer_model.compile(
optimizer=optimizer, loss="sparse_categorical_crossentropy", weighted_metrics=['sparse_categorical_accuracy', mrr_metric_fn]
)
return classifer_model
classifer_model = create_classifier_bert_model()
print(config.LR)
classifer_model.fit(
mlm_ds_ft_cs,
epochs=1,
)
Basically, I'm trying to calculate MRR (mean reciprocal rank) for my model.

Since you use tensors tf.keras.utils.to_categorical() also return tensor you better try it, however you need to change the loss into loss = "CategoricalCrossentropy" and the metrics = ["CategoricalAccuracy", ...].

Related

How to save and reload a Subclassed model in TF 2.6.0 / Python 3.9.7 wihtout performance drop?

Looks like the million dollars question. I have the model below built by sub classing Model in Keras.
Model trains fine and have good performance but I cannot find a way to save and restore the model without incurring a significant performance loss.
I track AUC on ROC curves for anomaly detection, and the ROC curve after loading the model is worse than before, using exactly the same validation data set.
I suspect the problem to come from the BatchNormalization, but I could be wrong.
I've tried several option:
This works but leads to performance drop.
model.save() / tf.keras.models.load()
This works but also lead to performance drop:
model.save_weights() / model.load_weights()
This does not work and I get the following error:
tf.saved_model.save() / tf.saved_model.load()
AttributeError: '_UserObject' object has no attribute 'predict'
This does not work either, as Subclassed model do not support json export:
model.to_json()
Here is the model:
class Deep_Seq2Seq_Detector(Model):
def __init__(self, flight_len, param_len, hidden_state=16):
super(Deep_Seq2Seq_Detector, self).__init__()
self.input_dim = (None, flight_len, param_len)
self._name_ = "LSTM"
self.units = hidden_state
self.regularizer0 = tf.keras.Sequential([
layers.BatchNormalization()
])
self.encoder1 = layers.LSTM(self.units,
return_state=False,
return_sequences=True,
#activation="tanh",
name='encoder1',
input_shape=self.input_dim)#,
#kernel_regularizer= tf.keras.regularizers.l1(),
#)
self.regularizer1 = tf.keras.Sequential([
layers.BatchNormalization(),
layers.Activation("tanh")
])
self.encoder2 = layers.LSTM(self.units,
return_state=False,
return_sequences=True,
#activation="tanh",
name='encoder2')#,
#kernel_regularizer= tf.keras.regularizers.l1()
#) # input_shape=(None, self.input_dim[1],self.units),
self.regularizer2 = tf.keras.Sequential([
layers.BatchNormalization(),
layers.Activation("tanh")
])
self.encoder3 = layers.LSTM(self.units,
return_state=True,
return_sequences=False,
activation="tanh",
name='encoder3')#,
#kernel_regularizer= tf.keras.regularizers.l1(),
#) # input_shape=(None, self.input_dim[1],self.units),
self.repeat = layers.RepeatVector(self.input_dim[1])
self.decoder = layers.LSTM(self.units,
return_sequences=True,
activation="tanh",
name="decoder",
input_shape=(self.input_dim[1],self.units))
self.dense = layers.TimeDistributed(layers.Dense(self.input_dim[2]))
#tf.function
def call(self, x):
# Encoder
x0 = self.regularizer0(x)
x1 = self.encoder1(x0)
x11 = self.regularizer1(x1)
x2 = self.encoder2(x11)
x22 = self.regularizer2(x2)
output, hs, cs = self.encoder3(x22)
# see https://www.tensorflow.org/guide/keras/rnn
encoded_state = [hs, cs]
repeated_vec = self.repeat(output)
# Decoder
decoded = self.decoder(repeated_vec, initial_state=encoded_state)
output_decoder = self.dense(decoded)
return output_decoder
I've seen Git threads, but no straight answer:
https://github.com/keras-team/keras/issues/4875
Did anyone found a solution ? Do I have to use the Functional or Sequential API instead ?
It seems the problem was coming from the Sublcassing API.
I reconstructed the exact same model using the Functionnal API and now model.save / model.load yields similar results.

Completely different results using Tensorflow and Pytorch for MobilenetV3 Small

I am using transfer learning from MobileNetV3 Small to predict 5 different points on an image. I am doing this as a regression task.
For both models:
Setting the last 50 layers trainable and adding the same fully connected layers to the end.
Learning rate 3e-2
Batch size 32
Adam optimizer with the same betas
100 epochs
The inputs consist of RGB unscaled images
Pytorch
Model
def _init_weights(m):
if type(m) == nn.Linear:
nn.init.xavier_uniform_(m.weight)
m.bias.data.fill_(0.01)
def get_mob_v3_small():
model = torchvision.models.mobilenet_v3_small(pretrained=True)
children_list = get_children(model)
for c in children_list[:-50]:
for p in c.parameters():
p.requires_grad = False
return model
class TransferMobileNetV3_v2(nn.Module):
def __init__(self,
num_keypoints: int = 5):
super(TransferMobileNetV3_v2, self).__init__()
self.classifier_neurons = num_keypoints*2
self.base_model = get_mob_v3_small()
self.base_model.classifier = nn.Sequential(
nn.Linear(in_features=1024, out_features=1024),
nn.ReLU(),
nn.Linear(in_features=1024, out_features=512),
nn.ReLU(),
nn.Linear(in_features=512, out_features=self.classifier_neurons)
)
self.base_model.apply(_init_weights)
def forward(self, x):
out = self.base_model(x)
return out
Training Script
def train(net, trainloader, testloader, train_loss_fn, optimizer, scaler, args):
len_dataloader = len(trainloader)
for epoch in range(1, args.epochs+1):
net.train()
for batch_idx, sample in enumerate(trainloader):
inputs, labels = sample
inputs, labels = inputs.to(args.device), labels.to(args.device)
optimizer.zero_grad()
with torch.cuda.amp.autocast(args.use_amp):
prediction = net(inputs)
loss = train_loss_fn(prediction, labels)
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
def main():
args = make_args_parser()
args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
seed = args.seed
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=3e-2,
betas=(0.9, 0.999))
scaler = torch.cuda.amp.GradScaler(enabled=args.use_amp)
train(net, train_loader, test_loader, loss_fn, optimizer, scaler, args)
Tensorflow
Model
base_model = tf.keras.applications.MobileNetV3Small(weights='imagenet',
input_shape=(224,224,3))
x_in = base_model.layers[-6].output
x = Dense(units=1024, activation="relu")(x_in)
x = Dense(units=512, activation="relu")(x)
x = Dense(units=10, activation="linear")(x)
model = Model(inputs=base_model.input, outputs=x)
for layer in model.layers[:-50]:
layer.trainable=False
Training Script
model.compile(loss = "mse",
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-2))
history = model.fit(input_numpy, output_numpy,
verbose=1,
batch_size=32, epochs=100,validation_split = 0.2)
Results
The PyTorch model predicts one single point around the center for all 5 different points.
The Tensorflow model predicts the points quite well and are quite accurate.
The loss in the Pytorch model is much higher than the Tensorflow model.
Please do let me know what is going wrong as I am trying my best to shift to PyTorch for this work and I need this model to give me similar/identical results. Please do let me know what is going wrong as I am trying my best to shift to PyTorch for this work and I need this model to give me similar/identical results.
Note: I also noticed that the MobileNetV3 Small model seems to be different in PyTorch and different in Tensorflow. I do not know if am interpreting it wrong, but I'm putting it here just in case.

Can't apply gradients on tf.Variable

I am trying to learn a similarity matrix(M) between two image embeddings, A single instance of training is a pair of images - (anchor, positive). So ideally the model will return 0 distance for embeddings of similar images.
The problem is, when i declare the distance matrix(M) as a tf.Variable, it returns an error
on this line
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
TypeError: 'Variable' object is not iterable.
I think I should use a tensorflow datatype for M, that is iterable
Please tell me how I can fix this issue
import tensorflow as tf
from tensorflow import keras
# metric learning model
class MetricLearningModel:
def __init__(self, lr):
self.optimizer = keras.optimizers.Adam(lr=lr)
self.lr = lr
self.loss_object = keras.losses.MeanSquaredError()
self.trainable_variables = tf.Variable(
(tf.ones((2048, 2048), dtype=tf.float32)),
trainable=True
)
def similarity_function(self, anchor_embeddings, positive_embeddings):
M = self.trainable_variables
X_i = anchor_embeddings
X_j = positive_embeddings
similarity_value = tf.matmul(X_j, M, name='Tensor')
similarity_value = tf.matmul(similarity_value, tf.transpose(X_i), name='Tensor')
# distance(x,y) = sqrt( (x-y)#M#(x-y).T )
return similarity_value
def train_step(self, anchor, positive):
anchor_embeddings, positive_embeddings = anchor, positive
# Calculate gradients
with tf.GradientTape() as tape:
# Calculate similarity between anchors and positives.
similarities = self.similarity_function(anchor_embeddings, positive_embeddings)
y_pred = similarities
y_true = tf.zeros(1)
print(y_true, y_pred)
loss_value = self.loss_object(
y_pred=y_true,
y_true=y_pred,
)
gradients = tape.gradient(loss_value, self.trainable_variables)
# Apply gradients via optimizer
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
metric_model = MetricLearningModel(lr=1e-3)
anchor, positive = tf.ones((1, 2048), dtype=tf.float32), tf.ones((1, 2048), dtype=tf.float32)
metric_model.train_step(anchor, positive)
The python zip function expects iterable objects, like for example a list or a tuple.
In your calls to tape.gradient, or optimizer.apply_gradients, you can put your Variable in a list to solve the issue :
with tf.GradienTape() as tape:
gradients = tape.gradient(loss_value, [self.trainable_variables])
# Apply gradients via optimizer
self.optimizer.apply_gradients(zip(gradients, [self.trainable_variables]))
tape.gradient respects the shape of the sources object passed to compute the gradients of, so if you feed it with a list, you will get a list out of it. It is stated in the documentation:
Returns
a list or nested structure of Tensors (or IndexedSlices, or None), one for each element in sources. Returned structure is the same as the structure of sources.

How to create a recurrent connection between 2 layers in Tensorflow/Keras?

Essentially what I would like to do is take the following very simple feedforward graph:
And then add a recurrent layer that feeds the outputs of the second Dense layer as Input to the first Dense layer, like demonstrated below. Both models are obviously simplifications of my actual use case, though I suppose the general principle for which I am asking holds true for both.
I wonder if there may be an efficient way in Tensorflow or even keras to accomplish this, especially regarding GPU processing efficiency. While I am fairly confident that I could hack together a custom model in Tensorflow that would accomplish this function-wise am I pessimistic about the GPU processing efficiency of such a custom model. I therefore would very much appreciate if someone knows about an efficient way to accomplish these recurrent connections between 2 layers. Thank you for your time! =)
For completeness sake, here is the code to create the first simple feedforward graph. The recurrent graph I created through image editing.
inputs = tf.keras.Input(shape=(128,))
h_1 = tf.keras.layers.Dense(64)(inputs)
h_2 = tf.keras.layers.Dense(32)(h_1)
out = tf.keras.layers.Dense(16)(h_2)
model = tf.keras.Model(inputs, out)
Since my question hasn't received any answers would I like to share the solution I came up with in case someone finds this question via search.
Please let me know if you find or come up with a better solution - thanks!
class SimpleModel(tf.keras.Model):
def __init__(self, input_shape, *args, **kwargs):
super(SimpleModel, self).__init__(*args, **kwargs)
# Create node layers
self.node_1 = tf.keras.layers.InputLayer(input_shape=input_shape)
self.node_2 = tf.keras.layers.Dense(64, activation='sigmoid')
self.node_3 = tf.keras.layers.Dense(32, activation='sigmoid')
self.node_4 = tf.keras.layers.Dense(16, activation='sigmoid')
self.conn_3_2_recurrent_state = None
# Create recurrent connection states
node_1_output_shape = self.node_1.compute_output_shape(input_shape)
node_2_output_shape = self.node_2.compute_output_shape(node_1_output_shape)
node_3_output_shape = self.node_3.compute_output_shape(node_2_output_shape)
self.conn_3_2_recurrent_state = tf.Variable(initial_value=self.node_3(tf.ones(shape=node_2_output_shape)),
trainable=False,
validate_shape=False,
dtype=tf.float32)
# OR
# self.conn_3_2_recurrent_state = tf.random.uniform(shape=node_3_output_shape, minval=0.123, maxval=4.56)
# OR
# self.conn_3_2_recurrent_state = tf.ones(shape=node_3_output_shape)
# OR
# self.conn_3_2_recurrent_state = tf.zeros(shape=node_3_output_shape)
def call(self, inputs):
x = self.node_1(inputs)
#tf.print(self.conn_3_2_recurrent_state)
#tf.print(self.conn_3_2_recurrent_state.shape)
x = tf.keras.layers.Concatenate(axis=-1)([x, self.conn_3_2_recurrent_state])
x = self.node_2(x)
x = self.node_3(x)
self.conn_3_2_recurrent_state.assign(x)
#tf.print(self.conn_3_2_recurrent_state)
#tf.print(self.conn_3_2_recurrent_state.shape)
x = self.node_4(x)
return x
# Demonstrate statefulness of model (uncomment tf prints in model.call())
model = SimpleModel(input_shape=(10, 128))
x = tf.ones(shape=(10, 128))
model(x)
model(x)
# Demonstrate trainability of the recurrent connection TF model
x = tf.random.uniform(shape=(10, 128))
y = tf.ones(shape=(10, 16))
model = SimpleModel(input_shape=(10, 128))
model.compile(optimizer='adam', loss='binary_crossentropy')
model.fit(x=x, y=y, epochs=100)

tf.keras.backend.function for transforming embeddings inside tf.data.dataset

I am trying to use the output of a neural network to transform data inside tf.data.dataset. Specifically, I am using a Delta-Encoder to manipulate embeddings inside the tf.data pipeline. In so doing, however, I get the following error:
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed in Graph execution. Use Eager execution or decorate this function with #tf.function.
I have searched the dataset pipeline page and stack overflow, but I could not find something that addresses my question. In the code below I am using an Autoencoder, as it yields an identical error with more concise code.
The offending part seems to be
[[x,]] = tf.py_function(Auto_Func, [x], [tf.float32])
inside
tf_auto_transform.
num_embeddings = 100
input_dims = 1000
embeddings = np.random.normal(size = (num_embeddings, input_dims)).astype(np.float32)
target = np.zeros(num_embeddings)
#creating Autoencoder
inp = Input(shape = (input_dims,), name ='input')
hidden = Dense(10, activation = 'relu', name = 'hidden')(inp)
out = Dense(input_dims, activation = 'relu', name='output')(hidden)
auto_encoder = tf.keras.models.Model(inputs =inp, outputs=out)
Auto_Func = tf.keras.backend.function(inputs = Autoencoder.get_layer(name='input').input,
outputs = Autoencoder.get_layer(name='output').input )
#Autoencoder transform for dataset.map
def tf_auto_transform(x, target):
x_shape = x.shape
##tf.function
#def func(x):
# return tf.py_function(Auto_Func, [x], [tf.float32])
#[[x,]] = func(x)
[[x,]] = tf.py_function(Auto_Func, [x], [tf.float32])
x.set_shape(x_shape)
return x, target
def get_dataset(X,y, batch_size = 32):
train_ds = tf.data.Dataset.from_tensor_slices((X, y))
train_ds = train_ds.map(tf_auto_transform)
train_ds = train_ds.batch(batch_size)
return train_ds
dataset = get_dataset(embeddings, target, 2)
The above code yields the following error:
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed in Graph execution. Use Eager execution or decorate this function with #tf.function.
I tried to eliminate the error by running the commented out section of the tf_auto_transform function, but the error persisted.
SideNote: While it is true that the Delta encoder paper has code, it is written in tf 1.x. I am trying to use tf 2.x with the tf functional API instead. Thank you for your help!
At the risk of outing myself as a n00b, the answer is to switch the order of the map and batch functions. I am trying to apply a neural network to make some changes on data. tf.keras models take batches as input, not individual samples. By batching the data first, I can run batches through my nn.
def get_dataset(X,y, batch_size = 32):
train_ds = tf.data.Dataset.from_tensor_slices((X, y))
#The changed order
train_ds = train_ds.batch(batch_size)
train_ds = train_ds.map(tf_auto_transform)**strong text**
return train_ds
It really is that simple.