I would like to clear the memory / network after every time I am done with the training. I used the alternatives proposed online, but it seems like they are not working if I am correctly interpreting my results. I use tf.compat.v1.reset_default_graph() and tf.keras.backend.clear_session() since they are mostly recommended online.
import numpy as np
import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.keras import backend as K
upper_limit = 2
lower_limit = -2
training_input= np.random.random ([100,5])*(upper_limit - lower_limit) + lower_limit
training_output = np.random.random ([100,1]) *10*(upper_limit - lower_limit) + lower_limit
model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(5,)),
tf.keras.layers.Dense(12, activation='relu'),
tf.keras.layers.Dense(1)
])
model.compile(loss="mse",optimizer = tf.keras.optimizers.Adam(learning_rate=0.01))
for layer in model.layers:
print("layer weights before fitting: ",layer.get_weights(),"\n") # weights
model.fit(training_input, training_output, epochs=5, batch_size=100,verbose=0)
for layer in model.layers:
print("layer weights after fitting: ",layer.get_weights(),"\n") # weights
print("\n")
tf.compat.v1.reset_default_graph()
tf.keras.backend.clear_session()
print("after clear","\n")
for layer in model.layers:
print(layer.get_weights(),"\n") # weights
When I print the layer weights after attempting to clear the network, I get the same weight values as before cleaning the session.
I think what are you looking is reset the weights of you model, and that is not really related to the session or the graph (with some exceptions).
The reset of the weights is currently a debated topic you can find how to do it in most of the cases here but as you can see, today nobody is planning to implement this function
for easy access I post the current proposition below
def reset_weights(model):
for layer in model.layers:
if isinstance(layer, tf.keras.Model): #if you're using a model as a layer
reset_weights(layer) #apply function recursively
continue
#where are the initializers?
if hasattr(layer, 'cell'):
init_container = layer.cell
else:
init_container = layer
for key, initializer in init_container.__dict__.items():
if "initializer" not in key: #is this item an initializer?
continue #if no, skip it
# find the corresponding variable, like the kernel or the bias
if key == 'recurrent_initializer': #special case check
var = getattr(init_container, 'recurrent_kernel')
else:
var = getattr(init_container, key.replace("_initializer", ""))
var.assign(initializer(var.shape, var.dtype))
remember that if you are not defining a seed, the weigths will be differents each time you call reset
Related
I have a network which contains Conv2D layers followed by ReLU activations, declared as such:
x = layers.Conv2D(self.hparams['channels_count'], kernel_size=(4,1))(x)
x = layers.ReLU()(x)
And it is ported to TFLite with the following representaiton:
Basic TFLite network without Q-aware training
However, after performing quantization-aware training on the network and porting it again, the ReLU layers are now explicit in the graph:
TFLite network after Q-aware training
This results in them being processed separately on the target instead of during the evaluation of the Conv2D kernel, inducing a 10% performance loss in my overall network.
Declaring the activation with the following implicit syntax does not produce the problem:
x = layers.Conv2D(self.hparams['channels_count'], kernel_size=(4,1), activation='relu')(x)
Basic TFLite network with implicit ReLU activation
TFLite network with implicit ReLU after Q-aware training
However, this restricts the network to basic ReLU activation, whereas I would like to use ReLU6 which cannot be declared in this way.
Is this a TFLite issue? If not, is there a way to prevent the ReLU layer from being split? Or alternatively, is there a way to manually merge the ReLU layers back into the Conv2D layers after the quantization-aware training?
Edit:
QA training code:
def learn_qaware(self):
quantize_model = tfmot.quantization.keras.quantize_model
self.model = quantize_model(self.model)
training_generator = SCDataGenerator(self.training_set)
validate_generator = SCDataGenerator(self.validate_set)
self.model.compile(
optimizer=self.configure_optimizers(qa_learn=True),
loss=self.get_LLP_loss(),
metrics=self.get_metrics(),
run_eagerly=config['eager_mode'],
)
self.model.fit(
training_generator,
epochs = self.hparams['max_epochs'],
batch_size = 1,
shuffle = self.hparams['shuffle_curves'],
validation_data = validate_generator,
callbacks = self.get_callbacks(qa_learn=True),
)
Quantized TFLite model generation code:
def tflite_convert(classifier):
output_file = get_tflite_filename(classifier.model_path)
# Convert the model to the TensorFlow Lite format without quantization
saved_shape = classifier.model.input.shape.as_list()
fixed_shape = saved_shape
fixed_shape[0] = 1
classifier.model.input.set_shape(fixed_shape) # Force batch size to 1 for generation
converter = tf.lite.TFLiteConverter.from_keras_model(classifier.model)
classifier.model.input.set_shape(saved_shape)
# Set the optimization flag.
converter.optimizations = [tf.lite.Optimize.DEFAULT]
# Enforce integer only quantization
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
# Provide a representative dataset to ensure we quantize correctly.
if config['eager_mode']:
tf.executing_eagerly()
def representative_dataset():
for x in classifier.validate_set.get_all_inputs():
rs = x.reshape(1, x.shape[0], 1, 1).astype(np.float32)
yield([rs])
converter.representative_dataset = representative_dataset
model_tflite = converter.convert()
# Save the model to disk
open(output_file, "wb").write(model_tflite)
return TFLite_model(output_file)
I have found a workaround which works by instantiating a non-trained version of the model, then copying over the weights from the quantization aware trained model before converting to TFLite.
This seems like quite a hack, so I'm still on the lookout for a cleaner solution.
Code for the workaround:
def dequantize(self):
if not hasattr(self, 'fp_model') or not self.fp_model:
self.fp_model = self.get_default_model()
def find_layer_in_model(name, model):
for layer in model.layers:
if layer.name == name:
return layer
return None
def find_weight_group_in_layer(name, layer):
for weight_group in quant_layer.trainable_weights:
if weight_group.name == name:
return weight_group
return None
for layer in self.fp_model.layers:
if 'input' in layer.name or 'quantize_layer' in layer.name:
continue
QUANT_TAG = "quant_"
quant_layer = find_layer_in_model(QUANT_TAG+layer.name,self.model)
if quant_layer is None:
raise RuntimeError('Failed to match layer ' + layer.name)
for i, weight_group in enumerate(layer.trainable_weights):
quant_weight_group = find_weight_group_in_layer(QUANT_TAG+weight_group.name, quant_layer)
if quant_weight_group is None:
quant_weight_group = find_weight_group_in_layer(weight_group.name, quant_layer)
if quant_weight_group is None:
raise RuntimeError('Failed to match weight group ' + weight_group.name)
layer.trainable_weights[i].assign(quant_weight_group)
self.model = self.fp_model
You can pass activation=tf.nn.relu6 to use ReLU6 activation.
Essentially what I would like to do is take the following very simple feedforward graph:
And then add a recurrent layer that feeds the outputs of the second Dense layer as Input to the first Dense layer, like demonstrated below. Both models are obviously simplifications of my actual use case, though I suppose the general principle for which I am asking holds true for both.
I wonder if there may be an efficient way in Tensorflow or even keras to accomplish this, especially regarding GPU processing efficiency. While I am fairly confident that I could hack together a custom model in Tensorflow that would accomplish this function-wise am I pessimistic about the GPU processing efficiency of such a custom model. I therefore would very much appreciate if someone knows about an efficient way to accomplish these recurrent connections between 2 layers. Thank you for your time! =)
For completeness sake, here is the code to create the first simple feedforward graph. The recurrent graph I created through image editing.
inputs = tf.keras.Input(shape=(128,))
h_1 = tf.keras.layers.Dense(64)(inputs)
h_2 = tf.keras.layers.Dense(32)(h_1)
out = tf.keras.layers.Dense(16)(h_2)
model = tf.keras.Model(inputs, out)
Since my question hasn't received any answers would I like to share the solution I came up with in case someone finds this question via search.
Please let me know if you find or come up with a better solution - thanks!
class SimpleModel(tf.keras.Model):
def __init__(self, input_shape, *args, **kwargs):
super(SimpleModel, self).__init__(*args, **kwargs)
# Create node layers
self.node_1 = tf.keras.layers.InputLayer(input_shape=input_shape)
self.node_2 = tf.keras.layers.Dense(64, activation='sigmoid')
self.node_3 = tf.keras.layers.Dense(32, activation='sigmoid')
self.node_4 = tf.keras.layers.Dense(16, activation='sigmoid')
self.conn_3_2_recurrent_state = None
# Create recurrent connection states
node_1_output_shape = self.node_1.compute_output_shape(input_shape)
node_2_output_shape = self.node_2.compute_output_shape(node_1_output_shape)
node_3_output_shape = self.node_3.compute_output_shape(node_2_output_shape)
self.conn_3_2_recurrent_state = tf.Variable(initial_value=self.node_3(tf.ones(shape=node_2_output_shape)),
trainable=False,
validate_shape=False,
dtype=tf.float32)
# OR
# self.conn_3_2_recurrent_state = tf.random.uniform(shape=node_3_output_shape, minval=0.123, maxval=4.56)
# OR
# self.conn_3_2_recurrent_state = tf.ones(shape=node_3_output_shape)
# OR
# self.conn_3_2_recurrent_state = tf.zeros(shape=node_3_output_shape)
def call(self, inputs):
x = self.node_1(inputs)
#tf.print(self.conn_3_2_recurrent_state)
#tf.print(self.conn_3_2_recurrent_state.shape)
x = tf.keras.layers.Concatenate(axis=-1)([x, self.conn_3_2_recurrent_state])
x = self.node_2(x)
x = self.node_3(x)
self.conn_3_2_recurrent_state.assign(x)
#tf.print(self.conn_3_2_recurrent_state)
#tf.print(self.conn_3_2_recurrent_state.shape)
x = self.node_4(x)
return x
# Demonstrate statefulness of model (uncomment tf prints in model.call())
model = SimpleModel(input_shape=(10, 128))
x = tf.ones(shape=(10, 128))
model(x)
model(x)
# Demonstrate trainability of the recurrent connection TF model
x = tf.random.uniform(shape=(10, 128))
y = tf.ones(shape=(10, 16))
model = SimpleModel(input_shape=(10, 128))
model.compile(optimizer='adam', loss='binary_crossentropy')
model.fit(x=x, y=y, epochs=100)
I have implemented my own version of MobileNet in TensorFlow and would like to verify it by comparing it against the official tensorflow_hub version.
I can get something working easily as so:
URL = "https://tfhub.dev/google/imagenet/mobilenet_v1_100_224/feature_vector/4"
model = tf.keras.Sequential([
hub.KerasLayer(URL, True, input_shape=(IMG_SIZE, IMG_SIZE, 3)),
Layers.Dropout(0.2),
Layers.Dense(len(class_names))
])
But this model is already trained.
I've tried calling initializers, but tensorflow_hub.KerasLayers don't have them.
I've also tried resetting all of the weights with glorot_uniform() and RandomUniform(), but then the model does not learn at all (and neither does mine, when I do the same randomization of weights).
Can you reinitialize a pre-trained model?
A lot of searching only provided methods for resetting layers that you already have, or restoring them back to what they were when you started with the model.
This is probably far from complete, but maybe someone can build off it!
import tensorflow as tf
import tensorflow.keras.backend as K
def untrain_layer(layer):
initial_weights = layer.weights
new_weights = []
for w in initial_weights:
print(w.name)
if "beta" in w.name:
new_weights.append(K.eval(tf.keras.initializers.zeros())(w.shape))
elif "gamma" in w.name:
new_weights.append(K.eval(tf.keras.initializers.ones())(w.shape))
elif "moving_mean" in w.name:
new_weights.append(K.eval(tf.keras.initializers.zeros())(w.shape))
elif "moving_variance" in w.name:
new_weights.append(K.eval(tf.keras.initializers.ones())(w.shape))
else:
new_weights.append(K.eval(tf.keras.initializers.glorot_uniform())(w.shape))
layer.set_weights(new_weights)
Use:
import tensorflow_hub as hub
feature_extractor = hub.KerasLayer(URL, True, input_shape=(IMG_SIZE, IMG_SIZE, 3))
untrain_keraslayer(feature_extractor)
I learnt ResNet's skip connection recently, and I found this structure of network can improve a lot in during training, and it also applies in convolutional networks such as U-net. However, I don't know how i can do to implement a similar structure with LSTM autoencoder network. it looks like I got trapped by some dimensional problems...
I'm using keras' method to implement, but I kept getting errors.
So here is the network code:
# lstm autoencoder recreate sequence
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
from keras.utils import plot_model
# from keras import regularizers
from keras.regularizers import l1
from keras.optimizers import Adam
import keras.backend as K
model = Sequential()
model.add(LSTM(512, activation='selu', input_shape=(n_in,1),return_sequences=True))
model.add(LSTM(256, activation='selu',return_sequences=True))
model.add(LSTM(20, activation='selu'))
model.add(RepeatVector(n_in))
model.add(LSTM(20, activation='selu',return_sequences=True))
model.add(LSTM(256, activation='selu',return_sequences=True))
model.add(LSTM(512, activation='selu', return_sequences=True))
model.add(TimeDistributed(Dense(1)))
# model.add
plot_model(model=model, show_shapes=True)
Just like skip connection diagram in resnet or unet, I'm trying to modify the network like this:
The output of a encoder lstm layer also combines(concat, or add?) the former layer output as the input of a decoder lstm layer. As the pic shows, the coresponding layers are symmetry. Is such idea of connection possible? But I'm new to keras API and skip-connection structure, I don't know how I can implement it.
First you need to start using the functional API instead of the Sequential.
The functional API allows you to build arbitrary input and output connections in each layer, instead of stacked networks.
Learn more about the functional API in:
https://keras.io/guides/functional_api/
About building skip connections from LSTM layers, it is as easy as building skip for any kind of layer. I will show you a sample code:
input = Input(shape=input_shape)
a = LSTM(32, return_sequences=True)(input)
x = LSTM(64, return_sequences=True)(a) # main1
a = LSTM(64, return_sequences=True)(a) # skip1
x = LSTM(64, return_sequences=True)(x) # main1
x = LSTM(64, return_sequences=True)(x) # main1
b = Add()([a,x]) # main1 + skip1
x = LSTM(128, return_sequences=True)(b) # main2
b = LSTM(128, return_sequences=True)(b) # skip2
x = LSTM(128, return_sequences=True)(x) # main2
x = LSTM(128, return_sequences=True)(x) # main2
c = Add()([b,x]) # main2 + skip2
x = LSTM(256, return_sequences=False)(c)
x = Dense(512, activation='relu')(x)
x = Dense(128, activation='relu')(x)
x = Dense(2, activation='softmax')(x)
model = Model(input, x)
This code will produce the following network:
As you can see, the Add layer receive as arguments the previous layer plus the layer before the block (a in the first block).
As Add require all arguments having the same shape, you must add an extra LSTM in the skip side equalizing the shape of the start and the end of the blocks (same concept as the original ResNet).
Of course you should mess with this network, adding different kinds of layers, Dropout, regularizers, Activation, or whatever you choose to work for your case. This is only a stump network to show the skip connections with LSTM.
The rest is pretty much the same as any other networks you have already trained.
I'm using Keras (with tensorflow backend) and trying to get layers output(actual activation) on my training set during train time (using 'fit' function)
Is there any way to get the activations of last batch used for training as part of the on_batch_end Callback? or any other way to be able to access layers output?
I found this code below but it runs a forward pass again on a new data. I'm trying to utilize the fact that my network already did a forward pass as part of the training on batch itself and just pull the current activations, is that posible?
def get_activations(model, model_inputs, print_shape_only=False, layer_name=None):
print('----- activations -----')
activations = []
inp = model.input
model_multi_inputs_cond = True
if not isinstance(inp, list):
# only one input! let's wrap it in a list.
inp = [inp]
model_multi_inputs_cond = False
outputs = [layer.output for layer in model.layers if
layer.name == layer_name or layer_name is None] # all layer outputs
funcs = [K.function(inp + [K.learning_phase()], [out]) for out in outputs] # evaluation functions
if model_multi_inputs_cond:
list_inputs = []
list_inputs.extend(model_inputs)
list_inputs.append(0.)
else:
list_inputs = [model_inputs, 0.]
# Learning phase. 0 = Test mode (no dropout or batch normalization)
# layer_outputs = [func([model_inputs, 0.])[0] for func in funcs]
layer_outputs = [func(list_inputs)[0] for func in funcs]
for layer_activations in layer_outputs:
activations.append(layer_activations)
if print_shape_only:
print(layer_activations.shape)
else:
print(layer_activations)
return activations
You can write a custom Callback to obtain per-layer activations over the epochs. The answer here is helpful for understanding how to build the callback. I have added some lines for accessing the activations.
from keras.callbacks import LambdaCallback
from keras import backend as k
activation_list= []
def save_act(model):
# each object is an output tensor variable for a layer:
output_vars= [layer.output for layer in model.layers]
# get_output backend function for each layer
get_outputs_funcs= [k.function([model.input], [out]) for out in output_vars]
# fit the function for each layer, obtain the actual activations:
layers_acts= [f([X]) for f in get_outputs_funcs]
activation_list.append(layers_acts)
# Save activations for all layers for each epoch:
activations_callback= LambdaCallback(on_epoch_end=lambda epoch, logs: save_act(model))
model.fit(..., callbacks= [activations_callback], ...)
# The dimensionality of the activation object: [epoch][layer][0][input][unit]
# I usually use the mean activations for each hidden layer over the epochs (to visualise and check for signs of saturation):
n_layers= 2; n_epochs =100
layer_sizes= [10,10,10]; input_size= X.shape[0]
act_layers= np.zeros((n_layers, n_epochs))
for ep in range(n_epochs):
for layer in range(n_layers):
layer_outs_wrt_inputs= np.zeros((layer_sizes[layer], input_size))
for i in range(input_size):
perlayer_outs= activation_list[ep][layer][0][i]
layer_outs_wrt_inputs[layer, i]= np.mean(perlayer_outs)
ave_over_inputs= np.mean(layer_outs_wrt_inputs)
act_layers[layer, ep]= ave_over_inputs
acts_L1= act_layers[0]; acts_L2= act_layers[1]
plt.plot(epochs, acts_L1, linestyle= 'dotted', color= 'red', label= 'layer 1')
plt.plot(epochs, acts_L2, linestyle= 'dotted', color= 'blue', label= 'layer 2')
plt.legend()
plt.show()