memory increase when training in tensorflow2.2.0 keras graph with different shape

memory increase when training in tensorflow2.2.0 keras graph with different shape - tensorflow

Below is the simple example:
import os
import psutil
import numpy as np
process = psutil.Process(os.getpid())
class TestKeras3:
def __init__(self):
pass
def build_graph(self):
inputs = tf.keras.Input(shape=(None, None, 3), batch_size=1)
x = tf.keras.layers.Conv2D(100, (2, 2), padding='SAME', name='x')(inputs)
y = tf.reshape(x, (-1,))
z = tf.multiply(y, y)
model = tf.keras.models.Model(inputs=inputs, outputs=z)
return model
def train(self):
model = self.build_graph()
model.summary()
size = np.arange(1000)
for i in range(1000):
inputs = tf.random.normal([1, size[999-i], size[999-i], 3])
with tf.GradientTape() as tage:
output = model(inputs)
print(i, tf.shape(output), process.memory_info().rss)
and the output is:
id output_shape memory cost
979 tf.Tensor([40000], shape=(1,), dtype=int32) 2481123328
980 tf.Tensor([36100], shape=(1,), dtype=int32) 2481582080
981 tf.Tensor([32400], shape=(1,), dtype=int32) 2482122752
982 tf.Tensor([28900], shape=(1,), dtype=int32) 2482393088
983 tf.Tensor([25600], shape=(1,), dtype=int32) 2482933760
984 tf.Tensor([22500], shape=(1,), dtype=int32) 2483453952
985 tf.Tensor([19600], shape=(1,), dtype=int32) 2483793920
986 tf.Tensor([16900], shape=(1,), dtype=int32) 2484330496
987 tf.Tensor([14400], shape=(1,), dtype=int32) 2484871168
988 tf.Tensor([12100], shape=(1,), dtype=int32) 2485137408
989 tf.Tensor([10000], shape=(1,), dtype=int32) 2485665792
990 tf.Tensor([8100], shape=(1,), dtype=int32) 2486206464
991 tf.Tensor([6400], shape=(1,), dtype=int32) 2486579200
992 tf.Tensor([4900], shape=(1,), dtype=int32) 2487119872
993 tf.Tensor([3600], shape=(1,), dtype=int32) 2487390208
994 tf.Tensor([2500], shape=(1,), dtype=int32) 2487930880
995 tf.Tensor([1600], shape=(1,), dtype=int32) 2488463360
996 tf.Tensor([900], shape=(1,), dtype=int32) 2488811520
997 tf.Tensor([400], shape=(1,), dtype=int32) 2489335808
998 tf.Tensor([100], shape=(1,), dtype=int32) 2489868288
999 tf.Tensor([0], shape=(1,), dtype=int32) 2490241024
I found that every time I changed the size of the input, the consumption of memory also increased.
I have a question that the size (2,2,3,100) of the conv2D parameter in the model is fixed. Is it true that the model will cache some Tensor during the forward calculation process, which will cause the memory to increase all the time? If so, how can these resources be released during training? If not, what else is the reason?

So after trying many method, i solved this problem.
It seems that using tf common operation in a keras graph will cause a memory leak, which can be solved by packaging the tf common op into the tf.keras.layers.Layer subclass.
class ReshapeMulti(tf.keras.layers.Layer):
def __init__(self):
super(ReshapeMulti, self).__init__()
def call(self, inputs):
y = tf.reshape(inputs, (-1, ))
z = tf.multiply(y, y)
return z
class TestKeras3:
def __init__(self):
pass
def build_graph(self):
inputs = tf.keras.Input(shape=(None, None, 3), batch_size=1)
x = tf.keras.layers.Conv2D(100, (2, 2), padding='SAME', name='x')(inputs)
# y = tf.reshape(x, (-1,))
# z = tf.multiply(y, y)
z = ReshapeMulti()(x)
model = tf.keras.models.Model(inputs=inputs, outputs=z)
return model
def train(self):
model = self.build_graph()
model.summary()
size = np.arange(1000)
for i in range(1000):
inputs = tf.random.normal([1, size[999-i], size[999-i], 3])
with tf.GradientTape() as tage:
output = model(inputs)
print(i, tf.shape(output), process.memory_info().rss)

Related

subclass of tf.keras.Model can not get summay() result

I want build subclass of tf.keras.Model and want to see the model structure with summary function. But it not works. The following is my code:
import tensorflow as tf
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.d1 = tf.keras.layers.Dense(128, activation='relu')
self.d2 = tf.keras.layers.Dense(10, activation='softmax')
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
model = MyModel()
model.summary()
The error:
ValueError: This model has not yet been built. Build the model first
by calling build() or calling fit() with some data, or specify an
input_shape argument in the first layer(s) for automatic build.

You need to call each layer once to infer shapes and then call build() method of the tf.keras.Model with model's input shape as argument:
import tensorflow as tf
import numpy as np
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.d1 = tf.keras.layers.Dense(128, activation='relu')
self.d2 = tf.keras.layers.Dense(10, activation='softmax')
x = np.random.normal(size=(1, 32, 32, 3))
x = tf.convert_to_tensor(x)
_ = self.call(x)
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
model = MyModel()
model.build((32, 32, 3))
model.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) multiple 896
_________________________________________________________________
flatten (Flatten) multiple 0
_________________________________________________________________
dense (Dense) multiple 3686528
_________________________________________________________________
dense_1 (Dense) multiple 1290
=================================================================
Total params: 3,688,714
Trainable params: 3,688,714
Non-trainable params: 0
_________________________________________________________________

A better solution is listed here. You need to provide a model method to infer the model explicitly.
import tensorflow as tf
from tensorflow.keras.layers import Input
class MyModel(tf.keras.Model):
def __init__(self):
super().__init__()
self.dense = tf.keras.layers.Dense(1)
def call(self, inputs, **kwargs):
return self.dense(inputs)
def model(self):
x = Input(shape=(1))
return Model(inputs=[x], outputs=self.call(x))
MyModel().model().summary()

Editing #Vlad's answer to avoid this error ValueError: Input 0 of layer conv2d_10 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (32, 32, 3)
Change this line from:
model.build((32, 32, 3 ))
To:
model.build((None, 32, 32, 3 ))
Final Code:
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
self.flatten = tf.keras.layers.Flatten()
self.d1 = tf.keras.layers.Dense(128, activation='relu')
self.d2 = tf.keras.layers.Dense(10, activation='softmax')
x = np.random.normal(size=(1, 32, 32, 3))
x = tf.convert_to_tensor(x)
_ = self.call(x)
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
model = MyModel()
model.build((None, 32, 32, 3 ))
model.summary()

Eager tf.GradientTape() returns only Nones

I try to calculate the gradients with Tensorflow in the eager mode, but
tf.GradientTape () returns only None values. I can not understand why.
The gradients are calculated in the update_policy () function.
The output of the line:
grads = tape.gradient(loss, self.model.trainable_variables)
is
{list}<class 'list'>:[None, None, ... ,None]
Here is the code.
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
import numpy as np
tf.enable_eager_execution()
print(tf.executing_eagerly())
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
set_session(sess)
class PGEagerAtariNetwork:
def __init__(self, state_space, action_space, lr, gamma):
self.state_space = state_space
self.action_space = action_space
self.gamma = gamma
self.model = tf.keras.Sequential()
# Conv
self.model.add(
tf.keras.layers.Conv2D(filters=32, kernel_size=[8, 8], strides=[4, 4], activation='relu',
input_shape=(84, 84, 4,),
name='conv1'))
self.model.add(
tf.keras.layers.Conv2D(filters=64, kernel_size=[4, 4], strides=[2, 2], activation='relu', name='conv2'))
self.model.add(
tf.keras.layers.Conv2D(filters=128, kernel_size=[4, 4], strides=[2, 2], activation='relu', name='conv3'))
self.model.add(tf.keras.layers.Flatten(name='flatten'))
# Fully connected
self.model.add(tf.keras.layers.Dense(units=512, activation='relu', name='fc1'))
self.model.add(tf.keras.layers.Dropout(rate=0.4, name='dr1'))
self.model.add(tf.keras.layers.Dense(units=256, activation='relu', name='fc2'))
self.model.add(tf.keras.layers.Dropout(rate=0.3, name='dr2'))
self.model.add(tf.keras.layers.Dense(units=128, activation='relu', name='fc3'))
self.model.add(tf.keras.layers.Dropout(rate=0.1, name='dr3'))
# Logits
self.model.add(tf.keras.layers.Dense(units=self.action_space, activation=None, name='logits'))
self.model.summary()
# Optimizer
self.optimizer = tf.train.AdamOptimizer(learning_rate=lr)
def get_probs(self, s):
s = s[np.newaxis, :]
logits = self.model.predict(s)
probs = tf.nn.softmax(logits).numpy()
return probs
def update_policy(self, s, r, a):
with tf.GradientTape() as tape:
logits = self.model.predict(s)
policy_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=a, logits=logits)
policy_loss = policy_loss * tf.stop_gradient(r)
loss = tf.reduce_mean(policy_loss)
grads = tape.gradient(loss, self.model.trainable_variables)
self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))

You don't have a forward pass in your model. The Model.predict() method returns numpy() array without taping the forward pass. Take a look at this example:
Given a following data and model:
import tensorflow as tf
import numpy as np
x_train = tf.convert_to_tensor(np.ones((1, 2), np.float32), dtype=tf.float32)
y_train = tf.convert_to_tensor([[0, 1]])
model = tf.keras.models.Sequential([tf.keras.layers.Dense(2, input_shape=(2, ))])
First we use predict():
with tf.GradientTape() as tape:
logits = model.predict(x_train)
print('`logits` has type {0}'.format(type(logits)))
# `logits` has type <class 'numpy.ndarray'>
xentropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_train, logits=logits)
reduced = tf.reduce_mean(xentropy)
grads = tape.gradient(reduced, model.trainable_variables)
print('grads are: {0}'.format(grads))
# grads are: [None, None]
Now we use model's input:
with tf.GradientTape() as tape:
logits = model(x_train)
print('`logits` has type {0}'.format(type(logits)))
# `logits` has type <class 'tensorflow.python.framework.ops.EagerTensor'>
xentropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_train, logits=logits)
reduced = tf.reduce_mean(xentropy)
grads = tape.gradient(reduced, model.trainable_variables)
print('grads are: {0}'.format(grads))
# grads are: [<tf.Tensor: id=2044, shape=(2, 2), dtype=float32, numpy=
# array([[ 0.77717704, -0.777177 ],
# [ 0.77717704, -0.777177 ]], dtype=float32)>, <tf.Tensor: id=2042,
# shape=(2,), dtype=float32, numpy=array([ 0.77717704, -0.777177 ], dtype=float32)>]
So use model's __call__() (i.e. model(x)) for forward pass and not predict().

Calculate gradients of intermediate nodes in tensorflow eager execution

I use tensorflow eager execution to do the following calculation:
y = x^2
z = y + 2.
My goal is to calculate dz/dx and dz/dy (the gradients of z over y and z)
dx, dy = GradientTape.gradient(z, [x, y]).
However, only dy is calculated and dx is None. Namely, only the gradients of tensors that directly rely on z can be calculated.
[None, <tf.Tensor: id=11, shape=(), dtype=float32, numpy=1.0>]
[None, <tf.Tensor: id=11, shape=(), dtype=float32, numpy=1.0>]
[None, <tf.Tensor: id=11, shape=(), dtype=float32, numpy=1.0>]
[None, <tf.Tensor: id=11, shape=(), dtype=float32, numpy=1.0>]
[None, <tf.Tensor: id=11, shape=(), dtype=float32, numpy=1.0>]
The following is the full code.
from __future__ import absolute_import, division, print_function
import tensorflow as tf
tf.enable_eager_execution()
tfe = tf.contrib.eager
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "6"
import warnings
warnings.filterwarnings('ignore')
train_steps = 5
for i in range(train_steps):
x = tf.contrib.eager.Variable(0.)
with tf.GradientTape() as tape:
y = tf.square(x)
z = y + 2
print(tape.gradient(z, [x,y]))
Any solution?

Keras Fully-Connected Dense Output M x N?

I am looking at the examples/image_orc.py example in Keras, when I run it I see something like
_______________
max2 (MaxPooling2D) (None, 32, 16, 16) 0 conv2[0][0]
____________________________________________________________________________________________________
reshape (Reshape) (None, 32, 256) 0 max2[0][0]
____________________________________________________________________________________________________
dense1 (Dense) (None, 32, 32) 8224 reshape[0][0]
_____________________________________________________________________________________
The Dense layer outputs a tensor 32x32. I am trying to replicate this in pur TensorFlow where tf.matmul would be used, but how can I output 32x32 using matmul?
Addition:
I am not trying to replicate the Keras example exactly,
w = 128; h = 64
# junk image, only one
dataset = np.zeros((1,w,h,1))
import tensorflow as tf
pool_size = 1
num_filters = 16
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
inputs = tf.placeholder(tf.float32, [None, w, h, 1])
W_conv1 = weight_variable([3, 3, 1, num_filters])
b_conv1 = bias_variable([num_filters])
h_conv1 = tf.nn.relu(conv2d(inputs, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([3, 3, num_filters, num_filters])
b_conv2 = bias_variable([num_filters])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2, [-1, 32, 256])
W_fc1 = weight_variable([256, 32])
b_fc1 = bias_variable([32])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
print inputs.shape
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output = sess.run(h_pool2_flat, feed_dict={inputs: dataset})
print 'output',output.shape
And I get
ValueError: Shape must be rank 2 but is rank 3 for 'MatMul_5' (op: 'MatMul') with input shapes: [?,32,256], [256,32].
A smaller example
import numpy as np
import tensorflow as tf
dataset = np.zeros((3,2,4))
inputs = tf.placeholder(tf.float32, [None, 2, 4])
print inputs
W = tf.zeros((4,5))
print W
W2 = tf.matmul(inputs, W)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output = sess.run(W2, feed_dict={inputs: dataset})
print 'output',output.shape
This also gives similar error
ValueError: Shape must be rank 2 but is rank 3 for 'MatMul_12' (op: 'MatMul') with input shapes: [?,2,4], [4,5].
Any ideas?
Thanks,

That 32 is there because it was in the previous layer. It keeps unchanged.
The tf.matmul multiplies considering the two last dimensions, as stated here. (See the examples taking more than two dimensions)
I see you've got a Dense(32) there, with input size = 256.
This means that the weights matrix is (256,32). In keras, the multiplication as seen here is inputs x kernel.
So, if you have the input tensor shaped as (?, any, 256), and the weights matrix shaped as (256,32), all you need is:
output = tf.matmul(input,weights)
This will output a shape (?, any, 32) - any is there untouched because it just was there before.
You may also want to sum the biases, which will follow the same principle. You need a bias vector of shape (32,).

keras Input layer (Nnoe, 2) with LSTM but didn't work

I try to create the samples, which are X_train and y_train.
Both of samples are similar to format of my real data.
And the codes is that I uses.
Here are my codes:
import matplotlib.pyplot as plt
import numpy as np
import time
import csv
import keras
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.layers.core import Masking
from keras.layers.wrappers import TimeDistributed
from openpyxl import load_workbook
from datetime import datetime
X_arryA = np.array([[1, 2],[3, 8],[9, 10],[6, 7]])
X_arryB = np.array([[1, 2],[3, 8]])
X_arryC = np.array([[1, 2],[3, 8],[9, 10],[6, 7],[9, 10],[6, 7]])
X_train = np.array([X_arryA,X_arryB,X_arryC])
y_arryA = np.array([1,5,3,4])
y_arryB = np.array([2,1])
y_arryC = np.array([6,7,4,2,3,1])
y_train = np.array([y_arryA,y_arryB,y_arryC])
model = Sequential()
layers = [2, 50, 100, 1]
model.add(LSTM(
input_shape=(None, 2),
output_dim=layers[1],
return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(
layers[2],
return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(
output_dim=layers[3]))
model.add(Activation("linear"))
start = time.time()
model.compile(loss="mse", optimizer="rmsprop")
#print "Compilation Time : ", time.time() - start
model.summary()
model.fit(X_train, y_train, batch_size=1, nb_epoch=1, validation_split=0.05)
I have check the model.summary().
I think the structure is be okay.
And some messages showed:
C:\Users\user\Anaconda3\envs\py35\lib\site-packages\ipykernel_launcher.py:14: UserWarning: Update your `LSTM` call to the Keras 2 API: `LSTM(units=50, input_shape=(None, 2), return_sequences=True)`
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm_77 (LSTM) (None, None, 50) 10600
_________________________________________________________________
dropout_65 (Dropout) (None, None, 50) 0
_________________________________________________________________
lstm_78 (LSTM) (None, 100) 60400
_________________________________________________________________
dropout_66 (Dropout) (None, 100) 0
_________________________________________________________________
dense_36 (Dense) (None, 1) 101
_________________________________________________________________
activation_33 (Activation) (None, 1) 0
=================================================================
Total params: 71,101
Trainable params: 71,101
Non-trainable params: 0
_________________________________________________________________
C:\Users\user\Anaconda3\envs\py35\lib\site-packages\ipykernel_launcher.py:23: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(units=1)`
C:\Users\user\Anaconda3\envs\py35\lib\site-packages\keras\models.py:848: UserWarning: The `nb_epoch` argument in `fit` has been renamed `epochs`.
warnings.warn('The `nb_epoch` argument in `fit` '
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-509-c6f954bdb474> in <module>()
28 #print "Compilation Time : ", time.time() - start
29 model.summary()
---> 30 model.fit(X_train, y_train, batch_size=1, nb_epoch=1, validation_split=0.05)
C:\Users\user\Anaconda3\envs\py35\lib\site-packages\keras\models.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
865 class_weight=class_weight,
866 sample_weight=sample_weight,
--> 867 initial_epoch=initial_epoch)
868
869 def evaluate(self, x, y, batch_size=32, verbose=1,
C:\Users\user\Anaconda3\envs\py35\lib\site-packages\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
1520 class_weight=class_weight,
1521 check_batch_axis=False,
-> 1522 batch_size=batch_size)
1523 # Prepare validation data.
1524 do_validation = False
C:\Users\user\Anaconda3\envs\py35\lib\site-packages\keras\engine\training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_batch_axis, batch_size)
1376 self._feed_input_shapes,
1377 check_batch_axis=False,
-> 1378 exception_prefix='input')
1379 y = _standardize_input_data(y, self._feed_output_names,
1380 output_shapes,
C:\Users\user\Anaconda3\envs\py35\lib\site-packages\keras\engine\training.py in _standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
130 ' to have ' + str(len(shapes[i])) +
131 ' dimensions, but got array with shape ' +
--> 132 str(array.shape))
133 for j, (dim, ref_dim) in enumerate(zip(array.shape, shapes[i])):
134 if not j and not check_batch_axis:
ValueError: Error when checking input: expected lstm_77_input to have 3 dimensions, but got array with shape (3, 1)
I spent 5 hours to solve the question, but it still doesn't work.
Any help. I appreciate it.

LSTM layers only accept shapes like (numberOfSequences, numberOfSteps, featuresPerStep)
These are the 3 dimensions expected that were mentioned in the error message.
You need to prepare your data properly to fit those dimensions.
The problem is that numpy arrays cannot accept variable sizes. It must be a well defined matrix.
When you give 3 different length X_arry to a numpy array, the result is impossible to fit a numpy array, then it makes an array of arrays instead. (Keras can't handle this, it expects a single array).
Using variable length, you will have to either pad each array with dummy values and add a masking layer, or simply train each length individually.
X_arryLen4 = np.asarray([[[1, 2],[3, 8],[9, 10],[6, 7]]])
X_arryLen2 = np.asarray([[[1, 2],[3, 8]]])
X_arryLen6 = np.asarray([[[1, 2],[3, 8],[9, 10],[6, 7],[9, 10],[6, 7]]])
model.fit(X_arryLen4, .....)
model.fit(X_arryLen2, .....)
model.fit(X_arryLen6, .....)
Answers that may help:
Keras misinterprets training data shape

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

memory increase when training in tensorflow2.2.0 keras graph with different shape - tensorflow

Related

subclass of tf.keras.Model can not get summay() result

Eager tf.GradientTape() returns only Nones

Calculate gradients of intermediate nodes in tensorflow eager execution

Keras Fully-Connected Dense Output M x N?

keras Input layer (Nnoe, 2) with LSTM but didn't work

Categories

Resources