Calculate gradients of intermediate nodes in tensorflow eager execution - tensorflow

I use tensorflow eager execution to do the following calculation:
y = x^2
z = y + 2.
My goal is to calculate dz/dx and dz/dy (the gradients of z over y and z)
dx, dy = GradientTape.gradient(z, [x, y]).
However, only dy is calculated and dx is None. Namely, only the gradients of tensors that directly rely on z can be calculated.
[None, <tf.Tensor: id=11, shape=(), dtype=float32, numpy=1.0>]
[None, <tf.Tensor: id=11, shape=(), dtype=float32, numpy=1.0>]
[None, <tf.Tensor: id=11, shape=(), dtype=float32, numpy=1.0>]
[None, <tf.Tensor: id=11, shape=(), dtype=float32, numpy=1.0>]
[None, <tf.Tensor: id=11, shape=(), dtype=float32, numpy=1.0>]
The following is the full code.
from __future__ import absolute_import, division, print_function
import tensorflow as tf
tf.enable_eager_execution()
tfe = tf.contrib.eager
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "6"
import warnings
warnings.filterwarnings('ignore')
train_steps = 5
for i in range(train_steps):
x = tf.contrib.eager.Variable(0.)
with tf.GradientTape() as tape:
y = tf.square(x)
z = y + 2
print(tape.gradient(z, [x,y]))
Any solution?

Related

use tfds.load download the datasets error

when i want to download the datasets by tfds.load(),just like that
ratings = tfds.load('movielens/100k-ratings', split="train")
the error is:
Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to C:\Users\samsung\tensorflow_datasets\movielens\100k-ratings\0.1.0...
Dl Completed...: 0%
0/1 [00:21<?, ? url/s]
Dl Size...:
0/0 [00:21<?, ? MiB/s]
Extraction completed...:
0/0 [00:21<?, ? file/s]
HTTPConnectionPool(host='files.grouplens.org', port=80): Max retries exceeded with url: /datasets/movielens/ml-100k.zip (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001E81008F910>: Failed to establish a new connection: [WinError 10060]
by the way, I use the company computer.
could someone help me.help!!!
Do you have the same dataset at the download floder
It cannot use the shuffle Fn but you can export and updates
ds = tfds.load('movielens/100k-ratings', split='train', shuffle_files=True)
👉👉👉 ds = ds.shuffle(1024).batch(64).prefetch(tf.data.experimental.AUTOTUNE)
assert isinstance(ds, tf.data.Dataset)
for example in ds.take(1):
print(example)
# {'bucketized_user_age': <tf.Tensor: shape=(), dtype=float32, numpy=45.0>, 'movie_genres': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([7], dtype=int64)>,
# 'movie_id': <tf.Tensor: shape=(), dtype=string, numpy=b'357'>, 'movie_title': <tf.Tensor: shape=(), dtype=string, numpy=b"One Flew Over the Cuckoo's Nest (1975)">,
# 'raw_user_age': <tf.Tensor: shape=(), dtype=float32, numpy=46.0>, 'timestamp': <tf.Tensor: shape=(), dtype=int64, numpy=879024327>,
# 'user_gender': <tf.Tensor: shape=(), dtype=bool, numpy=True>, 'user_id': <tf.Tensor: shape=(), dtype=string, numpy=b'138'>,
# 'user_occupation_label': <tf.Tensor: shape=(), dtype=int64, numpy=4>, 'user_occupation_text': <tf.Tensor: shape=(), dtype=string, numpy=b'doctor'>,
# 'user_rating': <tf.Tensor: shape=(), dtype=float32, numpy=4.0>, 'user_zip_code': <tf.Tensor: shape=(), dtype=string, numpy=b'53211'>}

memory increase when training in tensorflow2.2.0 keras graph with different shape

Below is the simple example:
import os
import psutil
import numpy as np
process = psutil.Process(os.getpid())
class TestKeras3:
def __init__(self):
pass
def build_graph(self):
inputs = tf.keras.Input(shape=(None, None, 3), batch_size=1)
x = tf.keras.layers.Conv2D(100, (2, 2), padding='SAME', name='x')(inputs)
y = tf.reshape(x, (-1,))
z = tf.multiply(y, y)
model = tf.keras.models.Model(inputs=inputs, outputs=z)
return model
def train(self):
model = self.build_graph()
model.summary()
size = np.arange(1000)
for i in range(1000):
inputs = tf.random.normal([1, size[999-i], size[999-i], 3])
with tf.GradientTape() as tage:
output = model(inputs)
print(i, tf.shape(output), process.memory_info().rss)
and the output is:
id output_shape memory cost
979 tf.Tensor([40000], shape=(1,), dtype=int32) 2481123328
980 tf.Tensor([36100], shape=(1,), dtype=int32) 2481582080
981 tf.Tensor([32400], shape=(1,), dtype=int32) 2482122752
982 tf.Tensor([28900], shape=(1,), dtype=int32) 2482393088
983 tf.Tensor([25600], shape=(1,), dtype=int32) 2482933760
984 tf.Tensor([22500], shape=(1,), dtype=int32) 2483453952
985 tf.Tensor([19600], shape=(1,), dtype=int32) 2483793920
986 tf.Tensor([16900], shape=(1,), dtype=int32) 2484330496
987 tf.Tensor([14400], shape=(1,), dtype=int32) 2484871168
988 tf.Tensor([12100], shape=(1,), dtype=int32) 2485137408
989 tf.Tensor([10000], shape=(1,), dtype=int32) 2485665792
990 tf.Tensor([8100], shape=(1,), dtype=int32) 2486206464
991 tf.Tensor([6400], shape=(1,), dtype=int32) 2486579200
992 tf.Tensor([4900], shape=(1,), dtype=int32) 2487119872
993 tf.Tensor([3600], shape=(1,), dtype=int32) 2487390208
994 tf.Tensor([2500], shape=(1,), dtype=int32) 2487930880
995 tf.Tensor([1600], shape=(1,), dtype=int32) 2488463360
996 tf.Tensor([900], shape=(1,), dtype=int32) 2488811520
997 tf.Tensor([400], shape=(1,), dtype=int32) 2489335808
998 tf.Tensor([100], shape=(1,), dtype=int32) 2489868288
999 tf.Tensor([0], shape=(1,), dtype=int32) 2490241024
I found that every time I changed the size of the input, the consumption of memory also increased.
I have a question that the size (2,2,3,100) of the conv2D parameter in the model is fixed. Is it true that the model will cache some Tensor during the forward calculation process, which will cause the memory to increase all the time? If so, how can these resources be released during training? If not, what else is the reason?
So after trying many method, i solved this problem.
It seems that using tf common operation in a keras graph will cause a memory leak, which can be solved by packaging the tf common op into the tf.keras.layers.Layer subclass.
class ReshapeMulti(tf.keras.layers.Layer):
def __init__(self):
super(ReshapeMulti, self).__init__()
def call(self, inputs):
y = tf.reshape(inputs, (-1, ))
z = tf.multiply(y, y)
return z
class TestKeras3:
def __init__(self):
pass
def build_graph(self):
inputs = tf.keras.Input(shape=(None, None, 3), batch_size=1)
x = tf.keras.layers.Conv2D(100, (2, 2), padding='SAME', name='x')(inputs)
# y = tf.reshape(x, (-1,))
# z = tf.multiply(y, y)
z = ReshapeMulti()(x)
model = tf.keras.models.Model(inputs=inputs, outputs=z)
return model
def train(self):
model = self.build_graph()
model.summary()
size = np.arange(1000)
for i in range(1000):
inputs = tf.random.normal([1, size[999-i], size[999-i], 3])
with tf.GradientTape() as tage:
output = model(inputs)
print(i, tf.shape(output), process.memory_info().rss)

tf.keras model.predict each time provides different values

Each time I run:
y_true = np.argmax(tf.concat([y for x, y in train_ds], axis=0), axis=1)
y_pred = np.argmax(model.predict(train_ds), axis=1)
confusion_matrix(y_true, y_pred)
The result each time is different to my understanding the line:
y_pred = np.argmax(model.predict(train_ds), axis=1) is different each time.
Clarification: I run cell 1 (training) once. And cell 2 (inference) few times.
Why?
THE CODE:
Cell 1 (jupyter)
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, experimental
from tensorflow.keras.layers import MaxPool2D, Flatten, Dense
from tensorflow.keras import Model
from tensorflow.keras.losses import categorical_crossentropy
from sklearn.metrics import accuracy_score
image_size = (100, 100)
batch_size = 32
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
directory,
label_mode='categorical',
validation_split=0.2,
subset="training",
seed=1337,
color_mode="grayscale",
image_size=image_size,
batch_size=batch_size,
)
inputs = Input(shape =(100,100,1))
x = experimental.preprocessing.Rescaling(1./255)(inputs)
x = Conv2D (filters =4, kernel_size =3, padding ='same', activation='relu')(x)
x = Conv2D (filters =4, kernel_size =3, padding ='same', activation='relu')(x)
x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
x = Conv2D (filters =8, kernel_size =3, padding ='same', activation='relu')(x)
x = Conv2D (filters =8, kernel_size =3, padding ='same', activation='relu')(x)
x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
x = Flatten()(x)
x = Dense(units = 4, activation ='relu')(x)
x = Dense(units = 4, activation ='relu')(x)
output = Dense(units = 5, activation ='softmax')(x)
model = Model (inputs=inputs, outputs =output)
model.compile(
optimizer=tf.keras.optimizers.Adam(1e-3),
loss=categorical_crossentropy,
metrics=["accuracy"])
model.fit(train_ds, epochs=5)
Cell 2:
print (Accuracy:)
y_pred = np.argmax(model.predict(train_ds), axis=1)
print (accuracy_score(y_true, y_pred))
y_pred = np.argmax(model.predict(train_ds), axis=1)
print (accuracy_score(y_true, y_pred))
OUTPUT:
118/118 [==============================] - 7s 57ms/step - loss: 0.1888 - accuracy: 0.9398
Accuracy:
0.593
0.586
Are you sure you do not train the model again every time you run the code? If the parameters of the model are the same the predicted result for the same input should be the same every time.
To my current understanding the reason of an above is the:
tf.keras.preprocessing.image_dataset_from_directory
While instance of it is:
type(train_ds)
tensorflow.python.data.ops.dataset_ops.BatchDataset
Reproduction:
First run:
[x for x, y in train_ds]
Output:
[<tf.Tensor: shape=(32, 100, 100, 1), dtype=float32, numpy= array([[[[157.],
[155.],
[159.],
Second run:
[x for x, y in train_ds]
Output:
[<tf.Tensor: shape=(32, 100, 100, 1), dtype=float32, numpy= array([[[[ 34.],
[ 36.],
[ 39.],
...,
The possible solution
imgs, y_true = [], []
for img, label in train_ds:
imgs.append(img)
y_true.append(label)
imgs = tf.concat(imgs, axis=0)
y_true = np.argmax(tf.concat(y_true, axis=0), axis=1)
y_pred = np.argmax(model.predict(imgs), axis=1)
print (accuracy_score(y_true, y_pred))
y_pred = np.argmax(model.predict(imgs), axis=1)
print (accuracy_score(y_true, y_pred))
OUTPUT
0.944044764
0.944044764
Is there any better solution?
UPDATE 2:
Maybe more appropriate apporach in case of validation dataset (here the train_ds is just for example is to add an argument Shuffle=False)
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
directory,
label_mode='categorical',
validation_split=0.2,
subset="training",
seed=1337,
color_mode="grayscale",
image_size=image_size,
batch_size=batch_size,
Shuffle=False
)
UPDATE 3:
Here it's probably the best option in case if your test images are in a separate folder.
path = 'your path to test folder'
test_generator = ImageDataGenerator().flow_from_directory(
directory=path,
class_mode='categorical',
shuffle=False,
batch_size=32,
target_size=(512, 512)
)
test_generator.reset()
This is better than OPTION 1, since it can work on dataset, which doesn't fits into memory (RAM).

How to solve ValueError in model.predict()?

I am new in neural network problems. I have searched for couple of hours but could not understand what should I do to fix this issue! I'm working with nsl-kdd dataset for intrusion detection system with convolutional neural net.
I stuck with this problem : ValueError: Input 0 of layer dense_14 is incompatible with the layer: expected axis -1 of input shape to have value 3904 but received input with shape [None, 3712]
Shapes:
x_train (125973, 122)
y_train (125973, 5)
x_test (22544, 116)
y_test (22544,)
After reshape :
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) #(125973, 122, 1)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1)) #(22544, 116, 1)
Model :
model = Sequential()
model.add(Convolution1D(64, 3, padding="same",activation="relu",input_shape = (x_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=(2)))
model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(5, activation="softmax"))
Compile :
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.fit(x_train, Y_train, epochs = 5, batch_size = 32)
pred = model.predict(x_test) #problem is occurring for this line
y_pred= np.argmax(pred, axis = 1)
model summary
Your x_test should have same dimensions as x_train.
x_train = (125973, 122, 1)
x_test = (22544, 116, 1) # the second parameter must match the train set
Code sample:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.layers import *
from tensorflow.keras import *
x1 = np.random.uniform(100, size =(125973, 122,1))
x2 = np.random.uniform(100, size =(22544, 122, 1))
y1 = np.random.randint(100, size =(125973,5), dtype = np.int32)
y2 = np.random.randint(2, size =(22544, ), dtype = np.int32)
def create_model2():
model = Sequential()
model.add(Convolution1D(64, 3, padding="same",activation="relu",input_shape = (x1.shape[1], 1)))
model.add(MaxPooling1D(pool_size=(2)))
model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(5, activation="softmax"))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
return model
model = create_model2()
tf.keras.utils.plot_model(model, 'my_first_model.png', show_shapes=True)
You model looks like this:
Now if use your test set to create your model keeping your dimension as (22544, 116, 1).
You get a model that looks this.
As the dimensions are different the expected input and output of each layers are different.
When you have appropriate test dimensions the output works as expected:
pred = model.predict(x2)
pred
Output:
array([[1., 0., 0., 0., 0.],
[1., 0., 0., 0., 0.],
[1., 0., 0., 0., 0.],
...,
[1., 0., 0., 0., 0.],
[1., 0., 0., 0., 0.],
[1., 0., 0., 0., 0.]], dtype=float32)
Problem: The problem is that your test set does have the same dimensions as your training set. The test set should look as if you took a sample from your training set. So if your training set has the dimensions x_train.shape = (125973, 122) and y_train.shape = (125973, 5). Then your test set should have the dimensions x_test.shape = (sample_num, 122) and y_test.shape = (sample_num, 5).
Possible Solution: An easy way to do testing if you didn't want to use your test set with be with a validation split in the .fit().
So this: model.fit(x_train, Y_train, epochs = 5, batch_size = 32)
would turn into this: model.fit(x_train, Y_train, epochs = 5, batch_size = 32, validation_split=0.2)
This would chop off 20% of your training data and use that for testing. Then after every epoch, TensorFlow will print how the network performed on that validation data so that you can see how your model performs on data it has never seen before.

Eager tf.GradientTape() returns only Nones

I try to calculate the gradients with Tensorflow in the eager mode, but
tf.GradientTape () returns only None values. I can not understand why.
The gradients are calculated in the update_policy () function.
The output of the line:
grads = tape.gradient(loss, self.model.trainable_variables)
is
{list}<class 'list'>:[None, None, ... ,None]
Here is the code.
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
import numpy as np
tf.enable_eager_execution()
print(tf.executing_eagerly())
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
set_session(sess)
class PGEagerAtariNetwork:
def __init__(self, state_space, action_space, lr, gamma):
self.state_space = state_space
self.action_space = action_space
self.gamma = gamma
self.model = tf.keras.Sequential()
# Conv
self.model.add(
tf.keras.layers.Conv2D(filters=32, kernel_size=[8, 8], strides=[4, 4], activation='relu',
input_shape=(84, 84, 4,),
name='conv1'))
self.model.add(
tf.keras.layers.Conv2D(filters=64, kernel_size=[4, 4], strides=[2, 2], activation='relu', name='conv2'))
self.model.add(
tf.keras.layers.Conv2D(filters=128, kernel_size=[4, 4], strides=[2, 2], activation='relu', name='conv3'))
self.model.add(tf.keras.layers.Flatten(name='flatten'))
# Fully connected
self.model.add(tf.keras.layers.Dense(units=512, activation='relu', name='fc1'))
self.model.add(tf.keras.layers.Dropout(rate=0.4, name='dr1'))
self.model.add(tf.keras.layers.Dense(units=256, activation='relu', name='fc2'))
self.model.add(tf.keras.layers.Dropout(rate=0.3, name='dr2'))
self.model.add(tf.keras.layers.Dense(units=128, activation='relu', name='fc3'))
self.model.add(tf.keras.layers.Dropout(rate=0.1, name='dr3'))
# Logits
self.model.add(tf.keras.layers.Dense(units=self.action_space, activation=None, name='logits'))
self.model.summary()
# Optimizer
self.optimizer = tf.train.AdamOptimizer(learning_rate=lr)
def get_probs(self, s):
s = s[np.newaxis, :]
logits = self.model.predict(s)
probs = tf.nn.softmax(logits).numpy()
return probs
def update_policy(self, s, r, a):
with tf.GradientTape() as tape:
logits = self.model.predict(s)
policy_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=a, logits=logits)
policy_loss = policy_loss * tf.stop_gradient(r)
loss = tf.reduce_mean(policy_loss)
grads = tape.gradient(loss, self.model.trainable_variables)
self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
You don't have a forward pass in your model. The Model.predict() method returns numpy() array without taping the forward pass. Take a look at this example:
Given a following data and model:
import tensorflow as tf
import numpy as np
x_train = tf.convert_to_tensor(np.ones((1, 2), np.float32), dtype=tf.float32)
y_train = tf.convert_to_tensor([[0, 1]])
model = tf.keras.models.Sequential([tf.keras.layers.Dense(2, input_shape=(2, ))])
First we use predict():
with tf.GradientTape() as tape:
logits = model.predict(x_train)
print('`logits` has type {0}'.format(type(logits)))
# `logits` has type <class 'numpy.ndarray'>
xentropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_train, logits=logits)
reduced = tf.reduce_mean(xentropy)
grads = tape.gradient(reduced, model.trainable_variables)
print('grads are: {0}'.format(grads))
# grads are: [None, None]
Now we use model's input:
with tf.GradientTape() as tape:
logits = model(x_train)
print('`logits` has type {0}'.format(type(logits)))
# `logits` has type <class 'tensorflow.python.framework.ops.EagerTensor'>
xentropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_train, logits=logits)
reduced = tf.reduce_mean(xentropy)
grads = tape.gradient(reduced, model.trainable_variables)
print('grads are: {0}'.format(grads))
# grads are: [<tf.Tensor: id=2044, shape=(2, 2), dtype=float32, numpy=
# array([[ 0.77717704, -0.777177 ],
# [ 0.77717704, -0.777177 ]], dtype=float32)>, <tf.Tensor: id=2042,
# shape=(2,), dtype=float32, numpy=array([ 0.77717704, -0.777177 ], dtype=float32)>]
So use model's __call__() (i.e. model(x)) for forward pass and not predict().