Are there any code examples for using Tensorflow's sampled_softmax_loss or nce_loss functions with multi-label problems? That is, where num_true is more than one?
What follows is my attempt to create a wrapper for nce_loss() and sampled_softmax_loss() based Jeff Chao's work (https://github.com/joelthchao/keras). In the following code, if you change num_true to 1, both samplers work. But with num_true > 1, both samplers throw slightly different exceptions involving tensor shape.
The main program is a simple autoencoder that replicates the class of problem I'm trying to solve: multi-label testing with a huge number of output classes, with a Zipfian distribution. Comments and stack trace at the end.
import tensorflow as tf
import numpy as np
import keras.layers as layers
from keras.models import Model
from keras import backend as K
from keras import initializers,regularizers,constraints
from keras.models import Model
from keras.layers import Dense
from keras.engine.base_layer import InputSpec
from keras.engine.topology import Layer
from keras.engine.input_layer import Input
from tensorflow.keras.optimizers import Nadam, Adam
np.random.seed(10)
import random
def nce_loss_function(weights, biases, labels, inputs, num_sampled, num_classes, num_true):
if K.learning_phase() == 1:
loss = tf.nn.nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true,
partition_strategy="div")
else:
logits = tf.matmul(inputs, tf.transpose(weights))
logits = tf.nn.bias_add(logits, biases)
labels_one_hot = tf.one_hot(labels, num_classes)
loss = tf.nn.sigmoid_cross_entropy_with_logits(
labels=labels_one_hot[:][0][:],
logits=logits)
loss = tf.reduce_sum(loss, axis=1)
return loss
def sampled_softmax_loss_function(weights, biases, labels, inputs, num_sampled, num_classes, num_true):
if K.learning_phase() == 1:
return tf.nn.sampled_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true,
partition_strategy="div")
else:
logits = tf.matmul(inputs, tf.transpose(weights))
logits = tf.nn.bias_add(logits, biases)
labels_one_hot = tf.one_hot(labels, num_classes)
loss = tf.nn.softmax_cross_entropy_with_logits_v2(
labels=labels_one_hot,
logits=logits)
return loss
class Sampling(Layer):
"""Regular densely-connected NN layer with various sampling Loss.
`Sampling` implements the operation:
`output = dot(input, kernel) + bias`
`kernel` is a weights matrix created by the layer, and `bias` is a bias vector
created by the layer. Also, it adds a sampling Loss to the model.
See [reference](http://proceedings.mlr.press/v9/gutmann10a/gutmann10a.pdf).
# Example
```python
inputs = Input(shape=(4,))
target = Input(shape=(1,)) # sparse format, e.g. [1, 3, 2, 6, ...]
net = Dense(8)(inputs)
net = Sampling(units=128, num_sampled=32)([net, target])
model = Model(inputs=[inputs, target], outputs=net)
model.compile(optimizer='adam', loss=None)
x = np.random.rand(1000, 4)
y = np.random.randint(128, size=1000)
model.fit([x, y], None)
```
# Arguments
units: Positive integer, dimensionality of the output space (num classes).
num_sampled: Positive integer, number of classes to sample in Sampling Loss.
type: 'sampled_softmax', 'nce'
num_true: Max # of positive classes, pad to this for variable inputs
kernel_initializer: Initializer for the `kernel` weights matrix
(see [initializers](../initializers.md)).
bias_initializer: Initializer for the bias vector
(see [initializers](../initializers.md)).
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix
(see [regularizer](../regularizers.md)).
bias_regularizer: Regularizer function applied to the bias vector
(see [regularizer](../regularizers.md)).
activity_regularizer: Regularizer function applied to
the output of the layer (its "activation").
(see [regularizer](../regularizers.md)).
kernel_constraint: Constraint function applied to
the `kernel` weights matrix
(see [constraints](../constraints.md)).
bias_constraint: Constraint function applied to the bias vector
(see [constraints](../constraints.md)).
# Input shape
Two tensors. First one is 2D tensor with shape: `(batch_size, input_dim)`.
Second one is 1D tensor with length `batch_size`
# Output shape
2D tensor with shape: `(batch_size, units)`.
For instance, for a 2D input with shape `(batch_size, input_dim)`,
the output would have shape `(batch_size, units)`.
"""
def __init__(self,
units,
num_sampled,
type='sampled_softmax',
num_true=1,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
**kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(Sampling, self).__init__(**kwargs)
self.units = units
self.num_sampled = num_sampled
if self.num_sampled > self.units:
raise Exception('num_sample: {} cannot be greater than units: {}'.format(
num_sampled, units))
self.type = type
if not (self.type == 'nce' or self.type == 'sampled_softmax'):
raise Exception('type {} is not a valid sampling loss type'.format(type))
self.num_true = num_true
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.input_spec = [InputSpec(min_ndim=2), InputSpec(min_ndim=1)]
self.supports_masking = True
def build(self, input_shape):
assert len(input_shape) == 2
input_dim = input_shape[0][-1]
self.kernel = self.add_weight(shape=(input_dim, self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
self.bias = self.add_weight(shape=(self.units,),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
self.input_spec[0] = InputSpec(min_ndim=2, axes={-1: input_dim})
self.built = True
def call(self, inputs):
pred, target = inputs
output = K.dot(pred, self.kernel)
output = K.bias_add(output, self.bias, data_format='channels_last')
# TODO : check train or test mode
if self.type == 'nce':
nce_loss = nce_loss_function(
K.transpose(self.kernel), self.bias, target, pred, self.num_sampled, self.units, self.num_true)
self.add_loss(K.mean(nce_loss))
else:
sampled_softmax_loss = sampled_softmax_loss_function(
K.transpose(self.kernel), self.bias, target, pred, self.num_sampled, self.units, self.num_true)
self.add_loss(K.mean(sampled_softmax_loss))
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) == 2
assert input_shape[0][-1]
output_shape = list(input_shape[0])
output_shape[-1] = self.units
return tuple(output_shape)
def get_config(self):
config = {
'units': self.units,
'num_sampled': self.num_sampled,
'kernel_initializer': initializers.serialize(self.kernel_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'activity_regularizer': regularizers.serialize(self.activity_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint)
}
base_config = super(Sampling, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def fill_zipf(length, num_classes, num_true=1):
data_onehot = np.zeros((length, num_classes), dtype='float32')
data_labels = np.zeros((length, num_true), dtype='int32')
# all indexes outside of num_classes scattered in existing space
rand = np.random.zipf(1.3, length * num_true) % num_classes
for i in range(length):
for j in range(num_true):
k = rand[i]
data_onehot[i][k] = 1.0
data_labels[i][j] = k
return data_onehot, data_labels
# number of test samples
num_train = 32*500
num_test = 32*500
num_valid = 100
num_epochs = 5
num_hidden = 10
# number of classes
num_classes = 2000
# number of samples for NCE
num_sampled = 24
# number of labels
num_true = 1
# type of negative sampler
sampler_type='sampled_softmax'
inputs = Input(shape=(num_classes,))
target = Input(shape=(num_true,), dtype=tf.int32) # sparse format, e.g. [1, 3, 2, 6, ...]
net = Dense(num_classes)(inputs)
net = Dense(num_hidden, activation='relu')(net)
net = Sampling(units=num_classes, num_sampled=num_sampled, type=sampler_type)([net, target])
model = Model(inputs=[inputs, target], outputs=net)
model.compile(optimizer='adam', loss=None, metrics=['binary_crossentropy'])
model.summary()
train_input, train_output = fill_zipf(num_train, num_classes, num_true)
valid_input, valid_output = fill_zipf(num_valid, num_classes, num_true)
history = model.fit([train_input, train_output], None,
validation_data=([valid_input, valid_output], None),
epochs=num_epochs, verbose=2)
test_input, test_output = fill_zipf(num_test, num_classes, num_true)
predicts = model.predict([test_input, test_output], batch_size=32)
count = 0
for test in range(num_test):
pred = predicts[test]
imax = np.argmax(pred)
if imax == test_output[test]:
count += 1
print("Found {0} out of {1}".format(count/num_true, num_test))
This test works for the single-label case, both 'nce' and 'sampled_softmax'. But, when I set num_true to greater than one, both NCE and Sampled Softmax throw a tensor mismatch exception.
num_true=3
width=2000
sampler_type='sampled_softmax'
With these parameters, for Sampled Softmax, the code throws this exception trace:
File "postable_sampling_tests.py", line 220, in <module>
epochs=num_epochs, verbose=2)
File "/opt/ds/lib/python3.6/site-packages/keras/engine/training.py", line 1039, in fit
validation_steps=validation_steps)
File "/opt/ds/lib/python3.6/site-packages/keras/engine/training_arrays.py", line 199, in fit_loop
outs = f(ins_batch)
File "/opt/ds/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2715, in __call__
return self._call(inputs)
File "/opt/ds/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2675, in _call
fetched = self._callable_fn(*array_vals)
File "/opt/ds/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1399, in __call__
run_metadata_ptr)
File "/opt/ds/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 526, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: logits and labels must be broadcastable: logits_size=[32,2000] labels_size=[96,2000]
[[{{node sampling_1/softmax_cross_entropy_with_logits}} = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _class=["loc:#train...s_grad/mul"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](sampling_1/BiasAdd_1, sampling_1/softmax_cross_entropy_with_logits/Reshape_1)]]
32 is the batch_size. Clearly, something is num_true * batch_size but I don't know how to fix this.
If we change the sampler to NCE:
num_true=3
width=2000
sampler_type='nce'
The final two lines of the exception stack:
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [32,2000] vs. [3,2000]
[[{{node sampling_1/logistic_loss/mul}} = Mul[T=DT_FLOAT, _class=["loc:#training/Adam/gradients/sampling_1/logistic_loss/mul_grad/Reshape"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](sampling_1/BiasAdd_1, sampling_1/strided_slice_2)]]
In this case, the labels have not been multiplied by batch_size.
What am I doing wrong? How can I get this wrapper system working for multi-label cases?
You can also use samples softmax with multiple labels, you just have to take the mean of each samples softmax
embeddings = tf.get_variable( 'embeddings',
initializer= tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
softmax_weights = tf.get_variable( 'softmax_weights',
initializer= tf.truncated_normal([vocabulary_size, embedding_size],
stddev=1.0 / math.sqrt(embedding_size)))
softmax_biases = tf.get_variable('softmax_biases',
initializer= tf.zeros([vocabulary_size]), trainable=False )
embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is
embed_reshaped = tf.reshape( embed, [batch_size*num_inputs, embedding_size] )
segments= np.arange(batch_size).repeat(num_inputs)
averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)
loss = tf.reduce_mean(
tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size))
optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss) #Original learning rate was 1.0
from
https://github.com/Santosh-Gupta/Research2Vec/blob/master/Research2VecTraining2.ipynb
Related
I am trying to implement a deep dream model using ResNet. there are many problems appear, one of them is the proble says that the object being unpacked is of type "NoneType," which means it has a value of None. In Python, you cannot unpack an object of type "NoneType" because it is not iterable. sometimes there is an error says that the value being converted is of type "NoneType" and that this type is unsupported. This means that TensorFlow cannot create an EagerTensor from a value of None.
this is a piece of code we try to use for solving error but it fail. and the second code is the all code
def deep_dream(input_image, model, steps=100, step_size=0.01):
# Define the loss and the optimizer
loss, intermediate_layer_model = calc_loss(input_image, model)
optimizer = tf.optimizers.SGD(learning_rate=step_size)
# Keep a list to hold the evolution of the image
image_list = []
# Run the optimization
for i in range(steps):
with tf.GradientTape() as tape:
tape.watch(input_image)
loss = calc_loss(input_image, model)[0]
grads, = tape.gradient(loss, input_image)
grads = tape.gradient(loss, input_image)
if grads is None:
return
optimizer.apply_gradients([(grads, input_image)])
image_list.append(input_image.numpy().copy())
# Return the final image
return input_image
# Load the ResNet50 model
#model = ResNet50(weights='imagenet')
from tensorflow import keras
model = keras.applications.ResNet50(weights='imagenet', include_top=False)
# Iterate over the layers in the ResNet50 model
for layer in model.layers:
print(f'{layer.name}---> {layer.output_shape}')
import cv2
# Function to calculate the loss
def calc_loss(input_image, model):
input_image_batch = tf.expand_dims(input_image, axis=0)
preprocessed_input = preprocess_input(input_image_batch.numpy().copy())
# Get the activations of a specific layer
layer_name = "conv5_block2_1_bn"
intermediate_layer_model = tf.keras.models.Model(inputs=model.input, outputs=model.get_layer(layer_name).output)
intermediate_output = intermediate_layer_model(preprocessed_input)
# Define the loss
loss = tf.math.reduce_mean(intermediate_output)
return loss, intermediate_layer_model
def deep_dream(input_image, model, steps=100, step_size=0.01):
for i in range(steps):
with tf.GradientTape() as tape:
# Compute the loss
loss, model_ = calc_loss(input_image, model)
# Get the gradients of the input image with respect to the loss
grads = tape.gradient(loss, input_image)
# Normalize the gradients
grads /= tf.math.reduce_std(grads) + 1e-8
# Update the input image
input_image += grads * step_size
input_image = tf.clip_by_value(input_image, 0, 255)
return input_image
# Load an image
# Load an image
#img_path = '/content/drive/MyDrive/Baghdad images/market.png'
#img = cv2.imread(img_path)
#img = cv2.resize(img, (224, 224))
#img = np.array(img, dtype=float)
# Preprocess the image
#original_image = np.copy(img)
#img = preprocess_input(np.expand_dims(img, axis=0))
img_path = cv2.resize(cv2.imread('/content/drive/MyDrive/Baghdad images/market.png'), (224, 224))
#img = image.load_img(img_path, target_size=(224, 224))
img = image.img_to_array(img)
# Preprocess the image
original_image = np.copy(img)
#img = preprocess_input(np.expand_dims(img, axis=0))
#input_image.set_shape([1,224,224,3])
#input_image = tf.constant(img, dtype=tf.float32)
#input_image = tf.expand_dims(input_image, axis=0)
#input_image = tf.squeeze(input_image, axis=0)
# Convert the image to a Tensor
input_image = tf.constant(img, dtype=tf.float32)
# Run the deep dream algorithm
dream_img = deep_dream(input_image, model)
# Deprocess the image
dream_img = tf.clip_by_value(dream_img[0], 0, 255).numpy().astype('uint8')
# Plot the original and dream images
plt.figure(figsize=(10,10))
plt.subplot(121)
plt.imshow(original_image.astype('uint8'))
plt.axis('off')
plt.title('Original Image')
plt.subplot(122)
plt.imshow(dream_img)
plt.axis('off')
plt.title('Dream Image')
plt.show()
I try to build deep dream model with ResNet to generate deep dream image, but the result is an error.
this error
TypeError Traceback (most recent call last)
<ipython-input-95-b14eebcfe038> in <module>
1 # Run the deep dream algorithm
----> 2 dream_img = deep_dream(input_image, model)
<ipython-input-92-27c78a6e0618> in deep_dream(input_image, model, steps, step_size)
12 tape.watch(input_image)
13 loss = calc_loss(input_image, model)[0]
---> 14 grads, = tape.gradient(loss, input_image)
15 grads = tape.gradient(loss, input_image)
16 if grads is None:
TypeError: cannot unpack non-iterable NoneType object
and this error
ValueError Traceback (most recent call last)
<ipython-input-48-09c4ef33f56c> in <module>
72
73 # Run the deep dream algorithm
---> 74 dream_img = deep_dream(input_image, model)
75
76 # Deprocess the image
16 frames
/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/constant_op.py in convert_to_eager_tensor(value, ctx, dtype)
96 dtype = dtypes.as_dtype(dtype).as_datatype_enum
97 ctx.ensure_initialized()
---> 98 return ops.EagerTensor(value, ctx.device_name, dtype)
99
100
ValueError: Attempt to convert a value (None) with an unsupported type (<class 'NoneType'>) to a Tensor.
I am working on a custom problem, and i have to change the fully connected layer (Dense with softmax), My model code is something like this (with Keras Framework):
.......
batch_size = 8
inputs = tf.random.uniform(shape=[batch_size,1024,256],dtype=tf.dtypes.float32)
preds = Dense(num_classes,activation='softmax')(x) #final layer with softmax activation
....
model = Model(inputs=base_model.input,outputs=preds)
So, i have to change the Code of Dense Layer to output a Tensor of probabilities with the shape of [batch_size, 1024, num_classes], without using a for loop, i need it to be optimized and not a consuming time function
The Dense code version that i want to change:
class Dense(Layer):
"""Just your regular densely-connected NN layer.
`Dense` implements the operation:
`output = activation(dot(input, kernel) + bias)`
where `activation` is the element-wise activation function
passed as the `activation` argument, `kernel` is a weights matrix
created by the layer, and `bias` is a bias vector created by the layer
(only applicable if `use_bias` is `True`).
Note: if the input to the layer has a rank greater than 2, then
it is flattened prior to the initial dot product with `kernel`.
# Example
```python
# as first layer in a sequential model:
model = Sequential()
model.add(Dense(32, input_shape=(16,)))
# now the model will take as input arrays of shape (*, 16)
# and output arrays of shape (*, 32)
# after the first layer, you don't need to specify
# the size of the input anymore:
model.add(Dense(32))
```
# Arguments
units: Positive integer, dimensionality of the output space.
activation: Activation function to use
(see [activations](../activations.md)).
If you don't specify anything, no activation is applied
(ie. "linear" activation: `a(x) = x`).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix
(see [initializers](../initializers.md)).
bias_initializer: Initializer for the bias vector
(see [initializers](../initializers.md)).
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix
(see [regularizer](../regularizers.md)).
bias_regularizer: Regularizer function applied to the bias vector
(see [regularizer](../regularizers.md)).
activity_regularizer: Regularizer function applied to
the output of the layer (its "activation").
(see [regularizer](../regularizers.md)).
kernel_constraint: Constraint function applied to
the `kernel` weights matrix
(see [constraints](../constraints.md)).
bias_constraint: Constraint function applied to the bias vector
(see [constraints](../constraints.md)).
# Input shape
nD tensor with shape: `(batch_size, ..., input_dim)`.
The most common situation would be
a 2D input with shape `(batch_size, input_dim)`.
# Output shape
nD tensor with shape: `(batch_size, ..., units)`.
For instance, for a 2D input with shape `(batch_size, input_dim)`,
the output would have shape `(batch_size, units)`.
"""
def __init__(self, units,
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
**kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(Dense, self).__init__(**kwargs)
self.units = units
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.input_spec = InputSpec(min_ndim=2)
self.supports_masking = True
def build(self, input_shape):
assert len(input_shape) >= 2
input_dim = input_shape[-1]
self.kernel = self.add_weight(shape=(input_dim, self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(self.units,),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
self.built = True
def call(self, inputs):
output = K.dot(inputs, self.kernel)
if self.use_bias:
output = K.bias_add(output, self.bias)
if self.activation is not None:
output = self.activation(output)
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 2
assert input_shape[-1]
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
def get_config(self):
config = {
'units': self.units,
'activation': activations.serialize(self.activation),
'use_bias': self.use_bias,
'kernel_initializer': initializers.serialize(self.kernel_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'activity_regularizer': regularizers.serialize(self.activity_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint)
}
base_config = super(Dense, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
There are three different ways in which this can be done (that I can think of). If you want to have a single dense layer, that maps a vector of 256 elements to a vector of num_classes elements, and apply it all across your batch of data (that is, use the same 256 x num_classes matrix of weights for every sample), then you don't need to do anything special, just use a regular Dense layer:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
layer = Dense(num_classes, activation='softmax')
out = layer(inp)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2570
Another way would be to have a single huge Dense layer that takes all 1024 * 256 values in at the same time and produces all 1024 * num_classes values at the output, that is, a layer with a matrix of weights with shape (1024 * 256) x (1024 * num_classes) (in the order if gigabytes of memory!). This is easy to do too, although it seems unlikely to be what you need:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import Flatten, Dense, Reshape, Softmax
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
res = Flatten()(inp)
# This takes _a lot_ of memory!
layer = Dense(1024 * num_classes, activation=None)
out_res = layer(res)
# Apply softmax after reshaping
out_preact = Reshape((-1, num_classes))(out_res)
out = Softmax()(out_preact)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2684364800
Finally, you may want to have a set of 1024 weight matrices, each one applied to the corresponding sample in the input, which would imply an array of weights with shape (1024, 256, num_classes). I don't think this can be done with one of the standard Keras layers (or don't know how to)1, but it's easy enough to write a custom layer based on Dense to do that:
import tensorflow as tf
from tensorflow.keras.layers import Dense, InputSpec
class Dense2D(Dense):
def __init__(self, *args, **kwargs):
super(Dense2D, self).__init__(*args, **kwargs)
def build(self, input_shape):
assert len(input_shape) >= 3
input_dim1 = input_shape[-2]
input_dim2 = input_shape[-1]
self.kernel = self.add_weight(shape=(input_dim1, input_dim2, self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(input_dim1, self.units),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.input_spec = InputSpec(min_ndim=3, axes={-2: input_dim1, -1: input_dim2})
self.built = True
def call(self, inputs):
# Multiply each set of weights with each input element
output = tf.einsum('...ij,ijk->...ik', inputs, self.kernel)
if self.use_bias:
output += self.bias
if self.activation is not None:
output = self.activation(output)
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 3
assert input_shape[-1]
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
You would then use it like this:
import tensorflow as tf
from tensorflow.keras import Input
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
layer = Dense2D(num_classes, activation='softmax')
out = layer(inp)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2631680
1: As today points out in the comments, you can actually use a LocallyConnected1D layer to do the same that I tried to do with my Dense2D layer. It is as simple as this:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import LocallyConnected1D
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
layer = LocallyConnected1D(num_classes, 1, activation='softmax')
out = layer(inp)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2631680
I'm trying to work through the Google BERT tutorial in Google Colab, and am having a hard time following some of its steps.
Specifically, there is a function called create_model which reads like this:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
num_labels):
"""Creates a classification model."""
bert_module = hub.Module(
BERT_MODEL_HUB,
trainable=True)
bert_inputs = dict(
input_ids=input_ids,
input_mask=input_mask,
segment_ids=segment_ids)
bert_outputs = bert_module(
inputs=bert_inputs,
signature="tokens",
as_dict=True)
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_outputs" for token-level output.
output_layer = bert_outputs["pooled_output"]
hidden_size = output_layer.shape[-1].value
# Create our own layer to tune for politeness data.
output_weights = tf.get_variable(
"output_weights", [num_labels, hidden_size],
initializer=tf.truncated_normal_initializer(stddev=0.02))
output_bias = tf.get_variable(
"output_bias", [num_labels], initializer=tf.zeros_initializer())
with tf.variable_scope("loss"):
# Dropout helps prevent overfitting
output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
logits = tf.matmul(output_layer, output_weights, transpose_b=True)
logits = tf.nn.bias_add(logits, output_bias)
log_probs = tf.nn.log_softmax(logits, axis=-1)
# Convert labels into one-hot encoding
one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
# If we're predicting, we want predicted labels and the probabiltiies.
if is_predicting:
return (predicted_labels, log_probs)
# If we're train/eval, compute loss between predicted and actual label
per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
loss = tf.reduce_mean(per_example_loss)
return (loss, predicted_labels, log_probs)
This would be fine, but when I look for where the create_model function is called in the notebook, the origin of the input_ids, input_mask and segment_ids arguments is not clear to me.
This is where the function is referenced later on in the notebook:
def model_fn_builder(num_labels, learning_rate, num_train_steps,
num_warmup_steps):
"""Returns `model_fn` closure for TPUEstimator."""
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
"""The `model_fn` for TPUEstimator."""
input_ids = features["input_ids"]
input_mask = features["input_mask"]
segment_ids = features["segment_ids"]
label_ids = features["label_ids"]
is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
# TRAIN and EVAL
if not is_predicting:
(loss, predicted_labels, log_probs) = create_model(
is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)
The problem here is that the features argument is not listed as an argument in the parent function, and it's not defined anywhere else in the notebook. So I'm not sure why it works, and even more importantly, I'm not sure what things like features['input_ids'] are supposed to represent. Without this being clear to me, it'll be difficult to make sense of what BERT actually does.
Thank you for your help.
I am using the tf.estimator API and I have the following model_fn function:
def model_fn(features, labels, mode, params):
labels = tf.reshape(labels, (-1, 1))
model = word2vec.create_model(features, params)
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=model)
loss = sampled_softmax_loss.create_loss(model['softmax_w'],
model['softmax_b'],
model['relu_layer_1'],
labels,
params['softmax_sample'],
params['vocabulary_size'])
cost = tf.reduce_mean(loss)
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode=mode, loss=cost)
optimizer = adam_optimizer.create_optimizer(params['learning_rate'])
train_operation = optimizer.minimize(cost)
if mode == tf.estimator.ModeKeys.TRAIN:
return tf.estimator.EstimatorSpec(mode=mode, loss=cost, train_op=train_operation)
raise RuntimeError('Not a valid Mode value')
The word2vec.create_model function is given below. The function returns a python dictionary with the interesting nodes of the network (e.g. the embeddings matrix, the softmax weight and bias for training etc.).
def create_model(features, hyper_params):
model = {}
vocabulary_size = hyper_params['vocabulary_size']
hidden_size = hyper_params['hidden_size']
feature_columns = hyper_params['feature_columns']
with tf.variable_scope('word2vec'):
# Creating the Embedding layer
net = tf.feature_column.input_layer(features, feature_columns)
# Creating the hidden layer
net = dense_layer.create_layer(model, net, hidden_size)
# Creating the SoftMax weight and bias variables to use in the sampled loss function
softmax_w = tf.Variable(tf.truncated_normal((vocabulary_size, hidden_size), stddev=0.1), name='softmax_weights')
softmax_b = tf.Variable(tf.zeros(vocabulary_size), name='softmax_bias')
model['softmax_w'] = softmax_w
model['softmax_b'] = softmax_b
return model
Last but not least, my main function, which in turn I use the tf.app.run(main) command to run:
def main():
path = os.path.join('data', 'data.csv')
(train_x, train_y), (test_x, test_y) = prepare_data.load_data(path, path, columns, columns[-1])
vocabulary_size = len(train_x[columns[0]].unique())
feature_columns = []
for key in train_x.keys():
item_id = tf.feature_column.categorical_column_with_identity(key=key, num_buckets=vocabulary_size)
feature_columns.append(tf.feature_column.embedding_column(item_id, 512))
classifier = tf.estimator.Estimator(
model_fn=model_fn,
params={
'feature_columns': feature_columns,
'vocabulary_size': vocabulary_size,
'hidden_size': 256,
'learning_rate': 0.001,
'softmax_sample': 100,
})
print('Training the classifier...')
classifier.train(input_fn=lambda: prepare_data.train_input_fn(train_x, train_y, 128), steps=2)
print('Evaluating on test dataset...')
eval_result = classifier.evaluate(input_fn=lambda: prepare_data.eval_input_fn(test_x, test_y, 128))
print('Printing results...')
print(eval_result)
When I run this, I get a ValueError: GraphDef cannot be larger than 2GB. error. Why is that? What can am I doing wrong?
Below is my train_input_fn:
def train_input_fn(features, labels, batch_size):
def gen():
for f, l in zip(features, labels):
yield f, l
ds = tf.data.Dataset.from_generator(gen, (tf.int64, tf.int64), (tf.TensorShape([None]), tf.TensorShape([None])))
ds = ds.repeat().batch(batch_size)
feature, label = ds.make_one_shot_iterator().get_next()
return {"Input": feature}, label
The dataset is a simple csv like below:
Input Label
0 12600 838
1 12600 4558
2 12600 838
3 12600 4558
4 838 12600
Dataset.from_tensor_slices adds the whole dataset to the computational graph (see details), so better use Dataset.from_generator. I have shown an example of how to do it using mnist:How to load MNIST via TensorFlow (including download)?
I'm a newbie of Tensorflow. I have created CNNs of Tensorflow followingthis topic : A Guide to TF Layers: Building a Convolutional Neural Network
I want to create CNNs to using it for training traffic sign dataset. The dataset I use is : BelgiumTS. It includes two part, one part stores images for training, second parth stores images for testing. All of this is .ppm format.
I define a method to load the dataset :
def load_data(data_dir):
"""Load Data and return two numpy array"""
directories = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir,d))]
list_labels = []
list_images = []
for d in directories:
label_dir = os.path.join(data_dir,d)
file_names = [os.path.join(label_dir,f) for f in os.listdir(label_dir) if f.endswith(".ppm")]
for f in file_names:
list_images.append(skimage.data.imread(f))
list_labels.append(int(d))
#resize images to 32x32 pixel
list_images32 = [skimage.transform.resize(image,(32,32)) for image in list_images]
#Got Error "Value passed to parameter 'input' has DataType float64 not in list of allowed values: float16, float32" if I don't add this line
list_images32 = tf.cast(list_images32,tf.float32)
images = np.array(list_images32)
labels = np.asarray(list_labels,dtype=int32)
return images,labels
And this is CNNs define :
def cnn_model_fn(features, labels, mode):
#Input layer
input_layer = tf.reshape(features["x"],[-1,32,32,1])
#Convolutional layer 1
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[5,5],
padding="same",
activation=tf.nn.relu)
#Pooling layer 1
pool1 = tf.layers.max_pooling2d(inputs=conv1,pool_size=[2,2],strides=2)
#Convolutional layer 2
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=[5,5],
padding="same",
activation=tf.nn.relu)
#Pooling layer 2
pool2 = tf.layers.max_pooling2d(inputs=conv2,pool_size=[2,2],strides=2)
#Dense layer
pool2_flat = tf.reshape(pool2,[-1,7*7*64])
dense = tf.layers.dense(inputs=pool2_flat,units=1024,activation=tf.nn.relu)
#Dropout
dropout = tf.layers.dropout(inputs=dense,rate=0.4,training=mode == tf.estimator.ModeKeys.TRAIN)
#Logits layer
logits = tf.layers.dense(inputs=dropout,units=10)
predictions = {
"classes": tf.argmax(input=logits,axis=1),
"probabilities": tf.nn.softmax(logits,name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode,predictions=predictions)
#Calculate Loss Value
onehot_labels = tf.one_hot(indices=tf.cast(labels,tf.int32),depth=10)
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,logits=logits)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(
loss = loss,
global_step = tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode,loss=loss,train_op=train_op)
eval_metric_ops = {
"accuracy": tf.metrics.accuracy(
labels=labels,predictions=predictions["classes"])}
return tf.estimator.EstimatorSpec(mode=mode,loss=loss,eval_metric_ops=eval_metric_ops)
I run my app in main :
def main(unused_argv):
# Load training and eval data
train_data_dir = "W:/Projects/AutoDrive/Training"
test_data_dir = "W:/Projects/AutoDrive/Testing"
images,labels = load_data(train_data_dir)
test_images,test_labels = load_data(test_data_dir)
# Create the Estimator
autoDrive_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir="/tmp/autoDrive_convnet_model")
# Set up logging for predictions
# Log the values in the "Softmax" tensor with label "probabilities"
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=50)
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": images},
y=labels,
batch_size=100,
num_epochs=None,
shuffle=True)
autoDrive_classifier.train(
input_fn=train_input_fn,
steps=10000,
hooks=[logging_hook])
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": test_images},
y=test_labels,
num_epochs=1,
shuffle=False)
eval_results = autoDrive_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)
But when I run it, I got this error : ValueError: Argument must be a dense tensor ... got shape [4575, 32, 32, 3], but wanted [4575] Did I lost something ?
Finally, this is full code :
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
import os
import skimage.data
import skimage.transform
import matplotlib
import matplotlib.pyplot as plt
tf.logging.set_verbosity(tf.logging.INFO)
def load_data(data_dir):
"""Load Data and return two lists"""
directories = [d for d in os.listdir(data_dir) if
os.path.isdir(os.path.join(data_dir,d))]
list_labels = []
list_images = []
for d in directories:
label_dir = os.path.join(data_dir,d)
file_names = [os.path.join(label_dir,f) for f in os.listdir(label_dir) if f.endswith(".ppm")]
for f in file_names:
list_images.append(skimage.data.imread(f))
list_labels.append(int(d))
list_images32 = [skimage.transform.resize(image,(32,32)) for image in list_images]
list_images32 = tf.cast(list_images32,tf.float32)
images = np.array(list_images32)
labels = np.asarray(list_labels,dtype=int32)
return images,labels
def cnn_model_fn(features, labels, mode):
#Input layer
input_layer = tf.reshape(features["x"],[-1,32,32,1])
#Convolutional layer 1
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[5,5],
padding="same",
activation=tf.nn.relu)
#Pooling layer 1
pool1 = tf.layers.max_pooling2d(inputs=conv1,pool_size=[2,2],strides=2)
#Convolutional layer 2
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=[5,5],
padding="same",
activation=tf.nn.relu)
#Pooling layer 2
pool2 = tf.layers.max_pooling2d(inputs=conv2,pool_size=[2,2],strides=2)
#Dense layer
pool2_flat = tf.reshape(pool2,[-1,7*7*64])
dense = tf.layers.dense(inputs=pool2_flat,units=1024,activation=tf.nn.relu)
#Dropout
dropout = tf.layers.dropout(inputs=dense,rate=0.4,training=mode == tf.estimator.ModeKeys.TRAIN)
#Logits layer
logits = tf.layers.dense(inputs=dropout,units=10)
predictions = {
"classes": tf.argmax(input=logits,axis=1),
"probabilities": tf.nn.softmax(logits,name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode,predictions=predictions)
#Calculate Loss Value
onehot_labels = tf.one_hot(indices=tf.cast(labels,tf.int32),depth=10)
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,logits=logits)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(
loss = loss,
global_step = tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode,loss=loss,train_op=train_op)
eval_metric_ops = {
"accuracy": tf.metrics.accuracy(
labels=labels,predictions=predictions["classes"])}
return tf.estimator.EstimatorSpec(mode=mode,loss=loss,eval_metric_ops=eval_metric_ops)
def main(unused_argv):
# Load training and eval data
train_data_dir = "W:/Projects/TSRecognition/Training"
test_data_dir = "W:/Projects/TSRecognition/Testing"
images,labels = load_data(train_data_dir)
test_images,test_labels = load_data(test_data_dir)
# Create the Estimator
TSRecognition_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir="/tmp/TSRecognition_convnet_model")
# Set up logging for predictions
# Log the values in the "Softmax" tensor with label "probabilities"
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=50)
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": images},
y=labels,
batch_size=100,
num_epochs=None,
shuffle=True)
TSRecognition_classifier.train(
input_fn=train_input_fn,
steps=10000,
hooks=[logging_hook])
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": test_images},
y=test_labels,
num_epochs=1,
shuffle=False)
eval_results = TSRecognition_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)
if __name__ == "__main__":
tf.app.run()
Short answer for your code:
Get rid of the np.array and np.asarray calls in your load_data function. In particular, change:
list_images32 = [skimage.transform.resize(image,(32,32)) for image in list_images]
...to...
list_images32 = [skimage.transform.resize(image,(32,32)).astype(np.float32).tolist() for image in list_images]
...and return list_images32 AS IS from your load_data function. Don't "wrap it" with the np.asarray() call. The tolist() part of my suggestion is what is important. With the astype() call I'm just suggesting doing in numpy something you're doing in TensorFlow.
Simply getting rid of the np.asarray you have on list_labels should suffice for your labels.
The full answer for those that want to understand what's going on...
The "got shape...but wanted" exception is thrown from exactly one place in TensorFlow (tensor_util.py) and the reason is this function:
def _GetDenseDimensions(list_of_lists):
"""Returns the inferred dense dimensions of a list of lists."""
if not isinstance(list_of_lists, (list, tuple)):
return []
elif not list_of_lists:
return [0]
else:
return [len(list_of_lists)] + _GetDenseDimensions(list_of_lists[0])
It is trying to traverse what it assumes are nested plain Python lists or plain Python tuples; it doesn't know what to do with the Numpy array type it finds in your data structure because of the np.array/np.asarray calls.