Tensorflow padded_batch for sparse tensor? - tensorflow

I have a code, which is like that
import tensorflow as tf
import numpy as np
sequences = np.array([[1,3,4],[5,6,7,8],[9,10,11,12,13],[14,15]])
def generator():
for el in sequences:
yield el, np.random.randn(3,5).astype('float32')
def parser(dense_tensor,spectrogram):
labels = tf.contrib.layers.dense_to_sparse(dense_tensor)
return spectrogram,labels
dataset = tf.data.Dataset().from_generator(generator, output_types= (tf.int64, tf.float32), output_shapes=([None],[None,None]))
dataset = dataset.map(lambda den, spec: parser(den,spec)).batch(2)
iter = dataset.make_initializable_iterator()
spectrogram,labels = iter.get_next()
with tf.Session() as sess:
sess.run(iter.initializer)
while True:
try:
spar,spe = sess.run([labels,spectrogram])
print(spar, spe.shape)
except Exception as e:
#print(e)
break
where I am using the tf.data to get the labels and spectrogram for speech to text. I have put a toy example above, it is ok, if I have a same length signal for speech, but for different length signal in batch, I need to do padded_batch, but dense_to_sparse does not allow the padded batch, any solution where I can use padded_batch with sparse tensor?

import tensorflow as tf
import numpy as np
def generator():
for el in sequences:
yield el, np.random.randn(np.random.randint(1,4),5).astype('float32')
def parser(dense_tensor,spectrogram):
#labels = tf.contrib.layers.dense_to_sparse(dense_tensor, eos_token=100)
labels = dense_tensor
return spectrogram,labels
dataset = tf.data.Dataset().from_generator(generator, output_types= (tf.int64, tf.float32), output_shapes=([None],[None,None]))
dataset = dataset.map(lambda den, spec: parser(den,spec)).padded_batch(2, ([None,None],[None]),padding_values=(0. , tf.constant(100,dtype=tf.int64)))
iter = dataset.make_initializable_iterator()
spectrogram,labels = iter.get_next()
res = tf.contrib.layers.dense_to_sparse(labels,eos_token=100)
print(res)
with tf.Session() as sess:
sess.run(iter.initializer)
while True:
try:
spar,spe,res1 = sess.run([labels,spectrogram,res])
print(res1, spar,spe)
except Exception as e:
#print(e)
break

Related

How can I compile batched training of a gpflow GPR into a tf.function?

I need to train a GPR model in multiple batches per epoch using a custom loss function. I would like to do this using GPflow and I would like to compile my training using tf.function to increase the efficiency. However, gpflow.GPR must be re-instantiated each time you supply new data, so tf.function will have to re-trace each time. This makes the code slower rather than faster.
This is the initial setup:
import numpy as np
from itertools import islice
import tensorflow as tf
import tensorflow_probability as tfp
tfb = tfp.bijectors
from sklearn.model_selection import train_test_split
import gpflow
from gpflow.kernels import SquaredExponential
import time
data_size = 1000
train_fract = 0.8
batch_size = 250
n_epochs = 3
iterations_per_epoch = int(train_fract * data_size/batch_size)
tf.random.set_seed(3)
# Generate dummy data
x = np.arange(data_size)
y = np.arange(data_size) + np.random.rand(data_size)
# Slice into train and validate sets
x_train, x_validate, y_train, y_validate = train_test_split(x, y, random_state = 1, test_size = 1-train_fract )
# Convert data into tensorflow constants
x_train = tf.constant(x_train[:, np.newaxis], dtype=np.float64)
x_validate = tf.constant(x_validate[:, np.newaxis], dtype=np.float64)
y_train = tf.constant(y_train[:, np.newaxis], dtype=np.float64)
y_validate = tf.constant(y_validate[:, np.newaxis], dtype=np.float64)
# Batch data
batched_dataset = (
tf.data.Dataset.from_tensor_slices((x_train, y_train))
.shuffle(buffer_size=len(x_train), seed=1)
.repeat(count=None)
.batch(batch_size)
)
# Create kernel
constrain_positive = tfb.Shift(np.finfo(np.float64).tiny)(tfb.Exp())
amplitude = tfp.util.TransformedVariable(initial_value=1, bijector=constrain_positive, dtype=np.float64, name="amplitude")
len_scale = tfp.util.TransformedVariable(initial_value=10, bijector=constrain_positive, dtype=np.float64, name="len_scale")
kernel = SquaredExponential(variance=amplitude, lengthscales=len_scale, name="squared_exponential_kernel")
obs_noise = tfp.util.TransformedVariable(initial_value=1e-3, bijector=constrain_positive, dtype=np.float64, name="observation_noise")
# Define custom loss function
#tf.function(autograph=False, experimental_compile=False)
def my_custom_loss(y_predict, y_true):
return tf.math.reduce_mean(tf.math.squared_difference(y_predict, y_true))
#optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)
This is how I train without a tf.function:
gpr_model_j_i = gpflow.models.GPR(data=(x_train, y_train), kernel=kernel, noise_variance=obs_noise)
# Start training loop
for j in range(n_epochs):
for i, (x_train_j_i, y_train_j_i) in enumerate(islice(batched_dataset, iterations_per_epoch)):
with tf.GradientTape() as tape:
gpr_model_j_i = gpflow.models.GPR(data=(x_train_j_i, y_train_j_i), kernel=kernel, noise_variance=gpr_model_j_i.likelihood.variance)
y_predict_j_i = gpr_model_j_i.predict_f(x_validate)[0]
loss_j_i = my_custom_loss(y_predict_j_i, y_validate)
grads_j_i = tape.gradient(loss_j_i, gpr_model_j_i.trainable_variables)
optimizer.apply_gradients(zip(grads_j_i, gpr_model_j_i.trainable_variables))
This is how I train with a tf.function:
#tf.function(autograph=False, experimental_compile=False)
def tf_function_attempt_3(model): #, optimizer):
with tf.GradientTape() as tape:
y_predict_j_i = model.predict_f(x_validate)[0]
loss_j_i = my_custom_loss(y_predict_j_i, y_validate)
grads_j_i = tape.gradient(loss_j_i, model.trainable_variables)
optimizer.apply_gradients(zip(grads_j_i, model.trainable_variables))
print("TRACING...", end="")
for j in range(n_epochs):
for i, (x_train_j_i, y_train_j_i) in enumerate(islice(batched_dataset, iterations_per_epoch)):
gpr_model_j_i = gpflow.models.GPR(data=(x_train_j_i, y_train_j_i), kernel=kernel, noise_variance=gpr_model_j_i.likelihood.variance)
tf_function_attempt_3(gpr_model_j_i)#, optimizer)
The tf.function retraces for each batch and is significantly slower than the normal training.
Is there a way to speed up the batched training of my GPR model with tf.function while using a custom loss function and GPflow? If not, I am open to suggestions for an alternative approach.
You don't have to re-instantiate GPR each time. You can construct tf.Variable holders with unconstrained shape and then .assign to them:
import gpflow
import numpy as np
import tensorflow as tf
input_dim = 1
initial_x, initial_y = np.zeros((0, input_dim)), np.zeros((0, 1)) # or your first batch
x_var = tf.Variable(initial_x, shape=(None, input_dim), dtype=tf.float64)
y_var = tf.Variable(initial_y, shape=(None,1), dtype=tf.float64)
# in principle you could also set shape=(None, None)...
m = gpflow.models.GPR((x_var, y_var), gpflow.kernels.SquaredExponential())
loss = m.training_loss_closure() # compile=True default wraps in tf.function()
N1 = 3
x1, y1 = np.random.randn(N1, input_dim), np.random.randn(N1, 1)
m.data[0].assign(x1)
m.data[1].assign(y1)
loss() # traces the first time
N2 = 7
x2, y2 = np.random.randn(N2, input_dim), np.random.randn(N2, 1)
m.data[0].assign(x2)
m.data[1].assign(y2)
loss() # does not trace again

How to print feature values after transformation inside tensorflow model

How can I see the value of final features that are being trained inside tensorflow model. Like in the below case I am trying to multi-hot my column 'x' and I want to see how the features are going to my model.
This is very easy to do in sklearn but being new to Tensorflow I dont understand how is it possible.
import tensorflow as tf
import pandas as pd
data = {'x':['a c', 'a b', 'b c'], 'y': [1, 1, 0]}
df = pd.DataFrame(data)
Y = df['y']
X = df.drop('y', axis=1)
indicator_features = [tf.feature_column.indicator_column(categorical_column=
tf.feature_column.categorical_column_with_vocabulary_list(key = 'x',
vocabulary_list = ['a','b','c']))]
model = tf.estimator.LinearClassifier(feature_columns=indicator_features,
model_dir = "/tmp/samplemodel")
training_input_fn = tf.estimator.inputs.pandas_input_fn(x = X,
y=Y,
batch_size=64,
shuffle= True,
num_epochs = None)
model.train(input_fn=training_input_fn,steps=1000)
I have been able to print the values by enabling the eager execution in tensorflow.
Posting my solution below. Welcome for any other ideas as well.
import tensorflow as tf
import tensorflow.feature_column as fc
import pandas as pd
PATH = "/tmp/sample.csv"
tf.enable_eager_execution()
COLUMNS = ['education','label']
train_df = pd.read_csv(PATH, header=None, names = COLUMNS)
#train_df['education'] = train_df['education'].str.split(" ")
def easy_input_function(df, label_key, num_epochs, shuffle, batch_size):
label = df[label_key]
ed = tf.string_split(df['education']," ")
df['education'] = ed
ds = tf.data.Dataset.from_tensor_slices((dict(df),label))
if shuffle:
ds = ds.shuffle(10000)
ds = ds.batch(batch_size).repeat(num_epochs)
return ds
ds = easy_input_function(train_df, label_key='label', num_epochs=5, shuffle=False, batch_size=5)
for feature_batch, label_batch in ds.take(1):
print('Some feature keys:', list(feature_batch.keys())[:5])
print()
print('A batch of education :', feature_batch['education'])
print()
print('A batch of Labels:', label_batch )
print(feature_batch)
education_vocabulary_list = [
'Bachelors', 'HS-grad', '11th', 'Masters', '9th', 'Some-college',
'Assoc-acdm', 'Assoc-voc', '7th-8th', 'Doctorate', 'Prof-school',
'5th-6th', '10th', '1st-4th', 'Preschool', '12th']
education = tf.feature_column.categorical_column_with_vocabulary_list('education', vocabulary_list=education_vocabulary_list)
fc.input_layer(feature_batch, [fc.indicator_column(education)])

how to create tf.feature_columns with data have no header(csv file)?

I am dealing with multi-class_classification_of_handwritten_digits in the following link google colab
Then I tried to put the code in my way to re write, feed and train the DNN.
Due to the csv file has no header I am not able to create my feature columns, so I cannot train my model.
Can you please help me to figure out how it has been done in the link or how it need to be for my code? Thanks in advance.
import pandas as pd
import seaborn as sns
import tensorflow as tf
mnist_df = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/mnist_train_small.csv",header=None)
mnist_df.columns
hand_df = mnist_df[0]
hand_df.head()
matrix_df = mnist_df.drop([0],axis=1)
matrix_df.head()
mnist_df = mnist_df.head(10000)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(matrix_df, hand_df, test_size=0.3, random_state=101)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
matrix_df = pd.DataFrame(data=scaler.fit_transform(matrix_df),
columns=matrix_df.columns,
index=matrix_df.index)
input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,y=y_train,
batch_size=10,
num_epochs=1000,
shuffle=True)
my_optimizer = tf.train.AdagradOptimizer(learning_rate=0.03)
my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 5.0)
model = tf.estimator.LinearClassifier(feature_columns=feat_cols,
n_classes=10,
optimizer=my_optimizer,
config=tf.estimator.RunConfig(keep_checkpoint_max=1))
model.train(input_fn=input_func,steps=1000)
The example code is already splitting the dataset into training and validation sets.
And I don't think this has anything to do with the header in the CSV.
training_targets, training_examples = parse_labels_and_features(mnist_dataframe[:7500])
validation_targets, validation_examples = parse_labels_and_features(mnist_dataframe[7500:10000])
So the training code is here separately.
import pandas as pd
import tensorflow as tf
from tensorflow.python.data import Dataset
import numpy as np
mnist_df = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/mnist_train_small.csv",sep=",",header=None)
mnist_df = mnist_df.head(10000)
dataset = mnist_df[:7500]
labels = dataset[0]
print ( labels.shape )
# DataFrame.loc index ranges are inclusive at both ends.
features = dataset.loc[:, 1:784]
print ( features.shape )
# Scale the data to [0, 1] by dividing out the max value, 255.
features = features / 255
def create_training_input_fn(feature, label, batch_size, num_epochs=None, shuffle=True):
"""A custom input_fn for sending MNIST data to the estimator for training.
Args:
features: The training features.
labels: The training labels.
batch_size: Batch size to use during training.
Returns:
A function that returns batches of training features and labels during
training.
"""
def _input_fn(num_epochs=None, shuffle=True):
# Input pipelines are reset with each call to .train(). To ensure model
# gets a good sampling of data, even when number of steps is small, we
# shuffle all the data before creating the Dataset object
idx = np.random.permutation(feature.index)
raw_features = {"pixels": feature.reindex(idx)}
raw_targets = np.array(label[idx])
ds = Dataset.from_tensor_slices((raw_features, raw_targets)) # warning: 2GB limit
ds = ds.batch(batch_size).repeat(num_epochs)
if shuffle:
ds = ds.shuffle(10000)
# Return the next batch of data.
feature_batch, label_batch = ds.make_one_shot_iterator().get_next()
return feature_batch, label_batch
return _input_fn
my_optimizer = tf.train.AdagradOptimizer(learning_rate=0.03)
my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 5.0)
model = tf.estimator.LinearClassifier(feature_columns=set([tf.feature_column.numeric_column('pixels', shape=784)]),
n_classes=10,
optimizer=my_optimizer,
config=tf.estimator.RunConfig(keep_checkpoint_max=1))
model.train(input_fn=create_training_input_fn(features, labels, batch_size=10),steps=1000)
Similarly you have a function for preparing the validation set for prediction. You could use this pattern as it is.
But if you are splitting the dataframe using train_test_split you can try this.
X_train, X_test = train_test_split(mnist_df, test_size=0.2)
You have to repeat the following procedure for X_test as well to get the validation features and labels.
X_train_labels = X_train[0]
print ( X_train_labels.shape )
# DataFrame.loc index ranges are inclusive at both ends.
X_train_features = X_train.loc[:, 1:784]
print ( X_train_features.shape )
# Scale the data to [0, 1] by dividing out the max value, 255.
X_train_features = X_train_features / 255
Rather than trying to find a way to use data without any column names, I have had an idea that :) I have named all my columns and append them into cols=[] then it was easy to assign and use by feature_columns = cols.
Here is my full working code for my own question.
Thanks.
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from sklearn import metrics
from tensorflow.python.data import Dataset
mnist_df = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/mnist_train_small.csv",header=None)
mnist_df.describe()
mnist_df.columns
hand_df = mnist_df[0]
matrix_df = mnist_df.drop([0],axis=1)
matrix_df.head()
hand_df.head()
#creating cols array and append a1 to a784 in order to name columns
cols=[]
for i in range(785):
if i!=0:
a = '{}{}'.format('a',i)
cols.append(a)
matrix_df.columns = cols
mnist_df = mnist_df.head(10000)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(matrix_df, hand_df, test_size=0.3, random_state=101)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
matrix_df = pd.DataFrame(data=scaler.fit_transform(matrix_df),
columns=matrix_df.columns,
index=matrix_df.index)
#naming columns so I will not get error while assigning feature_columns
for i in range(len(cols)):
a=i+1
b='{}{}'.format('a',a)
cols[i] = tf.feature_column.numeric_column(str(b))
matrix_df.head()
input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,y=y_train,
batch_size=10,num_epochs=1000,
shuffle=True)
my_optimizer = tf.train.AdagradOptimizer(learning_rate=0.03)
my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 5.0)
model = tf.estimator.DNNClassifier(feature_columns=cols,
hidden_units=[32,64],
n_classes=10,
optimizer=my_optimizer,
config=tf.estimator.RunConfig(keep_checkpoint_max=1))
model.train(input_fn=input_func,steps=1000)
predict_input_func = tf.estimator.inputs.pandas_input_fn(x=X_test,
batch_size=50,
num_epochs=1,
shuffle=False)
pred_gen = model.predict(predict_input_func)
predictions = list(pred_gen)
predictions[0]

Why adding unusable tensors will change the result of an RNN cell in tensorflow?

Here is the simplest code that can reproduce the problem:
import numpy as np
import random
import tensorflow as tf
tf.set_random_seed(12345)
np.random.seed(12345)
random.seed(12345)
unusable1 = tf.constant(1e-3, tf.float32)
unusable2 = tf.constant(1e-3, tf.float32)
unusable3 = tf.constant(1e-3, tf.float32)
X = tf.placeholder(tf.float32, shape=[2, 3])
cell = tf.contrib.rnn.BasicRNNCell(5)
changed_data = tf.reduce_sum(cell(X, state = tf.zeros((2, 5)))[0])
with tf.Session() as sess:
tf.global_variables_initializer().run()
output = sess.run(changed_data, feed_dict={X: np.ones((2, 3))})
print(output) # = -1.46618
The result of the above code is -1.46618 on my machine.
However, if I comment out three unusable constant tensor declarations, the result becomes 1.76918!
import numpy as np
import random
import tensorflow as tf
tf.set_random_seed(12345)
np.random.seed(12345)
random.seed(12345)
# unusable1 = tf.constant(1e-3, tf.float32)
# unusable2 = tf.constant(1e-3, tf.float32)
# unusable3 = tf.constant(1e-3, tf.float32)
X = tf.placeholder(tf.float32, shape=[2, 3])
cell = tf.contrib.rnn.BasicRNNCell(5)
changed_data = tf.reduce_sum(cell(X, state = tf.zeros((2, 5)))[0])
with tf.Session() as sess:
tf.global_variables_initializer().run()
output = sess.run(changed_data, feed_dict={X: np.ones((2, 3))})
print(output) # = 1.76918
Actually, you can add/delete/modify constant tensor declarations as much as you like, the result varies quite different!
What's the problem?
The initializers for variables are getting different op-level seeds because seeding is based on (1) the graph-level seed, and (2) the op id if an op-level seed is not explicitly set (a deterministic function of the previously created ops in the current graph). This prevents every variable from getting exactly the same initialization when a graph-level seed has been set. See get_seed for the implementation.

How to traverse the result of tf.unqiue?

After invoking tf.unqiue, the shape of tensor will be unknown, but I want to traverse the result of tf.unqiue
Suppose tensor = tf.unqiue(...)
I have tried:
for i in tf.range(tf.shape(tensor)[0])
tf.unstack(tensor, num=tf.shape(tensor)[0])
tf.split(tensor, num_or_size_splits=tf.shape(tensor)[0])
All of them can't work, because these functions all need static shape or num/num_or_size_splits = integer? So how can I traverse tensor?
Update
Example
I have two 1-D tensor with the same shape
x=[1,3,2,1,3]
y=[3,6,5,8,9]
I want to do like this:
x_u = unique(x) # [1,3,2]
get bool_mask, and slice y
for i in x_u:
y[x == i]
when i=1, y[x==i]=y[[True,False,False,True,False]], and I can get y[0] and y[3]
when i=3, I can get y[1] and y[4]
when i=2, I can get y[2]
Solution
After some trials, that may be a solution.
Try to use tf.while_loop:
import tensorflow as tf
import numpy as np
x = tf.constant(np.array([1,3,2,1,3]), dtype='int32')
y = tf.constant(np.array([3,6,5,8,9]), dtype='int32')
x_u, _ = tf.unique(x)
n = tf.shape(x_u)[0]
for_i = tf.constant(0)
re = tf.constant([], dtype=tf.int32)
cond = lambda i, res: i<n
def body(i, res):
x_0 = tf.slice(x_u, [i], [1])
selected = tf.boolean_mask(y, tf.equal(x_0, x))
return i+1, tf.concat([res, selected], axis=0)
op = tf.while_loop(cond, body, [for_i, re], shape_invariants=[for_i.get_shape(), tf.TensorShape([None])])
print(op[1].shape)
with tf.Session() as sess:
print(sess.run(op[1]))
I just tried this:
import tensorflow as tf
import numpy as np
a = tf.constant(np.random.randn(200), dtype='float32')
b = tf.unique(a)
print b[0] #Tensor("Unique:0", shape=(?,), dtype=float32)
c = tf.map_fn(lambda x: x*x, b[0])
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
d = sess.run(c)
print d
And it's working without knowing the shape of 'b' here. Be careful, tf.unique is returning a tuple (Tensor, Tensor) with the values and their indicies.
Update
This is the only way I found to do this, your result cannot have an insconsistent shape in Tensorflow.
import tensorflow as tf
import numpy as np
x = tf.constant(np.array([1,3,2,1,3]), dtype='int32')
y = tf.constant(np.array([3,6,5,8,9]), dtype='int32')
x_u = tf.unique(x)
eq = tf.equal(x, tf.expand_dims(x_u[0],1))
y_masked = y*tf.cast(eq, tf.int32)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
e = sess.run(y_masked)
print e
tf.boolean_mask can be used as well but you are going to get a flat output.
Last Update
This is what you want, way faster than what you proposed and it was already explained the line just above.
import tensorflow as tf
import numpy as np
x = tf.constant(np.array([1,3,2,1,3]), dtype='int32')
y = tf.constant(np.array([3,6,5,8,9]), dtype='int32')
x_u, _ = tf.unique(x)
eq = tf.equal(x, tf.expand_dims(x_u,1))
y_tiled = tf.tile(tf.expand_dims(y, 0), [tf.shape(x_u)[0], 1])
y_masked = tf.boolean_mask(y_tiled, eq)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
e = sess.run(y_masked)
print e