I have a tensorflow graph (stored in a protobuffer file) with placeholder operations as inputs. I want to wrap this graph as a keras layer or model.
Here is an example:
with tf.Graph().as_default() as gf:
x = tf.placeholder(tf.float32, shape=(None, 123), name='x')
c = tf.constant(100, dtype=tf.float32, name='C')
y = tf.multiply(x, c, name='y')
with tf.gfile.GFile("test_graph/y.pb", "wb") as f:
raw = gf.as_graph_def().SerializeToString()
f.write(raw)
Load back as a tensorflow graph:
persisted_sess = tf.Session()
with persisted_sess.as_default():
with gfile.FastGFile("./test_graph/y.pb",'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
persisted_sess.graph.as_default()
tf.import_graph_def(graph_def, name='')
for i, op in enumerate(persisted_sess.graph.get_operations()):
tensor = persisted_sess.graph.get_tensor_by_name(op.name + ':0')
print(i, '\t', op.name, op.type, tensor)
x_tensor = persisted_sess.graph.get_tensor_by_name('x:0')
y_tensor = persisted_sess.graph.get_tensor_by_name('y:0')
We can see the x and y operations and tensors:
0 x Placeholder Tensor("x:0", shape=(?, 123), dtype=float32)
1 C Const Tensor("C:0", shape=(), dtype=float32)
2 y Mul Tensor("y:0", shape=(?, 123), dtype=float32)
Then I try to wrap it into a keras model using different method:
Method 1:
output_layer = Lambda(lambda x: y_tensor, name='output_y')(x_tensor)
model = Model(inputs=[x_tensor], outputs=[output_layer]) # ERROR!
This already produce error InvalidArgumentError: You must feed a value for placeholder tensor 'x' with dtype float and shape [?,123] [[{{node x}}]]
Method 2:
input_x = Input(name='x', shape=(123,), dtype='float32')
output_layer = Lambda(lambda x: y_tensor, name='output_y')(input_x)
model = Model(inputs=[input_x], outputs=[output_layer]) # OK
model.predict({'x': np.ones((3, 123), dtype=np.float32)}) # ERROR!
This causes the same error at the predict call.
The closest info I can find relating to my question is this, but it doesn't address the handling of placeholders. What would be the correct way to do this?
I figured out the way. We need to use InputLayer instead of Input.
First the codes that create the demo tensorflow graph PB:
def dump_model(): # just to hide all vars during creation demo
import numpy as np
import sys
import tensorflow as tf
with tf.Graph().as_default() as gf:
x = tf.placeholder(tf.float32, shape=(None, 123), name='x')
b = tf.placeholder(tf.float32, shape=(None, 123), name='b')
c = tf.constant(100, dtype=tf.float32, name='C')
y = tf.multiply(x, c, name='y')
z = tf.add(y, x, name='z')
print(x, b, c, y, z)
with tf.gfile.GFile("test_graph/y.pb", "wb") as f:
raw = gf.as_graph_def().SerializeToString()
print(type(raw), len(raw))
f.write(raw)
dump_model()
Then import the graph and find the input/output tensors:
import numpy as np
import sys
import tensorflow as tf
persisted_sess = tf.Session()
with tf.Session().as_default() as session:
with tf.gfile.FastGFile("./test_graph/y.pb",'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
persisted_sess.graph.as_default()
tf.import_graph_def(graph_def, name='')
print(persisted_sess.graph.get_name_scope())
for i, op in enumerate(persisted_sess.graph.get_operations()):
tensor = persisted_sess.graph.get_tensor_by_name(op.name + ':0')
print(i, '\t', op.name, op.type, tensor)
x_tensor = persisted_sess.graph.get_tensor_by_name('x:0')
b_tensor = persisted_sess.graph.get_tensor_by_name('b:0')
y_tensor = persisted_sess.graph.get_tensor_by_name('y:0')
z_tensor = persisted_sess.graph.get_tensor_by_name('z:0')
Then we can create the keras model and make inference:
from tensorflow.keras.layers import Lambda, InputLayer
from tensorflow.keras import Model
from tensorflow.python.util import nest
from tensorflow.python.keras.utils import layer_utils
input_x = InputLayer(name='x', input_tensor=x_tensor)
input_x.is_placeholder = True # this is the critical bits
input_b = InputLayer(name='b2', input_tensor=b_tensor) # note the keras name can be different than the tf name
input_b.is_placeholder = True
output_y = Lambda(lambda x: y_tensor, name='output_y')(input_x.output)
output_z = Lambda(lambda x_b: z_tensor, name='output_z')([input_x.output, input_b.output])
base_model_inputs = nest.flatten([layer_utils.get_source_inputs(input_x.output),
layer_utils.get_source_inputs(input_b.output)])
base_model = Model(base_model_inputs, [output_y, output_z])
y_out, z_out = base_model.predict({'x': np.ones((3, 123), dtype=np.float32),
'b2': np.full((3, 123), 100.0, dtype=np.float32)})
y_out.shape, z_out.shape
And we can even create a new model from the base model:
from tensorflow.keras.layers import Add
derived_output = Add(name='derived')([output_y, output_z])
derived_model = Model(base_model.inputs, [derived_output])
derived_out = derived_model.predict({'x': np.ones((3, 123), dtype=np.float32),
'b2': np.full((3, 123), 100.0, dtype=np.float32)})
derived_out.shape
Related
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score,\
accuracy_score, balanced_accuracy_score,classification_report,\
plot_confusion_matrix, confusion_matrix
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, Concatenate
from tensorflow.keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D, LeakyReLU
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import RandomNormal
import tensorflow.keras.backend as K
from sklearn.utils import shuffle
import pickle
from tqdm import tqdm
import numpy as np
from scipy import stats
import pandas as pd
np.random.seed(1635848)
def get_data_XYZ_one_dimensional(n, a=-2, c=1/2, random_state=None, verbose=True):
"""
Generates pseudo-random data distributed according to the distribution defined in section 2.1 of the document
"Math/Confounders and data generation.pdf".
:param n: Number of data points to generate.
:param a: Mean of X.
:param c: Shape parameter for Weibull distribution.
:param random_state: Used to set the seed of numpy.random before generation of random numbers.
:param verbose: If True will display a progress bar. If False it will not display a progress bar.
:return: Pandas DataFrame with three columns (corresponding to X, Y and Z) and n rows (corresponding to the n
generated pseudo-random samples).
"""
np.random.seed(random_state)
output = []
iterator = tqdm(range(n)) if verbose else range(n)
for _ in iterator:
X = stats.norm.rvs(loc=-2, scale=1)
Y = stats.bernoulli.rvs(p=1/(1+np.exp(-X)))
if Y == 0:
Z = stats.expon.rvs(scale=np.exp(-X)) # note: np.exp(-X) could be cached for more computational efficiency but would render the code less useful
elif Y == 1:
Z = stats.weibull_min.rvs(c=c, scale=np.exp(-X))
else:
assert False
output.append((X, Y, Z))
return pd.DataFrame(output, columns=["Personal information", "Treatment", "Time to event"])
data = get_data_XYZ_one_dimensional(n=100, random_state=0)
print(data)
# The Architecture of CGAN
class cGAN():
"""
Class containing 3 methods (and __init__): generator, discriminator and train.
Generator is trained using random noise and label as inputs. Discriminator is trained
using real/fake samples and labels as inputs.
"""
def __init__(self,latent_dim=100, out_shape=3):
self.latent_dim = latent_dim
self.out_shape = out_shape
self.num_classes = 2
# using Adam as our optimizer
optimizer = Adam(0.0002, 0.5)
# building the discriminator
self.discriminator = self.discriminator()
self.discriminator.compile(loss=['binary_crossentropy'],
optimizer=optimizer,
metrics=['accuracy'])
# building the generator
self.generator = self.generator()
noise = Input(shape=(self.latent_dim,))
label = Input(shape=(1,))
gen_samples = self.generator([noise, label])
# we don't train discriminator when training generator
self.discriminator.trainable = False
valid = self.discriminator([gen_samples, label])
# combining both models
self.combined = Model([noise, label], valid)
self.combined.compile(loss=['binary_crossentropy'],
optimizer=optimizer,
metrics=['accuracy'])
def generator(self):
init = RandomNormal(mean=0.0, stddev=0.02)
model = Sequential()
model.add(Dense(128, input_dim=self.latent_dim))
model.add(Dropout(0.2))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(256))
model.add(Dropout(0.2))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(512))
model.add(Dropout(0.2))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(self.out_shape, activation='tanh'))
noise = Input(shape=(self.latent_dim,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(self.num_classes, self.latent_dim)(label))
model_input = multiply([noise, label_embedding])
gen_sample = model(model_input)
model.summary()
return Model([noise, label], gen_sample, name="Generator")
def discriminator(self):
init = RandomNormal(mean=0.0, stddev=0.02)
model = Sequential()
model.add(Dense(512, input_dim=self.out_shape, kernel_initializer=init))
model.add(LeakyReLU(alpha=0.2))
model.add(Dense(256, kernel_initializer=init))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.4))
model.add(Dense(128, kernel_initializer=init))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.4))
model.add(Dense(1, activation='sigmoid'))
gen_sample = Input(shape=(self.out_shape,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(self.num_classes, self.out_shape)(label))
model_input = multiply([gen_sample, label_embedding])
validity = model(model_input)
model.summary()
return Model(inputs=[gen_sample, label], outputs=validity, name="Discriminator")
def train(self, X_train, y_train, pos_index, neg_index, epochs, sampling=False, batch_size=32, sample_interval=100, plot=True):
# though not recommended, defining losses as global helps as in analysing our cgan out of the class
global G_losses
global D_losses
G_losses = []
D_losses = []
# Adversarial ground truths
valid = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))
for epoch in range(epochs):
# if sampling==True --> train discriminator with 8 sample from positive class and rest with negative class
if sampling:
idx1 = np.random.choice(pos_index, 3)
idx0 = np.random.choice(neg_index, batch_size-3)
idx = np.concatenate((idx1, idx0))
# if sampling!=True --> train discriminator using random instances in batches of 32
else:
idx = np.random.choice(len(y_train), batch_size)
samples, labels = X_train[idx], y_train[idx]
samples, labels = shuffle(samples, labels)
# Sample noise as generator input
noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
gen_samples = self.generator.predict([noise, labels])
# label smoothing
if epoch < epochs//1.5:
valid_smooth = (valid+0.1)-(np.random.random(valid.shape)*0.1)
fake_smooth = (fake-0.1)+(np.random.random(fake.shape)*0.1)
else:
valid_smooth = valid
fake_smooth = fake
# Train the discriminator
self.discriminator.trainable = True
d_loss_real = self.discriminator.train_on_batch([samples, labels], valid_smooth)
d_loss_fake = self.discriminator.train_on_batch([gen_samples, labels], fake_smooth)
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train Generator
self.discriminator.trainable = False
sampled_labels = np.random.randint(0, 2, batch_size).reshape(-1, 1)
# Train the generator
g_loss = self.combined.train_on_batch([noise, sampled_labels], valid)
if (epoch+1)%sample_interval==0:
print('[%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f'
% (epoch, epochs, d_loss[0], g_loss[0]))
G_losses.append(g_loss[0])
D_losses.append(d_loss[0])
if plot:
if epoch+1==epochs:
plt.figure(figsize=(10,5))
plt.title("Generator and Discriminator Loss")
plt.plot(G_losses,label="G")
plt.plot(D_losses,label="D")
plt.xlabel("iterations")
plt.ylabel("Loss")
plt.legend()
plt.show()
data.Treatment.value_counts()
scaler = StandardScaler()
X = scaler.fit_transform(data.drop('Treatment', 1))
y = data['Treatment'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
lgb_1 = lgb.LGBMClassifier()
lgb_1.fit(X_train, y_train)
y_pred = lgb_1.predict(X_test)
# evaluation
print(classification_report(y_test, y_pred))
plot_confusion_matrix(lgb_1, X_test, y_test)
plt.show()
le = preprocessing.LabelEncoder()
for i in ['Personal information', 'Treatment', 'Time to event']:
data[i] = le.fit_transform(data[i].astype(str))
y_train = y_train.reshape(-1,1)
pos_index = np.where(y_train==1)[0]
neg_index = np.where(y_train==0)[0]
cgan.train(X_train, y_train, pos_index, neg_index, epochs=500)
Here, the training gives an error ValueError: Input 0 of layer "Discriminator" is incompatible with the layer: expected shape=(None, 3), found shape=(100, 2). Well I understand I have to fix the shape by changing the input but where and how to do it.
Also there are 3 columns in data so how to go about making this work?
I think the fix out_shape=2 and not 3 because the generated output has 2 and you stated the number of classes to be 2 as well. Unless there is something else I am missing.
def __init__(self, latent_dim=100, out_shape=2):
Does anyone know how to preserve the sparse zeros in a tensor when using tf.keras.experimental.SequenceFeatures?
It seems to require a sparse tensor, but that eliminates the zeros in a sequence. Does anyone know of another way to input sequence data into tensorflow using features? Thanks
this is a toy example of the issue
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dropout, Dense
inputs = {'x1': tf.keras.layers.Input(name='x1', sparse=True, shape=(10), dtype='float32'),
'x2': tf.keras.layers.Input(name='x2', sparse=True, shape=(10), dtype='float32')}
columns = [tf.feature_column.sequence_numeric_column('x1', dtype=tf.float32),
tf.feature_column.sequence_numeric_column('x2', dtype=tf.float32)]
input_layer, input_length = tf.keras.experimental.SequenceFeatures(columns)(inputs)
lstm_out = LSTM(128, return_sequences=False)(input_layer)
lstm_out = Dense(5)(lstm_out)
model = tf.keras.models.Model(inputs, lstm_out)
model.compile(loss='mse', metrics='mae', optimizer='Adam')
def generator():
while True:
x_1 =np.asarray([[0,1,2,3,4,0,6,7,8,9],[1,2,3,4,5,0,7,8,9,10]], np.float32)
x_2 =np.asarray([[1,0.1,0,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0]], np.float32)
x1 = tf.sparse.from_dense(tf.convert_to_tensor(x_1, np.float32))
x2 = tf.sparse.from_dense(tf.convert_to_tensor(x_2, np.float32))
x = {'x1': x1, 'x2': x2}
data_np = np.asarray([[11,12,13,14,15],[12,13,14,15,16]], np.float32)
y = tf.convert_to_tensor(data_np, np.float32)
yield x, y
x, y = generator().__next__()
data = tf.data.Dataset.from_generator(generator, output_signature=({'x1':
tf.SparseTensorSpec(tf.TensorShape([2, 10]), tf.float32),
'x2': tf.SparseTensorSpec(tf.TensorShape([2, 10]), tf.float32)}, tf.TensorSpec(shape=(2, 5),
dtype=tf.float32)))
model.fit(data, steps_per_epoch=1, epochs=1, verbose=2)
Before reshaping xtraindata and xtest data, I got error:
"Input 0 of layer sequential_10 is incompatible with the layer: : expected min_ndim=4, found ndim=2.". After reshaping xtraindata and xtestdata as (1400,24,24,1) and (600,24,24,1) in order. Then I got error like this:
"Incompatible shapes: [32,1] vs. [32,6,6,1]
[[node mean_squared_error/SquaredDifference (defined at C:\Users\User\Documents\car_person.py:188) ]] [Op:__inference_test_function_7945]
Function call stack:
test_function"
I cannot make evaluate function working on created model. What should I do in order to make test data compatible with model?
import numpy as np
import matplotlib.pyplot as plt
import os
import time
import cv2
import pandas as pd
import tensorflow as tf
import itertools as it
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])
except RuntimeError as e:
print(e)
#gpu_options=K.tf.GPUOptions(per_process_gpu_memory_fraction=0.35)
path = "C:/Users/User/Desktop/tunel_data"
training_data=[]
def create_training_data(training_data, path):
categories = ["tunel_data_other", "tunel_data_car"]
for category in categories:
path=os.path.join(path, category)
for img in os.listdir(path):
print(img)
if category=="tunel_data_other":
class_num= 0
#image=Image.open(img)
#new_image = image.resize((50, 50))
#new_image.save('car'+img.index())
#try:
image_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)/255
new_array = cv2.resize(image_array, (24, 24))
print(new_array.shape)
training_data.append([new_array, class_num])
#except:
#pass
elif category=="tunel_data_car":
class_num = 1
#image=Image.open(img)
#new_image = image.resize((50, 50))
#new_image.save('person'+img.index())
#try:
image_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)/255
new_array = cv2.resize(image_array, (24, 24))
print(new_array.shape)
training_data.append([new_array, class_num])
#except:
#pass
path = "C:/Users/User/Desktop/tunel_data"
return training_data
create_training_data(training_data, path)
x=[]
y=[]
for i in range(len(training_data)):
x.append(training_data[i][0])
y.append(training_data[i][1])
#print(x)
#print(y)
x = np.array(x).reshape(2000, 576)
"""
principle_features = PCA(n_components=250)
feature = principle_features.fit_transform(x)
"""
feature = x
label = y
feature_df = pd.DataFrame(feature)
#df = DataFrame (People_List,columns=['First_Name','Last_Name','Age'])
label_df = pd.DataFrame(label)
data = pd.concat([feature_df, label_df], axis=1).to_csv('complete.csv')
data = pd.read_csv("complete.csv")
data = data.sample(frac=1).reset_index(drop=True)
print(data)
x_test, x_train, y_test, y_train = train_test_split(x, y, test_size=0.7, random_state=65)
xtraindata=pd.DataFrame(data=x_train[:,:])
xtestdata=pd.DataFrame(data=x_test[:,:])
print(xtraindata)
ytraindata=pd.DataFrame(data=y_train[:])
ytestdata=pd.DataFrame(data=y_test[:])
print(ytraindata)
xtraindata = np.asarray(xtraindata)
ytraindata = np.asarray(ytraindata)
xtestdata = np.asarray(xtestdata)
ytestdata = np.asarray(ytestdata)
x=np.asarray(x)
y=np.asarray(y)
xtraindata = xtraindata.reshape(1400,24,24,1)
xtestdata = xtestdata.reshape(600,24,24,1)
activation = ["tanh", "relu", "sigmoid", "softmax"]
input_size1 = range(10)
input_size2 = range(10)
k_scores = []
in_size = []
possible = list(it.permutations(activation, 4))
for c in possible:
for i in input_size1:
for a in input_size2:
model = tf.keras.Sequential([tf.keras.layers.Conv2D(256, kernel_size=(3,3), padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
tf.keras.layers.Conv2D(512, kernel_size=(3,3), padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
tf.keras.layers.Dense(250, activation=c[0]),
tf.keras.layers.Dense(i, activation=c[1]),
tf.keras.layers.Dense(a, activation=c[2]),
tf.keras.layers.Dense(1, activation=c[3])])
model.compile(optimizer='sgd', loss='mse')
val_loss = model.evaluate(xtestdata, ytestdata, verbose=1)
k_scores.append(val_loss)
in_size.append([i,a])
print(k_scores)
print("Best activation functions for each layer:", possible[(k_scores.index((min(k_scores)))) % len(possible)],
"/n Best input sizes:", "840", in_size[k_scores.index((min(k_scores)))][0], in_size[k_scores.index((min(k_scores)))][1], "1")
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(250, activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][0]))
model.add(tf.keras.layers.Dense(in_size[k_scores.index((min(k_scores)))][0], activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][1]))
model.add(tf.keras.layers.Dense(in_size[k_scores.index((min(k_scores)))][1], activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][2]))
model.add(tf.keras.layers.Dense(1, activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][3]))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy", "mse"])
model.fit(x, y, batch_size=16, epochs=5)
predictions = model.predict([x_test])
print(predictions)
print(predictions.shape)
output layer size is different. you want size (32, 1) but model's output is (32, 6, 6, 1)
insert Flatten() between MaxPooling2D and Dense() maybe this work's well.
and here is the tip. .evaluate method is only for trained model. you should use .fit first.
I want to define trainable scalar in my models. In TensorFlow, this is done using tf.Variable. In Keras, keras.backend.variable is supposed to behave the same way. However, when I use model.fit, keras does not change the variable during the optimization process. Does anyone know why?
To test, please uncomment RUN_ON = "tensorflow" or RUN_ON = "keras" to run on either of engines.
import numpy as np
import keras as k
import tensorflow as tf
import matplotlib.pyplot as plt
# RUN_ON = "tensorflow"
# RUN_ON = "keras"
b_true = 3.0
w_true = 5.0
x_true = np.linspace(0.0, 1.0, 1000).reshape(-1, 1)
y_true = x_true * w_true + b_true
ids = np.arange(0, x_true.shape[0])
if RUN_ON=="keras":
x = k.Input((1,), dtype="float32", name="x")
Fx = k.layers.Dense(1, use_bias=False, name="Fx")(x)
b = k.backend.variable(1.0, name="b")
y = k.layers.Lambda(lambda x: x+b, name="Add")(Fx)
model = k.Model(inputs=[x], outputs=[y])
model.compile("adam", loss="mse")
# model.summary()
model.fit(x_true, [y_true], epochs=100000, batch_size=1000)
y_pred = model.predict(x_true)
elif RUN_ON=="tensorflow":
x = tf.placeholder("float32", shape=[None, 1], name="x")
Fx = tf.layers.Dense(1, use_bias=False, name="Fx")(x)
b = tf.Variable(1.0, name="b")
y = Fx + b
yp = tf.placeholder("float32", shape=[None, 1], name="y")
loss = tf.reduce_mean(tf.square(yp - y))
opt = tf.train.AdamOptimizer(0.001).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(100000):
np.random.shuffle(ids)
opt_out, loss_val, b_val = sess.run([opt, loss, b], feed_dict={x: x_true[ids], yp: y_true[ids]})
print("epoch={:d} loss={:e} b_val={:f}".format(i, loss_val, b_val))
if loss_val < 1.0e-9:
break
y_pred = sess.run([y], feed_dict={x: x_true, yp: y_true})[0]
else:
raise ValueError('`RUN_ON` should be either `keras` or `tensorflow`.')
plt.plot(x_true, y_true, '--b', linewidth=4)
plt.plot(x_true, y_pred, 'r')
plt.show()
#
Based on this post, I tried to create another model, where I'm adding both categorical and continous variables.
Please find the code below:
from __future__ import print_function
import pandas as pd;
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import LabelEncoder
if __name__ == '__main__':
# 1 categorical input feature and a binary output
df = pd.DataFrame({'cat2': np.array(['o', 'm', 'm', 'c', 'c', 'c', 'o', 'm', 'm', 'm']),
'num1': np.random.rand(10),
'label': np.array([0, 0, 1, 1, 0, 0, 1, 0, 1, 1])})
encoder = LabelEncoder()
encoder.fit(df.cat2.values)
X1 = encoder.transform(df.cat2.values).reshape(-1,1)
X2 = np.array(df.num1.values).reshape(-1,1)
# X = np.concatenate((X1,X2), axis=1)
Y = np.zeros((len(df), 2))
Y[np.arange(len(df)), df.label.values] = 1
# Neural net parameters
training_epochs = 5
learning_rate = 1e-3
cardinality = len(np.unique(X))
embedding_size = 2
input_X_size = 1
n_labels = len(np.unique(Y))
n_hidden = 10
# Placeholders for input, output
cat2 = tf.placeholder(tf.int32, [None], name='cat2')
x = tf.placeholder(tf.float32, [None, 1], name="input_x")
y = tf.placeholder(tf.float32, [None, 2], name="input_y")
embed_matrix = tf.Variable(
tf.random_uniform([cardinality, embedding_size], -1.0, 1.0),
name="embed_matrix"
)
embed = tf.nn.embedding_lookup(embed_matrix, cat2)
inputs_with_embed = tf.concat([x, embedding_aggregated], axis=2, name="inputs_with_embed")
# Neural network weights
h = tf.get_variable(name='h2', shape=[inputs_with_embed, n_hidden],
initializer=tf.contrib.layers.xavier_initializer())
W_out = tf.get_variable(name='out_w', shape=[n_hidden, n_labels],
initializer=tf.contrib.layers.xavier_initializer())
# Neural network operations
#embedded_chars = tf.nn.embedding_lookup(embeddings, x)
layer_1 = tf.matmul(inputs_with_embed,h)
layer_1 = tf.nn.relu(layer_1)
out_layer = tf.matmul(layer_1, W_out)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=out_layer, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
avg_cost = 0.
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost],
feed_dict={x: X2,cat2:X1, y: Y})
print("Optimization Finished!")
But I'm getting the following error. It seems I'm not concatenating the continous variable and embedding properly. But I'm not understanding how to fix it.
Please if someone can please guide me.
ValueError: Shape must be at least rank 3 but is rank 2 for 'inputs_with_embed_2' (op: 'ConcatV2') with input shapes: [?,1], [?,2], [] and with computed input tensors: input[2] = <2>.
Thanks!
If by embedding_agregated you mean embed (probably typo)
The error is that there is no axis=2 in your case , it should be axis=1
inputs_with_embed = tf.concat([x, embed], axis=1, name="inputs_with_embed")
embed has a shape [None, embedding_dimension] and x has a shape [None, 1]
They are both 2D tensors, so you have access to axis=0 or axis=1 (indexing at 0 not 1), therefore to have your input_with_embed of shape [None, embedding_dimension+1] you need to concat on the axis=1