I am new to tensorflow and am trying to use the new version 2.0. I understand what is happening in the 1.3 tutorial but a lot of things have been depreciated. If someone could please make the code below work with the 2.0 version it would help me to understand how to convert what is being done in the tutorials to the new version on tensorflow.
Everything commented out doesn't work due to depredations.
I do not want to use any tensorflow versions below 2.0
n_features = 10
n_dense_neurons = 3
#x = tf.placeholder(tf.float32,(None, n_features))
W = tf.Variable(tf.random.normal([n_features,n_dense_neurons]))
b = tf.Variable(tf.ones([n_dense_neurons]))
#xW = tf.matmul(x,W)
#z = tf.add(xW,b)
#a = tf.sigmoid(z)
#init = tf.global_variables_initializer()
#with tf.Session() as sess:
#layer_out = sess.run(a,feed_dict={x:np.random.random([1,n_features])})
It's pretty straightforward: forget about the tf.Session object and about tf.placeholder, just execute the code line by line, it works! (this is the eager execution, that is the default in TensorFlow 2.0).
Thus, you don't have to declare a placeholder, but you have to just use an input value (I added a tf.cast operation because np.random.random returns a double but TensorFlow MatMul operation works with tf.float32 value).
import numpy as np
import tensorflow as tf
n_features = 10
n_dense_neurons = 3
x = tf.cast(np.random.random([1,n_features]), tf.float32)
W = tf.Variable(tf.random.normal([n_features,n_dense_neurons]))
b = tf.Variable(tf.ones([n_dense_neurons]))
xW = tf.matmul(x,W)
z = tf.add(xW,b)
a = tf.sigmoid(z)
The graph compiles but NaN output values in objective function (although, random generated data as input).
# reference https://www.tensorflow.org/probability/api_docs/python/tfp/math/ode/Solver
# reference https://www.tensorflow.org/probability/api_docs/python/tfp/optimizer/differential_evolution_minimize
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability import distributions as tfd
population_size = 40
initial_population = (tf.random.normal([population_size]),
pi = tf.constant(3.14159)
t_init, t0, t1 = 0., 0.5, 1.
def ode_fn(t, x):
x, y = initial_population
return -(tf.math.cos(x) * tf.math.cos(y) *
tf.math.exp(-(x-pi)**2 - (y-pi)**2))
def gradients(x):
results = tfp.math.ode.BDF().solve(ode_fn, t_init, initial_population[0],
solution_times=[t0, t1])
# The objective function and the gradient.
optim_results = tfp.optimizer.differential_evolution_minimize(
objective_value = optim_results[4]
DirSampleNoise = tfd.Dirichlet([tf.math.reduce_mean(objective_value), tf.math.reduce_std(objective_value)])
# Check that the argmin is close to the actual value.
# Print out the total number of function evaluations it took. Should be 5.
Current, developments - can be found in this link - https://gitlab.com/emmanuelnsanga/bayes-distil-model/-/blob/main/optimizer_non-parametric.py
I'm trying to fit a beta distribution to the results of a survey with discrete scores (1, 2, 3, 4, 5).
For that to work I need a working log_prob of a Beta in TensorFlow probability. However, there is a problem with how batching is handled in Beta.
Here is a minimal example that gives me an error:
InvalidArgumentError: Shapes of a and x are inconsistent: [3] vs. [1000,1] [Op:Betainc]
The same code seems to work ok with Normal distribution...
What am I doing wrong here?
import numpy as np
import tensorflow_probability as tfp
tfd = tfp.distributions
#Generate fake data
data = np.random.beta(2.,2.,1000)*5.0
data = np.ceil(data)
data = data[:,None]
# Create a batch of three Beta distributions.
alpha = np.array([1., 2., 3.]).astype(np.float32)
beta = np.array([1., 2., 3.]).astype(np.float32)
bt = tfd.Beta(alpha, beta)
#bt = tfd.Normal(loc=alpha, scale=beta)
#Scale beta to 0-5
scbt = tfd.TransformedDistribution(
# quantize beta to (1,2,3,4,5)
qdist = tfd.QuantizedDistribution(distribution=scbt,low=1,high=5)
#calc log_prob for 3 distributions
TensorFlow 2.0.0
tensorflow_probability 0.8.0
As suggested by Chris Suter. Here is broadcasting by hand solution:
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
from matplotlib import pyplot as plt
#Generate fake data
numdata = 100
numbeta = 3
data = np.random.beta(2.,2.,numdata)
data *= 5.0
data = np.ceil(data)
data = data[:,None].astype(np.float32)
#alpha and beta [[1., 2., 3.]]
alpha = np.expand_dims(np.arange(1,4),0).astype(np.float32)
beta = np.expand_dims(np.arange(1,4),0).astype(np.float32)
#tile to compensate for betainc
alpha = tf.tile(alpha,[numdata,1])
beta = tf.tile(beta,[numdata,1])
data = tf.tile(data,[1,numbeta])
bt = tfd.Beta(concentration1=alpha, concentration0=beta)
scbt = tfd.TransformedDistribution(
# quantize beta to (1,2,3,4,5)
qdist = tfd.QuantizedDistribution(distribution=scbt,low=1,high=5)
#calc log_prob for numbeta number of distributions
EDIT2: The above solution does not work when I try to apply it in MCMC sampling.
The new code looks like this:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
from time import time
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
import numpy as np
#Generate fake data
numdata = 100
data = np.random.beta(2.,2.,numdata)
data *= 5.0
data = np.ceil(data)
data = data[:,None].astype(np.float32)
def sample_chain():
#Parameters of MCMC
num_burnin_steps = 300
num_results = 200
num_chains = 50
step_size = 0.01
#data tensor
outcomes = tf.convert_to_tensor(data, dtype=tf.float32)
def modeldist(alpha,beta):
bt = tfd.Beta(concentration1=alpha, concentration0=beta)
scbt = tfd.TransformedDistribution(
# quantize beta to (1,2,3,4,5)
qdist = tfd.QuantizedDistribution(distribution=scbt,low=1,high=5)
return qdist
def joint_log_prob(con1,con0):
#manual broadcast
tcon1 = tf.tile(con1[None,:],[numdata,1])
tcon0 = tf.tile(con0[None,:],[numdata,1])
toutcomes = tf.tile(outcomes,[1,num_chains])
#model distribution with manual broadcast
dist = modeldist(tcon1,tcon0)
#joint log prob
return tf.reduce_sum(dist.log_prob(toutcomes),axis=0)
kernel = tfp.mcmc.HamiltonianMonteCarlo(
kernel = tfp.mcmc.SimpleStepSizeAdaptation(
inner_kernel=kernel, num_adaptation_steps=int(num_burnin_steps * 0.8))
init_state = [tf.identity(tf.random.uniform([num_chains])*10.0,name='init_alpha'),
samples, [step_size, is_accepted] = tfp.mcmc.sample_chain(
trace_fn=lambda _, pkr: [pkr.inner_results.accepted_results.step_size,
return samples
samples = sample_chain()
This ends up with an error message:
ValueError: Encountered None gradient. fn_arg_list: [tf.Tensor 'init_alpha:0' shape=(50,) dtype=float32, tf.Tensor 'init_beta:0' shape=(50,) dtype=float32] grads: [None, None]
Sadly tf.math.betainc doesn't support broadcasting at present, which causes the cdf computation, which QuantizedDistribution calls, to fail. If you must use Beta, the only workaround I can think of is to broadcast "manually" by tiling the data and Beta params.
Alternatively, you might be able to get away with using the Kumaraswamy distribution, which is similar to Beta but has some nicer analytical properties.
i am exporting Keras TF model without a luck:
import tensorflow as tf
import numpy as np
ssValues = np.zeros(shape=(640,800,6),dtype=np.float16)
ssValues += 3.
ssKerasConstant = tf.keras.backend.constant(value=ssValues, dtype=tf.dtypes.float16, shape=(1,640,800,6));
inputLayer = tf.keras.Input(shape=(640,800,6),
ssConstant = tf.constant(ssValues, dtype=tf.dtypes.float16, shape=(1,640,800,6), name='ss')
ssm = tf.keras.layers.Multiply()([inputLayer,ssKerasConstant])
model = tf.keras.models.Model(inputs=inputLayer, outputs=ssm)
tf.keras.experimental.export_saved_model(model, '~/models/model7.pb')
and i get the following error:
graph = inputs[0].graph
IndexError: list index out of range
even though i am able to predict the model.
You can save the model successfully by replacing the last line of your code,
tf.keras.experimental.export_saved_model(model, '~/models/model7.pb')
with the below line:
tf.saved_model.save(model, '~/models/model7.pb')
It works in Tensorflow Version, 2.0. Please find Gist here.
I want to wrap a tensorflow function in a Keras Lambda layer as per the docs. However, my inputs are complex64. Here is a more complete example of the code i am using to replicate this behavior:
import numpy as np
from keras.models import Model
from keras.layers import Input, Lambda
import tensorflow as tf
np.set_printoptions(precision=3, threshold=3, edgeitems=3)
def layer0(inp):
z = inp[0] + inp[1]
num = tf.cast(tf.real(z), tf.complex64)
return z/num
if __name__ == "__main__":
shape = (1,10,5)
z1 = Input(shape=shape[1:], dtype=np.complex64)
z2 = Input(shape=shape[1:], dtype=np.complex64)
#s = Lambda(layer0, output_shape=shape)([z1, z2])
s = Lambda(layer0)([z1, z2])
model = Model(inputs=[z1,z2], outputs=s)
z1_in = np.asarray(np.random.normal(size=shape) + np.random.normal(size=shape)*1j, 'complex64')
z2_in = np.asarray(np.random.normal(size=shape) + np.random.normal(size=shape)*1j, 'complex64')
s_out = model.predict([z1_in, z2_in])
which gives the following error:
Traceback (most recent call last):
File "complex_lambda.py", line 32, in <module>
s = Lambda(layer0)([z1, z2])
File "complex_lambda.py", line 18, in layer0
return z/num
TypeError: x and y must have the same dtype, got tf.float32 != tf.complex64
However, if I use the commented line instead:
s = Lambda(layer0, output_shape=shape)([z1, z2])
The code runs just fine. It seems that "output_shape=(...)" is necessary to make the division in the lambda function work. While this solution solves the problem for a single output variable, it doesn't work when having multiple outputs.
I cannot replicate your issue. Which version of tensorflow are you using? Are you using the keras package, or the tensorflow.keras submodule ?
At any rate, I think you can fix your issue by specifying the dtype of the Lambda layer : s = Lambda(lambda x: tf.math.real(x[0] + x[1]), dtype='complex64')([z1, s2])
I have trained a model with caffe tools under bin and now I am trying to do testing using python script, I read in an image and preprocess it myself (as I did for my training dataset) and I load the pretrained weights to the net, but I am almost always (99.99% of the time) receiving the same result -0- for every test image. I did consider that my model might be overfitting but after training a few models, I have come to realize the labels I get from predictions are most likely the cause. I have also increased dropout and took random crops to overcome overfitting and I have about 60K for training. The dataset is also roughly balanced. I get between 77 to 87 accuracy during evaluation step of training (depending on how I process data, what architecture I use etc)
Excuse my super hacky code, I have been distant to caffe testing for some time so I suspect the problem is how I pass the input data to the network, but I can't put my finger on it:
import h5py, os
import sys
from caffe.io import oversample
from caffe.io import resize_image
import caffe
from random import randint
import numpy as np
import cv2
import matplotlib.pyplot as plt
from collections import Counter as Cnt
meanImg = cv2.imread('/home/caffe/data/Ch/Final_meanImg.png')
model_def = '/home/X/Desktop/caffe-caffe-0.16/models/bvlc_googlenet/deploy.prototxt'
model_weights = '/media/X/DATA/SDet/Google__iter_140000.caffemodel'
# load the model
net = caffe.Net(model_def, # defines the structure of the model
model_weights, # contains the trained weights
caffe.TEST) # use test mode (e.g., don't perform dropout)
with open( '/home/caffe/examples/sdet/SDet/test_random.txt', 'r' ) as T, open('/media/X/DATA/SDet/results/testResults.txt','w') as testResultsFile:
readImgCounter = 0
runningCorrect = 0
runningAcc = 0.0
#testResultsFile.write('filename'+' '+'prediction'+' '+'GT')
lines = T.readlines()
for i,l in enumerate(lines):
sp = l.split(' ')
video = sp[0].split('_')[0]
impath = '/home/caffe/data/Ch/images/'+video+'/'+sp[0] +'.jpg'
img = cv2.imread(impath)
resized_img = resize_image(img, (255,255))
oversampledImages = oversample([resized_img], (224,224)) #5 crops x 2 mirror flips = return 10 images
transposed_img = np.zeros( (10, 3, 224, 224), dtype='f4' )
tp = np.zeros( (1, 3, 224, 224), dtype='f4' )
predictedLabels = []
for j in range(0,oversampledImages.shape[0]-1):
transposed_img[j] = oversampledImages[j].transpose((2,0,1))
tp[0] = transposed_img[j]
net.blobs['data'].data[0] = tp
pred = net.forward(data=tp)
prediction,num_most_common = Cnt(predictedLabels).most_common(1)[0]
readImgCounter = readImgCounter + 1
if (prediction == int(sp[1])):
runningCorrect = runningCorrect + 1
runningAcc = runningCorrect / readImgCounter
testResultsFile.write(sp[0]+' '+str(prediction)+' '+sp[1])
I have fixed this problem eventually. I am not 100% sure what worked but it was most likely changing the bias to 0 while learning.