I am using this code. I have modified it to work with a car (0 Left, 1 Straight, 2 Right).
I would like to add some observations, such as Destination (XY), Car Location (XY) bearing (angle), distance_to_destination and bearing_of_destination, in the hope that the car can find its way to the destination.
I have spent most of the days trying to get this to work, however failed, and failed in many different ways. The crux of the problem seams to be getting the input shape to match.
I think the closest that I have got is this:
def __init__(self):
low = np.array([-5, -5, -5, -5, -5])
high = -np.array([ 5, 5, 5, 5, 5])
self.observation_space = gym.spaces.Box(low, high, dtype=np.float32)
self.action_space = gym.spaces.Box(low, high, dtype=np.float32)
def reset(self):
self.state = Myarray# \[\[1,2\], \[1,2\],\[1,2\],\[1,2\],\[1,2\]\]#result.BearingToDest
self.shower_length = 60000
return self.state
def build_model(states, actions):
model = Sequential()
model.add(Dense(units=24, activation='relu', input_shape=\[ 2\]))
model.add(Dense(units=24, activation='relu'))
model.add(Dense(actions, activation='linear'))
return model
When I run it and the model loads, the error message is :
ValueError: Error when checking input: expected dense_input to have 2 dimensions, but got array with shape (1, 1, 1, 2)
**Training for 1000000 steps ...
Resetting ML
Interval 1 (0 steps performed)**
First, just to make sure, the state you return in reset should look something like:
self.state = np.array([x1, y1, x2, y2, angle])
I don't see the step function in your code, but I assume you also modified it to return self.state?
Also, your action space is the same as the observation space, it is not normal is it? Given what you said, there are 3 actions so it should be:
self.action_space = Discrete(3)
Without the full code, it is not really possible to find the cause of your problem. Could you show it?
I also noticed a minus sign which I find strange (although it seems unrelated to your main problem):
high = -np.array([ 5, 5, 5, 5, 5])
^
HERE
The Code that Ran for me is below, it will take some tweeks to get everything learning in the way that I want, but at least its running :)
import gym
from gym import Env
import numpy as np
from gym.spaces import Discrete, Box, Dict
import random
Myarray = np.Myarray = [[3, 2]]
# Myarray = [[1][2 ]]
x1 = y1 = x2 = y2 = angle = 1
# create a custom class
class ShowerEnv(Env):
def __init__(self, size=5):
self.size = size
high = np.array([[600, 600, 600, 600, 360]])
low = np.array([[-1, -1, -1, -1, -360]])
self.state = np.zeros((1, 5), dtype=np.float32)
self.x1, self.y1, self.x2, self.y2, self.angle = 1, 1, 1, 1, 1
self.action_space = Discrete(3)
self.observation_space = gym.spaces.Box(low, high, dtype=np.float32, shape=(1, 5))
self.shower_length = 60 # duration of temperature
def step(self, shower_action):
x1 = y1 = x2 = y2 = angle = 1
self.shower_length -= 1
# this line sends a protobuf command to the car program and gets a response ie the true enviroment
result = client.ChangeCoarse( shower_action - 1, True)#( shower_action - 1, True)
self.state = np.array([result.X, result.Y, result.DesX, result.DesY, result.BearingToDest])
if (result.ResetML == True):
self.reset()
# x1 = y1 = x2 = y2 = angle = 1
# self.state = np.array([x1, y1, x2, y2, angle])
# this should set the reward and gets it from protobuf
reward = result.BearingToDest
# reward = 1 # just put in to make the code run
if self.shower_length <= 0:
done = True
else:
done = False
info = ()
info = {}
return self.state, reward, done, info
def render(self):
pass
def reset(self):
result = client.ChangeResetDest()
self.shower_length = 60000
self.state = np.array([result.X, result.Y, result.DesX, result.DesY, result.BearingToDest])
print("Resetting ML")
return self.state
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
env = ShowerEnv()
states = env.observation_space.shape
actions = env.action_space.n
def build_model(states, actions):
model = Sequential()
model.add(Dense(units=24, activation='relu', input_shape=states))
model.add(Dense(units=24, activation='relu'))
model.add(Dense(actions, activation='linear'))
model.add(Flatten())
return model
# model =build_model(states,actions)
# model.compile(optimizer=Adam(learning_rate=1e-3), metrics=['mae'])
# del model
#print(model.summary())
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
import grpc
import Message_pb2_grpc as pb2_grpc, Message_pb2 as pb2
class UnaryClient(object):
"""
Client for gRPC functionality
"""
def __init__(self):
self.host = 'localhost'
self.server_port = 50052
# instantiate a channel
self.channel = grpc.insecure_channel(
'{}:{}'.format(self.host, self.server_port))
# bind the client and the server
self.stub = pb2_grpc.UnaryStub(self.channel)
def ChangeCoarse(self, val, TF):
"""
Client function to call the rpc for GetServerResponse
"""
message = pb2.MessageTo(MoveBoat=True, MoveBoatStep=TF, BoatDelta=val)
# (message="message", Val=9, MoveBoat=True,MoveBoatStep=True, SailAngle=4, BoatAngle=5.79878, SailDelta=0, BoatDelta=-1)
# print(f'{message}')
return self.stub.GetServerResponse(message)
def ChangeSail(self, val, TF):
"""
Client function to call the rpc for GetServerResponse
"""
message = pb2.MessageTo(MoveBoat=True, MoveBoatStep=TF, SailDelta=val)
# print(f'{message}')
return self.stub.GetServerResponse(message)
def ChangeWindDirection(self, val, TF):
"""
Client function to call the rpc for GetServerResponse
"""
message = pb2.MessageTo(MoveBoat=True, MoveBoatStep=TF, WindDelta=val)
# print(f'{message}')
return self.stub.GetServerResponse(message)
def ChangeResetDest(self):
"""
Client function to call the rpc for GetServerResponse
"""
message = pb2.MessageTo(MoveBoat=True, ResetTarget=True)
# (message="message", Val=9, MoveBoat=True,MoveBoatStep=True, SailAngle=4, BoatAngle=5.79878, SailDelta=0, BoatDelta=-1)
# print(f'{message}')
return self.stub.GetServerResponse(message)
client = UnaryClient()
result = client.ChangeCoarse(90, True)
# if result.
def build_agent(model, actions):
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=900000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=100,
target_model_update=1e-2)
return dqn
dqn = build_agent(build_model(states, actions), actions)
dqn.compile(optimizer=Adam(learning_rate=1e-5), metrics=['mae'])
dqn.fit(env, nb_steps=1000000, visualize=False, verbose=1)
Related
I'm recently learning deep reinforcement learning and I wanted to apply what I learned to a problem from gym using Keras.
During training I realized that it is too slow, after checking the reason I saw that "fit" function takes so much time.
Running each episode takes 3-4 minutes.
Is there something wrong at what I'm doing? Or can you suggest an improvement?
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.optimizers import Adam
from collections import deque
import random
import gym
import datetime
class DQN():
def __init__(self, env):
self.env = env
self.memory = deque(maxlen=2000)
self.gamma = 0.98
self.epsilon = 1
self.epsilon_min = 0.01
self.epsilon_decay = 0.998
self.learning_rate = 0.001
self.model = self.create_model()
self.target_model = self.create_model()
def create_model(self):
model = keras.Sequential()
state_shape = self.env.observation_space.shape
model.add(keras.layers.Dense(48, activation="relu", input_dim=state_shape[0]))
model.add(keras.layers.Dense(24, activation="relu"))
model.add(keras.layers.Dense(self.env.action_space.n, activation="relu"))
model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
return model
def remember(self, state, action, reward, new_state, done):
self.memory.append([state, action, reward, new_state, done])
def replay(self):
batch_size = 32
if len(self.memory) < batch_size:
return
samples = random.sample(self.memory, batch_size)
# states, actions, rewards, states_, dones = samples
# targets = self.target_model.predict(states)
# _states = [i for i in range(len(samples))]
# targets = [[0 for j in range(self.env.action_space.n)] for i in range(len(samples))]
_states = np.zeros((len(samples), 8))
targets = np.zeros((len(samples), self.env.action_space.n))
for i, sample in enumerate(samples):
state, action, reward, new_state, done = sample
_states[i] = state
# target = self.target_model.predict(state)
if done:
targets[i][action] = reward
else:
Q_future = max(self.target_model.predict(new_state)[0])
targets[i][action] = reward + Q_future*self.gamma
self.model.fit(_states, targets, epochs=1, verbose=0)
# for sample in samples:
# state, action, reward, new_state, done = sample
# target = self.target_model.predict(state)
# if done:
# target[0][action] = reward
# else:
# Q_future = max(self.target_model.predict(new_state)[0])
# target[0][action] = reward + Q_future*self.gamma
# start_time = datetime.datetime.now()
# self.model.fit(state, target, epochs=1, verbose=0)
# end_time = datetime.datetime.now()
# print("--fit--")
# print(end_time-start_time)
def target_train(self):
weights = self.model.get_weights()
target_weights = self.target_model.get_weights()
for i in range(len(target_weights)):
target_weights[i] = weights[i]
self.target_model.set_weights(target_weights)
def act(self, state):
self.epsilon *= self.epsilon_decay
self.epsilon = max(self.epsilon_min, self.epsilon)
if np.random.random() < self.epsilon:
return self.env.action_space.sample()
return np.argmax(self.model.predict(state)[0])
def save_model(self, fn):
self.model.save(fn)
def act_eval(self, state):
return np.argmax(self.model.predict(state)[0])
def evaluation(self, n_eval=10):
total_reward = 0
for _ in range(n_eval):
self.env.reset()
cur_state = self.env.reset().reshape(1,8)
done = False
while not done:
action = self.act_eval(cur_state)
new_state, reward, done, _ = self.env.step(action)
total_reward += reward
cur_state = new_state.reshape(1,8)
return total_reward / n_eval
def main():
save_path = "policies/"
env = gym.make("LunarLander-v2")
trials = 2000
trial_len = 500
update_target_network = 500
agent = DQN(env=env)
for trial in range(trials):
cur_state = env.reset().reshape(1,8)
time_step_cntr = 0
# check execution durations
dur_replay = 0
dur_step = 0
dur_act = 0
for step in range(trial_len):
print("Trial {0}, step {1}".format(trial, step))
action = agent.act(cur_state) #
new_state, reward, done, _ = env.step(action) #
new_state = new_state.reshape(1,8)
agent.remember(cur_state, action, reward, new_state, done)
# learn from experience
agent.replay() #
# after "update_target_network" steps, update target network
if time_step_cntr % update_target_network == 0:
agent.target_train()
time_step_cntr += 1
cur_state = new_state
if done:
break
# print("Duration replay {0}, duration act {1}, duration step {2}".format(dur_replay, dur_act, dur_step))
# at each N steps, evaluate
print("Evaluation over 10 episodes", agent.evaluation())
print("Trial #{0} completed.".format(trial))
# # print the progress
# if trial % 100 == 0:
# print("Trial #{0} completed.".format(trial))
# save the model
# if trial % 20 == 0:
agent.save_model(save_path + str(trial) + "__.model")
agent.save_model(save_path + "_final" + "__.model")
if __name__ == "__main__":
main()
Your problem is not in the fit call, but in the loop that you have in the replay() method. Try always substituting loops by numpy operations in these cases, that make the operations much more agile.
Replace your replay method by the following one and let me know if it works faster for you
def replay(self):
batch_size = 32
if len(self.memory) >= batch_size:
# Draw a sample
samples = random.sample(self.memory, batch_size)
# Prepare the batch
state, action, reward, new_state, done = zip(*samples)
next_state = np.concatenate(new_state)
done = np.array(done)[:,None]
state = np.concatenate(state)
reward = np.array(reward)[:,None]
q_future = self.target_model.predict(next_state)
targets = reward + self.gamma*np.max(q_future, axis=1, keepdims=True)*(1-done)
# Fit the model
self.model.fit(state, targets, epochs=1, verbose=0)
I am performing NER in TensorFlow 2, but when I perform F1Score calculation through Tensorflwo_Addons it keeps giving me this error
TypeError: Expected int32 passed to parameter 'y' of op 'Greater', got 1e-12 of type 'float' instead. Error: Expected int32, got 1e-12 of type 'float' instead.
The function that is producing this error is > model_fn
The first part of code for data preparation
__author__ = "Guillaume Genthial"
import functools
import json
import logging
from pathlib import Path
import sys
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow_addons as tf_ad
from sklearn.metrics import f1_score
# DiRECTORY PATH
DATADIR = '/content/drive/My Drive/7th Semester/FYP Related Stuff /data/example'
# Logging
Path('results').mkdir(exist_ok=True)
tf.autograph.set_verbosity(logging.INFO) # INFO --> Type of message like error, warning, etc
handlers = [
logging.FileHandler('results/main.log'),
logging.StreamHandler(sys.stdout)
]
logging.getLogger('tensorflow').handlers = handlers
# parse function
def parse_fn(line_words, line_tags):
# Encode in Bytes for TF
words = [word.encode() for word in line_words.strip().split()]
tags = [tag.encode() for tag in line_tags.strip().split()]
assert len(words) == len(tags), "Words and tags lengths don't match" # Returns assertion error
return (words, len(words)), tags
# Data Generator function
def generator_fn(words, tags):
with Path(words).open('r') as file_words, Path(tags).open('r') as file_tags:
for line_words, line_tags in zip(file_words, file_tags):
yield parse_fn(line_words, line_tags) # yield: to return sth from function without stoping the function and exe starts from last yeild stat
# Input Function
def input_fn(words, tags, params=None, shuffle_and_repeat=False):
params = params if params is not None else {}
shapes = ((tf.TensorShape([None]), tf.TensorShape(())), tf.TensorShape([None]))
types = ((tf.string, tf.int32), tf.string) # data types fro tensor
defaults = (('<pad>', 0), 'O') # padding in case of mismatched length
dataset = tf.data.Dataset.from_generator(
functools.partial(generator_fn, words, tags),
output_shapes=shapes, output_types=types)
if shuffle_and_repeat:
dataset = dataset.shuffle(params['buffer']).repeat(params['epochs'])
dataset = (dataset
.padded_batch(params.get('batch_size', 20), shapes, defaults)
.prefetch(1))
return dataset
Model function
def model_fn(features, labels, mode, params):
# For serving, features are a bit different
if isinstance(features, dict):
# if not a tensor
features = features['words'], features['nwords']
dropout = params['dropout']
words, nwords = features # words and number of words
training = (mode == tf.estimator.ModeKeys.TRAIN)
vocab_words = tf.lookup.StaticVocabularyTable(
tf.lookup.TextFileInitializer(
params['words'],
key_dtype=tf.string, key_index=tf.lookup.TextFileIndex.WHOLE_LINE,
value_dtype=tf.int64, value_index=tf.lookup.TextFileIndex.LINE_NUMBER,
delimiter="\n" ),
num_oov_buckets=params['num_oov_buckets'])
with Path(params['tags']).open() as f:
indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
num_tags = len(indices) + 1 # number of tags in tag file
# Word Embeddings
word_ids = vocab_words.lookup(words)
glove = np.load(params['glove'])['embeddings'] # np.array
variable = np.vstack([glove, [[0.]*params['dim']]])
shape = variable.shape
# getting embeddings
variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
embeddings = tf.nn.embedding_lookup(variable, word_ids)
# LSTM
dropout_lstm = tf.keras.layers.Dropout(rate=dropout)
embeddings = dropout_lstm(embeddings, training=training)
layer_fw = tf.keras.layers.LSTM(params['lstm_size'], return_sequences=True)
layer_bw = tf.keras.layers.LSTM(params['lstm_size'], go_backwards=True)
bilstm = tf.keras.layers.Bidirectional(layer_fw)
output = bilstm(embeddings)
dropout_crf = tf.keras.layers.Dropout(rate=dropout)
output = dropout_crf(output)
dense_layer = tf.keras.layers.Dense(num_tags)
logits = dense_layer(output)
initial_val = np.zeros((num_tags,num_tags))# [[1.,1.,1.,1.],[1.,1.,1.,1.],[1.,1.,1.,1.],[1.,1.,1.,1.]]
# crf_params = tf.Variable(initial_value=initial_val ,name="crf",shape=[num_tags,num_tags], dtype=tf.float32)
crf_params = tf.convert_to_tensor(initial_val,dtype=tf.float32)
pred_ids, _ = tf_ad.text.crf_decode(logits,crf_params, nwords)
if mode == tf.estimator.ModeKeys.PREDICT:
# Predictions
reverse_vocab_tags = tf.lookup.StaticVocabularyTable(
tf.lookup.TextFileInitializer(
params['tags'],
key_dtype=tf.int64, key_index=tf.lookup.TextFileIndex.LINE_NUMBER,
value_dtype=tf.string, value_index=tf.lookup.TextFileIndex.WHOLE_LINE,
delimiter="\n"
),
num_oov_buckets=params['num_oov_buckets']
)
pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
predictions = {
'pred_ids': pred_ids,
'tags': pred_strings
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
else:
# Loss
vocab_tags = tf.lookup.StaticVocabularyTable(
tf.lookup.TextFileInitializer(
params['tags'],
key_dtype=tf.string, key_index=tf.lookup.TextFileIndex.WHOLE_LINE,
value_dtype=tf.int64, value_index=tf.lookup.TextFileIndex.LINE_NUMBER,
delimiter="\n"
),
num_oov_buckets=params['num_oov_buckets']
)
tags = vocab_tags.lookup(labels)
log_likelihood, _ = tf_ad.text.crf.crf_log_likelihood(
logits, tags, nwords, crf_params)
loss = tf.reduce_mean(-log_likelihood)
print("\nloss of error",loss,"\n")
# Metrics
weights = tf.sequence_mask(nwords)
#Accuracy
acc_layer = tf.keras.metrics.Accuracy()
acc_layer.update_state(tags, pred_ids, weights)
acc = acc_layer
#Precision
pre_layer = tf.keras.metrics.Precision(top_k=num_tags)
pre_layer.update_state(tags, pred_ids, weights)
pre = pre_layer
#Recall
rec_layer = tf.keras.metrics.Recall(top_k=num_tags)
rec_layer.update_state(tags, pred_ids, weights)
rec = rec_layer
print(tf.keras.backend.cast(tags, dtype="int32"))
# f1 Score
f1_layer = tf_ad.metrics.F1Score(num_classes=num_tags, average="weighted")
f1_layer.update_state(tf.keras.backend.cast(tags, dtype="int32"), tf.keras.backend.cast(pred_ids, dtype="int32"), weights)
f1_score = f1_layer
metrics = {
'acc': acc,
'precision': pre,
'recall': rec,
'f1_score': f1_score,
}
# for metric_name, op in metrics.items():
# print(f"\nThe metric_name is:{metric_name} and op is:{op}\n")
# ret = tf.summary.scalar(metric_name, op.result())
# print(ret)
def loss_fn():
loss = tf.reduce_mean(-log_likelihood)
return loss
print("\nloss function\n",metrics)
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(
mode, loss=loss, eval_metric_ops=metrics)
elif mode == tf.estimator.ModeKeys.TRAIN:
train_op = tf.compat.v1.train.AdamOptimizer().minimize(
loss)
return tf.estimator.EstimatorSpec(
mode, loss=loss, train_op=train_op)
Main.py
if __name__ == '__main__':
# Params
params = {
'dim': 300,
'dropout': 0.5,
'num_oov_buckets': 1,
'epochs': 25,
'batch_size': 20,
'buffer': 15000,
'lstm_size': 100,
'words': str(Path(DATADIR, 'vocab.words.txt')),
'chars': str(Path(DATADIR, 'vocab.chars.txt')),
'tags': str(Path(DATADIR, 'vocab.tags.txt')),
'glove': str(Path(DATADIR, 'glove.npz'))
}
with Path('results/params.json').open('w') as f:
json.dump(params, f, indent=4, sort_keys=True)
def fwords(name):
return str(Path(DATADIR, '{}.words.txt'.format(name)))
def ftags(name):
return str(Path(DATADIR, '{}.tags.txt'.format(name)))
# Estimator, train and evaluate
train_inpf = functools.partial(input_fn, fwords('train'), ftags('train'),
params, shuffle_and_repeat=True)
eval_inpf = functools.partial(input_fn, fwords('testa'), ftags('testa'))
cfg = tf.estimator.RunConfig(save_checkpoints_secs=120)
estimator = tf.estimator.Estimator(model_fn, 'results/model', cfg, params)
Path(estimator.eval_dir()).mkdir(parents=True, exist_ok=True)
# hook is used to stop and then run a pspecific func or piece of code after specific time
hook = tf.estimator.experimental.stop_if_no_increase_hook(
estimator, 'f1', 500, min_steps=8000, run_every_secs=120)
train_spec = tf.estimator.TrainSpec(input_fn=train_inpf, hooks=[hook])
eval_spec = tf.estimator.EvalSpec(input_fn=eval_inpf, throttle_secs=120)
tf.compat.v1.enable_eager_execution()
tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
# Write predictions to file
def write_predictions(name):
Path('results/score').mkdir(parents=True, exist_ok=True)
with Path('results/score/{}.preds.txt'.format(name)).open('wb') as f:
test_inpf = functools.partial(input_fn, fwords(name), ftags(name))
golds_gen = generator_fn(fwords(name), ftags(name))
preds_gen = estimator.predict(test_inpf)
for golds, preds in zip(golds_gen, preds_gen):
((words, _), tags) = golds
for word, tag, tag_pred in zip(words, tags, preds['tags']):
f.write(b' '.join([word, tag, tag_pred]) + b'\n')
f.write(b'\n')
for name in ['train', 'testa', 'testb']:
write_predictions(name)
Another that is causing trouble is this:
InvalidArgumentError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1382 '\nsession_config.graph_options.rewrite_options.'
1383 'disable_meta_optimizer = True')
-> 1384 raise type(e)(node_def, op, message)
1385
1386 def _extend_graph(self):
InvalidArgumentError: assertion failed: [predictions must be <= 1] [Condition x <= y did not hold element-wise:] [x (Cast_6:0) = ] [[3 3 3...]...] [y (Cast_9/x:0) = ] [1]
[[{{node Assert}}]]
This happens when I comment out f1score calculation and comment it out in metrics dict
Please help out I am totally stuck with it. Actually, I am converting TF 1 code to TF2 the code that was in TF 1 was working, but when I made the necessary changes to run it in TF2 it started giving these errors
Also, I was trying to use tf.keras.optimiser.Adam().minimize() but it gave error No gradients provided for any of the variable I use loss_fn for loss of minimize and tv for trainable_variables
def loss_fn():
return loss = tf.reduce_mean(-log_likelihood)
tv = dense_layer.trainable_weights + bilst.trainable_weights
I am seriously stuck, I have tried all the GitHub solutions that were posted regarding similar problems, but nothing worked.
Specifications of code:
Tensorflow 2
GloVe for emdeddings
My question is: I want to calculate the derivation of "time input" and "dense_input". Before asking question, I search the soluatoin of calculaing jacobian matrix by keras function.
After running it, But I got this error:
File "\keras\backend\tensorflow_backend.py", line 2614, in _call
dtype=tensor.dtype.base_dtype.name))
AttributeError: 'list' object has no attribute 'dtype'
Here is my simple version:
from keras.models import *
from keras.layers import *
import keras.backend as K
import pandas as pd
from keras import optimizers
def get_model(timestamp, features):
time_input = Input(shape=(timestamp, features,), name='time_input')
lstm_out = LSTM(4)(time_input)
dense_hidden_units = 2
dense_input_layer = Input(shape=(dense_length,), name='dense_input_layer')
final_input_layer = concatenate([lstm_out, dense_input_layer])
# Disable biases in the hidden layer
dense_1 = Dense(units=dense_hidden_units, use_bias=False, activation='sigmoid')(final_input_layer)
# Disable bias in output layer
output_layer = Dense(units=1, use_bias=False, name='final_output')(dense_1)
model = Model(
inputs=[time_input, dense_input_layer],
outputs=output_layer
)
print(model.summary())
return model
if __name__ == '__main__':
timestamp = 3
features = 1
dense_length = 3
temp_data = pd.DataFrame([
[1, 2, 3, 2, 3, 4],
])
time_data = temp_data.values.reshape(-1, timestamp, features)
dense_data = temp_data.values.reshape(-1, dense_length)
target_data = np.array([1, 2])
print(time_data.shape)
print(dense_data.shape)
print(target_data.shape)
model = get_model(
timestamp, features
)
Ada = optimizers.Adagrad(lr=0.09, epsilon=1e-04)
model.compile(loss='mse', optimizer=Ada, metrics=['mse'])
model.fit(
{
'time_input': time_data,
'dense_input_layer': dense_data,
},
{
'final_output': target_data
},
epochs=1, batch_size=1
)
time_input = model.get_layer('time_input').input
GPP_input_layer = model.get_layer('dense_input_layer').input
J = K.gradients(model.output, [time_input, GPP_input_layer])
jacobianTime = K.function([[time_input, GPP_input_layer], K.learning_phase()], J)
deriRes = jacobianTime([time_data, dense_data]) # this line throw exception
print(deriRes[0])
Thanks for help!
You have an extra set of brackets.
jacobianTime = K.function([[time_input, GPP_input_layer], K.learning_phase()], J)
to
jacobianTime = K.function([time_input, GPP_input_layer, K.learning_phase()], J)
I was able to run your code like this at least.
I write the following code for extract features from two images with deep CNN usinf tensorflow:
# -*- coding: utf-8 -*-
# Implementation of Wang et al 2017: Automatic Brain Tumor Segmentation using Cascaded Anisotropic Convolutional Neural Networks. https://arxiv.org/abs/1709.00382
# Author: Guotai Wang
# Copyright (c) 2017-2018 University College London, United Kingdom. All rights reserved.
# http://cmictig.cs.ucl.ac.uk
#
# Distributed under the BSD-3 licence. Please see the file licence.txt
# This software is not certified for clinical use.
#
from __future__ import absolute_import, print_function
import numpy as np
from scipy import ndimage
import time
import os
import sys
import pickle
import tensorflow as tf
from tensorflow.contrib.data import Iterator
from util.data_loader import *
from util.data_process import *
from util.train_test_func import *
from util.parse_config import parse_config
from train import NetFactory
print("import finished")
def test(config_file):
# 1, load configure file
config = parse_config(config_file)
config_data = config['data']
config_net1 = config.get('network1', None)
config_net2 = config.get('network2', None)
config_net3 = config.get('network3', None)
config_test = config['testing']
batch_size = config_test.get('batch_size', 5)
print("configure file loaded")
# 2.1, network for whole tumor
if(config_net1):
net_type1 = config_net1['net_type']
net_name1 = config_net1['net_name']
data_shape1 = config_net1['data_shape']
label_shape1 = config_net1['label_shape']
class_num1 = config_net1['class_num']
print("configure file of whole tumor is loaded")
# construct graph for 1st network
full_data_shape1 = [batch_size] + data_shape1
x1 = tf.placeholder(tf.float32, shape = full_data_shape1)
net_class1 = NetFactory.create(net_type1)
net1 = net_class1(num_classes = class_num1,w_regularizer = None,
b_regularizer = None, name = net_name1)
net1.set_params(config_net1)
predicty1, caty1 = net1(x1, is_training = True)
proby1 = tf.nn.softmax(predicty1)
else:
config_net1ax = config['network1ax']
config_net1sg = config['network1sg']
config_net1cr = config['network1cr']
print("configure files of whole tumor in three planes are loaded")
# construct graph for 1st network axial
net_type1ax = config_net1ax['net_type']
net_name1ax = config_net1ax['net_name']
data_shape1ax = config_net1ax['data_shape']
label_shape1ax = config_net1ax['label_shape']
class_num1ax = config_net1ax['class_num']
full_data_shape1ax = [batch_size] + data_shape1ax
x1ax = tf.placeholder(tf.float32, shape = full_data_shape1ax)
net_class1ax = NetFactory.create(net_type1ax)
net1ax = net_class1ax(num_classes = class_num1ax,w_regularizer = None,
b_regularizer = None, name = net_name1ax)
net1ax.set_params(config_net1ax)
predicty1ax, caty1ax = net1ax(x1ax, is_training = True)
proby1ax = tf.nn.softmax(predicty1ax)
print("graph for 1st network1ax is constructed")
# construct graph for 1st network sagittal
net_type1sg = config_net1sg['net_type']
net_name1sg = config_net1sg['net_name']
data_shape1sg = config_net1sg['data_shape']
label_shape1sg = config_net1sg['label_shape']
class_num1sg = config_net1sg['class_num']
full_data_shape1sg = [batch_size] + data_shape1sg
x1sg = tf.placeholder(tf.float32, shape = full_data_shape1sg)
net_class1sg = NetFactory.create(net_type1sg)
net1sg = net_class1sg(num_classes = class_num1sg,w_regularizer = None,
b_regularizer = None, name = net_name1sg)
net1sg.set_params(config_net1sg)
predicty1sg, caty1sg = net1sg(x1sg, is_training = True)
proby1sg = tf.nn.softmax(predicty1sg)
print("graph for 1st network1sg is constructed")
# construct graph for 1st network coronal
net_type1cr = config_net1cr['net_type']
net_name1cr = config_net1cr['net_name']
data_shape1cr = config_net1cr['data_shape']
label_shape1cr = config_net1cr['label_shape']
class_num1cr = config_net1cr['class_num']
full_data_shape1cr = [batch_size] + data_shape1cr
x1cr = tf.placeholder(tf.float32, shape = full_data_shape1cr)
net_class1cr = NetFactory.create(net_type1cr)
net1cr = net_class1cr(num_classes = class_num1cr,w_regularizer = None,
b_regularizer = None, name = net_name1cr)
net1cr.set_params(config_net1cr)
predicty1cr, caty1cr = net1cr(x1cr, is_training = True)
proby1cr = tf.nn.softmax(predicty1cr)
print("graph for 1st network1cr is constructed")
# 3, create session and load trained models
all_vars = tf.global_variables()
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
if(config_net1):
net1_vars = [x for x in all_vars if x.name[0:len(net_name1) + 1]==net_name1 + '/']
saver1 = tf.train.Saver(net1_vars)
saver1.restore(sess, config_net1['model_file'])
else:
net1ax_vars = [x for x in all_vars if x.name[0:len(net_name1ax) + 1]==net_name1ax + '/']
saver1ax = tf.train.Saver(net1ax_vars)
saver1ax.restore(sess, config_net1ax['model_file'])
net1sg_vars = [x for x in all_vars if x.name[0:len(net_name1sg) + 1]==net_name1sg + '/']
saver1sg = tf.train.Saver(net1sg_vars)
saver1sg.restore(sess, config_net1sg['model_file'])
net1cr_vars = [x for x in all_vars if x.name[0:len(net_name1cr) + 1]==net_name1cr + '/']
saver1cr = tf.train.Saver(net1cr_vars)
saver1cr.restore(sess, config_net1cr['model_file'])
print("all variables of net1 is saved")
# 4, load test images
dataloader = DataLoader(config_data)
dataloader.load_data()
image_num = dataloader.get_total_image_number()
# 5, start to test
test_slice_direction = config_test.get('test_slice_direction', 'all')
save_folder = config_data['save_folder']
test_time = []
struct = ndimage.generate_binary_structure(3, 2)
margin = config_test.get('roi_patch_margin', 5)
x=['x1','x2']
paddings=tf.constant([[0,0],[0,0],[10,10],[0,0],[0,0]])
for i in range(image_num):
[temp_imgs, temp_weight, temp_name, img_names, temp_bbox, temp_size] = dataloader.get_image_data_with_name(i)
t0 = time.time()
# 5.1, test of 1st network
if(config_net1):
data_shapes = [ data_shape1[:-1], data_shape1[:-1], data_shape1[:-1]]
label_shapes = [label_shape1[:-1], label_shape1[:-1], label_shape1[:-1]]
nets = [net1, net1, net1]
outputs = [proby1, proby1, proby1]
inputs = [x1, x1, x1]
class_num = class_num1
else:
data_shapes = [ data_shape1ax[:-1], data_shape1sg[:-1], data_shape1cr[:-1]]
label_shapes = [label_shape1ax[:-1], label_shape1sg[:-1], label_shape1cr[:-1]]
nets = [net1ax, net1sg, net1cr]
outputs = [proby1ax, proby1sg, proby1cr]
inputs = [x1ax, x1sg, x1cr]
class_num = class_num1ax
predi=tf.concat([predicty1ax,tf.reshape(predicty1sg,[5,11,180,160,2]),tf.pad(predicty1cr,paddings,"CONSTANT")],0)
cati=tf.concat([caty1ax,tf.reshape(caty1sg,[5,11,180,160,14]),tf.pad(caty1cr,paddings,"CONSTANT")],0)
prob1 = test_one_image_three_nets_adaptive_shape(temp_imgs, data_shapes, label_shapes, data_shape1ax[-1], class_num,
batch_size, sess, nets, outputs, inputs, shape_mode = 0)
pred1 = np.asarray(np.argmax(prob1, axis = 3), np.uint16)
pred1 = pred1 * temp_weight
print("net1 is tested")
globals()[x[i]]=predi
test_time.append(time.time() - t0)
print(temp_name)
test_time = np.asarray(test_time)
print('test time', test_time.mean())
np.savetxt(save_folder + '/test_time.txt', test_time)
if __name__ == '__main__':
if(len(sys.argv) != 2):
print('Number of arguments should be 2. e.g.')
print(' python test.py config17/test_all_class.txt')
exit()
config_file = str(sys.argv[1])
assert(os.path.isfile(config_file))
test(config_file)
y=tf.stack([x1,x2],0)
z=tf.Session().run(y)
the output is a tensor(y) that I want to convert it to numpy array using tf.Session().run() but I get this error:
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [5,19,180,160,4]
[[Node: Placeholder = Placeholderdtype=DT_FLOAT, shape=[5,19,180,160,4], _device="/job:localhost/replica:0/task:0/device:GPU:0"]]
Note, this answer is based on a deep look in the crystal ball, predicting the code, which seems to be classified -- at least not written in the question itself.
Have a look at the error message:
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor
This is exactly, what is wrong with your code. Trimming down, your code is essentially just (there are a lot of issues):
import tensorflow as tf
x1 = tf.placeholder(tf.float32, [None, 3])
y = tf.layers.dense(x1, 2)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
print(tf.Session().run(y))
The output tensor y cannot be evaluated without knowing the value of x1, since it depends on this value.
1. Fix use proper naming
import tensorflow as tf
x1 = tf.placeholder(tf.float32, [None, 3], name='my_input')
y = tf.layers.dense(x1, 2, name='fc1')
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
print(tf.Session().run(y))
Now the error-message becomes much clearer
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'my_input' with dtype float and shape [?,3]
2. Fix: provide a feed_dict
To let TensorFlow know, which value the computation of y should be based on, you need to feed it into the graph:
import tensorflow as tf
x1 = tf.placeholder(tf.float32, [None, 3], name='my_input')
y = tf.layers.dense(x1, 2, name='fc1')
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
np_result = tf.Session().run(y, feed_dict={x1: [[42, 43, 44]]})
Now, this reveals the second issue with your code. You have 2 sessions:
sess = tf.InteractiveSession() (session_a)
tf.Session() in tf.Session().run() (session_b)
Now, session_a get all initialized variables, since your code contains
sess.run(tf.global_variables_initializer())
But, during tf.Session().run(...) another session is created, leaving a new error message:
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value ...
3. Fix: use just one session
import tensorflow as tf
x1 = tf.placeholder(tf.float32, [None, 3], name='my_input')
y = tf.layers.dense(x1, 2, name='fc1')
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
np_result = sess.run(y, feed_dict={x1: [[42, 43, 44]]})
And to provide, the best possible solution:
import tensorflow as tf
# construct graph somewhere
x1 = tf.placeholder(tf.float32, [None, 3], name='my_input')
y = tf.layers.dense(x1, 2, name='fc1')
with tf.Session() as sess:
# init variables / or load them
sess.run(tf.global_variables_initializer())
# make sure, that no operations willl be added to the graph
sess.graph.finalize()
# fetch result as numpy array
np_result = sess.run(y, feed_dict={x1: [[42, 43, 44]]})
The code you either wrote yourself or copied from somewhere is the best demonstration of "How to not write in tensorflow."
last remark:
TensorFlow forces you to create a clean structure. This is important. It should become a habit to follow this structure. After a while, you see these parts immediately, which smells like bad code.
If you use an entire network, then just replace tf.layers.dense with my_network_definition and
def my_network_definition(x1):
output = ...
return output
In pytorch, you can write in the arbitrary style like you provided in the question. Not saying, you should do that. But it is possible. So then, try to follow the structure TensorFlow expects from you.
Dear pytorch users, I am looking forward to your feedback.
I am learning tensor flow by modifying some examples I've found. To start off with I have taken an RNN example to try against the "Spam" data set from UCI.
My code and the sample data set can be found in full here:
https://trinket.io/python/c7d6b95452
When I run the code I get a 100% error rate. I figure even if this data set was not well suited for this particular model that I'd get at least something better than that, so I don't think it's my choice of a sample data set.
Below is my Python code. If anyone can suggest how to modify this to get the model to work properly I would appreciate it! I'd also appreciate any general tensor flow advice too.
# Example for my blog post at:
# https://danijar.com/introduction-to-recurrent-networks-in-tensorflow/
import functools
import os
import sets
import random
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
def lazy_property(function):
attribute = '_' + function.__name__
#property
#functools.wraps(function)
def wrapper(self):
if not hasattr(self, attribute):
setattr(self, attribute, function(self))
return getattr(self, attribute)
return wrapper
class SequenceClassification:
def __init__(self, data, target, dropout, num_hidden=200, num_layers=3):
self.data = data
self.target = target
self.dropout = dropout
self._num_hidden = num_hidden
self._num_layers = num_layers
self.prediction
self.error
self.optimize
#lazy_property
def prediction(self):
# Recurrent network.
network = rnn_cell.GRUCell(self._num_hidden)
network = rnn_cell.DropoutWrapper(
network, output_keep_prob=self.dropout)
network = rnn_cell.MultiRNNCell([network] * self._num_layers)
output, _ = tf.nn.dynamic_rnn(network, self.data, dtype=tf.float32)
# Select last output.
output = tf.transpose(output, [1, 0, 2])
last = tf.gather(output, int(output.get_shape()[0]) - 1)
# Softmax layer.
weight, bias = self._weight_and_bias(
self._num_hidden, int(self.target.get_shape()[1]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
return prediction
#lazy_property
def cost(self):
cross_entropy = -tf.reduce_sum(self.target *tf.log(self.prediction))
return cross_entropy
#lazy_property
def optimize(self):
learning_rate = 0.003
optimizer = tf.train.RMSPropOptimizer(learning_rate)
return optimizer.minimize(self.cost)
#lazy_property
def error(self):
mistakes = tf.not_equal(
tf.argmax(self.target, 1), tf.argmax(self.prediction, 1))
return tf.reduce_mean(tf.cast(mistakes, tf.float32))
#staticmethod
def _weight_and_bias(in_size, out_size):
weight = tf.truncated_normal([in_size, out_size], stddev=0.01)
bias = tf.constant(0.1, shape=[out_size])
return tf.Variable(weight), tf.Variable(bias)
def main():
sample_size=10
num_classes=2 #spam or ham
##
# import spam data
##
spam_data=[]
spam_data_train=[]
spam_data_test=[]
data_dir="."
data_file="spam.csv"
with open(os.path.join(data_dir, data_file), "r") as file_handle:
for row in file_handle:
spam_data.append(row)
spam_data=[line.rstrip().split(",") for line in spam_data if len(line) >=1]
random.shuffle(spam_data)
spam_data_train=spam_data[0:int(len(spam_data)*.8)]
spam_data_test=spam_data[int(len(spam_data)*.8):int(len(spam_data))]
def next_train_batch(batch_size):
a=random.sample(spam_data_train, batch_size)
return [np.array([line[:-1] for line in a]), np.array([line[len(line)-1] for line in a])]
def train_batch():
return [np.array([line[:-1] for line in spam_data_train]),np.array([line[len(line)-1] for line in spam_data_train])]
def next_test_batch(batch_size):
a=random.sample(spam_data_test, batch_size)
return [np.array([line[:-1] for line in a]), np.array([line[len(line)-1] for line in a])]
def test_batch():
return [np.array([line[:-1] for line in spam_data_test]),np.array([line[len(line)-1] for line in spam_data_test])]
t=train_batch();
train_input=t[0]
train_target=t[1]
test=test_batch()
test_input=t[0]
test_target=t[1]
training_data = tf.placeholder(tf.float32, [None, sample_size, len(train_input[0])], "training_data")
training_target = tf.placeholder(tf.float32, [None, sample_size], "training_target")
testing_data = tf.placeholder(tf.float32, [None, len(test_input), len(test_input[0])], "testing_data")
testing_target = tf.placeholder(tf.float32, [None, len(test_target)], "testing_target")
dropout = tf.placeholder(tf.float32)
training_model = SequenceClassification(training_data, training_target, dropout)
tf.get_variable_scope().reuse_variables()
testing_model = SequenceClassification(testing_data, testing_target, dropout)
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
for epoch in range(sample_size):
for _ in range(100):
sample=random.sample(range(0,len(train_input)-1),sample_size)
batch_train = [train_input[i] for i in sample]
batch_target = [train_target[i] for i in sample]
sess.run(training_model.optimize, {
training_data: [batch_train], training_target: [batch_target] , dropout: 0.5})
error = sess.run(testing_model.error, {
testing_data: [test_input], testing_target: [test_target], dropout: 1.0})
print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))
if __name__ == '__main__':
main()