You must feed a value for placeholder tensor 'input_example_tensor' with dtype string and shape [1] - tensorflow

I am developing a tensorflow serving client/server application by using chatbot-retrieval project.
My code has two parts, namely serving part and client part.
Below is the code snippet for the serving parts.
def get_features(context, utterance):
context_len = 50
utterance_len = 50
features = {
"context": context,
"context_len": tf.constant(context_len, shape=[1,1], dtype=tf.int64),
"utterance": utterance,
"utterance_len": tf.constant(utterance_len, shape=[1,1], dtype=tf.int64),
}
return features
def my_input_fn(estimator, input_example_tensor ):
feature_configs = {
'context':tf.FixedLenFeature(shape=[50], dtype=tf.int64),
'utterance':tf.FixedLenFeature(shape=[50], dtype=tf.int64)
}
tf_example = tf.parse_example(input_example_tensor, feature_configs)
context = tf.identity(tf_example['context'], name='context')
utterance = tf.identity(tf_example['utterance'], name='utterance')
features = get_features(context, utterance)
return features
def my_signature_fn(input_example_tensor, features, predictions):
feature_configs = {
'context':tf.FixedLenFeature(shape=[50], dtype=tf.int64),
'utterance':tf.FixedLenFeature(shape=[50], dtype=tf.int64)
}
tf_example = tf.parse_example(input_example_tensor, feature_configs)
tf_context = tf.identity(tf_example['context'], name='tf_context_utterance')
tf_utterance = tf.identity(tf_example['utterance'], name='tf_utterance')
default_graph_signature = exporter.regression_signature(
input_tensor=input_example_tensor,
output_tensor=tf.identity(predictions)
)
named_graph_signatures = {
'inputs':exporter.generic_signature(
{
'context':tf_context,
'utterance':tf_utterance
}
),
'outputs':exporter.generic_signature(
{
'scores':predictions
}
)
}
return default_graph_signature, named_graph_signatures
def main():
##preliminary codes here##
estimator.fit(input_fn=input_fn_train, steps=100, monitors=[eval_monitor])
estimator.export(
export_dir = FLAGS.export_dir,
input_fn = my_input_fn,
use_deprecated_input_fn = True,
signature_fn = my_signature_fn,
exports_to_keep = 1
)
Below is the code snippet for the client part.
def tokenizer_fn(iterator):
return (x.split(" ") for x in iterator)
vp = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(FLAGS.vocab_processor_file)
input_context = "biz banka kart farkli bir banka atmsinde para"
input_utterance = "farkli banka kart biz banka atmsinde para"
context_feature = np.array(list(vp.transform([input_context])))
utterance_feature = np.array(list(vp.transform([input_utterance])))
context_tensor = tf.contrib.util.make_tensor_proto(context_feature, shape=[1, context_feature.size])
utterance_tensor = tf.contrib.util.make_tensor_proto(context_feature, shape=[1, context_feature.size])
request.inputs['context'].CopyFrom(context_tensor)
request.inputs['utterance'].CopyFrom(utterance_tensor)
result_counter.throttle()
result_future = stub.Predict.future(request, 5.0) # 5 seconds
result_future.add_done_callback(
_create_rpc_callback(label[0], result_counter))
return result_counter.get_error_rate()
Both of the serving and client parts builds with no error. After running the serving application and then the client application I get the following strange error propogated to the client application when the rpc call completes.
Below is the error I get when rpc call completes
AbortionError(code=StatusCode.INVALID_ARGUMENT, details="You must feed a value for placeholder tensor 'input_example_tensor' with dtype string and shape [1]
[[Node: input_example_tensor = Placeholder[_output_shapes=[[1]], dtype=DT_STRING, shape=[1], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]")
The error is strange since there seems to be no way to feed the placeholder from the client application.
How can I provide data for the placeholder 'input_example_tensor' if I am accessing the model through tensorflow serving?
ANSWER:
(I posted my answer here since I couldn't post it as an answer due to lack of StackOverflow badges. Anyone who is volunteer to submit it as his/her answer to the question is more than welcome. I will approve it as the answer.)
I could resolve the problem by using the option use_deprecated_input_fn = False in estimator.export function and change the input signatures accordingly.
Below is the final code which is running with no problem.
def get_features(input_example_tensor, context, utterance):
context_len = 50
utterance_len = 50
features = {
"my_input_example_tensor": input_example_tensor,
"context": context,
"context_len": tf.constant(context_len, shape=[1,1], dtype=tf.int64),
"utterance": utterance,
"utterance_len": tf.constant(utterance_len, shape=[1,1], dtype=tf.int64),
}
return features
def my_input_fn():
input_example_tensor = tf.placeholder(tf.string, name='tf_example_placeholder')
feature_configs = {
'context':tf.FixedLenFeature(shape=[50], dtype=tf.int64),
'utterance':tf.FixedLenFeature(shape=[50], dtype=tf.int64)
}
tf_example = tf.parse_example(input_example_tensor, feature_configs)
context = tf.identity(tf_example['context'], name='context')
utterance = tf.identity(tf_example['utterance'], name='utterance')
features = get_features(input_example_tensor, context, utterance)
return features, None
def my_signature_fn(input_example_tensor, features, predictions):
default_graph_signature = exporter.regression_signature(
input_tensor=input_example_tensor,
output_tensor=predictions
)
named_graph_signatures = {
'inputs':exporter.generic_signature(
{
'context':features['context'],
'utterance':features['utterance']
}
),
'outputs':exporter.generic_signature(
{
'scores':predictions
}
)
}
return default_graph_signature, named_graph_signatures
def main():
##preliminary codes here##
estimator.fit(input_fn=input_fn_train, steps=100, monitors=[eval_monitor])
estimator._targets_info = tf.contrib.learn.estimators.tensor_signature.TensorSignature(tf.constant(0, shape=[1,1]))
estimator.export(
export_dir = FLAGS.export_dir,
input_fn = my_input_fn,
input_feature_key ="my_input_example_tensor",
use_deprecated_input_fn = False,
signature_fn = my_signature_fn,
exports_to_keep = 1
)

OP self-solved but couldn't self-answer, so here's their answer:
Problem was fixed by using the option use_deprecated_input_fn = False in estimator.export function and changing the input signatures accordingly:
def my_signature_fn(input_example_tensor, features, predictions):
default_graph_signature = exporter.regression_signature(
input_tensor=input_example_tensor,
output_tensor=predictions
)
named_graph_signatures = {
'inputs':exporter.generic_signature(
{
'context':features['context'],
'utterance':features['utterance']
}
),
'outputs':exporter.generic_signature(
{
'scores':predictions
}
)
}
return default_graph_signature, named_graph_signatures
def main():
##preliminary codes here##
estimator.fit(input_fn=input_fn_train, steps=100, monitors=[eval_monitor])
estimator._targets_info = tf.contrib.learn.estimators.tensor_signature.TensorSignature(tf.constant(0, shape=[1,1]))
estimator.export(
export_dir = FLAGS.export_dir,
input_fn = my_input_fn,
input_feature_key ="my_input_example_tensor",
use_deprecated_input_fn = False,
signature_fn = my_signature_fn,
exports_to_keep = 1
)

Related

How to concat laserembeddings with huggingface funnel transformers simple CLS output for NLP sequence classification task?

i was approaching NLP sequence classification problem (3 classes) using huggingface transformers (funnel-transformer/large) and tensorflow.
first i created laserembedding like this :
from laserembeddings import Laser
laser = Laser()
df = pd.read_csv("mycsv.csv")
embeds = laser.embed_sentences(df['text'].values, lang='en')
write_pickle_to_file('train.pkl', embeds )
part 1 : Tensorflow version
for data preparation i use code like below :
df['text']=temp['column1']+tokenizer.sep_token+temp['column2']+tokenizer.sep_token+temp['column3']
def encode_text(texts):
enc_di = tokenizer.batch_encode_plus(
texts,
padding='max_length',
truncation=True,
return_token_type_ids=True,
pad_to_max_length=True,
max_length=cfg.max_len
)
return [np.asarray(enc_di['input_ids'], dtype=np.int64),
np.asarray(enc_di['attention_mask'], dtype=np.int64),
np.asarray(enc_di['token_type_ids'], dtype=np.int64)]
then inside training function :
x_train = encode_text(df.text.to_list())
train_ds = (
tf.data.Dataset
.from_tensor_slices((
{
"input_ids": x_train[0],
"input_masks": x_train[1],
"input_segments": x_train[2],
"lasers": np.array( train[laser_columns].values, dtype=np.float32 ) #laser_columns contains all the laser embedded columns
},
tf.one_hot(df["label"].to_list(), 3) #3 class
))
.repeat()
.shuffle(2048)
.batch(cfg.batch_size)
.prefetch(AUTO)
)
i add laser embedding in my model like this :
def create_model():
transformer = transformers.TFAutoModel.from_pretrained(cfg.pretrained,config=config,from_pt=True)
max_len=512
# transformer
input_ids = Input(shape=(max_len,), dtype="int32", name="input_ids")
input_masks = Input(shape=(max_len,), dtype="int32", name="input_masks")
input_segments = Input(shape=(max_len,), dtype="int32", name="input_segments")
sequence_output = transformer(input_ids, attention_mask=input_masks, token_type_ids=input_segments)[0]
cls_token = sequence_output[:, 0, :]
# lasers
lasers = Input(shape=(n_lasers,), dtype=tf.float32, name="lasers") #n_lasers = 1024
lasers_output = tf.keras.layers.Dense(n_lasers, activation='tanh')(lasers)
x = tf.keras.layers.Concatenate()([cls_token, lasers_output])
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(2048, activation='tanh')(x)
x = tf.keras.layers.Dropout(0.1)(x)
out = tf.keras.layers.Dense(3, activation='softmax')(x)
model = Model(inputs=[input_ids, input_masks, input_segments, lasers], outputs=out)
model.compile(Adam(lr=1e-5), loss=losses.CategoricalCrossentropy(), metrics=["acc", metrics.CategoricalCrossentropy(name='xentropy')])
return model
now my question is, how do we do the same with pytorch for exact same problem and same dataset?
part 2 : pytorch version
df = pd.read_csv("mytrain.csv")
class myDataset(Dataset):
def __init__(self,df, max_length, tokenizer, training=True):
self.df = df
self.max_len = max_length
self.tokenizer = tokenizer
self.column1 = self.df['column1'].values
self.column2 = self.df['column2'].values
self.column3= self.df['column3'].values
self.column4= self.df['column4'].values
self.training = training
if self.training:
self.targets = self.df['label'].values
def __len__(self):
return len(self.df)
def __getitem__(self, index):
column1 = self.column1[index]
column2= self.column2[index]
column3= self.column3[index]
text0 = self.column4[index]
text1 = column1 + ' ' + column2+ ' ' + column3
inputs = self.tokenizer.encode_plus(
text1 ,
text0 ,
truncation = True,
add_special_tokens = True,
return_token_type_ids = True,
is_split_into_words=False,
max_length = self.max_len
)
samples = {
'input_ids': inputs['input_ids'],
'attention_mask': inputs['attention_mask'],
}
if 'token_type_ids' in inputs:
samples['token_type_ids'] = inputs['token_type_ids']
if self.training:
samples['target'] = self.targets[index]
return samples
collate_fn = DataCollatorWithPadding(tokenizer=CONFIG['tokenizer'])
class myModel(nn.Module):
def __init__(self, model_name):
super(myModel, self).__init__()
self.model = AutoModel.from_pretrained(model_name)
if(True):
print("using gradient_checkpoint...")
self.model.gradient_checkpointing_enable()
self.config = AutoConfig.from_pretrained(model_name)
self.config.update(
{
"output_hidden_states": True,
"hidden_dropout_prob": 0.0,
"layer_norm_eps": 1e-7,
"add_pooling_layer": False,
"attention_probs_dropout_prob":0.0,
}
)
self.fc = nn.Linear(self.config.hidden_size, 3)
def forward(self, ids, mask):
out = self.model(input_ids=ids,attention_mask=mask,output_hidden_states=False)
out = out[0][:, 0, :]
outputs = self.fc(out)
return outputs
and in train and validation loop i have code like this :
bar = tqdm(enumerate(dataloader), total=len(dataloader))
for step, data in bar:
ids = data['input_ids'].to(device, dtype = torch.long)
mask = data['attention_mask'].to(device, dtype = torch.long)
targets = data['target'].to(device, dtype=torch.long)
batch_size = ids.size(0)
optimizer.zero_grad()
# forward pass with `autocast` context manager
with autocast(enabled=True):
outputs = model(ids, mask)
loss = loss_fct(outputs, targets)
i would like to know where and how in my huggingface pytorch pipeline i can use the laserembedding that i created earlier and used in tensorflow huggingface model?
i would like to concat laserembeddings with funnel transformer's simple CLS token output and train the transformers model with laser embed as extra feature in pytorch implementation exactly like i did in tensorflow example,do you know how to modify my pytorch code to make it working in pytorch? the tensorflow implementation with laserembedding concatenated above that i have posted here works good,i just wanted to do the same in pytorch implementation,,your help is highly appreciated,thanks in advance

How can I preprocess my Mapdataset to fit my model input?

I use a MapDataset compose of label in text and a vector of float in string.
Here is the way I read the content of my tfrecord:
def extract_data(tfrecord_ds):
feature_description = {
'classes_text': tf.io.FixedLenFeature((), tf.string),
'data': tf.io.FixedLenFeature([], tf.string)
}
def _parse_data_function(example_proto):
return tf.compat.v1.parse_single_example(example_proto, feature_description)
parsed_dataset = tfrecord_ds.map(_parse_data_function)
dataset = parsed_dataset.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
return dataset
I want to convert the label_text to int according to label.txt file and the data string to vector of float.
I want to use this data to train a custom model like this:
my_model = tf.keras.Sequential([
tf.keras.layers.Input(shape=(1024), dtype=tf.float32,
name='input_embedding'),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dense(num_classes)
], name='audio_detector')
How can I process my MapDataset from (string,string) to (int, float_array) to be able to train my model?
Edit:
Here is the way I encode my data:
features = {}
features['classes_text'] = tf.train.Feature(
bytes_list=tf.train.BytesList(value=[audio_data_generator.label.encode()]))
bytes = embedding.numpy().tobytes()
features['data'] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[bytes]))
tf_example = tf.train.Example(features=tf.train.Features(feature=features))
writer.write(tf_example.SerializeToString())
It is easier to encode the embedding using tf.train.FloatList.
When writing to tfrecords use:
features = {
'classes_text': tf.train.Feature(bytes_list=tf.train.BytesList(value=[label.encode()])),
'data': tf.train.Feature(float_list=tf.train.FloatList(value=embedding))
}
tf_example = tf.train.Example(features=tf.train.Features(feature=features))
writer.write(tf_example.SerializeToString())
And when reading give the embedding size to tf.io.FixedLenFeature, for example:
embedding_size = 10
feature_description = {
'classes_text': tf.io.FixedLenFeature((), tf.string),
'data': tf.io.FixedLenFeature([embedding_size], tf.float32)
}
To convert label_text to int you can use tf.lookup.StaticVocabularyTable.
# Assuming lable.txt contains a single label per line.
with open('label.txt', 'r') as fin:
categories = [line.strip() for line in fin.readlines()]
init = tf.lookup.KeyValueTensorInitializer(
keys=tf.constant(categories),
values=tf.constant(list(range(len(categories))), dtype=tf.int64))
label_table = tf.lookup.StaticVocabularyTable(
init,
num_oov_buckets=1)
feature_description = {
'classes_text': tf.io.FixedLenFeature((), tf.string),
'data': tf.io.FixedLenFeature([embedding_size], tf.float32)
}
def _parse_data_function(example_proto):
example = tf.compat.v1.parse_single_example(example_proto, feature_description)
# Apply the label lookup.
example['classes_text'] = label_table.lookup(example['classes_text'])
return example
parsed_dataset = tfrecord_ds.map(_parse_data_function)
dataset = parsed_dataset.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
Edit
If you wish to keep the way you save data you can use np.frombuffer to convert the numpy vectors to from binary stings. You will have to wrap this code in a tf.function and tf.py_function though.
def decode_embedding(embedding_bytes):
return np.frombuffer(embedding_bytes.numpy())
#tf.function()
def tf_decode_embedding(embedding_bytes):
return tf.py_function(decode_embedding, inp=[embedding_bytes], Tout=tf.float32)
feature_description = {
'classes_text': tf.io.FixedLenFeature((), tf.string),
'data': tf.io.FixedLenFeature([], tf.string)
}
def _parse_data_function(example_proto):
example = tf.compat.v1.parse_single_example(example_proto, feature_description)
example['classes_text'] = label_table.lookup(example['classes_text'])
example['data'] = tf_decode_embedding(example['data'])
return example
parsed_dataset = tfrecord_ds.map(_parse_data_function)
dataset = parsed_dataset.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)

Tensorflow: converting H5 layer model to TFJS version leads to Unknown layer: TensorFlowOpLayer error when it works in TS

I'm trying to run the converted model from the repository: https://github.com/HasnainRaz/Fast-SRGAN. Well, the conversion was successful. But when I tried to initialize the model, I saw the error: "Unknown layer: TensorFlowOpLayer.". If we will investigate the saved model, we can see TensorFlowOpLayer:
The model structure
As I understood it is this peace of code:
keras.layers.UpSampling2D(size=2, interpolation='bilinear')(layer_input).
I decided to write my own class "TensorFlowOpLayer".
import * as tf from '#tensorflow/tfjs';
export class TensorFlowOpLayer extends tf.layers.Layer {
constructor() {
super({});
}
computeOutputShape(shape: Array<number>) {
return [1, null, null, 32];
}
call(input_3): tf.Tensor {
const result = tf.layers.upSampling2d({ size: [2, 2], dataFormat: 'channelsLast', interpolation: 'bilinear' }).apply(input_3) as tf.Tensor;
return result;
}
static get className() {
return 'TensorFlowOpLayer';
}
}
But it doesn't work. Can someone help me to understand how to write to the method "computeOutputShape"?
And second misunderstanding, why on the picture above we see the next order of layers:
Conv2D -> TensorFlowOpLayer -> PReLU
As I understood the TensorFlowOpLayer layer is "UpSampling2D" in the python code. The H5 model was investigated through the site: https://netron.app
u = keras.layers.UpSampling2D(size=2, interpolation='bilinear')(layer_input)
u = keras.layers.Conv2D(self.gf, kernel_size=3, strides=1, padding='same')(u)
u = keras.layers.PReLU(shared_axes=[1, 2])(u)
The initializing of the model in TS:
async loadModel() {
this.model = await tf.loadLayersModel('/assets/fast_srgan/model.json');
const inputs = tf.layers.input({shape: [null, null, 32]});
const outputs = this.model.apply(inputs) as tf.SymbolicTensor;
this.model = tf.model({inputs: inputs, outputs: outputs});
console.log("Model has been loaded");
}
like in python code:
from tensorflow import keras
# Load the model
model = keras.models.load_model('models/generator.h5')
# Define arbitrary spatial dims, and 3 channels.
inputs = keras.Input((None, None, 3))
# Trace out the graph using the input:
outputs = model(inputs)
# Override the model:
model = keras.models.Model(inputs, outputs)
Then, how is it used:
tf.tidy(() => {
let img = tf.browser.fromPixels(this.imgLr.nativeElement, 3);
img = tf.div(img, 255.0);
img = tf.image.resizeNearestNeighbor(img, [96, 96]);
img = tf.expandDims(img, 0);
let sr = this.model.predict(img) as tf.Tensor;
});
like in python code:
def predict(img):
# Rescale to 0-1.
lr = tf.math.divide(img, 255)
# Get super resolution image
sr = model.predict(tf.expand_dims(lr, axis=0))
return sr[0]
When I added my own class "TensorFlowOpLayer" I see the next error:
"expected input1 to have shape [null,null,null,32] but got array with shape [1,96,96,3]."
Solved the issue. The issue related to the version of the code and the saved model. The author of the code refactored the code and didn't change the saved model. I rewrote the needed class:
import * as tf from '#tensorflow/tfjs';
export class DepthToSpace extends tf.layers.Layer {
constructor() {
super({});
}
computeOutputShape(shape: Array<number>) {
return [null, ...shape.slice(1, 3).map(x => x * 2), 32];
}
call(input): tf.Tensor {
input = input[0];
const result = tf.depthToSpace(input, 2);
return result;
}
static get className() {
return 'TensorFlowOpLayer';
}
}
and it works.
The author's original code is:
u = keras.layers.Conv2D(filters, kernel_size=3, strides=1, padding='same')(layer_input)
u = tf.nn.depth_to_space(u, 2)
u = keras.layers.PReLU(shared_axes=[1, 2])(u)

GoogleNet fails to classify images

I built Keras Google Net from here:
https://www.analyticsvidhya.com/blog/2018/10/understanding-inception-network-from-scratch/
The only difference is that I replaced 1000 classes in output layers with 3. data is prepared this way :
def grey_preprocessor (xarray):
xarray=(xarray/127.5)-1
return xarray
img_resol = (224,224)
train_batches = ImageDataGenerator(horizontal_flip = True, preprocessing_function = grey_preprocessor).flow_from_directory(
directory = train_path, target_size=img_resol, classes = ['bacterial', 'healthy', 'viral'], batch_size = 10)
valid_batches = ImageDataGenerator(horizontal_flip = True, preprocessing_function = grey_preprocessor).flow_from_directory(
directory = valid_path, target_size=img_resol, classes = ['bacterial', 'healthy', 'viral'], batch_size = 10)
test_batches = ImageDataGenerator(horizontal_flip = True, preprocessing_function = grey_preprocessor).flow_from_directory(
directory = test_path, target_size=img_resol, classes = ['bacterial', 'healthy', 'viral'], batch_size = 10, shuffle = False)
assert train_batches.n == 4222
assert valid_batches.n == 300
assert test_batches.n == 150
assert train_batches.num_classes == valid_batches.num_classes == test_batches.num_classes == 3
I train it like this:
history = model.fit(train_batches, validation_data=valid_batches, epochs=epochs, batch_size=256, callbacks=[lr_sc])
However, all the accuracies on every batch are 0.3333, which means it doesn't classify at all. I understand that it can be anything. What is a good way to troubleshoot it?
If you want to normalize your grayscale image use this!
def gray_preprocessor (xarray):
xarray=xarray/255.0
return xarray
or you can also use lambda function:
gray_preprocessor = lambda xarray : xarray / 255.0

A2C is not working due to critic loss is not converging

I'm trying to do my own implementation of the Advantage Actor Critic algorithm by using tensorflow. I used the code in https://github.com/BoYanSTKO/Practical_RL-coursera/blob/master/week5_policy_based/practice_a3c.ipynb as a rough template on how I should write the algorithm.
I tried it on the simple CartPole-v0 gym environment but by implementation fails badly. The critics loss just explodes and becomes way to large while the actors loss is rather low.
I'm not sure what I'm doing wrong here. Any help? :)
I've tried separating the actor and critic from each other by having 2 different networks. This did not help either. Have also tried fine tuning some stuff like gamma and learning rate without any success.
!/usr/bin/python
import tensorflow as tf
import numpy as np
import gym
import random
from tensorboardX import SummaryWriter
class ActorCritic():
def __init__(self,state_dim,n_actions,learning_rate,gamma=0.99):
with tf.variable_scope("ActorCritic"):
self.states_ph = tf.placeholder(tf.float32,(None,state_dim),name="states")
self.action_ph = tf.placeholder(tf.int32,(None,),name="actions")
self.n_actions = n_actions
self.reward_ph = tf.placeholder(tf.float32,(None,),name="rewards")
self.next_state_values = tf.placeholder(tf.float32,(None,),name="rewards")
self.is_done_ph = tf.placeholder(tf.float32,(None,),name="rewards")
net = tf.layers.dense(self.states_ph,24,activation=tf.nn.relu)
self.logits = tf.layers.dense(net,n_actions,activation=None)
self.state_values = tf.layers.dense(net,1,activation=None)
self.action_probs = tf.nn.softmax(self.logits)
self.log_prob = tf.nn.log_softmax(self.logits)
self.entropy = -tf.reduce_sum(self.action_probs*self.log_prob,axis=-1,name="entropy")
self.logp_actions = tf.reduce_sum(self.log_prob*tf.one_hot(self.action_ph,depth=n_actions),axis=-1)
self.target_state_values = self.reward_ph + gamma*(1.0-self.is_done_ph)*self.next_state_values
self.advantage = self.target_state_values - self.state_values
self.actor_loss = -tf.reduce_mean(self.logp_actions * tf.stop_gradient(self.advantage)) - 0.01*tf.reduce_mean(self.entropy)
self.critic_loss = tf.reduce_mean(self.advantage**2.0)
self.train_opt = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.actor_loss+self.critic_loss)
def train(self,states,actions,rewards,is_done,nxt_state_values_batch):
sess = tf.get_default_session()
return sess.run([self.critic_loss,self.actor_loss,self.train_opt],feed_dict={
self.next_state_values:nxt_state_values_batch,
self.states_ph:states,
self.action_ph:actions,
self.reward_ph:rewards,
self.is_done_ph:is_done})
def predict_state_values(self,states):
sess = tf.get_default_session()
return sess.run(self.state_values,feed_dict={self.states_ph:states})
def sample_actions(self,states):
sess = tf.get_default_session()
action_probs = sess.run(self.action_probs,{self.states_ph:states})
return [ np.random.choice(range(self.n_actions),p=action_prob) for action_prob in action_probs ]
class EnvBatch():
def __init__(self,env_name,n_envs):
self.envs = [gym.make(env_name) for env in range(n_envs)]
self.n_actions = self.envs[0].action_space.n
self.state_dim = self.envs[0].observation_space.shape[0]
def reset(self):
return [env.reset().tolist() for env in self.envs ]
def step(self,actions):
states_batch, rewards_batch, is_done_batch = [], [], []
for action, env in zip(actions,self.envs):
s, r , d, _ = env.step(action)
if d:
s = env.reset()
states_batch.append(s)
rewards_batch.append(r)
is_done_batch.append(d)
return np.array(states_batch), np.array(rewards_batch), np.array(is_done_batch)
def evaluate_performance(env_name,agent,nr_runs=10):
env = gym.make(env_name)
rewards = []
for _ in range(nr_runs):
state = env.reset()
is_done = False
acc_reward = 0.0
while not is_done:
action = agent.sample_actions([state])
nxt_state, reward, is_done, _ = env.step(action[0])
state = nxt_state
acc_reward += reward
rewards.append(acc_reward)
return np.mean(rewards)
tf.reset_default_graph()
env = EnvBatch("CartPole-v0",10)
agent = ActorCritic(env.state_dim,env.n_actions,learning_rate=0.001)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
state_batch = env.reset()
writer = SummaryWriter()
for i in range(100000):
actions = agent.sample_actions(state_batch)
nxt_state_batch, rewards_batch, is_done_batch = env.step(actions)
nxt_state_values = agent.predict_state_values(nxt_state_batch).ravel()
critic_loss, actor_loss, _ = agent.train(state_batch,actions,rewards_batch,is_done_batch,nxt_state_values)
writer.add_scalar("actor_loss",actor_loss,i)
writer.add_scalar("critic_loss",critic_loss,i)
if i%50==0:
test_reward = evaluate_performance("CartPole-v0",agent)
writer.add_scalar("test_reward",test_reward,i)
if test_reward > 195:
print "Done!"
states_batch = nxt_state_batch
sess.close()
writer.close()