Is there a way to get tensorflow tf.Print output to appear in Jupyter Notebook output - tensorflow

I'm using the tf.Print op in a Jupyter notebook. It works as required, but will only print the output to the console, without printing in the notebook. Is there any way to get around this?
An example would be the following (in a notebook):
import tensorflow as tf
a = tf.constant(1.0)
a = tf.Print(a, [a], 'hi')
sess = tf.Session()
a.eval(session=sess)
That code will print 'hi[1]' in the console, but nothing in the notebook.

Update Feb 3, 2017
I've wrapped this into memory_util package. Example usage
# install memory util
import urllib.request
response = urllib.request.urlopen("https://raw.githubusercontent.com/yaroslavvb/memory_util/master/memory_util.py")
open("memory_util.py", "wb").write(response.read())
import memory_util
sess = tf.Session()
a = tf.random_uniform((1000,))
b = tf.random_uniform((1000,))
c = a + b
with memory_util.capture_stderr() as stderr:
sess.run(c.op)
print(stderr.getvalue())
** Old stuff**
You could reuse FD redirector from IPython core. (idea from Mark Sandler)
import os
import sys
STDOUT = 1
STDERR = 2
class FDRedirector(object):
""" Class to redirect output (stdout or stderr) at the OS level using
file descriptors.
"""
def __init__(self, fd=STDOUT):
""" fd is the file descriptor of the outpout you want to capture.
It can be STDOUT or STERR.
"""
self.fd = fd
self.started = False
self.piper = None
self.pipew = None
def start(self):
""" Setup the redirection.
"""
if not self.started:
self.oldhandle = os.dup(self.fd)
self.piper, self.pipew = os.pipe()
os.dup2(self.pipew, self.fd)
os.close(self.pipew)
self.started = True
def flush(self):
""" Flush the captured output, similar to the flush method of any
stream.
"""
if self.fd == STDOUT:
sys.stdout.flush()
elif self.fd == STDERR:
sys.stderr.flush()
def stop(self):
""" Unset the redirection and return the captured output.
"""
if self.started:
self.flush()
os.dup2(self.oldhandle, self.fd)
os.close(self.oldhandle)
f = os.fdopen(self.piper, 'r')
output = f.read()
f.close()
self.started = False
return output
else:
return ''
def getvalue(self):
""" Return the output captured since the last getvalue, or the
start of the redirection.
"""
output = self.stop()
self.start()
return output
import tensorflow as tf
x = tf.constant([1,2,3])
a=tf.Print(x, [x])
redirect=FDRedirector(STDERR)
sess = tf.InteractiveSession()
redirect.start();
a.eval();
print "Result"
print redirect.stop()

I ran into the same problem and got around it by using a function like this in my notebooks:
def tf_print(tensor, transform=None):
# Insert a custom python operation into the graph that does nothing but print a tensors value
def print_tensor(x):
# x is typically a numpy array here so you could do anything you want with it,
# but adding a transformation of some kind usually makes the output more digestible
print(x if transform is None else transform(x))
return x
log_op = tf.py_func(print_tensor, [tensor], [tensor.dtype])[0]
with tf.control_dependencies([log_op]):
res = tf.identity(tensor)
# Return the given tensor
return res
# Now define a tensor and use the tf_print function much like the tf.identity function
tensor = tf_print(tf.random_normal([100, 100]), transform=lambda x: [np.min(x), np.max(x)])
# This will print the transformed version of the tensors actual value
# (which was summarized to just the min and max for brevity)
sess = tf.InteractiveSession()
sess.run([tensor])
sess.close()
FYI, using a logger instead of calling "print" in my custom function worked wonders for me as the stdout is often buffered by jupyter and not shown before "Loss is Nan" kind of errors -- which was the whole point in using that function in the first place in my case.

You can check the terminal where you launched the jupyter notebook to see the message.
import tensorflow as tf
tf.InteractiveSession()
a = tf.constant(1)
b = tf.constant(2)
opt = a + b
opt = tf.Print(opt, [opt], message="1 + 2 = ")
opt.eval()
In the terminal, I can see:
2018-01-02 23:38:07.691808: I tensorflow/core/kernels/logging_ops.cc:79] 1 + 2 = [3]

A simple way, tried it in regular python, but not jupyter yet.
os.dup2(sys.stdout.fileno(), 1)
os.dup2(sys.stdout.fileno(), 2)
Explanation is here: In python, how to capture the stdout from a c++ shared library to a variable

The issue that I faced was that one can't run a session inside a Tensorflow Graph, like in the training or in the evaluation.
That's why the options to use sess.run(opt) or opt.eval() were not a solution for me.
The best thing was to use tf.Print() and redirect the logging to an external file.
I did this using a temporal file, which I transferred to a regular file like this:
STDERR=2
import os
import sys
import tempfile
class captured:
def __init__(self, fd=STDERR):
self.fd = fd
self.prevfd = None
def __enter__(self):
t = tempfile.NamedTemporaryFile()
self.prevfd = os.dup(self.fd)
os.dup2(t.fileno(), self.fd)
return t
def __exit__(self, exc_type, exc_value, traceback):
os.dup2(self.prevfd, self.fd)
with captured(fd=STDERR) as tmp:
...
classifier.evaluate(input_fn=input_fn, steps=100)
with open('log.txt', 'w') as f:
print(open(tmp.name).read(), file=f)
And then in my evaluation I do:
a = tf.constant(1)
a = tf.Print(a, [a], message="a: ")

Related

Using BatchedPyEnvironment in tf_agents

I am trying to create a batched environment version of an SAC agent example from the Tensorflow Agents library, the original code can be found here. I am also using a custom environment.
I am pursuing a batched environment setup in order to better leverage GPU resources in order to speed up training. My understanding is that by passing batches of trajectories to the GPU, there will be less overhead incurred when passing data from the host (CPU) to the device (GPU).
My custom environment is called SacEnv, and I attempt to create a batched environment like so:
py_envs = [SacEnv() for _ in range(0, batch_size)]
batched_env = batched_py_environment.BatchedPyEnvironment(envs=py_envs)
tf_env = tf_py_environment.TFPyEnvironment(batched_env)
My hope is that this will create a batched environment consisting of a 'batch' of non-batched environments. However I am receiving the following error when running the code:
ValueError: Cannot assign value to variable ' Accumulator:0': Shape mismatch.The variable shape (1,), and the assigned value shape (32,) are incompatible.
with the stack trace:
Traceback (most recent call last):
File "/home/gary/Desktop/code/sac_test/sac_main2.py", line 370, in <module>
app.run(main)
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/absl/app.py", line 312, in run
_run_main(main, args)
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/absl/app.py", line 258, in _run_main
sys.exit(main(argv))
File "/home/gary/Desktop/code/sac_test/sac_main2.py", line 366, in main
train_eval(FLAGS.root_dir)
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/gin/config.py", line 1605, in gin_wrapper
utils.augment_exception_message_and_reraise(e, err_str)
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/gin/utils.py", line 41, in augment_exception_message_and_reraise
raise proxy.with_traceback(exception.__traceback__) from None
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/gin/config.py", line 1582, in gin_wrapper
return fn(*new_args, **new_kwargs)
File "/home/gary/Desktop/code/sac_test/sac_main2.py", line 274, in train_eval
results = metric_utils.eager_compute(
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/gin/config.py", line 1605, in gin_wrapper
utils.augment_exception_message_and_reraise(e, err_str)
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/gin/utils.py", line 41, in augment_exception_message_and_reraise
raise proxy.with_traceback(exception.__traceback__) from None
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/gin/config.py", line 1582, in gin_wrapper
return fn(*new_args, **new_kwargs)
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/tf_agents/eval/metric_utils.py", line 163, in eager_compute
common.function(driver.run)(time_step, policy_state)
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/tf_agents/drivers/dynamic_episode_driver.py", line 211, in run
return self._run_fn(
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/tf_agents/utils/common.py", line 188, in with_check_resource_vars
return fn(*fn_args, **fn_kwargs)
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/tf_agents/drivers/dynamic_episode_driver.py", line 238, in _run
tf.while_loop(
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/tf_agents/drivers/dynamic_episode_driver.py", line 154, in loop_body
observer_ops = [observer(traj) for observer in self._observers]
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/tf_agents/drivers/dynamic_episode_driver.py", line 154, in <listcomp>
observer_ops = [observer(traj) for observer in self._observers]
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/tf_agents/metrics/tf_metric.py", line 93, in __call__
return self._update_state(*args, **kwargs)
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/tf_agents/metrics/tf_metric.py", line 81, in _update_state
return self.call(*arg, **kwargs)
ValueError: in user code:
File "/home/gary/anaconda3/envs/py39/lib/python3.9/site-packages/tf_agents/metrics/tf_metrics.py", line 176, in call *
self._return_accumulator.assign(
ValueError: Cannot assign value to variable ' Accumulator:0': Shape mismatch.The variable shape (1,), and the assigned value shape (32,) are incompatible.
In call to configurable 'eager_compute' (<function eager_compute at 0x7fa4d6e5e040>)
In call to configurable 'train_eval' (<function train_eval at 0x7fa4c8622dc0>)
I have dug through the tf_metric.py code to try and understand the error, however I have been unsuccessful. A related issue was solved when I added the batch size (32) to the initializer for the AverageReturnMetric instance, and this issue seems related.
The full code is:
# coding=utf-8
# Copyright 2020 The TF-Agents Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python2, python3
r"""Train and Eval SAC.
All hyperparameters come from the SAC paper
https://arxiv.org/pdf/1812.05905.pdf
To run:
```bash
tensorboard --logdir $HOME/tmp/sac/gym/HalfCheetah-v2/ --port 2223 &
python tf_agents/agents/sac/examples/v2/train_eval.py \
--root_dir=$HOME/tmp/sac/gym/HalfCheetah-v2/ \
--alsologtostderr
\```
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from sac_env import SacEnv
import os
import time
from absl import app
from absl import flags
from absl import logging
import gin
from six.moves import range
import tensorflow as tf # pylint: disable=g-explicit-tensorflow-version-import
from tf_agents.agents.ddpg import critic_network
from tf_agents.agents.sac import sac_agent
from tf_agents.agents.sac import tanh_normal_projection_network
from tf_agents.drivers import dynamic_step_driver
#from tf_agents.environments import suite_mujoco
from tf_agents.environments import tf_py_environment
from tf_agents.environments import batched_py_environment
from tf_agents.eval import metric_utils
from tf_agents.metrics import tf_metrics
from tf_agents.networks import actor_distribution_network
from tf_agents.policies import greedy_policy
from tf_agents.policies import random_tf_policy
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.utils import common
from tf_agents.train.utils import strategy_utils
flags.DEFINE_string('root_dir', os.getenv('TEST_UNDECLARED_OUTPUTS_DIR'),
'Root directory for writing logs/summaries/checkpoints.')
flags.DEFINE_multi_string('gin_file', None, 'Path to the trainer config files.')
flags.DEFINE_multi_string('gin_param', None, 'Gin binding to pass through.')
FLAGS = flags.FLAGS
gpus = tf.config.list_physical_devices('GPU')
if gpus:
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
except RuntimeError as e:
print(e)
#gin.configurable
def train_eval(
root_dir,
env_name='SacEnv',
# The SAC paper reported:
# Hopper and Cartpole results up to 1000000 iters,
# Humanoid results up to 10000000 iters,
# Other mujoco tasks up to 3000000 iters.
num_iterations=3000000,
actor_fc_layers=(256, 256),
critic_obs_fc_layers=None,
critic_action_fc_layers=None,
critic_joint_fc_layers=(256, 256),
# Params for collect
# Follow https://github.com/haarnoja/sac/blob/master/examples/variants.py
# HalfCheetah and Ant take 10000 initial collection steps.
# Other mujoco tasks take 1000.
# Different choices roughly keep the initial episodes about the same.
#initial_collect_steps=10000,
initial_collect_steps=2000,
collect_steps_per_iteration=1,
replay_buffer_capacity=31250, # 1000000 / 32
# Params for target update
target_update_tau=0.005,
target_update_period=1,
# Params for train
train_steps_per_iteration=1,
#batch_size=256,
batch_size=32,
actor_learning_rate=3e-4,
critic_learning_rate=3e-4,
alpha_learning_rate=3e-4,
td_errors_loss_fn=tf.math.squared_difference,
gamma=0.99,
reward_scale_factor=0.1,
gradient_clipping=None,
use_tf_functions=True,
# Params for eval
num_eval_episodes=30,
eval_interval=10000,
# Params for summaries and logging
train_checkpoint_interval=50000,
policy_checkpoint_interval=50000,
rb_checkpoint_interval=50000,
log_interval=1000,
summary_interval=1000,
summaries_flush_secs=10,
debug_summaries=False,
summarize_grads_and_vars=False,
eval_metrics_callback=None):
"""A simple train and eval for SAC."""
root_dir = os.path.expanduser(root_dir)
train_dir = os.path.join(root_dir, 'train')
eval_dir = os.path.join(root_dir, 'eval')
train_summary_writer = tf.compat.v2.summary.create_file_writer(
train_dir, flush_millis=summaries_flush_secs * 1000)
train_summary_writer.set_as_default()
eval_summary_writer = tf.compat.v2.summary.create_file_writer(
eval_dir, flush_millis=summaries_flush_secs * 1000)
eval_metrics = [
tf_metrics.AverageReturnMetric(buffer_size=num_eval_episodes),
tf_metrics.AverageEpisodeLengthMetric(buffer_size=num_eval_episodes)
]
global_step = tf.compat.v1.train.get_or_create_global_step()
with tf.compat.v2.summary.record_if(
lambda: tf.math.equal(global_step % summary_interval, 0)):
py_envs = [SacEnv() for _ in range(0, batch_size)]
batched_env = batched_py_environment.BatchedPyEnvironment(envs=py_envs)
tf_env = tf_py_environment.TFPyEnvironment(batched_env)
eval_py_envs = [SacEnv() for _ in range(0, batch_size)]
eval_batched_env = batched_py_environment.BatchedPyEnvironment(envs=eval_py_envs)
eval_tf_env = tf_py_environment.TFPyEnvironment(eval_batched_env)
time_step_spec = tf_env.time_step_spec()
observation_spec = time_step_spec.observation
action_spec = tf_env.action_spec()
strategy = strategy_utils.get_strategy(tpu=False, use_gpu=True)
with strategy.scope():
actor_net = actor_distribution_network.ActorDistributionNetwork(
observation_spec,
action_spec,
fc_layer_params=actor_fc_layers,
continuous_projection_net=tanh_normal_projection_network
.TanhNormalProjectionNetwork)
critic_net = critic_network.CriticNetwork(
(observation_spec, action_spec),
observation_fc_layer_params=critic_obs_fc_layers,
action_fc_layer_params=critic_action_fc_layers,
joint_fc_layer_params=critic_joint_fc_layers,
kernel_initializer='glorot_uniform',
last_kernel_initializer='glorot_uniform')
tf_agent = sac_agent.SacAgent(
time_step_spec,
action_spec,
actor_network=actor_net,
critic_network=critic_net,
actor_optimizer=tf.compat.v1.train.AdamOptimizer(
learning_rate=actor_learning_rate),
critic_optimizer=tf.compat.v1.train.AdamOptimizer(
learning_rate=critic_learning_rate),
alpha_optimizer=tf.compat.v1.train.AdamOptimizer(
learning_rate=alpha_learning_rate),
target_update_tau=target_update_tau,
target_update_period=target_update_period,
td_errors_loss_fn=td_errors_loss_fn,
gamma=gamma,
reward_scale_factor=reward_scale_factor,
gradient_clipping=gradient_clipping,
debug_summaries=debug_summaries,
summarize_grads_and_vars=summarize_grads_and_vars,
train_step_counter=global_step)
tf_agent.initialize()
# Make the replay buffer.
replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
data_spec=tf_agent.collect_data_spec,
batch_size=batch_size,
max_length=replay_buffer_capacity,
device="/device:GPU:0")
replay_observer = [replay_buffer.add_batch]
train_metrics = [
tf_metrics.NumberOfEpisodes(),
tf_metrics.EnvironmentSteps(),
tf_metrics.AverageReturnMetric(
buffer_size=num_eval_episodes, batch_size=tf_env.batch_size),
tf_metrics.AverageEpisodeLengthMetric(
buffer_size=num_eval_episodes, batch_size=tf_env.batch_size),
]
eval_policy = greedy_policy.GreedyPolicy(tf_agent.policy)
initial_collect_policy = random_tf_policy.RandomTFPolicy(
tf_env.time_step_spec(), tf_env.action_spec())
collect_policy = tf_agent.collect_policy
train_checkpointer = common.Checkpointer(
ckpt_dir=train_dir,
agent=tf_agent,
global_step=global_step,
metrics=metric_utils.MetricsGroup(train_metrics, 'train_metrics'))
policy_checkpointer = common.Checkpointer(
ckpt_dir=os.path.join(train_dir, 'policy'),
policy=eval_policy,
global_step=global_step)
rb_checkpointer = common.Checkpointer(
ckpt_dir=os.path.join(train_dir, 'replay_buffer'),
max_to_keep=1,
replay_buffer=replay_buffer)
train_checkpointer.initialize_or_restore()
rb_checkpointer.initialize_or_restore()
initial_collect_driver = dynamic_step_driver.DynamicStepDriver(
tf_env,
initial_collect_policy,
observers=replay_observer + train_metrics,
num_steps=initial_collect_steps)
collect_driver = dynamic_step_driver.DynamicStepDriver(
tf_env,
collect_policy,
observers=replay_observer + train_metrics,
num_steps=collect_steps_per_iteration)
if use_tf_functions:
initial_collect_driver.run = common.function(initial_collect_driver.run)
collect_driver.run = common.function(collect_driver.run)
tf_agent.train = common.function(tf_agent.train)
if replay_buffer.num_frames() == 0:
# Collect initial replay data.
logging.info(
'Initializing replay buffer by collecting experience for %d steps '
'with a random policy.', initial_collect_steps)
initial_collect_driver.run()
results = metric_utils.eager_compute(
eval_metrics,
eval_tf_env,
eval_policy,
num_episodes=num_eval_episodes,
train_step=global_step,
summary_writer=eval_summary_writer,
summary_prefix='Metrics',
)
if eval_metrics_callback is not None:
eval_metrics_callback(results, global_step.numpy())
metric_utils.log_metrics(eval_metrics)
time_step = None
policy_state = collect_policy.get_initial_state(tf_env.batch_size)
timed_at_step = global_step.numpy()
time_acc = 0
# Prepare replay buffer as dataset with invalid transitions filtered.
def _filter_invalid_transition(trajectories, unused_arg1):
return ~trajectories.is_boundary()[0]
dataset = replay_buffer.as_dataset(
sample_batch_size=batch_size,
num_steps=2).unbatch().filter(
_filter_invalid_transition).batch(batch_size).prefetch(5)
# Dataset generates trajectories with shape [Bx2x...]
iterator = iter(dataset)
def train_step():
experience, _ = next(iterator)
return tf_agent.train(experience)
if use_tf_functions:
train_step = common.function(train_step)
global_step_val = global_step.numpy()
while global_step_val < num_iterations:
start_time = time.time()
time_step, policy_state = collect_driver.run(
time_step=time_step,
policy_state=policy_state,
)
for _ in range(train_steps_per_iteration):
train_loss = train_step()
time_acc += time.time() - start_time
global_step_val = global_step.numpy()
if global_step_val % log_interval == 0:
logging.info('step = %d, loss = %f', global_step_val,
train_loss.loss)
steps_per_sec = (global_step_val - timed_at_step) / time_acc
logging.info('%.3f steps/sec', steps_per_sec)
tf.compat.v2.summary.scalar(
name='global_steps_per_sec', data=steps_per_sec, step=global_step)
timed_at_step = global_step_val
time_acc = 0
for train_metric in train_metrics:
train_metric.tf_summaries(
train_step=global_step, step_metrics=train_metrics[:2])
if global_step_val % eval_interval == 0:
results = metric_utils.eager_compute(
eval_metrics,
eval_tf_env,
eval_policy,
num_episodes=num_eval_episodes,
train_step=global_step,
summary_writer=eval_summary_writer,
summary_prefix='Metrics',
)
if eval_metrics_callback is not None:
eval_metrics_callback(results, global_step_val)
metric_utils.log_metrics(eval_metrics)
if global_step_val % train_checkpoint_interval == 0:
train_checkpointer.save(global_step=global_step_val)
if global_step_val % policy_checkpoint_interval == 0:
policy_checkpointer.save(global_step=global_step_val)
if global_step_val % rb_checkpoint_interval == 0:
rb_checkpointer.save(global_step=global_step_val)
return train_loss
def main(_):
tf.compat.v1.enable_v2_behavior()
logging.set_verbosity(logging.INFO)
gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_param)
train_eval(FLAGS.root_dir)
if __name__ == '__main__':
flags.mark_flag_as_required('root_dir')
app.run(main)
What is the appropriate way to create a batched environment for a custom, non-batched environment? I can share my custom environment, but I don't believe the issue lies there as the code works fine when using batch sizes of 1.
Also, any tips on increasing GPU utilization in reinforcement learning scenarios would be greatly appreciated. I have examined examples of using tensorboard-profiler to profile GPU utilization, but it seems these require callbacks and a fit function, which doesn't seem to be applicable in RL use-cases.
It turns out I neglected to pass batch_size when initializing the AverageReturnMetric and AverageEpisodeLengthMetric instances.

Two models of the same architecture with same weights giving different results

Problem
After copying weights from a pretrained model, I do not get the same output.
Description
tf2cv repository provides pretrained models in TF2 for various backbones. Unfortunately the codebase is of limited use to me because they use subclassing via tf.keras.Model which makes it very hard to extract intermediate outputs and gradients at will. I therefore embarked upon rewriting the codes for the backbones using the functional API. After rewriting the resnet architecture codes, I copied their weights into my model and saved them in SavedModel format. In order to test if it is correctly done, I gave an input to my model instance and theirs and the results were different.
My approaches to debugging the problem
I checked the number of trainable and non-trainable parameters and they are the same between my model instance and theirs.
I checked if all trainable weights have been copied which they have.
My present line of thinking
I think it might be possible that weights have not been copied to the correct layers. For example :- Layer X and Layer Y might have weights of the same shape but during weight copying, weights of layer Y might have gone into Layer X and vice versa. This is only possible if I have not mapped the layer names between the two models properly.
However I have exhaustively checked and have not found any error so far.
The Code
My code is attached below. Their (tfcv) code for resnet can be found here
Please note that resnet_orig in the following snippet is the same as here
My converted code can be found here
from vision.image import resnet as myresnet
from glob import glob
from loguru import logger
import tensorflow as tf
import resnet_orig
import re
import os
import numpy as np
from time import time
from copy import deepcopy
tf.random.set_seed(time())
models = [
'resnet10',
'resnet12',
'resnet14',
'resnetbc14b',
'resnet16',
'resnet18_wd4',
'resnet18_wd2',
'resnet18_w3d4',
'resnet18',
'resnet26',
'resnetbc26b',
'resnet34',
'resnetbc38b',
'resnet50',
'resnet50b',
'resnet101',
'resnet101b',
'resnet152',
'resnet152b',
'resnet200',
'resnet200b',
]
def zipdir(path, ziph):
# ziph is zipfile handle
for root, dirs, files in os.walk(path):
for file in files:
ziph.write(os.path.join(root, file),
os.path.relpath(os.path.join(root, file),
os.path.join(path, '..')))
def find_model_file(model_type):
model_files = glob('*.h5')
for m in model_files:
if '{}-'.format(model_type) in m:
return m
return None
def remap_our_model_variables(our_variables, model_name):
remapped = list()
reg = re.compile(r'(stage\d+)')
for var in our_variables:
newvar = var.replace(model_name, 'features/features')
stage_search = re.search(reg, newvar)
if stage_search is not None:
stage_search = stage_search[0]
newvar = newvar.replace(stage_search, '{}/{}'.format(stage_search,
stage_search))
newvar = newvar.replace('conv_preact', 'conv/conv')
newvar = newvar.replace('conv_bn','bn')
newvar = newvar.replace('logits','output1')
remapped.append(newvar)
remap_dict = dict([(x,y) for x,y in zip(our_variables, remapped)])
return remap_dict
def get_correct_variable(variable_name, trainable_variable_names):
for i, var in enumerate(trainable_variable_names):
if variable_name == var:
return i
logger.info('Uffff.....')
return None
layer_regexp_compiled = re.compile(r'(.*)\/.*')
model_files = glob('*.h5')
a = np.ones(shape=(1,224,224,3), dtype=np.float32)
inp = tf.constant(a, dtype=tf.float32)
for model_type in models:
logger.info('Model is {}.'.format(model_type))
model = eval('myresnet.{}(input_height=224,input_width=224,'
'num_classes=1000,data_format="channels_last")'.format(
model_type))
model2 = eval('resnet_orig.{}(data_format="channels_last")'.format(
model_type))
model2.build(input_shape=(None,224, 224,3))
model_name=find_model_file(model_type)
logger.info('Model file is {}.'.format(model_name))
original_weights = deepcopy(model2.weights)
if model_name is not None:
e = model2.load_weights(model_name, by_name=True, skip_mismatch=False)
print(e)
loaded_weights = deepcopy(model2.weights)
else:
logger.info('Pretrained model is not available for {}.'.format(
model_type))
continue
diff = [np.mean(x.numpy()-y.numpy()) for x,y in zip(original_weights,
loaded_weights)]
our_model_weights = model.weights
their_model_weights = model2.weights
assert (len(our_model_weights) == len(their_model_weights))
our_variable_names = [x.name for x in model.weights]
their_variable_names = [x.name for x in model2.weights]
remap_dict = remap_our_model_variables(our_variable_names, model_type)
new_weights = list()
for i in range(len(our_model_weights)):
our_name = model.weights[i].name
remapped_name = remap_dict[our_name]
source_index = get_correct_variable(remapped_name, their_variable_names)
new_weights.append(
model2.weights[source_index].value())
logger.debug('Copying from {} ({}) to {} ({}).'.format(
model2.weights[
source_index].name,
model2.weights[source_index].value().shape,
model.weights[
i].name,
model.weights[i].value().shape))
logger.info(len(new_weights))
logger.info('Setting new weights')
model.set_weights(new_weights)
logger.info('Finished setting new weights.')
their_output = model2(inp)
our_output = model(inp)
logger.info(np.max(their_output.numpy() - our_output.numpy()))
logger.info(diff) # This must be 0.0
break

TensorFlow-Keras generator: Turn off auto-sharding or switch auto_shard_policiy to DATA

While training my model I ran into the issue described in the post Tensorflow - Keras: Consider either turning off auto-sharding or switching the auto_shard_policy to DATA to shard this dataset. My question now is: Does the solution mentioned by #Graham501617 work with generators as well? Here is some dummy code for what I use so far:
class BatchGenerator(Sequence):
def __init__(self, some_args):
...
def __len__(self):
num_batches_in_sequence = ...
def __getitem__(self, _):
data, labels = get_one_batch(self.some_args)
return data, labels
In the main script I do something like:
train_generator = BatchGenerator(some_args)
valid_generator = BatchGenerator(some_args)
cross_device_ops = tf.distribute.HierarchicalCopyAllReduce(num_packs=2)
strategy = tf.distribute.MirroredStrategy(cross_device_ops=cross_device_ops)
with strategy.scope():
model = some_model
model.compile(some_args)
history = model.fit(
x=train_generator,
validation_data=valid_generator,
...
)
I would probably have to modify the __getitem__ function somehow, do I?
I appreciate your support!
You'd have to wrap your generator into a single function...
Example below assumes your data is stored as numpy array (.npy), each file already has the correct amount of mini-batch size, is labeled 0_x.npy, 1_x.npy, 2_x.npy, etc.. and both data and label arrays are float64.
from pathlib import Path
import tensorflow as tf
import numpy as np
# Your new generator as a function rather than an object you need to instantiate
def getNextBatch(stop, data_dir):
i = 0
data_dir = data_dir.decode('ascii')
while True:
while i < stop:
x = np.load(str(Path(data_dir + "/" + str(i) + "_x.npy")))
y = np.load(str(Path(data_dir + "/" + str(i) + "_y.npy")))
yield x, y
i += 1
i = 0
# Make a dataset given the directory and strategy
def makeDataset(generator_func, dir, strategy=None):
# Get amount of files
data_size = int(len([name for name in os.listdir(dir) if os.path.isfile(os.path.join(dir, name))])/2)
ds = tf.data.Dataset.from_generator(generator_func, args=[data_size, dir], output_types=(tf.float64, tf.float64)) # Make a dataset from the generator. MAKE SURE TO SPECIFY THE DATA TYPE!!!
options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
ds = ds.with_options(options)
# Optional: Make it a distributed dataset if you're using a strategy
if strategy is not None:
ds = strategy.experimental_distribute_dataset(ds)
return ds
training_ds = makeDataset(getNextBatch, str(Path(data_dir + "/training")), None)
validation_ds = makeDataset(getNextBatch, str(Path(data_dir + "/validation")), None)
model.fit(training_ds,
epochs=epochs,
callbacks=callbacks,
validation_data=validation_ds)
You might need to pass the amount of steps per epoch in your fit() call, in which case you can use the generator you've already made.

Extract Tensorboard Histogram data

Following the tutorial, I got a histogram by Tensorflow,
import tensorflow as tf
k = tf.placeholder(tf.float32)
# Make a normal distribution, with a shifting mean
mean_moving_normal = tf.random_normal(shape=[1000], mean=(5*k), stddev=1)
# Record that distribution into a histogram summary
tf.summary.histogram("normal/moving_mean", mean_moving_normal)
# Setup a session and summary writer
sess = tf.Session()
writer = tf.summary.FileWriter("/tmp/histogram_example")
summaries = tf.summary.merge_all()
# Setup a loop and write the summaries to disk
N = 400
for step in range(N):
k_val = step/float(N)
summ = sess.run(summaries, feed_dict={k: k_val})
writer.add_summary(summ, global_step=step)
Next step, I want to extract the histogram data using API of Tensorboard, my code is here
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
event_acc = EventAccumulator(summary_path)
event_acc.Reload()
# Show all tags in the log file
tags = event_acc.Tags()
hist_dict = {}
for hist_event in event_acc.Histograms('normal/moving_mean'):
hist_dict.update({hist_event.step: (hist_event.histogram_value.bucket_limit,
hist_event.histogram_value.bucket)})
However, it only returned the last output. How can get all the data?
When passing a "size_guidance" to the EventAccumulator constructor, you are good to go. Something like:
event_acc = EventAccumulator(path, size_guidance={
'histograms': REAL_STEP_COUNT,
})

How can I update tensor (weight value) trying to use two separate network?

I've been trying to make AI for blackjack using RL. Now I'm trying to make two separate networks which is one way of DQN. I've searched the web and found some way and tried to use it but failed.
This error has occurred:
TypeError: Using a tf.Tensor as a Python bool is not allowed. Use if t is not None: instead of if t: to test if a tensor is defined, and use TensorFlow ops such as tf.cond to execute subgraphs conditioned on the value of a tensor.
Code:
import gym
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
def one_hot(x):
s=np.identity(600)
b = s[x[0] * 20 + x[1] * 2 + x[2]]
return b.reshape(1, 600)
def boolstr_to_floatstr(v):
if v == True:
return 1
elif v == False:
return 0
env=gym.make('Blackjack-v0')
learning_rate=0.5
state_number=600
action_number=2
#######################################3
X=tf.placeholder(tf.float32, shape=[1,state_number], name='input_data')
W1=tf.Variable(tf.random_uniform([state_number,128],0,0.01))#network for update
layer1=tf.nn.tanh(tf.matmul(X,W1))
W2=tf.Variable(tf.random_uniform([128,256],0,0.01))
layer2=tf.nn.tanh(tf.matmul(layer1,W2))
W3=tf.Variable(tf.random_uniform([256,action_number],0,0.01))
Qpred=tf.matmul(layer2,W3) # Qprediction
#####################################################################3
X1=tf.placeholder(shape=[1,state_number],dtype=tf.float32)
W4=tf.Variable(tf.random_uniform([state_number,128],0,0.01))#network for target
layer3=tf.nn.tanh(tf.matmul(X1,W4))
W5=tf.Variable(tf.random_uniform([128,256],0,0.01))
layer4=tf.nn.tanh(tf.matmul(layer3,W5))
W6=tf.Variable(tf.random_uniform([256,action_number],0,0.01))
target=tf.matmul(layer4,W6) # target
#################################################################
update1=W4.assign(W1)
update2=W5.assign(W2)
update3=W6.assign(W3)
Y=tf.placeholder(shape=[1,action_number],dtype=tf.float32)
loss=tf.reduce_sum(tf.square(Y-Qpred))#cost(W)=(Ws-y)^2
train=tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss)
num_episodes=1000
dis=0.99 #discount factor
rList=[] #record the reward
init=tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for i in range(num_episodes): #episode 만번
s = env.reset()
rALL = 0
done = False
e=1./((i/100)+1) #exploit or explore용 상수
total_loss=[]
while not done:
s = np.asarray(s)
s[2] = boolstr_to_floatstr(s[2])
#print(np.shape(one_hot(s)))
#print(one_hot(s))
Qs=sess.run(Qpred,feed_dict={X:one_hot(s).astype(np.float32)})
if np.random.rand(1)<e: #새로운 도전시도
a=env.action_space.sample()
else:
a=np.argmax(Qs) #그냥 내가아는한 최댓값의 액션 선택
s1,reward,done,_=env.step(a) #
s1=np.asarray(s1)
s1[2]=boolstr_to_floatstr(s1[2])
if done:
Qs[0,a]=reward
else:
Qs1=sess.run(target,feed_dict={X1:one_hot(s1)})
Qs[0,a]=reward+dis*np.max(Qs1) #optimal Q
sess.run(train,feed_dict={X:one_hot(s),Y:Qs})
if i%10==0: ##target 을 Qpredion으로 업데이트해줌
sess.run(update1,update2,update3)
if reward==1:
rALL += reward
else:
rALL+=0
s=s1
rList.append(rALL)
print('success rate: '+ str(sum(rList)/num_episodes))
print("Final Q-table values")
I need to print success rate finally. before DQN its 38%ish. If there is something wrong in my code considering its DQN algorithm, tell me please.
If you want to share the weights between different networks, then simply create layer with same name, using the scope with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): and then weights between networks will be shared automatically.