How to split own data set to train and validation in Tensorflow CNN - tensorflow

I'm using CNN Tensorflow code in --> https://www.tensorflow.org/tutorials/layers
I'm trying to run my own data instead MNIST dataset. Since I'm new in this area, I have many struggles with coding and errors :(
I made a file.txt which it's contained each image path in my computer and its label.I have 400 images, gray scale, 16x16.
Here it is the code:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
...
from PIL import Image
import PIL.Image
#import imageflow
import os
import cv2
#import glob
import __main__ as _main_module
import matplotlib.pyplot as plt
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
from sklearn.model_selection import train_test_split
...
from tensorflow.contrib import learn
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
#tf.logging.set_verbosity(tf.logging.INFO)
#%%%%%%%%%%%%%%%%%%%%%% MY DATA %%%%%%%%%%%%%%%%%%%%%%%
def main(unused_argv):
path = 'C:/Users/.../ImageDir-Lables-01.txt'
filenames = []
labels = []
#Reading file and extracting paths and labels
with open(path, 'r') as File:
infoFile = File.readlines() #Reading all the lines from File
for line in infoFile: #Reading line-by-line
words = line.split() #Splitting lines in words using space character as separator
filenames.append(words[0])
labels.append(int(words[1]))
NumFiles = len(filenames)
print (NumFiles)
#Converting filenames and labels into tensors
tfilenames = ops.convert_to_tensor(filenames, dtype=dtypes.string)
tlabels = ops.convert_to_tensor(labels, dtype=dtypes.int32)
#Creating a queue which contains the list of files to read and the value of the labels
filename_queue = tf.train.slice_input_producer([tfilenames, tlabels],
num_epochs=10,
shuffle=True,
capacity=NumFiles)
#Reading the image files and decoding them
rawIm= tf.read_file(filename_queue[0])
decodedIm = tf.image.decode_image(rawIm) # png or jpg decoder
#Extracting the labels queue
label_queue = filename_queue[1]
#Initializing Global and Local Variables so we avoid warnings and errors
init_op = tf.group(tf.local_variables_initializer() ,tf.global_variables_initializer())
#Creating an InteractiveSession so we can run in iPython
sess = tf.InteractiveSession()
with sess.as_default():
sess.run(init_op)
# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(NumFiles): #length of your filenames list
nm, image, lb = sess.run([filename_queue[0], decodedIm, label_queue])
print (image.shape)
print (nm)
print (lb)
#Showing the current image
jpgfile = Image.open(nm)
jpgfile.show()
coord.request_stop()
coord.join(threads)
train_data, train_labels, eval_data, eval_labels =
tf.train_split([filename_queue[0], filename_queue[1]], frac=.1)
# train_data, eval_data, train_labels, eval_labels =
train_test_split([filename_queue[0], filename_queue[1]], frac=0.2)
# train_data, train_labels, eval_data, eval_labels =
tf.split(tf.random_shuffle(filename_queue[0], filename_queue[1],
frac=0.25))
return train_data, train_labels, eval_data, eval_labels
print (train_data.shape)
###########################################
# Create the Estimator
Xray_classifier = learn.Estimator(model_fn=cnn_model_fn, model_dir="/tmp/Xray_convnet_model")
###########################################
# Set up logging for predictions
# Log the values in the "Softmax" tensor with label "probabilities"
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=50)
# Train the model
Xray_classifier.fit(
x=train_data,
y=train_labels,
batch_size=10,
steps=20000,
monitors=[logging_hook])
# Configure the accuracy metric for evaluation
metrics = {
"accuracy":
learn.MetricSpec(
metric_fn=tf.metrics.accuracy, prediction_key="classes"),
}
# Evaluate the model and print results
eval_results = Xray_classifier.evaluate(
x=eval_data, y=eval_labels, metrics=metrics)
print(eval_results)
# Our application logic will be added here
if __name__ == "__main__":
tf.app.run()
I used 3 different codes to divide my dataset. I used --> train_data, train_labels, eval_data, eval_labels = tf.train_split(image, lb, frac=.1)
it gives this error --> AttributeError: module 'tensorflow' has no attribute 'train_split'
when I used --> train_data, eval_data, train_labels, eval_labels = train_test_split([filename_queue[0], filename_queue[1]], frac=0.2)
it gives the error--> TypeError: Invalid parameters passed: {'frac': 0.2}
When I used--> train_data, train_labels, eval_data, eval_labels = tf.split(tf.random_shuffle(filename_queue[0], filename_queue[1], frac=0.25))
It gives this error --> TypeError: random_shuffle() got an unexpected keyword argument 'frac'
Does someone have an idea what should I write for splitting?
Any help would be appreciated. Thank you

You can use http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html Scikit Learn's train_test_split function.

Related

Loading and testing a Tensorflow 2 trained model

i already was able to train a custom TF2 model using this tutorial:
https://neptune.ai/blog/how-to-train-your-own-object-detector-using-tensorflow-object-detection-api
Now im getting stuck with testing this model. The script i use for this is also from a turoial and i changed the paths etc but it still doesnt work... I tried and tried and tried for many hours now but at the time i just got demotivated...
I can resolve many errors but the current one not, maybe anyone can help me. Im quite new to object detection..
import numpy as np
import os
import six as urllib # import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
import cv2
cap = cv2.VideoCapture(1)
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
# ## Object detection imports
# Here are the imports from the object detection module.
# In[3]:
from object_detection.utils import label_map_util # from utils import label_map_util
from object_detection.utils import visualization_utils as vis_util # from utils import visualization_utils as vis_util
# # Model preparation
# ## Variables
#
# Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.
#
# By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.
# In[4]:
# What model to download.
MODEL_NAME = 'D:/VSCode/Machine_Learning_Tests/Tensorflow/workspace/exported_models/first_model/saved_model' # MODEL_NAME = 'inference_graph'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/saved_model.pb' # PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = 'D:/VSCode/Machine_Learning_Tests/Tensorflow/workspace/data/label_map.pbtxt' # PATH_TO_LABELS = 'training/labelmap.pbtxt'
NUM_CLASSES = 1
# ## Load a (frozen) Tensorflow model into memory.
# In[6]:
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.compat.v1.GraphDef() # od_graph_def = tf.GraphDef()
with tf.compat.v2.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: # with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
# ## Loading label map
# Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine
# In[7]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# ## Helper code
# In[8]:
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# # Detection
# In[9]:
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'images/test/'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(3, 8) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12,8)
# In[10]:
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True:
image_np = np.array(cv2.imread('Test.jpg'))
cv2.imshow('image',image_np)
cv2.waitKey(1)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
cv2.imshow('object detection', cv2.resize(image_np, (800,600)))
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
Thats the code i use to try testing the model
And this is the current error:
Traceback (most recent call last):
File "d:\VSCode\Machine_Learning_Tests\Tensorflow\test\object_detection_tutorial_wwwPythonProgrammingNet__mitBild.py", line 65, in <module>
od_graph_def.ParseFromString(serialized_graph)
google.protobuf.message.DecodeError: Error parsing message with type 'tensorflow.GraphDef'

Tensorflow object detection API tutorial error

After struggling with compatibility issues between Tensorflow 2.00 and the object detection API, I downgraded to Tensorflow 1.15 to be able to train my own model. after completing the training I modified the jupyter notebook included in the Tensorflow object detection API repo to test on my own images but I keep getting this error:
Traceback (most recent call last):
File "object_detection_tutorial_converted.py", line 254, in <module>
show_inference(detection_model, image_path)
File "object_detection_tutorial_converted.py", line 235, in show_inference
output_dict = run_inference_for_single_image(model, image_np)
File "object_detection_tutorial_converted.py", line 203, in run_inference_for_single_image
num_detections = int(output_dict.pop('num_detections'))
TypeError: int() argument must be a string, a bytes-like object or a number, not 'Tensor'
Here's my modified jupyter notebook
import os
import pathlib
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from IPython.display import display
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
# patch tf1 into `utils.ops`
utils_ops.tf = tf.compat.v1
# Patch the location of gfile
tf.gfile = tf.io.gfile
def load_model(model_name):
model_dir = pathlib.Path(model_name)/"saved_model"
model = model = tf.compat.v2.saved_model.load(str(model_dir), None)
model = model.signatures['serving_default']
return model
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = 'training/label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = pathlib.Path('test_images')
TEST_IMAGE_PATHS = sorted(list(PATH_TO_TEST_IMAGES_DIR.glob("*.jpg")))
TEST_IMAGE_PATHS
model_name = 'devices_graph'
detection_model = load_model(model_name)
print(detection_model.inputs)
detection_model.output_dtypes
detection_model.output_shapes
def run_inference_for_single_image(model, image):
image = np.asarray(image)
# The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
input_tensor = tf.convert_to_tensor(image)
# The model expects a batch of images, so add an axis with `tf.newaxis`.
input_tensor = input_tensor[tf.newaxis,...]
# Run inference
output_dict = model(input_tensor)
# All outputs are batches tensors.
# Convert to numpy arrays, and take index [0] to remove the batch dimension.
# We're only interested in the first num_detections.
num_detections = int(output_dict.pop('num_detections'))
output_dict = {key:value[0, :num_detections].numpy()
for key,value in output_dict.items()}
output_dict['num_detections'] = num_detections
# detection_classes should be ints.
output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
# Handle models with masks:
if 'detection_masks' in output_dict:
# Reframe the the bbox mask to the image size.
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
output_dict['detection_masks'], output_dict['detection_boxes'],
image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
tf.uint8)
output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
return output_dict
# Run it on each test image and show the results:
def show_inference(model, image_path):
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = np.array(Image.open(image_path))
# Actual detection.
output_dict = run_inference_for_single_image(model, image_np)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks_reframed', None),
use_normalized_coordinates=True,
line_thickness=8)
display(Image.fromarray(image_np))
for image_path in TEST_IMAGE_PATHS:
show_inference(detection_model, image_path)
First you need to create an inference of the model using the script in the below link and later load the "frozen_inference_graph.pb" file/model, We need to give full path not just the folder path.
https://github.com/tensorflow/models/blob/master/research/object_detection/export_inference_graph.py
example path
MODEL_PATH = '/home/sumanh/tf_models/Archive/model/ssd_inception_v2_coco_2018_01_28/190719/frozen_inference_graph.pb'
That's strange this worked for tensorflow 2.0.0 for me. Can you send console log

No module named 'object_detection' on Spyder with W10

I use Python 3.6 with Anaconda and use the Spyder editor on my system which is a standard desktop with Windows 10. I set up TensorFlow Object Detection API as instructed in
https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md.
Since the formal installation instructions are in a Linux nature, I also got help from
https://medium.com/#rohitrpatil/how-to-use-tensorflow-object-detection-api-on-windows-102ec8097699.
At the end, I wanted to test the system I already set up by running an already supported test file "object_detection_tutorial.pynb" on Jupyter notebook. It immediately gave the error:
ModuleNotFoundError Traceback (most recent call last)
<ipython-input-10-34f5cdda911a> in <module>
15 # This is needed since the notebook is stored in the object_detection folder.
16 sys.path.append("..")
---> 17 from object_detection.utils import ops as utils_ops
18
19 if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):
ModuleNotFoundError: No module named 'object_detection'
I couldn't find a solution for the error even though many times discussed on Github and here. I decided to go with Spyder, and test the code right in there. It gave error for the line
%matplotlib inline
in the code. After some research, I found that this is a Jupyter-ish command thus I commented it out. Instead I added
matplotlib.use('TkAgg')
plt.show()
Final structure of the official test code I've been testing on Spyder is
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import matplotlib
from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops
if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):
raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')
# This is needed to display the images.
# %matplotlib inline
from utils import label_map_util
from utils import visualization_utils as vis_util
# What model to download.
MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
file_name = os.path.basename(file.name)
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, os.getcwd())
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'test_images'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
def run_inference_for_single_image(image, graph):
with graph.as_default():
with tf.Session() as sess:
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
]:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
tensor_name)
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image.shape[1], image.shape[2])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
# Run inference
output_dict = sess.run(tensor_dict,
feed_dict={image_tensor: image})
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.int64)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict
for image_path in TEST_IMAGE_PATHS:
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
output_dict = run_inference_for_single_image(image_np_expanded, detection_graph)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks'),
use_normalized_coordinates=True,
line_thickness=8)
plt.figure(figsize=IMAGE_SIZE)
plt.imshow(image_np)
matplotlib.use('TkAgg')
plt.show()
You can see the last two lines that are added by me.
When I run this code, it gives no error, however a figure window opens and never shows a figure in it. When I hover mouse cursor on it, it shows up busy all the time.
I've tried many suggestions but I couldn't figure things out. I already created a system environment variable
PYTHON_PATH
and added values of
C:\Users\user\models;
C:\Users\user\models\research;
C:\Users\user\models\research\slim;
C:\Users\user\models\research\object_detection;
C:\Users\user\models\research\object_detection\utils;
C:\Neon-ProgramData\Anaconda3;
C:\Neon-ProgramData\Anaconda3\Scripts;
C:\Neon-ProgramData\Anaconda3\Library\bin;
I also correctly compiled proto files with protoc.exe and confirmed that .py files are sitting there.
In Anaconda, I've created an environment for TensorFlow works and TF also works normally.
I'm completely lost in the problem. I think I did the installation correctly and tried to use all suggestions the internet gave to me. I want to test and use this API and need help about where I got stuck.

Tensorflow: Replacing Placeholders With Real Tensors In A Restored Metagraph

(I'm on TF 1.7 right now, in case that matters.)
I'm trying to initialize and then save a model and associated metagraph in one script (init.py) so that I can load the model and resume training from a second script (train.py). The model is initialized with placeholders for training example and label, to be replaced with real tensors during training. Yet when I try create some real tensors in train.py (from a Dataset), I get a stack trace to the effect that my iterator hasn't been initialized. The trace points to the import_meta_graph() call and happens the same whether I use a oneshot iterator (which shouldn't require initialization) or an initializable iterator which I am actually initializing.
Am I missing something, conceptually, for how two graphs get spliced together?
I want to believe that this is a common use case for saving and restoring metagraphs, but I can't find any examples of it on the internet. How do others feed their real data into a restored model?
Caused by op 'IteratorGetNext_1', defined at:
File "src/tictactoe/train.py", line 47, in <module>
meta_graph, input_map={'example': example, 'label': label})
File "/home/mason/dev/rust/seraphim/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1927, in import_meta_graph
**kwargs)
File "/home/mason/dev/rust/seraphim/lib/python3.5/site-packages/tensorflow/python/framework/meta_graph.py", line 741, in import_scoped_meta_graph
producer_op_list=producer_op_list)
File "/home/mason/dev/rust/seraphim/lib/python3.5/site-packages/tensorflow/python/util/deprecation.py", line 432, in new_func
return func(*args, **kwargs)
File "/home/mason/dev/rust/seraphim/lib/python3.5/site-packages/tensorflow/python/framework/importer.py", line 577, in import_graph_def
op_def=op_def)
File "/home/mason/dev/rust/seraphim/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 3290, in create_op
op_def=op_def)
File "/home/mason/dev/rust/seraphim/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1654, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
FailedPreconditionError (see above for traceback): GetNext() failed because theiterator has not been initialized. Ensure that you have run the initializer operation for this iterator before getting the next element.
[[Node: IteratorGetNext_1 = IteratorGetNext[output_shapes=[[?,19], [?,9]], output_types=[DT_UINT8, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](Iterator_1)]]
Complete code of both scripts here:
# init.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from pathlib import Path
import argparse
import os
import tensorflow as tf
parser = argparse.ArgumentParser(description='Initialize a TicTacToe expert model.')
parser.add_argument('name', metavar='foo-model', help='Model prefix')
args = parser.parse_args()
model_dir = "src/tictactoe/saved_models/" + args.name + "/" + args.name
with tf.Session() as sess:
example = tf.placeholder(tf.uint8, shape=[1, 9 * 2 + 1], name ='example')
label = tf.placeholder(tf.float32, shape=[1, 9], name='label')
dense = tf.layers.dense(tf.cast(example, tf.float32), units=64, activation=tf.nn.relu)
logits = tf.layers.dense(dense, units=9, activation=tf.nn.relu)
softmax = tf.nn.softmax(logits, name='softmax')
tf.add_to_collection('softmax', softmax)
sess = tf.Session()
init = tf.group(
tf.global_variables_initializer(),
tf.local_variables_initializer())
sess.run(init)
loss = tf.losses.mean_squared_error(labels=label, predictions=softmax)
optimizer = tf.train.GradientDescentOptimizer(.01)
train = optimizer.minimize(loss, name='train')
tf.add_to_collection('train', train)
saver = tf.train.Saver()
saved = saver.save(sess, model_dir, global_step=0)
print("Model saved in path: %s" % saved)
Here's the training script.
# train.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from pathlib import Path
import argparse
import glob
import os
import tensorflow as tf
parser = argparse.ArgumentParser(description='Initialize a TicTacToe expert model.')
parser.add_argument('name', metavar='foo-model', help='Model prefix')
args = parser.parse_args()
model_dir = "src/tictactoe/saved_models/" + args.name
saver_prefix = "src/tictactoe/saved_models/" + args.name + "/" + args.name
latest_checkpoint = tf.train.latest_checkpoint(model_dir)
meta_graph = ".".join([latest_checkpoint, "meta"])
num_epochs = 100
minibatch_size = 128
dataset_dir = "src/tictactoe/gamedata"
def make_dataset(minibatch_size, dataset_dir):
files = glob.glob("{}/*.tfrecord".format(dataset_dir))
print(files)
dataset = tf.data.TFRecordDataset(files)
dataset = dataset.map(parse)
dataset = dataset.shuffle(buffer_size=100000)
dataset = dataset.batch(minibatch_size)
return dataset
def parse(bytes):
features = {"game": tf.FixedLenFeature((), tf.string),
"choice": tf.FixedLenSequenceFeature((), tf.float32, allow_missing=True)}
parsed_features = tf.parse_single_example(bytes, features)
game = tf.decode_raw(parsed_features["game"], tf.uint8)
choice = parsed_features["choice"]
return tf.reshape(game, [19]), tf.reshape(choice, [9])
with tf.Session() as sess:
dataset = make_dataset(minibatch_size, dataset_dir)
iterator = dataset.make_initializable_iterator()
sess.run(iterator.initializer)
example, label = iterator.get_next()
saver = tf.train.import_meta_graph(
meta_graph, input_map={'example': example, 'label': label})
print("{}".format(meta_graph))
saver.restore(sess, latest_checkpoint)
print("{}".format(latest_checkpoint))
train_op = tf.get_collection('train_op')[0]
for i in range(num_epochs):
sess.run(iterator.initializer)
while True:
try:
sess.run(train_op)
except tf.errors.OutOfRangeError:
break
print(saver.save(sess, saver_prefix, global_step=step))
I believe I've found the issue. The issue is that my Saver in train.py is saving the real input tensors that I've mapped in. When I try to restore, those real input tensors are restored from the disk, but not initialized.
So: after running input.py one time, the following train.py script successfully trains. But when I run again, the extra input tensors that it's mapped into the graph are restored but not initialized. It's a bit strange because I am mapping them out again when I restore, so I wouldn't think it'd be necessary to initialize them. I found that tf.report_uninitialized_variables() was crucial for debugging the issue.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from pathlib import Path
import argparse
import glob
import os
import tensorflow as tf
parser = argparse.ArgumentParser(description='Initialize a TicTacToe expert model.')
parser.add_argument('name', metavar='foo-model', help='Model prefix')
args = parser.parse_args()
model_dir = "src/tictactoe/saved_models/" + args.name
saver_prefix = "src/tictactoe/saved_models/" + args.name + "/" + args.name
latest_checkpoint = tf.train.latest_checkpoint(model_dir)
meta_graph = ".".join([latest_checkpoint, "meta"])
num_epochs = 100
minibatch_size = 128
dataset_dir = "src/tictactoe/gamedata"
def make_dataset(minibatch_size, dataset_dir):
files = glob.glob("{}/*.tfrecord".format(dataset_dir))
print(files)
dataset = tf.data.TFRecordDataset(files)
dataset = dataset.map(parse)
dataset = dataset.shuffle(buffer_size=100000)
dataset = dataset.batch(minibatch_size)
return dataset
def parse(bytes):
features = {"game": tf.FixedLenFeature((), tf.string),
"choice": tf.FixedLenSequenceFeature((), tf.float32, allow_missing=True)}
parsed_features = tf.parse_single_example(bytes, features)
game = tf.decode_raw(parsed_features["game"], tf.uint8)
choice = parsed_features["choice"]
return tf.reshape(game, [19]), tf.reshape(choice, [9])
with tf.Session() as sess:
dataset = make_dataset(minibatch_size, dataset_dir)
iterator = dataset.make_initializable_iterator()
example, label = iterator.get_next()
# print("before iterator", sess.run(tf.report_uninitialized_variables()))
saver = tf.train.import_meta_graph(meta_graph, input_map={'example': example, 'label': label})
print("{}".format(meta_graph))
saver.restore(sess, latest_checkpoint)
print("{}".format(latest_checkpoint))
train_op = tf.get_collection('train_op')[0]
init = tf.get_collection('init')[0]
for i in range(num_epochs):
sess.run(iterator.initializer)
while True:
try:
sess.run(train_op)
except tf.errors.OutOfRangeError:
break
print(saver.save(sess, saver_prefix))

Tensorflow - How to manipulate Saver

I am working with the Boston housing data tutorial for tensorflow, but am inserting my own data set:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pandas as pd
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)
COLUMNS = ["crim", "zn", "indus", "nox", "rm", "age",
"dis", "tax", "ptratio", "medv"]
FEATURES = ["crim", "zn", "indus", "nox", "rm",
"age", "dis", "tax", "ptratio"]
LABEL = "medv"
def input_fn(data_set):
feature_cols = {k: tf.constant(data_set[k].values) for k in FEATURES}
labels = tf.constant(data_set[LABEL].values)
return feature_cols, labels
def main(unused_argv):
# Load datasets
training_set = pd.read_csv("boston_train.csv", skipinitialspace=True,
skiprows=1, names=COLUMNS)
test_set = pd.read_csv("boston_test.csv", skipinitialspace=True,
skiprows=1, names=COLUMNS)
# Set of 6 examples for which to predict median house values
prediction_set = pd.read_csv("boston_predict.csv", skipinitialspace=True,
skiprows=1, names=COLUMNS)
# Feature cols
feature_cols = [tf.contrib.layers.real_valued_column(k)
for k in FEATURES]
# Build 2 layer fully connected DNN with 10, 10 units respectively.
regressor = tf.contrib.learn.DNNRegressor(
feature_columns=feature_cols, hidden_units=[10, 10])
# Fit
regressor.fit(input_fn=lambda: input_fn(training_set), steps=5000)
# Score accuracy
ev = regressor.evaluate(input_fn=lambda: input_fn(test_set), steps=1)
loss_score = ev["loss"]
print("Loss: {0:f}".format(loss_score))
# Print out predictions
y = regressor.predict(input_fn=lambda: input_fn(prediction_set))
print("Predictions: {}".format(str(y)))
if __name__ == "__main__":
tf.app.run()
The issue I am having is that the dataset is so big that the saving of checkpoint files via tf.train.Saver() is filling up all my disk space.
Is there a way to either disable the saving of checkpoint files, or reduce the amount of checkpoints saved in the script above?
Thanks
The tf.contrib.learn.DNNRegressor initializer takes a tf.contrib.learn.RunConfig object, which can be used to control the behavior of the internally-created saver. For example, you can do the following to keep only one checkpoint:
config = tf.contrib.learn.RunConfig(keep_checkpoint_max=1)
regressor = tf.contrib.learn.DNNRegressor(
feature_columns=feature_cols, hidden_units=[10, 10], config=config)