Tensorflow model.fit() using a Dataset generator

10,172

While the origin of the errors is still nebulous, I have found a solution that makes the code work. I'll post it here in case it is useful to anyone in a similar situation.

Basically, I changed the my_input_fn() into a generator and used model.fit_generator() as follows:

import tensorflow as tf
import numpy as np
import random


def my_generator(total_items):
    i = 0
    while i < total_items:
        x = np.random.rand(4, 20)
        y = random.randint(0, 11)
        label = tf.one_hot(y, depth=12)
        yield x.reshape(4, 20, 1), label
        i += 1

def my_input_fn(total_items, epochs):
    dataset = tf.data.Dataset.from_generator(lambda: my_generator(total_items),
                                             output_types=(tf.float64, tf.int64))

    dataset = dataset.repeat(epochs)
    dataset = dataset.batch(32)


    iterator = dataset.make_one_shot_iterator()
    while True:
        batch_features, batch_labels = iterator.get_next()
        yield batch_features, batch_labels

if __name__ == "__main__":
    tf.enable_eager_execution()

    model = tf.keras.Sequential([tf.keras.layers.Flatten(input_shape=(4, 20, 1)),
                                 tf.keras.layers.Dense(64, activation=tf.nn.relu),
                                 tf.keras.layers.Dense(12, activation=tf.nn.softmax)])

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    total_items = 200
    batch_size = 32
    epochs = 10
    num_batches = int(total_items/batch_size)
    train_data_generator = my_input_fn(total_items, epochs)
    model.fit_generator(generator=train_data_generator, steps_per_epoch=num_batches, epochs=epochs, verbose=1)

EDIT

As implied by giser_yugang in a comment, it is also possible to do it with my_input_fn() as a function returning the dataset instead of the individual batches.

def my_input_fn(total_items, epochs):
    dataset = tf.data.Dataset.from_generator(lambda: my_generator(total_items),
                                             output_types=(tf.float64, tf.int64))

    dataset = dataset.repeat(epochs)
    dataset = dataset.batch(32)
    return dataset

if __name__ == "__main__":
    tf.enable_eager_execution()

    model = tf.keras.Sequential([tf.keras.layers.Flatten(input_shape=(4, 20, 1)),
                                 tf.keras.layers.Dense(64, activation=tf.nn.relu),
                                 tf.keras.layers.Dense(12, activation=tf.nn.softmax)])

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    total_items = 100
    batch_size = 32
    epochs = 10
    num_batches = int(total_items/batch_size)
    dataset = my_input_fn(total_items, epochs)
    model.fit_generator(dataset, epochs=epochs, steps_per_epoch=num_batches)

There does not appear to be any average performance difference between the approaches.

Share:
10,172
berkelem
Author by

berkelem

Updated on June 14, 2022

Comments

  • berkelem
    berkelem almost 2 years

    I am using the Dataset API to generate training data and sort it into batches for a NN.

    Here is a minimum working example of my code:

    import tensorflow as tf
    import numpy as np
    import random
    
    
    def my_generator():
        while True:
            x = np.random.rand(4, 20)
            y = random.randint(0, 11)
            label = tf.one_hot(y, depth=12)
            yield x.reshape(4, 20, 1), label
    
    def my_input_fn():
        dataset = tf.data.Dataset.from_generator(lambda: my_generator(),
                                                 output_types=(tf.float64, tf.int32))
    
        dataset = dataset.batch(32)
        iterator = dataset.make_one_shot_iterator()
        batch_features, batch_labels = iterator.get_next()
    
        return batch_features, batch_labels
    
    
    if __name__ == "__main__":
        tf.enable_eager_execution()
    
        model = tf.keras.Sequential([tf.keras.layers.Flatten(input_shape=(4, 20, 1)),
                                     tf.keras.layers.Dense(128, activation=tf.nn.relu),
                                     tf.keras.layers.Dense(12, activation=tf.nn.softmax)])
    
        model.compile(optimizer='adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
    
        data_generator = my_input_fn()
        model.fit(data_generator)
    

    The code fails using TensorFlow 1.13.1 at the model.fit() call with the following error:

    Traceback (most recent call last):
      File "scripts/min_working_example.py", line 37, in <module>
        model.fit(data_generator)
      File "~/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 880, in fit
        validation_steps=validation_steps)
      File "~/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_arrays.py", line 310, in model_iteration
        ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]]
      File "~/.local/lib/python3.6/site-packages/tensorflow/python/keras/utils/generic_utils.py", line 526, in slice_arrays
        return [None if x is None else x[start] for x in arrays]
      File "~/.local/lib/python3.6/site-packages/tensorflow/python/keras/utils/generic_utils.py", line 526, in <listcomp>
        return [None if x is None else x[start] for x in arrays]
      File "~/.local/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 654, in _slice_helper
        name=name)
      File "~/.local/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 820, in strided_slice
        shrink_axis_mask=shrink_axis_mask)
      File "~/.local/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 9334, in strided_slice
        _six.raise_from(_core._status_to_exception(e.code, message), None)
      File "<string>", line 3, in raise_from
    tensorflow.python.framework.errors_impl.InvalidArgumentError: Attr shrink_axis_mask has value 4294967295 out of range for an int32 [Op:StridedSlice] name: strided_slice/
    

    I tried running the same code on a different machine using TensorFlow 2.0 (after removing the line tf.enable_eager_execution() because it runs eagerly by default) and I got the following error:

    Traceback (most recent call last):
      File "scripts/min_working_example.py", line 37, in <module>
        model.fit(data_generator)
      File "~/.local/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 873, in fit
        steps_name='steps_per_epoch')
      File "~/.local/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_arrays.py", line 352, in model_iteration
        batch_outs = f(ins_batch)
      File "~/.local/lib/python3.7/site-packages/tensorflow/python/keras/backend.py", line 3217, in __call__
        outputs = self._graph_fn(*converted_inputs)
      File "~/.local/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 558, in __call__
        return self._call_flat(args)
      File "~/.local/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 627, in _call_flat
        outputs = self._inference_function.call(ctx, args)
      File "~/.local/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 397, in call
        (len(args), len(list(self.signature.input_arg))))
    ValueError: Arguments and signature arguments do not match: 21 23 
    

    I tried changing model.fit() to model.fit_generator() but this fails on both TensorFlow versions too. On TF 1.13.1 I get the following error:

    Traceback (most recent call last):
      File "scripts/min_working_example.py", line 37, in <module>
        model.fit_generator(data_generator)
      File "~/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 1426, in fit_generator
        initial_epoch=initial_epoch)
      File "~/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_generator.py", line 115, in model_iteration
        shuffle=shuffle)
      File "~/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_generator.py", line 377, in convert_to_generator_like
        num_samples = int(nest.flatten(data)[0].shape[0])
    TypeError: __int__ returned non-int (type NoneType)
    

    and on TF 2.0 I get the following error:

    Traceback (most recent call last):
      File "scripts/min_working_example.py", line 37, in <module>
        model.fit_generator(data_generator)
      File "~/.local/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 1515, in fit_generator
        steps_name='steps_per_epoch')
      File "~/.local/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_generator.py", line 140, in model_iteration
        shuffle=shuffle)
      File "~/.local/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_generator.py", line 477, in convert_to_generator_like
        raise ValueError('You must specify `batch_size`')
    ValueError: You must specify `batch_size`
    

    yet batch_size is not a recognized keyword for fit_generator().

    I am puzzled by these error messages and I would appreciate if anyone can shed some light on them, or point out what I am doing wrong.