element-wise multiplication with broadcasting in keras custom layer

11,967

Solution 1

Before multiplying, you need to repeat the elements to increase the shape. You can use K.repeat_elements for that. (import keras.backend as K)

class MyLayer(Layer):

    #there are some difficulties for different types of shapes   
    #let's use a 'repeat_count' instead, increasing only one dimension
    def __init__(self, repeat_count,**kwargs):
        self.repeat_count = repeat_count
        super(MyLayer, self).__init__(**kwargs)

    def build(self, input_shape):

        #first, let's get the output_shape
        output_shape = self.compute_output_shape(input_shape)
        weight_shape = (1,) + output_shape[1:] #replace the batch size by 1


        self.kernel = self.add_weight(name='kernel',
                                      shape=weight_shape,
                                      initializer='ones',
                                      trainable=True)


        super(MyLayer, self).build(input_shape)  # Be sure to call this somewhere!

    #here, we need to repeat the elements before multiplying
    def call(self, x):

        if self.repeat_count > 1:

             #we add the extra dimension:
             x = K.expand_dims(x, axis=1)

             #we replicate the elements
             x = K.repeat_elements(x, rep=self.repeat_count, axis=1)


        #multiply
        return x * self.kernel


    #make sure we comput the ouptut shape according to what we did in "call"
    def compute_output_shape(self, input_shape):

        if self.repeat_count > 1:
            return (input_shape[0],self.repeat_count) + input_shape[1:]
        else:
            return input_shape

Solution 2

here is another solution that is based on the answer by Daniel Möller, but uses tf.multiply like the original code.

class MyLayer(Layer):

    def __init__(self, output_dim, **kwargs):
        self.output_dim = output_dim

        super(MyLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        output_shape = self.compute_output_shape(input_shape)
        self.kernel = self.add_weight(name='kernel',
                                      shape=(1,) + output_shape[1:],
                                      initializer='ones',
                                      trainable=True)


        super(MyLayer, self).build(input_shape)  # Be sure to call this somewhere!

    def call(self, x):

        return K.tf.multiply(x, self.kernel)

    def compute_output_shape(self, input_shape):
        return (input_shape[0],self.output_dim)+input_shape[1:]
Share:
11,967
buckithed
Author by

buckithed

Updated on June 05, 2022

Comments

  • buckithed
    buckithed almost 2 years

    I am creating a custom layer with weights that need to be multiplied by element-wise before activation. I can get it to work when the output and input is the same shape. The problem occurs when I have a first order array as input with a second order array as output. tensorflow.multiply supports broadcasting, but when I try to use it in Layer.call(x, self.kernel) to multiply x by the self.kernel Variable it complains that they are different shapes saying:

    ValueError: Dimensions must be equal, but are 4 and 3 for 'my_layer_1/Mul' (op: 'Mul') with input shapes: [?,4], [4,3].
    

    here is my code:

    from keras import backend as K
    from keras.engine.topology import Layer
    import tensorflow as tf
    from keras.models import Sequential
    import numpy as np
    
    class MyLayer(Layer):
    
        def __init__(self, output_dims, **kwargs):
            self.output_dims = output_dims
    
            super(MyLayer, self).__init__(**kwargs)
    
        def build(self, input_shape):
            # Create a trainable weight variable for this layer.
            self.kernel = self.add_weight(name='kernel',
                                          shape=self.output_dims,
                                          initializer='ones',
                                          trainable=True)
    
    
            super(MyLayer, self).build(input_shape)  # Be sure to call this somewhere!
    
        def call(self, x):
            #multiply wont work here?
            return K.tf.multiply(x, self.kernel)
    
        def compute_output_shape(self, input_shape):
            return (self.output_dims)
    
    mInput = np.array([[1,2,3,4]])
    inShape = (4,)
    net = Sequential()
    outShape = (4,3)
    l1 = MyLayer(outShape, input_shape= inShape)
    net.add(l1)
    net.compile(loss='mean_absolute_error', optimizer='adam', metrics=['accuracy'])
    p = net.predict(x=mInput, batch_size=1)
    print(p)
    

    Edit: Given input shape (4,) and output shape (4,3) the weight matrix should be the same shape as the output and initialized with ones. So in the above code the input is [1,2,3,4], the weight matrix should be [[1,1,1,1],[1,1,1,1],[1,1,1,1]] and the output should look like [[1,2,3,4],[1,2,3,4],[1,2,3,4]]