Why does adding random numbers not break this custom loss function?

Can someone explain why adding random numbers to the loss does not affect the predictions of this Keras model? Every time I run it I get a very similar AUC for both models but I would expect the AUC from the second model to be close to 0.5. I use Colab.

Any suggestions why this might be happening?


import numpy as np
import pandas as pd
import tensorflow as tf
import keras as keras
from keras import layers
import random
from keras import backend as K
from sklearn import metrics
from sklearn.metrics import roc_auc_score

opt = tf.keras.optimizers.Adam(learning_rate=1e-04)

#resetting seeds to ensure reproducibility

def reset_random_seeds():
   tf.random.set_seed(1)
   np.random.seed(1)
   random.seed(1)

def get_auc(y_test,y_pred):

  fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred)
  auc = metrics.auc(fpr, tpr)
  return auc

#standard loss function with binary cross-entropy

def binary_crossentropy1(y_true, y_pred): 

  bin_cross = tf.keras.losses.BinaryCrossentropy(from_logits=False)
  bce1 = K.mean(bin_cross(y_true, y_pred))
  return bce1

#same loss function but with added random numbers

def binary_crossentropy2(y_true, y_pred): 

  bin_cross = tf.keras.losses.BinaryCrossentropy(from_logits=False)

  bce2 = K.mean(bin_cross(y_true, y_pred))
  penalty = tf.random.normal([], mean=50.0, stddev=100.0)
  bce2 = tf.math.add(bce2, penalty)
  return bce2

#model without randomness

reset_random_seeds()

input1 = keras.Input(shape=(9,))
x = layers.Dense(12, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(input1)
x = layers.Dense(8, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
output = layers.Dense(1, activation="sigmoid", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
model1 = keras.Model(inputs=input1, outputs=output)


model1.compile(optimizer=opt, loss=binary_crossentropy1, metrics=['accuracy'])

model1.fit(x=X_train, y=y_train, epochs=10, batch_size = 32)

model1_pred = model1.predict(X_test)

#model with randomness

reset_random_seeds()

input1 = keras.Input(shape=(9,))
x = layers.Dense(12, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(input1)
x = layers.Dense(8, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
output = layers.Dense(1, activation="sigmoid", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
model2 = keras.Model(inputs=input1, outputs=output)


model2.compile(optimizer=opt, loss=binary_crossentropy2, metrics=['accuracy'])

model2.fit(x=X_train, y=y_train, epochs=10, batch_size = 32)

model2_pred = model2.predict(X_test)


print(get_auc(y_test, model1_pred))
print(get_auc(y_test, model2_pred))

Result

0.7228943446346893
0.7231896873302319

What the penalty looks like

penalty =  112.050842
penalty =  139.664017
penalty =  152.505341
penalty =  -37.1483
penalty =  -74.08284
penalty =  155.872528
penalty =  42.7903175

The training is guided by the gradient of the loss with respect to the input.

The random value that you add to the loss in the second model is independent form the input, so it will not contribute to the gradient of the loss during training. When you are running the prediction you are taking the the model output (before the loss function), so that's not affected as well.