Linear Regression in Tensorflow Probability#
toc: true
badges: true
comments: true
author: Nipun Batra
categories: [ML]
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns
import tensorflow_probability as tfp
import pandas as pd
tfd = tfp.distributions
tfl = tfp.layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import Callback
sns.reset_defaults()
sns.set_context(context='talk',font_scale=1)
%matplotlib inline
%config InlineBackend.figure_format='retina'
np.random.seed(42)
x = np.linspace(-0.5, 1, 100)
y = 5*x + 4 + 2*np.multiply(x, np.random.randn(100))
plt.scatter(x, y, s=20, alpha=0.6)
sns.despine()
plt.xlabel("x")
plt.ylabel("y")
Text(0, 0.5, 'y')
data:image/s3,"s3://crabby-images/803b4/803b4808d59bcd254b69f6720694fcf6f52e854b" alt="../../_images/741af96c46b852b47998bd1b9777e93678797df26d8b5d5b8af811634ae35d2e.png"
Model 1: Vanilla Linear Regression#
model = Sequential([
Dense(input_shape=(1,), units=1, name='D1')])
2022-02-01 09:37:25.292936: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
D1 (Dense) (None, 1) 2
=================================================================
Total params: 2
Trainable params: 2
Non-trainable params: 0
_________________________________________________________________
model.compile(loss='mse', optimizer='adam')
model.fit(x, y, epochs=4000, verbose=0)
<keras.callbacks.History at 0x1a36573a0>
model.get_layer('D1').weights
[<tf.Variable 'D1/kernel:0' shape=(1, 1) dtype=float32, numpy=array([[4.929521]], dtype=float32)>,
<tf.Variable 'D1/bias:0' shape=(1,) dtype=float32, numpy=array([3.997371], dtype=float32)>]
plt.scatter(x, y, s=20, alpha=0.6)
sns.despine()
plt.xlabel("x")
plt.ylabel("y")
pred_m1 = model.predict(x)
plt.plot(x, pred_m1)
[<matplotlib.lines.Line2D at 0x1a38dcfd0>]
data:image/s3,"s3://crabby-images/a5360/a5360e0b1be318e9783d3f1b6ebef769543e9f69" alt="../../_images/02d4e7eff79e36d05e506d7287cb497982dd7de41534cd3bba0681e0bc417b74.png"
Model 2#
model_2 = Sequential([
Dense(input_shape=(1,), units=1, name='M2_D1'),
tfl.DistributionLambda(lambda loc: tfd.Normal(loc=loc, scale=1.), name='M2_Likelihood')])
2022-02-01 09:37:33.529583: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
model_2.summary()
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
M2_D1 (Dense) (None, 1) 2
M2_Likelihood (Distribution ((None, 1), 0
Lambda) (None, 1))
=================================================================
Total params: 2
Trainable params: 2
Non-trainable params: 0
_________________________________________________________________
m2_untrained_weight = model_2.get_layer('M2_D1').weights
plt.scatter(x, y, s=20, alpha=0.6)
sns.despine()
plt.xlabel("x")
plt.ylabel("y")
m_2 = model_2(x)
plt.plot(x, m_2.sample(40).numpy()[:, :, 0].T, color='k', alpha=0.05);
plt.plot(x, m_2.mean().numpy().flatten(), color='k')
[<matplotlib.lines.Line2D at 0x1a3ae4040>]
data:image/s3,"s3://crabby-images/bb288/bb288602e72c9f91f0bd4286467ca1d47798e3a9" alt="../../_images/4804aa5d73324ccdb26b7fc854b1721d5a6dfa0a5757545bcd5ddd70578b0590.png"
def plot(model):
plt.scatter(x, y, s=20, alpha=0.6)
sns.despine()
plt.xlabel("x")
plt.ylabel("y")
m = model(x)
m_s = m.stddev().numpy().flatten()
m_m = m.mean().numpy().flatten()
plt.plot(x, m_m , color='k')
plt.fill_between(x, m_m-m_s, m_m+m_s, color='k', alpha=0.4)
plot(model_2)
data:image/s3,"s3://crabby-images/941a4/941a41e0294be8fcaff033838574bcd1a2d35a06" alt="../../_images/08dc24063d2bf9cebfc9e471448445958a684e6222714b1b197e2d7175907d54.png"
def nll(y_true, y_pred):
# y_pred is distribution
return -y_pred.log_prob(y_true)
model_2.compile(loss=nll, optimizer='adam')
model_2.fit(x, y, epochs=4000, verbose=0)
<keras.callbacks.History at 0x1a3b96880>
plot(model_2)
data:image/s3,"s3://crabby-images/d64b5/d64b5f518ddd47dc251b9e00034125e9f015dbbd" alt="../../_images/330b07d7188f75d278fecaa1f93582659f451bdd11364623772a9f1b288937e8.png"
Model 3#
model_3 = Sequential([
Dense(input_shape=(1,), units=2, name='M3_D1'),
tfl.DistributionLambda(lambda t: tfd.Normal(loc=t[..., 0], scale=tf.exp(t[..., 1])), name='M3_Likelihood')])
model_3.get_layer('M3_D1').weights
[<tf.Variable 'M3_D1/kernel:0' shape=(1, 2) dtype=float32, numpy=array([[0.04476571, 0.55212975]], dtype=float32)>,
<tf.Variable 'M3_D1/bias:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>]
model_3.compile(loss=nll, optimizer='adam')
model_3.fit(x, y, epochs=4000, verbose=0)
<keras.callbacks.History at 0x1a3d20190>
plot(model_3)
data:image/s3,"s3://crabby-images/8b2f1/8b2f172d61ea10611e46a00bc86d08b16dcee921" alt="../../_images/3da2f84438dc5136005ddcac749effd4bf0d95cb24d6a773e629c58571093cd2.png"
Good reference https://tensorchiefs.github.io/bbs/files/21052019-bbs-Beate-uncertainty.pdf
At this point, we see that scale or sigma is a linear function of input x.
#### Model 4
model_4 = Sequential([
Dense(input_shape=(1,), units=2, name='M4_D1', activation='relu'),
Dense(units=2, name='M4_D2', activation='relu'),
Dense(units=2, name='M4_D3'),
tfl.DistributionLambda(lambda t: tfd.Normal(loc=t[..., 0], scale=tf.math.softplus(t[..., 1])), name='M4_Likelihood')])
model_4.compile(loss=nll, optimizer='adam')
model_4.fit(x, y, epochs=4000, verbose=0)
<keras.callbacks.History at 0x1a3cd0a60>
plot(model_4)
data:image/s3,"s3://crabby-images/6d868/6d86804b3c6bfda7af4de3dce005a71cdff453b3" alt="../../_images/3c712170d19b812976ac5a126e816141b1f4b0797ae084084ae312cc733d1e05.png"
Model 5#
Model 6#
Dense Variational
def prior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
# Independent Normal Distribution
return lambda t: tfd.Independent(tfd.Normal(loc=tf.zeros(n, dtype=dtype),
scale=1),
reinterpreted_batch_ndims=1)
def posterior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
return Sequential([
tfl.VariableLayer(tfl.IndependentNormal.params_size(n), dtype=dtype),
tfl.IndependentNormal(n)
])
N = len(x)
model_6 = Sequential([
# Requires posterior and prior distribution
# Add kl_weight for weight regularization
#tfl.DenseVariational(16, posterior, prior, kl_weight=1/N, activation='relu', input_shape=(1, )),
tfl.DenseVariational(2, posterior, prior, kl_weight=1/N, input_shape=(1,)),
tfl.IndependentNormal(1)
])
model_6.compile(loss=nll, optimizer='adam')
model_6.fit(x, y, epochs=5000, verbose=0)
<keras.callbacks.History at 0x1a61a9ee0>
plot(model_6)
data:image/s3,"s3://crabby-images/e66d0/e66d0e85b6b9cbcd0ef8ecc868fb9639a29fab62" alt="../../_images/56243e223588d9dd6314e512254be135014cefa120ffafa8b0f661e3c87c255c.png"
N = len(x)
model_7 = Sequential([
# Requires posterior and prior distribution
# Add kl_weight for weight regularization
tfl.DenseVariational(16, posterior, prior, kl_weight=1/N, activation='relu', input_shape=(1, )),
tfl.DenseVariational(2, posterior, prior, kl_weight=1/N),
tfl.IndependentNormal(1)
])
model_7.compile(loss=nll, optimizer='adam')
model_7.fit(x, y, epochs=5000, verbose=0)
<keras.callbacks.History at 0x1a669da90>
plot(model_7)
data:image/s3,"s3://crabby-images/a0fe3/a0fe347f3dfbddeba0a1f6b438447fcf046ceee7" alt="../../_images/6e594f19c445322d8af98b3187e5a70a32ab9a546f88e46db61fc7f047aca274.png"
https://livebook.manning.com/book/probabilistic-deep-learning-with-python/chapter-8/123