Bayesian logistic regression#
Author: Nipun Batra
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import seaborn as sns
import pymc3 as pm
from sklearn.datasets import make_blobs
import arviz as az
import theano.tensor as tt
rc('font', size=16)
rc('text', usetex=True)
Let us generate a pseudo-random dataset for logistic regression.
np.random.seed(0)
X, y = make_blobs(n_samples=200, n_features=2,cluster_std=0.5, centers=2)
plt.scatter(X[:, 0], X[:, 1],c=y);
plt.xlabel('$x_1$');
plt.ylabel('$x_2$');
data:image/s3,"s3://crabby-images/a0f2c/a0f2c1b3133aa494150bef776ebf832c0497191b" alt="../../_images/46859058daabcd05ee7ed99e193fd0f2a0b9e6308854c81662dbc60ba08ec63d.png"
Adding extra column of ones to incorporate the bias.
X_concat = np.hstack((np.ones((len(y), 1)), X))
X_concat.shape
(200, 3)
We define the bayesian logistic regression model as the following. Notice that we need to use Bernoulli likelihood as our output is binary.
basic_model = pm.Model()
with basic_model:
# Priors for unknown model parameters
theta = pm.Normal("theta", mu=0, sigma=100, shape=3)
#theta = pm.Uniform("theta", upper=50, lower=-50, shape=3)
X_ = pm.Data('features', X_concat)
# Expected value of outcome
y_hat = pm.math.sigmoid(tt.dot(X_, theta))
# Likelihood (sampling distribution) of observations
Y_obs = pm.Bernoulli("Y_obs", p=y_hat, observed=y)
pm.model_to_graphviz(basic_model.model)
Let us get MAP for the parameter posterior.
map_estimate = pm.find_MAP(model=basic_model)
map_estimate
{'theta': array([20.12421332, 3.69334159, -9.60226941])}
Let us visualize the optimal seperating hyperplane.
#separating hyperplane; X\theta = 0
def hyperplane(x, theta):
return (-theta[1]*x-theta[0]) /(theta[2])
x = np.linspace(X[:, 0].min()-0.1, X[:, 0].max()+0.1, 100)
plt.plot(x, hyperplane(x, map_estimate['theta']), label='optimal hyperplane')
plt.scatter(X[:, 0], X[:, 1],c=y);
plt.xlabel('$x_1$');
plt.ylabel('$x_2$');
plt.legend(bbox_to_anchor=(1,1));
data:image/s3,"s3://crabby-images/10ad9/10ad9d0d1cf52aa0ac04fa20aff15a5b1522313d" alt="../../_images/e0a1626d6d69f13f7c8e98665f252a191c63c2cc4c4626249c0afcc428dcecf0.png"
Let us draw a large number of samples from the posterior.
with basic_model:
# draw 500 posterior samples
trace = pm.sample(2000,return_inferencedata=False,tune=20000)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [theta]
Sampling 4 chains for 20_000 tune and 2_000 draw iterations (80_000 + 8_000 draws total) took 45 seconds.
There were 780 divergences after tuning. Increase `target_accept` or reparameterize.
There were 772 divergences after tuning. Increase `target_accept` or reparameterize.
There were 397 divergences after tuning. Increase `target_accept` or reparameterize.
There were 351 divergences after tuning. Increase `target_accept` or reparameterize.
The estimated number of effective samples is smaller than 200 for some parameters.
Let us visualize the parameter posterior.
az.plot_trace(trace)
/home/patel_zeel/anaconda3/lib/python3.8/site-packages/arviz/data/io_pymc3.py:96: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
warnings.warn(
array([[<AxesSubplot:title={'center':'theta'}>,
<AxesSubplot:title={'center':'theta'}>]], dtype=object)
data:image/s3,"s3://crabby-images/10a5c/10a5c1423a1c1efc90f6d16baacc07af630a4234" alt="../../_images/f9a6201624e015283a8f677d712eab3361126792baba18f68be223674945ff60.png"
Let us predict for new input locations.
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
np.arange(y_min, y_max, 0.1))
X_test = np.c_[xx.ravel(), yy.ravel()]
X_test_concat = np.hstack((np.ones((len(X_test), 1)), X_test))
with basic_model:
pm.set_data({'features': X_test_concat})
posterior = pm.sample_posterior_predictive(trace)
Z = posterior['Y_obs']
Following plot shows the posterior distribution over the hyperplanes.
for i in range(len(Z))[:500]:
plt.contour(xx, yy, Z[i].reshape(xx.shape), alpha=0.01)
plt.scatter(X[:, 0], X[:, 1],c=y, zorder=10);
plt.xlabel('$x_1$');
plt.ylabel('$x_2$');
data:image/s3,"s3://crabby-images/d51fc/d51fca9ca34d2e04043d7ad0ce4c41a5f0010505" alt="../../_images/02251e2c951401df5e61b0ebb67168860fed668d1d1021f77c99b7682474e45e.png"
Following code is inspired from”: https://docs.pymc.io/notebooks/bayesian_neural_network_advi.html
The following plot show probabilities of being in any of the class for any arbitrary sample in the space.
pred = posterior['Y_obs'].mean(axis=0)>0.5
cmap = sns.diverging_palette(250, 12, s=85, l=25, as_cmap=True)
fig, ax = plt.subplots(figsize=(8, 4))
contour = plt.contourf(xx, yy, posterior['Y_obs'].mean(axis=0).reshape(xx.shape),cmap=cmap)
#ax.scatter(X_test[pred == 0, 0], X_test[pred == 0, 1])
#ax.scatter(X_test[pred == 1, 0], X_test[pred == 1, 1], color="r")
cbar = plt.colorbar(contour, ax=ax)
#_ = ax.set(xlim=(-3, 3), ylim=(-3, 3), xlabel="X", ylabel="Y")
#cbar.ax.set_ylabel("Posterior predictive mean probability of class label = 0");
plt.xlabel('$x_1$');
plt.ylabel('$x_2$');
data:image/s3,"s3://crabby-images/19f05/19f053a5df4b45edbf0122ee9ce5742060778986" alt="../../_images/0f4f08eef93de74b3a4e162872b17f1dc9b0d625acf428e3658665c4ae12d9a0.png"
The following plot shows uncertainty in the predictions at any arbitrary location in input space.
pred = posterior['Y_obs'].mean(axis=0)>0.5
cmap = sns.cubehelix_palette(light=1, as_cmap=True)
fig, ax = plt.subplots(figsize=(8, 4))
contour = plt.contourf(xx, yy, posterior['Y_obs'].std(axis=0).reshape(xx.shape),cmap=cmap)
#ax.scatter(X_test[pred == 0, 0], X_test[pred == 0, 1])
#ax.scatter(X_test[pred == 1, 0], X_test[pred == 1, 1], color="r")
cbar = plt.colorbar(contour, ax=ax)
#_ = ax.set(xlim=(-3, 3), ylim=(-3, 3), xlabel="X", ylabel="Y")
#cbar.ax.set_ylabel("Posterior predictive mean probability of class label = 0");
plt.xlabel('$x_1$');
plt.ylabel('$x_2$');
data:image/s3,"s3://crabby-images/481dd/481dd6e9c6fc3a1114364bef1a1230133199581c" alt="../../_images/186819347dd2ffdddaf6f68f211fed8ff6d8d2a7df5faa6fd5a50532e947a09f.png"