{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c1ba8b72",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ruff: noqa: E402\n",
    "# ruff: noqa: E501"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8f61cdce",
   "metadata": {
    "lines_to_next_cell": 0
   },
   "source": [
    "# Fitting a polynomial with Gaussian priors\n",
    "\n",
    "We fit a simple polynomial with Gaussian priors, which is an example of a Gauss-linear\n",
    "problem for which the results obtained using Subspace Iterative Ensemble Smoother\n",
    "(SIES) tend to those obtained using Ensemble Smoother (ES).\n",
    "This notebook illustrated this property."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "53491d90",
   "metadata": {},
   "outputs": [],
   "source": [
    "import itertools\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "np.set_printoptions(suppress=True)\n",
    "rng = np.random.default_rng(12345)\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "COLORS = list(plt.rcParams[\"axes.prop_cycle\"].by_key()[\"color\"])\n",
    "\n",
    "plt.rcParams[\"figure.figsize\"] = (6, 6)\n",
    "plt.rcParams.update({\"font.size\": 10})\n",
    "from ipywidgets import interact  # noqa  # isort:skip\n",
    "import ipywidgets as widgets  # noqa  # isort:skip\n",
    "from p_tqdm import p_map\n",
    "\n",
    "import iterative_ensemble_smoother as ies"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0f50cd88",
   "metadata": {},
   "source": [
    "## Define synthetic truth and use it to create noisy observations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "46e1fd8c",
   "metadata": {},
   "outputs": [],
   "source": [
    "ensemble_size = 200\n",
    "\n",
    "\n",
    "def poly(a, b, c, x):\n",
    "    return a * x**2 + b * x + c\n",
    "\n",
    "\n",
    "# True patameter values\n",
    "a_t = 0.5\n",
    "b_t = 1.0\n",
    "c_t = 3.0\n",
    "\n",
    "noise_scale = 0.1\n",
    "x_observations = [0, 2, 4, 6, 8]\n",
    "observations = [\n",
    "    (\n",
    "        rng.normal(loc=1, scale=noise_scale) * poly(a_t, b_t, c_t, x),\n",
    "        noise_scale * poly(a_t, b_t, c_t, x),\n",
    "        x,\n",
    "    )\n",
    "    for x in x_observations\n",
    "]\n",
    "\n",
    "d = pd.DataFrame(observations, columns=[\"value\", \"sd\", \"x\"])\n",
    "d = d.set_index(\"x\")\n",
    "num_obs = d.shape[0]\n",
    "\n",
    "fig, ax = plt.subplots(figsize=(7, 4))\n",
    "x_plot = np.linspace(0, 10, 2**8)\n",
    "ax.set_title(\"Truth and noisy observations\")\n",
    "ax.set_xlabel(\"Time step\")\n",
    "ax.set_ylabel(\"Response\")\n",
    "ax.plot(x_plot, poly(a_t, b_t, c_t, x_plot))\n",
    "ax.plot(d.index.get_level_values(\"x\"), d.value.values, \"o\")\n",
    "ax.grid()\n",
    "fig.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6584590e",
   "metadata": {},
   "source": [
    "## Assume diagonal observation error covariance matrix and define perturbed observations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "97f53eec",
   "metadata": {},
   "outputs": [],
   "source": [
    "R = np.diag(d.sd.values**2)\n",
    "\n",
    "E = rng.multivariate_normal(mean=np.zeros(len(R)), cov=R, size=ensemble_size).T\n",
    "assert E.shape == (num_obs, ensemble_size)\n",
    "\n",
    "D = d.value.values.reshape(-1, 1) + E"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "49cfcd6b",
   "metadata": {},
   "source": [
    "## Define Gaussian priors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0578cf69",
   "metadata": {},
   "outputs": [],
   "source": [
    "coeff_a = rng.standard_normal(size=ensemble_size)\n",
    "coeff_b = rng.standard_normal(size=ensemble_size)\n",
    "coeff_c = rng.standard_normal(size=ensemble_size)\n",
    "\n",
    "X = np.vstack([coeff_a, coeff_b, coeff_c])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "968e01bf",
   "metadata": {},
   "source": [
    "## Run forward model in parallel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "afa68f9a",
   "metadata": {},
   "outputs": [],
   "source": [
    "fwd_runs = p_map(\n",
    "    poly,\n",
    "    coeff_a,\n",
    "    coeff_b,\n",
    "    coeff_c,\n",
    "    [np.arange(max(x_observations) + 1)] * ensemble_size,\n",
    "    desc=\"Running forward model.\",\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "01238cd3",
   "metadata": {},
   "source": [
    "## Pick responses where we have observations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e8205a66",
   "metadata": {},
   "outputs": [],
   "source": [
    "Y = np.array(\n",
    "    [fwd_run[d.index.get_level_values(\"x\").to_list()] for fwd_run in fwd_runs]\n",
    ").T\n",
    "\n",
    "assert Y.shape == (\n",
    "    num_obs,\n",
    "    ensemble_size,\n",
    "), \"Measured responses must be a matrix with dimensions (number of observations x number of realisations)\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6cfd1a5e",
   "metadata": {},
   "source": [
    "## Condition on observations to calculate posterior using both `ES` and `SIES`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "14bf2c47",
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "from iterative_ensemble_smoother.utils import steplength_exponential\n",
    "\n",
    "X_ES_ert = X.copy()\n",
    "Y_ES_ert = Y.copy()\n",
    "smoother_es = ies.SIES(\n",
    "    parameters=X_ES_ert,\n",
    "    covariance=d.sd.values**2,\n",
    "    observations=d.value.values,\n",
    "    seed=42,\n",
    ")\n",
    "X_ES_ert = smoother_es.sies_iteration(Y_ES_ert, step_length=1.0)\n",
    "\n",
    "\n",
    "X_IES_ert = X.copy()\n",
    "Y_IES_ert = Y.copy()\n",
    "smoother_ies = ies.SIES(\n",
    "    parameters=X_IES_ert,\n",
    "    covariance=d.sd.values**2,\n",
    "    observations=d.value.values,\n",
    "    seed=42,\n",
    ")\n",
    "n_ies_iter = 7\n",
    "for i in range(n_ies_iter):\n",
    "\n",
    "    step_length = steplength_exponential(i + 1)\n",
    "    X_IES_ert = smoother_ies.sies_iteration(Y_IES_ert, step_length=step_length)\n",
    "\n",
    "    _coeff_a, _coeff_b, _coeff_c = X_IES_ert\n",
    "\n",
    "    _fwd_runs = p_map(\n",
    "        poly,\n",
    "        _coeff_a,\n",
    "        _coeff_b,\n",
    "        _coeff_c,\n",
    "        [np.arange(max(x_observations) + 1)] * ensemble_size,\n",
    "        desc=f\"SIES ert iteration {i}\",\n",
    "    )\n",
    "\n",
    "    Y_IES_ert = np.array(\n",
    "        [fwd_run[d.index.get_level_values(\"x\").to_list()] for fwd_run in _fwd_runs]\n",
    "    ).T"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c0a63880",
   "metadata": {
    "lines_to_next_cell": 2
   },
   "source": [
    "## Plots to compare results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ef0035fe",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_posterior(ax, posterior, method):\n",
    "    for i, param in enumerate(\"abc\"):\n",
    "        ax[i].set_title(param)\n",
    "        ax[i].hist(posterior[i, :], label=f\"{method} posterior\", alpha=0.5, bins=\"fd\")\n",
    "        ax[i].legend()\n",
    "\n",
    "    fig.tight_layout()\n",
    "    return ax\n",
    "\n",
    "\n",
    "fig, ax = plt.subplots(nrows=3, figsize=(7, 8))\n",
    "\n",
    "for i in range(3):\n",
    "    ax[i].hist(X[i, :], label=\"prior\", bins=\"fd\")\n",
    "\n",
    "ax[0].axvline(a_t, color=\"k\", linestyle=\"--\", label=\"truth\")\n",
    "ax[1].axvline(b_t, color=\"k\", linestyle=\"--\", label=\"truth\")\n",
    "ax[2].axvline(c_t, color=\"k\", linestyle=\"--\", label=\"truth\")\n",
    "\n",
    "plot_posterior(ax, X_ES_ert, method=\"ES ert\")\n",
    "_ = plot_posterior(ax, X_IES_ert, method=f\"SIES ert ({n_ies_iter})\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "52a96ac5",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(1, 3, figsize=(8, 2.75))\n",
    "axes = axes.ravel()\n",
    "labels = \"abc\"\n",
    "true_parameters = [a_t, b_t, c_t]\n",
    "\n",
    "fig.suptitle(f\"SIES ert ({n_ies_iter}) Posterior distribution\")\n",
    "for k, (i, j) in enumerate(itertools.combinations([0, 1, 2], 2)):\n",
    "    axes[k].scatter(X[i, :], X[j, :], s=15, alpha=0.6)\n",
    "    axes[k].scatter(X_ES_ert[i, :], X_ES_ert[j, :], s=15, alpha=0.2)\n",
    "    axes[k].scatter(X_IES_ert[i, :], X_IES_ert[j, :], s=15, alpha=0.2)\n",
    "    axes[k].scatter(\n",
    "        [true_parameters[i]],\n",
    "        [true_parameters[j]],\n",
    "        color=\"black\",\n",
    "        s=100,\n",
    "        label=\"True value\",\n",
    "    )\n",
    "    axes[k].set_xlabel(labels[i])\n",
    "    axes[k].set_ylabel(labels[j])\n",
    "\n",
    "\n",
    "axes[k].legend()\n",
    "fig.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa8526f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(8, 2.5), sharex=True, sharey=True)\n",
    "x_plot = np.linspace(0, 10, 2**8)\n",
    "\n",
    "# Plot the prior\n",
    "ax1.set_title(\"Prior\")\n",
    "ax1.plot(x_plot, poly(a_t, b_t, c_t, x_plot), zorder=10, lw=4, color=\"black\")\n",
    "for parameter_prior in X.T:\n",
    "    ax1.plot(\n",
    "        x_plot, poly(*parameter_prior, x_plot), color=COLORS[0], alpha=0.1, zorder=5\n",
    "    )\n",
    "\n",
    "# Plot the posterior\n",
    "ax2.set_title(\"ES ert posterior\")\n",
    "ax2.plot(x_plot, poly(a_t, b_t, c_t, x_plot), zorder=10, lw=4, color=\"black\")\n",
    "for parameter_posterior in X_ES_ert.T:\n",
    "    ax2.plot(\n",
    "        x_plot, poly(*parameter_posterior, x_plot), alpha=0.1, zorder=5, color=COLORS[1]\n",
    "    )\n",
    "\n",
    "# Plot the posterior\n",
    "ax3.set_title(f\"SIES ert ({n_ies_iter}) posterior\")\n",
    "ax3.plot(x_plot, poly(a_t, b_t, c_t, x_plot), zorder=10, lw=4, color=\"black\")\n",
    "for parameter_posterior in X_IES_ert.T:\n",
    "    ax3.plot(\n",
    "        x_plot, poly(*parameter_posterior, x_plot), alpha=0.1, zorder=5, color=COLORS[2]\n",
    "    )\n",
    "\n",
    "# Common axes setup\n",
    "for ax in [ax1, ax2, ax3]:\n",
    "    ax.set_ylim([0, 70])\n",
    "    ax.set_xlabel(\"Time step\")\n",
    "    ax.set_ylabel(\"Response\")\n",
    "    ax.grid(zorder=0)\n",
    "\n",
    "fig.tight_layout()\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "jupytext": {
   "formats": "ipynb,py:percent"
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}