From 7f17a26e08f5682552a9ff08e6cd1a72cd0e76cc Mon Sep 17 00:00:00 2001
From: Roger Labbe <rlabbejr@gmail.com>
Date: Wed, 22 Jul 2015 12:55:58 -0700
Subject: [PATCH] Fixed use of ```python blocks.

Turns out my problem was the closing ``` needs to be on a line
of its own for nbconvert to parse it correctly.
---
 01-g-h-filter.ipynb                      | 17 ++++--
 02-Discrete-Bayes.ipynb                  |  8 ++-
 03-Gaussians.ipynb                       |  5 +-
 04-One-Dimensional-Kalman-Filters.ipynb  | 75 ++++++++++++++++--------
 06-Multivariate-Kalman-Filters.ipynb     | 33 +++++++----
 07-Kalman-Filter-Math.ipynb              |  6 +-
 08-Designing-Kalman-Filters.ipynb        |  7 ++-
 09-Nonlinear-Filtering.ipynb             | 22 +++----
 10-Unscented-Kalman-Filter.ipynb         | 32 ++++++----
 11-Extended-Kalman-Filters.ipynb         | 36 ++++++------
 12-Particle-Filters.ipynb                | 64 ++++++++++++--------
 13-Smoothing.ipynb                       |  3 +-
 14-Adaptive-Filtering.ipynb              |  3 +-
 Appendix-E-Ensemble-Kalman-Filters.ipynb | 36 ++++++++----
 14 files changed, 216 insertions(+), 131 deletions(-)

diff --git a/01-g-h-filter.ipynb b/01-g-h-filter.ipynb
index 68a1a91..86f0d32 100644
--- a/01-g-h-filter.ipynb
+++ b/01-g-h-filter.ipynb
@@ -968,7 +968,8 @@
     "One final point before we go on. In the prediction step I wrote the line\n",
     "\n",
     "```python\n",
-    "gain_rate = gain_rate```\n",
+    "gain_rate = gain_rate\n",
+    "```\n",
     "    \n",
     "This obviously has no effect, and can be removed. I wrote this to emphasize that in the prediction step you need to predict next value for **all** variables, both *weight* and *gain_rate*. In this case we are assuming that the the gain does not vary, but when we generalize this algorithm we will remove that assumption. "
    ]
@@ -1400,7 +1401,8 @@
     "    'g' is the g-h's g scale factor\n",
     "    'h' is the g-h's h scale factor\n",
     "    'dt' is the length of the time step \n",
-    "    \"\"\"```\n",
+    "    \"\"\"\n",
+    "```\n",
     "\n",
     "Return the data as a NumPy array, not a list. Test it by passing in the same weight data as before, plot the results, and visually determine that it works."
    ]
@@ -2126,19 +2128,22 @@
     "\n",
     "```python\n",
     "pos = 23*1000\n",
-    "vel = 15```\n",
+    "vel = 15\n",
+    "```\n",
     "\n",
     "Now we can compute the position of the train at some future time, *assuming* no change in velocity, with\n",
     "\n",
     "```python\n",
     "def compute_new_position(pos, vel, dt=1):\n",
-    "    return pos + (vel * dt)```\n",
+    "    return pos + (vel * dt)\n",
+    "```\n",
     "\n",
     "We can simulate the measurement by adding in some random noise to the position. Here our error is 500m, so the code might look like:\n",
     "\n",
     "```python\n",
     "def measure_position(pos):\n",
-    "        return pos + random.randn()*500```\n",
+    "        return pos + random.randn()*500\n",
+    "```\n",
     "        \n",
     "Let's put that in a cell and plot the results of 100 seconds of simulation. I will use NumPy's `asarray` function to convert the data into an NumPy array. This will allow me to divide all of the elements of the array at once by using the '/' operator."
    ]
@@ -2617,7 +2622,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.4.3"
+   "version": "3.4.1"
   }
  },
  "nbformat": 4,
diff --git a/02-Discrete-Bayes.ipynb b/02-Discrete-Bayes.ipynb
index 99fc7b4..b732ace 100644
--- a/02-Discrete-Bayes.ipynb
+++ b/02-Discrete-Bayes.ipynb
@@ -1767,7 +1767,8 @@
     "    if val == z:\n",
     "        belief[i] *= correct_scale\n",
     "    else:\n",
-    "        belief[i] *= 1.```\n",
+    "        belief[i] *= 1.\n",
+    "```\n",
     "\n",
     "I added the `else` here, which has no mathematical effect, to point out that every element in $x$ (called `belief` in the code) is multiplied by a probability. You may object that I am multiplying by a scale factor, which I am, but this scale factor is derived from the probability of the measurement being correct vs  the probability being incorrect.\n",
     "\n",
@@ -1796,7 +1797,8 @@
     "for i in range(N):\n",
     "    for k in range (kN):\n",
     "        index = (i + (width-k) - offset) % N\n",
-    "        result[i] += prob_dist[index] * kernel[k]```"
+    "        result[i] += prob_dist[index] * kernel[k]\n",
+    "```"
    ]
   },
   {
@@ -1883,7 +1885,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.4.3"
+   "version": "3.4.1"
   }
  },
  "nbformat": 4,
diff --git a/03-Gaussians.ipynb b/03-Gaussians.ipynb
index 788ab03..9f602d4 100644
--- a/03-Gaussians.ipynb
+++ b/03-Gaussians.ipynb
@@ -915,7 +915,7 @@
     "    gaussian with the specified mean and variance. \n",
     "    \"\"\"\n",
     "    return np.exp((-0.5*(x-mean)**2)/var) / \\\n",
-    "                     np.sqrt(_two_pi*var)```"
+    "```"
    ]
   },
   {
@@ -1540,7 +1540,8 @@
     "The code for rand_student_t is included in `filterpy.stats`. You may use it with\n",
     "\n",
     "```python\n",
-    "from filterpy.stats import rand_student_t```"
+    "from filterpy.stats import rand_student_t\n",
+    "```"
    ]
   },
   {
diff --git a/04-One-Dimensional-Kalman-Filters.ipynb b/04-One-Dimensional-Kalman-Filters.ipynb
index 2be2551..7b1b910 100644
--- a/04-One-Dimensional-Kalman-Filters.ipynb
+++ b/04-One-Dimensional-Kalman-Filters.ipynb
@@ -353,13 +353,15 @@
     "\n",
     "```python\n",
     "noisy_vel = vel + randn()*velocity_std\n",
-    "x = x + noisy_vel*dt```\n",
+    "x = x + noisy_vel*dt\n",
+    "```\n",
     " \n",
     "\n",
     "To model the sensor we need to take the current position of the dog and then add some noise to it proportional to the Gaussian of the measurement noise. This is simply\n",
     "\n",
     "```python\n",
-    "z = x + randn()*measurement_std```\n",
+    "z = x + randn()*measurement_std\n",
+    "```\n",
     "    \n",
     "I don't like the bookkeeping required to keep track of the various values for position, velocity, and standard deviations so I will implement the simulation as a class. "
    ]
@@ -734,19 +736,22 @@
     "        else:\n",
     "            q[i] = pos[i] * p_miss\n",
     "     normalize(q)\n",
-    "     return q```\n",
+    "     return q\n",
+    "```\n",
     "         \n",
     "Note that the algorithm is essentially computing:\n",
     "\n",
     "```python\n",
-    "new_belief = prior_belief * measurement * sensor_error```\n",
+    "new_belief = prior_belief * measurement * sensor_error\n",
+    "```\n",
     "    \n",
     "The measurement term might not be obvious, but recall that measurement in this case was always 1 or 0, and so it was left out for convenience. Here *prior* carries the both the colloquial sense of *previoius*, but also the Bayesian meaning. In Bayesian terms the *prior* is the probability after the prediction and before the measurements have been incorporated.\n",
     "    \n",
     "If we are implementing this with Gaussians, we might expect it to be implemented as:\n",
     "\n",
     "```python\n",
-    "new_gaussian = measurement * old_gaussian```\n",
+    "new_gaussian = measurement * old_gaussian\n",
+    "```\n",
     "    \n",
     "where measurement is a Gaussian returned from the sensor. But does that make sense? Can we multiply Gaussians? If we multiply a Gaussian with a Gaussian is the result another Gaussian, or something else?"
    ]
@@ -1066,7 +1071,8 @@
     "```python\n",
     "def update_dog(dog_pos, dog_variance, measurement, measurement_variance):\n",
     "    return multiply(dog_pos, dog_variance, \n",
-    "                    measurement, measurement_variance)```"
+    "                    measurement, measurement_variance)\n",
+    "```"
    ]
   },
   {
@@ -1161,7 +1167,8 @@
     "            pos[(i-move) % n]   * p_correct + \\\n",
     "            pos[(i-move-1) % n] * p_over + \\\n",
     "            pos[(i-move+1) % n] * p_under          \n",
-    "    return result```\n",
+    "    return result\n",
+    "```\n",
     "\n",
     "In a nutshell, we shift the probability vector by the amount we believe the animal moved, and adjust the probability. How do we do that with Gaussians?\n",
     "\n",
@@ -1328,18 +1335,21 @@
     "Now let's walk through the code and output.\n",
     "\n",
     "```python\n",
-    "pos = (0., 400.)   # gaussian N(0, 400)```\n",
+    "pos = (0., 400.)   # gaussian N(0, 400)\n",
+    "```\n",
     "    \n",
     "Here we set the initial position at 0 m. We set the variance to 400 m$^2$, which is a standard deviation of 20 meters. You can think of this as saying \"I believe with 99% accuracy the position is 0 plus or minus 60 meters\". This is because with Gaussians ~99% of values fall within 3$\\sigma$ of the mean.\n",
     "\n",
     "```python\n",
-    "velocity = 1.```\n",
+    "velocity = 1.\n",
+    "```\n",
     "    \n",
     "So how do I know the velocity? Magic? Consider it a prediction, or perhaps we have a secondary velocity sensor. If this is a robot then this would be a control input to the robot. In the next chapter we will learn how to handle situations where you don't have a velocity sensor or input, so please accept this simplification for now.\n",
     "\n",
     "```python\n",
     "process_variance = 1.\n",
-    "sensor_variance = 2.```\n",
+    "sensor_variance = 2.\n",
+    "```\n",
     "   \n",
     "Here the variance for the predict step is `process_variance` and the variance for the sensor is `sensor_variance`. The meaning of sensor variance should be clear - it is how much variance there is in each sensor reading. The process variance is how much error there is in the prediction model. We are predicting that at each time step the dog moves forward one meter. Dogs rarely do what we expect, and of course things like hills or the whiff of a squirrel will change his progress. If this was a robot responding to digital commands the performance would be better, but not perfect. Perhaps a hand made robot would have a variance of $\\sigma^2=.1$, and an industrial robot might have $\\sigma^2=0.02$. These are not 'magic' numbers; the square root of the express the distance error in meters. It is easy to get a Kalman filter working by just plugging in numbers, but if the numbers do not reflect reality the performance of the filter will be poor."
    ]
@@ -1354,7 +1364,8 @@
     "dog = DogSimulation(pos[0], velocity=velocity, \n",
     "                    measurement_variance=sensor_variance, \n",
     "                    process_variance=process_variance)\n",
-    "zs = [dog.move_and_sense() for _ in range(N)]```\n",
+    "zs = [dog.move_and_sense() for _ in range(N)]\n",
+    "```\n",
     "    \n",
     "It may seem very 'convenient' to set the simulator to the same position as our guess, and it is. Do not fret. In the next example we will see the effect of a wildly inaccurate guess for the dog's initial position. We move the dog for 10 steps forward and store the measurements in `zs` via a list comprehension.\n",
     "\n",
@@ -1363,7 +1374,8 @@
     "The next code allocates an array to store the output of the filtered positions. \n",
     "\n",
     "```python\n",
-    "positions =  np.zeros(N)```"
+    "positions =  np.zeros(N)\n",
+    "```"
    ]
   },
   {
@@ -1375,7 +1387,8 @@
     "```python\n",
     "for i in range(N):\n",
     "    pos = predict(pos=pos[0], variance=pos[1], \n",
-    "                  movement=vel, movement_variance=process_variance)```\n",
+    "                  movement=vel, movement_variance=process_variance)\n",
+    "```\n",
     "\n",
     "We call the `predict()` function with the Gaussian for the dog's position, and another Gaussian for its movement. `pos[0], pos[1]` is the mean and variance for the position, and 'vel, process_variance' is the mean and variance for the movement. The first time through the loop we get\n",
     "\n",
@@ -1387,7 +1400,8 @@
     "\n",
     "```python\n",
     "pos = update(pos[0], pos[1], z, sensor_variance)\n",
-    "positions.append(pos[0])```\n",
+    "positions.append(pos[0])\n",
+    "```\n",
     "    \n",
     "For this I get this as the result:\n",
     "\n",
@@ -1408,7 +1422,8 @@
     "    pos = predict(pos=pos[0], variance=pos[1], \n",
     "                  movement=vel, movement_variance=process_variance)\n",
     "    pos = update(mean=pos[0], variance=pos[1],\n",
-    "                 measurement=z, measurement_variance=sensor_variance)```"
+    "                 measurement=z, measurement_variance=sensor_variance)\n",
+    "```"
    ]
   },
   {
@@ -1615,7 +1630,8 @@
     "For many purposes the code above suffices. However, if you write enough of these filters the functions will become a bit annoying. For example, having to write\n",
     "\n",
     "```python\n",
-    "pos = predict(pos[0], pos[1], movement, movement_variance) ```\n",
+    "pos = predict(pos[0], pos[1], movement, movement_variance)\n",
+    "```\n",
     "\n",
     "is a bit cumbersome and error prone. Let's investigate how we might implement this in a form that makes our lives easier.\n",
     "\n",
@@ -1626,7 +1642,8 @@
     "    def __init__(self, initial_state, measurement_variance, movement_variance):\n",
     "        self.state = initial_state\n",
     "        self.measurement_variance = measurement_variance\n",
-    "        self.movement_variance = movement_variance```\n",
+    "        self.movement_variance = movement_variance\n",
+    "```\n",
     "\n",
     "That works, but I am going to use different naming. The Kalman filter literature uses math, not code, so it uses single letter variables, so I will start exposing you to it now. At first it seems impossibly terse, but as you become familiar with the nomenclature you'll see that the math formulas in the textbooks will have an exact one-to-one correspondence with the code. Unfortunately there is not a lot of meaning behind the naming unless you have training in control theory; you will have to memorize them. If you do not make this effort you will never be able to read the literature.\n",
     "\n",
@@ -1638,21 +1655,24 @@
     "        self.x = x0\n",
     "        self.P = P\n",
     "        self.R = R\n",
-    "        self.Q = Q```\n",
+    "        self.Q = Q\n",
+    "```\n",
     "            \n",
     "Now we can implement the `update()` and `predict()` methods. In the literature the measurement is usually named either `z` or `y`; I find `y` is too easy to confuse with the y axis of a plot, so I like `z`. I like to think I can hear a `z` in *measurement*, which helps me remember what `z` stands for. So for the update method we might write:\n",
     "\n",
     "```python\n",
     "def update(z):\n",
     "    self.x = (self.P * z + self.x * self.R) / (self.P + self.R)\n",
-    "    self.P = 1 / (1/self.P + 1/self.R)```\n",
+    "    self.P = 1 / (1/self.P + 1/self.R)\n",
+    "```\n",
     "\n",
     "Finally, the movement is usually called `u`, and so we will use that. So for the predict method we might write:\n",
     "\n",
     "```python\n",
     "def predict(self, u):\n",
     "    self.x += u\n",
-    "    self.P += self.Q```\n",
+    "    self.P += self.Q\n",
+    "```\n",
     "\n",
     "That give us the following code. Production code would require significant comments and error checking. However, in the next chapter we will develop Kalman filter code that works for any dimension so this class will never be more than a stepping stone for us."
    ]
@@ -1794,14 +1814,16 @@
     "Modify the values of `movement_variance` and `sensor_variance` and note the effect on the filter and on the variance. Which has a larger effect on the variance convergence?. For example, which results in a smaller variance:\n",
     "\n",
     "```python\n",
-    "    movement_variance = 40\n",
-    "    sensor_variance = 2```\n",
+    "movement_variance = 40\n",
+    "sensor_variance = 2\n",
+    "```\n",
     "    \n",
     "or:\n",
     "\n",
     "```python\n",
-    "    movement_variance = 2\n",
-    "    sensor_variance = 40```"
+    "movement_variance = 2\n",
+    "sensor_variance = 40\n",
+    "```"
    ]
   },
   {
@@ -2481,7 +2503,8 @@
     "\n",
     "```python\n",
     "for i in range(100):\n",
-    "    Z = math.sin(i/3.) * 2```\n",
+    "    Z = math.sin(i/3.) * 2\n",
+    "```\n",
     "    \n",
     "Adjust the variance and initial positions to see the effect. What is, for example, the result of a very bad initial guess?"
    ]
@@ -2757,7 +2780,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.4.3"
+   "version": "3.4.1"
   }
  },
  "nbformat": 4,
diff --git a/06-Multivariate-Kalman-Filters.ipynb b/06-Multivariate-Kalman-Filters.ipynb
index 8d13d8e..3c28753 100644
--- a/06-Multivariate-Kalman-Filters.ipynb
+++ b/06-Multivariate-Kalman-Filters.ipynb
@@ -453,7 +453,8 @@
     "I have programmed the equations of the Kalman filter into the `KalmanFilter` class in FilterPy. You will import it with\n",
     "\n",
     "```python\n",
-    "from filterpy.kalman import KalmanFilter```"
+    "from filterpy.kalman import KalmanFilter\n",
+    "```"
    ]
   },
   {
@@ -681,7 +682,8 @@
     "\n",
     "```python\n",
     "def predict(pos, variance, movement, movement_variance):\n",
-    "    return (pos + movement, variance + movement_variance)```\n",
+    "    return (pos + movement, variance + movement_variance)\n",
+    "```\n",
     "        \n",
     "If the current position is 5.4 meters, and the movement is 1.1 meters over the time step, then of course the new position is 5.4 + 1.1, or 6.5 meters.\n",
     "\n",
@@ -1085,7 +1087,8 @@
     "\n",
     "```python\n",
     "CELSIUS_TO_VOLTS = 0.21475\n",
-    "residual = measurement - CELSIUS_TO_VOLTS * predicted_state```\n",
+    "residual = measurement - CELSIUS_TO_VOLTS * predicted_state\n",
+    "```\n",
     "    \n",
     "The Kalman filter generalizes this problem by having you supply a **measurement function** that converts a state into a measurement. \n",
     "\n",
@@ -1334,19 +1337,22 @@
     "The function `filter_dog()` implements the filter itself.  Lets work through it line by line. The first line creates the simulation of the DogSensor, as we have seen in the previous chapter.\n",
     "\n",
     "```python\n",
-    "dog = DogSensor(velocity=1, noise=noise)```\n",
+    "dog = DogSensor(velocity=1, noise=noise)\n",
+    "```\n",
     "\n",
     "The next line uses our helper function to create a Kalman filter.\n",
     "\n",
     "```python\n",
-    "dog_filter = dog_tracking_filter(R=R, Q=Q, cov=500.)```\n",
+    "dog_filter = dog_tracking_filter(R=R, Q=Q, cov=500.)\n",
+    "```\n",
     "\n",
     "We will want to plot the filtered position, the measurements, and the covariance, so we will need to store them in lists. The next three lines initialize empty lists of length *count* in a Pythonic way.\n",
     "\n",
     "```python\n",
     "pos = [None] * count\n",
     "zs  = [None] * count\n",
-    "cov = [None] * count```\n",
+    "cov = [None] * count\n",
+    "```\n",
     "\n",
     "Finally we get to the filter. All we need to do is perform the update and predict steps of the Kalman filter for each measurement. The `KalmanFilter` class provides the two methods `update()` and `predict()` for this purpose. `update()` performs the measurement update step of the Kalman filter, and so it takes a variable containing the sensor measurement. \n",
     "\n",
@@ -1356,7 +1362,8 @@
     "for t in range(count):\n",
     "    z = dog.sense()\n",
     "    dog_filter.update (z)\n",
-    "    dog_filter.predict()```\n",
+    "    dog_filter.predict()\n",
+    "```\n",
     "\n",
     "It really cannot get much simpler than that. As we tackle more complicated problems this code will remain largely the same; all of the work goes into setting up the `KalmanFilter` variables; executing the filter is trivial.\n",
     "\n",
@@ -1364,7 +1371,8 @@
     "\n",
     "The Kalman filter is designed as a recursive algorithm - as new measurements come in we immediately create a new estimate. But it is very common to have a set of data that have been already collected which we want to filter. Kalman filters can always be run in a *batch* mode, where all of the measurements are filtered at once. We have implemented this in `KalmanFilter.batch_filter()`. Internally, all the method does is loop over the measurements and collect the resulting state and covariance estimates in arrays.\n",
     "\n",
-    "Here is an alternative form of the last function which uses batch filtering. `batch_filter` returns four NumPy `Array` objects; the first two contain the filtered estimates and covariances (the posteriors), and the last two contain the predicted estimates and covariances (the priors). The priors are useful for smoothing algorithms which we will learn later, but for now they are not useful to us, so I disregard them."
+    "Here is an alternative form of the last function which uses batch filtering. `batch_filter` returns four NumPy `Array` objects; the first two contain the filtered estimates and covariances (the posteriors), and the las\n",
+    "t two contain the predicted estimates and covariances (the priors). The priors are useful for smoothing algorithms which we will learn later, but for now they are not useful to us, so I disregard them."
    ]
   },
   {
@@ -2678,17 +2686,20 @@
     "First collect your measurements into an array or list. Maybe it is in a CSV file, for example.\n",
     "\n",
     "```python\n",
-    "zs = read_altitude_from_csv()```\n",
+    "zs = read_altitude_from_csv()\n",
+    "```\n",
     "\n",
     "Or maybe you will generate it using a generator:\n",
     "\n",
     "```python\n",
-    "zs = [some_func(i) for i in range(1000)]```\n",
+    "zs = [some_func(i) for i in range(1000)]\n",
+    "```\n",
     "\n",
     "Then call the `batch_filter()` method.\n",
     "\n",
     "```python\n",
-    "Xs, Ps, Xs_pred, Ps_pred = kfilter.batch_filter(zs)```\n",
+    "Xs, Ps, Xs_pred, Ps_pred = kfilter.batch_filter(zs)\n",
+    "```\n",
     "\n",
     "The function takes the list/array of measurements, filters it, and returns a list of state estimates (Xs), covariance matrices (Ps), and the predictions for the same (Xs_pred, Ps_pred).\n",
     "\n",
diff --git a/07-Kalman-Filter-Math.ipynb b/07-Kalman-Filter-Math.ipynb
index 42bab8a..b0be344 100644
--- a/07-Kalman-Filter-Math.ipynb
+++ b/07-Kalman-Filter-Math.ipynb
@@ -358,7 +358,8 @@
     "        else:\n",
     "            pos_belief[i] *= p_miss\n",
     "\n",
-    "    pos_belief /= sum(pos_belief)```\n",
+    "    pos_belief /= sum(pos_belief)\n",
+    "```\n",
     "\n",
     "Let's rewrite this using our newly learned terminology.\n",
     "\n",
@@ -371,7 +372,8 @@
     "        else:\n",
     "            posterior_probability[i] = prior_probability[i] * p_miss\n",
     "\n",
-    "    return posterior_probability / sum(posterior_probability)```\n",
+    "    return posterior_probability / sum(posterior_probability)\n",
+    "```\n",
     "\n",
     "\n",
     "So what is this doing? It's multiplying the old belief that the dog is at position *i* (prior probability) with the probability that the measurement is correct for that position, and then dividing by the total probability for that new event.\n",
diff --git a/08-Designing-Kalman-Filters.ipynb b/08-Designing-Kalman-Filters.ipynb
index 704b947..56e779f 100644
--- a/08-Designing-Kalman-Filters.ipynb
+++ b/08-Designing-Kalman-Filters.ipynb
@@ -2793,7 +2793,8 @@
     "So to create a trajectory starting at (0, 15) with a velocity of $100 \\frac{m}{s}$ and an angle of $60^\\circ$ we would write:\n",
     "\n",
     "```python\n",
-    "traj = BallTrajectory2D(x0=0, y0=15, velocity=100, theta_deg=60)```\n",
+    "traj = BallTrajectory2D(x0=0, y0=15, velocity=100, theta_deg=60)\n",
+    "```\n",
     "    \n",
     "and then call `traj.position(t)` for each time step. Let's test this "
    ]
@@ -3058,8 +3059,8 @@
     "vx = cos(omega) * v0\n",
     "vy = sin(omega) * v0\n",
     "\n",
-    "f1.x = np.array([[x, vx, y, vy]]).T```\n",
-    "    \n",
+    "f1.x = np.array([[x, vx, y, vy]]).T\n",
+    "```\n",
     "    \n",
     "With all the steps done we are ready to implement our filter and test it. First, the implementation:"
    ]
diff --git a/09-Nonlinear-Filtering.ipynb b/09-Nonlinear-Filtering.ipynb
index beac4d4..4f1b5c7 100644
--- a/09-Nonlinear-Filtering.ipynb
+++ b/09-Nonlinear-Filtering.ipynb
@@ -282,23 +282,25 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The Kalman filter that we have developed to this point is extremely good, but it is also limited. All of the equations are linear, and so the filter can only handle linear problems. But the world is nonlinear, and so the classic filter that we have been studying to this point can have very limited utility.For example, the state transition function is $\\mathbf{x}^- = \\mathbf{Fx}$. Suppose we wanted to track the motion of a weight on a spring. We might be tracking an automobile's active suspension, for example. The equation for the motion with $m$ being the mass, $k$ the spring constant, and $c$ the damping force is \n",
+    "The Kalman filter that we have developed to this point is extremely good, but it is also limited. All of the equations are linear, and so the filter can only handle linear problems. But the world is nonlinear, and so the classic filter that we have been studying to this point can have very limited utility. \n",
+    "\n",
+    "There can be nonlinearity in the process model. Suppose we wanted to track the motion of a weight on a spring, such as an automobile's suspension. The equation for the motion with $m$ being the mass, $k$ the spring constant, and $c$ the damping force is \n",
     "\n",
     "$$m\\frac{d^2x}{dt^2} + c\\frac{dx}{dt} +kx = 0$$\n",
     "\n",
-    "which is a second order differential equation. It is not possible to write a linear equation for this equation, and therefore we cannot even formulate a design for the Kalman filter. \n",
+    "It is not possible to write a linear equation for this second order differential equation, and therefore we cannot design a Kalman filter using the theory that we have learned.\n",
     "\n",
-    "The early adopters of Kalman filters needed to apply the filters to nonlinear problems and this fact was not lost on them. For example, radars measure the slant range to an object, and we are typically interested in the aircraft's position over the ground. We invoke Pythagoras and get the nonlinear equation:\n",
+    "A second source of nonlinearity comes from the measurements. For example, radars measure the slant range to an object, and we are typically interested in the aircraft's position over the ground. We invoke Pythagoras and get the nonlinear equation:\n",
     "\n",
-    "$$x=\\sqrt{slant^2 - altitude^2}$$\n",
+    "$$x=\\sqrt{\\mathtt{slant}^2 - \\mathtt{altitude}^2}$$\n",
     "\n",
-    "And then, of course, the behavior of the objects being tracked is also nonlinear. So shortly after the idea of the Kalman filter was published by Kalman people began working on how to extend the Kalman filter into nonlinear problems. \n",
+    "These facts were not lost on the early adopters of the Kalman filter. Shortly after the idea of the Kalman filter was published by Kalman people began working on how to extend the Kalman filter into nonlinear problems. \n",
     "\n",
-    "It is almost true to state that the only equation we (anyone) know how to solve is $\\mathbf{Ax}=\\mathbf{b}$. We only really know how to do linear algebra. I can give you a linear equation and you can solve it. If I told you your job depended on knowing how to solve the equation $ax+b3=c$ you would trivially solve it for any values of $a$, $b$, and $c$. There is no linear equation that you cannot either solve or prove has no solution ($x+1=x$).\n",
+    "It is almost true to state that the only equation we (anyone) know how to solve is $\\mathbf{Ax}=\\mathbf{b}$. We only really know how to do linear algebra. I can give you a linear equation and you can solve it. If I told you your job depended on knowing how to solve the equation $ax+3b=c$ you would trivially solve it for any values of $a$, $b$, and $c$. There is no linear equation that you cannot either solve or prove has no solution ($x+1=x$).\n",
     "\n",
-    "Anyone with formal mathematical education has spent years learning various analytic ways to solve integrals, differential equations and so on. It was all a lie, nearly. Even trivial physical systems produce equations that no one in the world know how to solve analytically. I can give you an equation that you are able to integrate, multiply or add in an $\\log$ term, and render it insolvable. It is stuff like this that leads to jokes about physicists stating stating the problem \"assume a spherical cow on a frictionless surface in a vacuum...\"\n",
+    "Anyone with formal education in math or physics has spent years learning various analytic ways to solve integrals, differential equations and so on. Yet even trivial physical systems produce equations that cannot be solved analytically. I can take an equation that you are able to integrate, insert a $\\log$ term, and render it insolvable. It is stuff like this that leads to jokes about physicists stating stating the problem \"assume a spherical cow on a frictionless surface in a vacuum...\"\n",
     "\n",
-    "How do we do things like model airflow over an aircraft in a computer, or predict weather, or track missiles with a Kalman filter?  We retreat to what we know: $\\mathbf{Ax}=\\mathbf{b}$. We find some way to linearize the problem, turning it into a set of linear equations, and then use linear algebra software packages to grind out a solution. It's an approximation, so the answers are approximate. Linearizing a nonlinear problem gives us inexact answers, and in a recursive algorithm like a Kalman filter or weather tracking system these small errors can sometimes reinforce each other at each step, quickly causing the algorithm to spit out nonsense. \n",
+    "How do we do things like model airflow over an aircraft in a computer, or predict weather, or track missiles with a Kalman filter?  We retreat to what we know: $\\mathbf{Ax}=\\mathbf{b}$. We find some way to linearize the problem, turning it into a set of linear equations, and then use linear algebra software packages to compute an approximate solution. Linearizing a nonlinear problem gives us inexact answers, and in a recursive algorithm like a Kalman filter or weather tracking system these small errors can sometimes reinforce each other at each step, quickly causing the algorithm to spit out nonsense. \n",
     "\n",
     "What we are about to embark upon is a difficult problem. There is not one obvious, correct, mathematically optimal solution anymore. We will be using approximations, we will be introducing errors into our computations, and we will forever be battling filters that *diverge*, that is, filters whose numerical errors overwhelm the solution. \n",
     "\n",
@@ -358,7 +360,7 @@
    "source": [
     "I particularly like the following way of looking at the problem, which I am borrowing from Dan Simon's *Optimal State Estimation* [[1]](#[1]). Consider a tracking problem where we get the range and bearing to a target, and we want to track its position. Reported distance is 50 km, and the reported angle is 90$^\\circ$. Now, given that sensors are imperfect, assume that the errors in both range and angle are distributed in a Gaussian manner. Given an infinite number of measurements what is the expected value of the position?\n",
     "\n",
-    "I have been recommending using intuition to solve problems in this book, so let's see how it fares for this problem (hint: nonlinear problems are *not* intuitive). We might reason that since the mean of the range will be 50 km, and the mean of the angle will be 90$^\\circ$, that clearly the answer will be x=0 km, y=90 km.\n",
+    "I have been recommending using intuition to gain insight, so let's see how it fares for this problem (hint: nonlinear problems are *not* intuitive). We might reason that since the mean of the range will be 50 km, and the mean of the angle will be 90$^\\circ$, that clearly the answer will be x=0 km, y=90 km.\n",
     "\n",
     "Well, let's plot that and find out. Here are 3000 points plotted with a normal distribution of the distance of 0.4 km, and the angle having a normal distribution of 0.35 radians. We compute the average of the all of the positions, and display it as a star. Our intuition is displayed with a large circle."
    ]
@@ -417,7 +419,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Unfortunately Gaussians are not closed under an arbitrary nonlinear function. Recall the equations of the Kalman filter - at each evolution (prediction) we pass the Gaussian representing the state through the process function to get the Gaussian at time $k$. Our process function was always linear, so the output was always another Gaussian.  Let's look at that on a graph. I will take an arbitrary Gaussian and pass it through the function $f(x) = 2x + 1$ and plot the result. We know how to do this analytically, but lets do this with sampling. I will generate 500,000 points with a normal distribution, pass it through the function, and then plot the results. I do it this way because the next example will be nonlinear, and we will have no way to compute this analytically."
+    "Gaussians are not closed under an arbitrary nonlinear function. Recall the equations of the Kalman filter - at each evolution (prediction) we pass the Gaussian representing the state through the process function to get the Gaussian at time $k$. Our process function was always linear, so the output was always another Gaussian.  Let's look at that on a graph. I will take an arbitrary Gaussian and pass it through the function $f(x) = 2x + 1$ and plot the result. We know how to do this analytically, but lets do this with sampling. I will generate 500,000 points with a normal distribution, pass it through the function, and then plot the results. I do it this way because the next example will be nonlinear, and we will have no way to compute this analytically."
    ]
   },
   {
diff --git a/10-Unscented-Kalman-Filter.ipynb b/10-Unscented-Kalman-Filter.ipynb
index 76d38f1..1698825 100644
--- a/10-Unscented-Kalman-Filter.ipynb
+++ b/10-Unscented-Kalman-Filter.ipynb
@@ -1062,7 +1062,8 @@
     "\n",
     "```python\n",
     "def sigma_points(self, x, P)\n",
-    "def weights(self)```\n",
+    "def weights(self)\n",
+    "```\n",
     "\n",
     "FilterPy provides the class `MerweScaledSigmaPoints`, which implements the sigma point algorithm in this chapter."
    ]
@@ -1078,7 +1079,8 @@
     "from filterpy.kalman import UnscentedKalmanFilter as UKF\n",
     "\n",
     "points = MerweScaledSigmaPoints(n=4, alpha=.1, beta=2., kappa=-1)\n",
-    "ukf = UKF(dim_x=4, dim_z=2, fx=f_cv, hx=h_cv, dt=dt, points=points)```"
+    "ukf = UKF(dim_x=4, dim_z=2, fx=f_cv, hx=h_cv, dt=dt, points=points)\n",
+    "```"
    ]
   },
   {
@@ -1783,7 +1785,8 @@
     "\n",
     "```python\n",
     "def bearing(sensor, target):\n",
-    "    return math.atan2(target[1] - sensor[1], target[0] - sensor[0])```\n",
+    "    return math.atan2(target[1] - sensor[1], target[0] - sensor[0])\n",
+    "```\n",
     "\n",
     "The filter receives a vector of 2 measurements during each update, one for each sensor. We can implement that as:\n",
     "\n",
@@ -1791,7 +1794,8 @@
     "def measurement(A_pos, B_pos, pos):\n",
     "    angle_a = bearing(A_pos, pos)\n",
     "    angle_b = bearing(B_pos, pos)\n",
-    "    return [angle_a, angle_b]```"
+    "    return [angle_a, angle_b]\n",
+    "```"
    ]
   },
   {
@@ -2153,7 +2157,8 @@
     "Wc = np.full(2*n + 1,  1. / (2*(n+lambda_))\n",
     "Wm = np.full(2*n + 1,  1. / (2*(n+lambda_))\n",
     "Wc[0] = lambda_ / (n + lambda_) + (1. - alpha**2 + beta)\n",
-    "Wm[0] = lambda_ / (n + lambda_)```"
+    "Wm[0] = lambda_ / (n + lambda_)\n",
+    "```"
    ]
   },
   {
@@ -2205,7 +2210,8 @@
     "sigmas[0] = X\n",
     "for k in range (n):\n",
     "    sigmas[k+1]   = X + U[k]\n",
-    "    sigmas[n+k+1] = X - U[k]```"
+    "    sigmas[n+k+1] = X - U[k]\n",
+    "```"
    ]
   },
   {
@@ -2223,7 +2229,8 @@
     "We implement the sum of the means with\n",
     "\n",
     "```python\n",
-    "x = np.dot(Wm, sigmas)```\n",
+    "x = np.dot(Wm, sigmas)\n",
+    "```\n",
     "\n",
     "If you are not a heavy user of NumPy this may look foreign to you. NumPy is not just a library that make linear algebra possible; under the hood it is written in C to achieve much faster speeds than Python can reach. A typical speedup is 20x to 100x. To get that speedup we must avoid using for loops, and instead use NumPy's built in functions to perform calculations. So, instead of writing a for loop to compute the sum, we call the built in `numpy.dot(x,y)` method. If passed a 1D array and a 2D array it will compute the sum of inner products:"
    ]
@@ -2267,7 +2274,7 @@
     "    y = sigmas[k] - x\n",
     "    P += Wc[k] * np.outer(y, y) \n",
     "P += Q\n",
-    "    ```\n",
+    "```\n",
     "\n",
     "This introduces another feature of NumPy. The state variable `x` is one dimensional, as is `sigmas[k]`, so the difference `sigmas[k]-X` is also one dimensional. NumPy will not compute the transpose of a 1-D array; it considers the transpose of `[1,2,3]` to be `[1,2,3]`. So we call the function `np.outer(y,y)` which computes the value of $\\mathbf{yy}^\\mathsf{T}$ for a 1D array $\\mathbf{y}$. An alternative implementation could be:\n",
     "\n",
@@ -2413,12 +2420,14 @@
     "Collect your measurements into an array or list.\n",
     "\n",
     "```python\n",
-    "zs = read_altitude_from_csv()```\n",
+    "zs = read_altitude_from_csv()\n",
+    "```\n",
     "\n",
     "Then call the `batch_filter()` method.\n",
     "\n",
     "```python\n",
-    "Xs, Ps = ukf.batch_filter(zs)```\n",
+    "Xs, Ps = ukf.batch_filter(zs)\n",
+    "```\n",
     "\n",
     "The function takes the list/array of measurements, filters it, and returns an array of state estimates (Xs) and covariance matrices (Ps) for the entire data set. \n",
     "\n",
@@ -3161,7 +3170,8 @@
     "\n",
     "ukf = UKF(dim_x=3, dim_z=2, fx=fx, hx=Hx, dt=dt, points=points,\n",
     "         x_mean_fn=state_mean, z_mean_fn=z_mean,\n",
-    "         residual_x=residual_x, residual_z=residual_h)```\n",
+    "         residual_x=residual_x, residual_z=residual_h)\n",
+    "```\n",
     "\n",
     "The rest of the code runs the simulation and plots the results, and shouldn't need too much comment by now. I create a variable `landmarks` that contains the coordinates of the landmarks. I update the simulated robot position 10 times a second, but run the EKF only once. This is for two reasons. First, we are not using Runge Kutta to integrate the differental equations of motion, so a narrow time step allows our simulation to be more accurate. Second, it is fairly normal in embedded systems to have limited processing speed. This forces you to run your Kalman filter only as frequently as absolutely needed."
    ]
diff --git a/11-Extended-Kalman-Filters.ipynb b/11-Extended-Kalman-Filters.ipynb
index ad22a8e..e504241 100644
--- a/11-Extended-Kalman-Filters.ipynb
+++ b/11-Extended-Kalman-Filters.ipynb
@@ -275,11 +275,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "At this point in the book we have developed the theory for the linear Kalman filter. Then, in the last two chapters we broached the topic of using Kalman filters for nonlinear problems. I chose to start off with the Unscented Kalman filter, which probably felt like quite a departure from the linear Kalman filter math. It was also a departure from the historical development of the Kalman filter.\n",
-    "\n",
-    "Shortly after Dr. Kálmán published his paper on the Kalman filter people recognized that his theory was limited to linear problems and immediately began work on extending the theory to work with nonlinear problems. As I have said elsewhere nearly the only equation we really know how to solve is $\\mathbf{Ax} = \\mathbf{b}$. That is a slight exaggeration. Many of us have spent years at school learning sophisticated tricks and techniques to handle equations that are not linear. Yet after all that work the vast majority of equations that arise from simple physical systems remain intractable. Certainly there is no way to find general analytic solutions to the Kalman filter equations for nonlinear systems. \n",
-    "\n",
-    "In this chapter we will learn the Extended Kalman filter (EKF). The EKF handles nonlinearity by linearizing the system at the point of the current estimate, and then the usual Kalman filter is used to filter this linearized system. It was one of the very first techniques used for nonlinear problems, and it remains the most common technique. Most filters in real world use are EKFs. \n",
+    "At this point in the book we have developed the theory for the linear Kalman filter. Then, in the last two chapters we broached the topic of using Kalman filters for nonlinear problems. In this chapter we will learn the Extended Kalman filter (EKF). The EKF handles nonlinearity by linearizing the system at the point of the current estimate, and then the linear Kalman filter is used to filter this linearized system. It was one of the very first techniques used for nonlinear problems, and it remains the most common technique. Most filters in real world use are EKFs. \n",
     "\n",
     "The EKF provides significant mathematical challenges to the designer of the filter; this is the most challenging chapter of the book. To be honest, I do everything I can to avoid the EKF in favor of other techniques that have been developed to filter nonlinear problems. However, the topic is unavoidable; all classic papers and a majority of current papers in the field use the EKF. Even if you do not use the EKF in your own work you will need to be familiar with the topic to be able to read the literature. "
    ]
@@ -288,14 +284,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Linearizing the System Model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The extended Kalman filter (EKF) works by linearizing the process model for each evolution. For example, consider the problem of tracking a cannonball in flight. Obviously it follows a curved flight path. However, if our update rate is small enough, say 1/100 second, then the trajectory over that time is nearly linear. If we linearize that short segment we will get an answer very close to the actual value, and we can use that value to perform the prediction step of the filter. More often you will have to perform numeric integration. There are many ways to linearize a set of nonlinear differential equations, and the topic is somewhat beyond the scope of this book. In practice, a Taylor series approximation is frequently used with EKFs, and that is what we will use. "
+    "## Linearizing a System"
    ]
   },
   {
@@ -410,7 +399,11 @@
     "\n",
     "For nonlinear problems our function $f()$ is a set of differential equations. Modeling physical systems with differential equations is well outside the scope of this book. You will need to be reasonably well versed in this branch of applied mathematics to successfully implement the EKF for your problem. If you have not read it yet, please read the section **Modeling Dynamic Systems** in the **Kalman Filter Math** chapter as it contains the math that you will need to complete this chapter.\n",
     "\n",
-    "I think the easiest way to understand the EKF is to start off with an example. Perhaps the reason for some of my mathematical choices will not be clear, but trust that the end result will be an EKF."
+    "I think the easiest way to understand the EKF is to start off with an example. Perhaps the reason for some of my mathematical choices will not be clear, but trust that the end result will be an EKF.\n",
+    "\n",
+    "\n",
+    "**orphan**\n",
+    "The extended Kalman filter (EKF) linearizing the process model for each evolution. For example, consider the problem of tracking a cannonball in flight. Obviously it follows a curved flight path. However, if our update rate is small enough, say 1/100 second, then the trajectory over that time is nearly linear. If we linearize that short segment we will get an answer very close to the actual value, and we can use that value to perform the prediction step of the filter. More often you will have to perform numeric integration. There are many ways to linearize a set of nonlinear differential equations, and the topic is somewhat beyond the scope of this book. In practice, a Taylor series approximation is frequently used with EKFs, and that is what we will use. "
    ]
   },
   {
@@ -745,12 +738,14 @@
     "from filterpy.kalman import ExtendedKalmanFilter\n",
     "\n",
     "rk = ExtendedKalmanFilter(dim_x=3, dim_z=1)\n",
-    "radar = RadarSim(dt, pos=0., vel=100., alt=1000.)```\n",
+    "radar = RadarSim(dt, pos=0., vel=100., alt=1000.)\n",
+    "```\n",
     "\n",
     "We will initialize the filter near the airplane's actual position\n",
     "\n",
     "```python\n",
-    "rk.x = array([radar.pos, radar.vel-10, radar.alt+100])```\n",
+    "rk.x = array([radar.pos, radar.vel-10, radar.alt+100])\n",
+    "```\n",
     "\n",
     "We assign the system matrix using the first term of the Taylor series expansion we computed above.\n",
     "\n",
@@ -758,7 +753,8 @@
     "dt = 0.05\n",
     "rk.F = eye(3) + array([[0, 1, 0],\n",
     "                       [0, 0, 0],\n",
-    "                       [0, 0, 0]])*dt```\n",
+    "                       [0, 0, 0]])*dt\n",
+    "```\n",
     "\n",
     "After assigning reasonable values to $\\mathbf{R}$, $\\mathbf{Q}$, and $\\mathbf{P}$ we can run the filter with a simple loop\n",
     "\n",
@@ -766,7 +762,8 @@
     "for i in range(int(20/dt)):\n",
     "    z = radar.get_range()\n",
     "    rk.update(array([z]), HJacobian_at, hx)\n",
-    "    rk.predict()```\n",
+    "    rk.predict()\n",
+    "```\n",
     "\n",
     "Putting that all together along with some boilerplate code to save the results and plot them, we get"
    ]
@@ -1445,7 +1442,8 @@
     "First, `evalf` uses a dictionary to pass in the values you want to use. For example, if your matrix contains an x and y, you can write\n",
     "\n",
     "```python\n",
-    "    M.evalf(subs={x:3, y:17})```\n",
+    "    M.evalf(subs={x:3, y:17})\n",
+    "```\n",
     "    \n",
     "to evaluate the matrix for `x=3` and `y=17`. \n",
     "\n",
diff --git a/12-Particle-Filters.ipynb b/12-Particle-Filters.ipynb
index 1c43bbb..3425050 100644
--- a/12-Particle-Filters.ipynb
+++ b/12-Particle-Filters.ipynb
@@ -436,18 +436,21 @@
     "We start by creating the points.\n",
     "\n",
     "```python\n",
-    "N = 20000\n",
-    "ps = uniform(-1, 1, (N, 2))```\n",
+    "        N = 20ch000\n",
+    "ps = uniform(-1, 1, (N, 2))\n",
+    "```\n",
     "\n",
     "A point is inside the circle if it's distance from the center point (0, 0) is less than or equal to one. We can compute the distance by using `numpy.linalg.norm`. If you aren't familiar with this, the norm is the magnitude of a vector. Since vectors start at (0, 0) calling norm will compute the point's distance from the origin.\n",
     "\n",
     "```python\n",
-    "dist = np.linalg.norm(ps, axis=1)```\n",
+    "dist = np.linalg.norm(ps, axis=1)\n",
+    "```\n",
     "\n",
     "Next we compute which of this distances fit the criteria with this code, which returns a bool array that contains `True` if it meets the condition\n",
     "\n",
     "```python\n",
-    "in_circle = dist <= 1```\n",
+    "in_circle = dist <= 1\n",
+    "```\n",
     "\n",
     "All that is left is to count the points inside the circle, compute pi, and plot the results. I've put it all in one cell so you can experiment with alternative values for `N`."
    ]
@@ -653,7 +656,8 @@
     "\n",
     "    d = u[1]*dt + randn(self.N) * std[1]\n",
     "    self.particles[:, 0] += np.cos(self.particles[:, 2]) * d\n",
-    "    self.particles[:, 1] += np.sin(self.particles[:, 2]) * d```"
+    "    self.particles[:, 1] += np.sin(self.particles[:, 2]) * d\n",
+    "```"
    ]
   },
   {
@@ -672,7 +676,8 @@
     "    for i, val in enumerate(map_):\n",
     "        if val == z:\n",
     "            belief[i] *= scale\n",
-    "    normalize(belief)```\n",
+    "    normalize(belief)\n",
+    "```\n",
     "\n",
     "We will want to do the same thing with our particles. Each particle has a position. We can also assign it a weight or probability based on how well it matches the measurement. We will want to multiply that new probability into the current weight of the particle. If we then normalize the weights of the particles those that are closest to the robot will generally have a higher weight than ones far from the robot.\n",
     "\n",
@@ -682,7 +687,8 @@
     "    for i, landmark in enumerate(self.landmarks):\n",
     "        dist = np.linalg.norm(self.particles[:, 0:2] - landmark, axis=1)\n",
     "        self.weights *= scipy.stats.norm(dist, self.R).pdf(z[i])\n",
-    "    self.weights /= sum(self.weights) # normalize```\n",
+    "    self.weights /= sum(self.weights) # normalize\n",
+    "```\n",
     "\n",
     "In the literature this part of the algorithm is called **Sequential Importance Sampling**, or SIS, and the equation for the weights is called the **importance density**. I will give it theoretical underpinnings in a following section. For now I hope that this makes intuitive sense. If we weight the particles according to how how they match the measurements they *probably* are a good sample for the probability distribution of the system after incorporating the measurements. Theory changes 'probably are a good sample' to 'are a good sample'. Different problems will need to tackle this step in slightly different ways.\n",
     "\n",
@@ -731,7 +737,8 @@
     "    # resample according to indexes\n",
     "    self.particles = self.particles[indexes]\n",
     "    self.weights = self.weights[indexes]\n",
-    "    self.weights /= np.sum(self.weights) # normalize```\n",
+    "    self.weights /= np.sum(self.weights) # normalize\n",
+    "```\n",
     " \n",
     "There are many resampling algorithms. Each has different properties with respect to which particles are sampled, execution time, and memory required. I will share a few of them in a later section.\n",
     "\n",
@@ -743,7 +750,8 @@
     "\n",
     "```python\n",
     "def neff(self):\n",
-    "    return 1. / np.sum(np.square(self.weights))```\n",
+    "    return 1. / np.sum(np.square(self.weights))\n",
+    "```\n",
     "\n",
     "If $\\hat{N}_{eff}$ falls below some threshold it is time to resample. A useful starting point is $\\frac{1}{2}N$, but this varies by problem. It is also possible for $\\hat{N}_{eff} = N$, which means the particle set has collapsed to one point (each has equal weight). It may not be theoretically pure, but if that happens I create a new distribution of particles in the hopes of generating particles with more diversity. If this happens to you often, you may need to increase the number of particles, or otherwise adjust your filter. We will talk more of this later."
    ]
@@ -866,7 +874,8 @@
     "    pf.predict(u=(0.00, 1.414), std=(.2, .05))\n",
     "    pf.update(z=zs)\n",
     "    if pf.neff() < N/2:\n",
-    "        pf.resample()```\n",
+    "        pf.resample()\n",
+    "```\n",
     "\n",
     "The first line takes advantage of `numpy.linalg.norm`, which computes the Euclidian distance of a vector. In other words, we compute the distance of the robot to each landmark, and add in sensor noise so that this is a realistic simulation. Uou wouldn't add noise if this was real sensor data, of course!\n",
     "\n",
@@ -972,7 +981,8 @@
     "\n",
     "```python\n",
     "pf = RobotLocalizationParticleFilter(N, 20, 20, landmarks, sensor_std_err)\n",
-    "pf.create_gaussian_particles(mean=(0, 0), var=(10,10))```"
+    "pf.create_gaussian_particles(mean=(0, 0), var=(10,10))\n",
+    "```"
    ]
   },
   {
@@ -1112,7 +1122,8 @@
     "    for i, landmark in enumerate(self.landmarks):\n",
     "        dist = np.linalg.norm(self.particles[:, 0:2] - landmark, axis=1)\n",
     "        self.weights *= scipy.stats.norm(dist, self.R).pdf(z[i])\n",
-    "    self.weights /= sum(self.weights) # normalize```\n",
+    "    self.weights /= sum(self.weights) # normalize\n",
+    "```\n",
     "    \n",
     "The reason for `self.weights.fill(1.)` might have confused you; it confused me the first time I saw it. In all the Bayesian filters up to this chapter we started with the probability distribution created by the `predict` step, and this appears to discard that information by setting all of the weights to 1. Well, we are discarding the weights, but we do not discard the particles. That is a direct result of applying importance sampling - we draw from the known distribution, but weight by the unknown distribution. \n",
     "\n",
@@ -1141,8 +1152,8 @@
     "\n",
     "    self.particles = self.particles[indexes]\n",
     "    self.weights = self.weights[indexes]\n",
-    "    self.weights /= np.sum(self.weights)```\n",
-    "\n",
+    "    self.weights /= np.sum(self.weights)\n",
+    "```\n",
     "\n",
     "### Multinomial Resampling\n",
     "\n",
@@ -1193,7 +1204,8 @@
     "def multinomal_resample(weights):\n",
     "    cumulative_sum = np.cumsum(weights)\n",
     "    cumulative_sum[-1] = 1.  # avoid round-off errors\n",
-    "    return np.searchsorted(cumulative_sum, random(len(weights)))```\n",
+    "    return np.searchsorted(cumulative_sum, random(len(weights)))\n",
+    "```\n",
     "    \n",
     "Here is an example:"
    ]
@@ -1230,7 +1242,8 @@
     "You may import this from FilterPy using\n",
     "\n",
     "```python\n",
-    "from filterpy.monte_carlo import multinomal_resample```"
+    "from filterpy.monte_carlo import multinomal_resample\n",
+    "```"
    ]
   },
   {
@@ -1256,7 +1269,6 @@
     "            indexes[k] = i\n",
     "            k += 1\n",
     "\n",
-    "\n",
     "    # use multinormial resample on the residual to fill up the rest.\n",
     "    residual = w - num_copies     # get fractional part\n",
     "    residual /= sum(residual)     # normalize\n",
@@ -1264,7 +1276,8 @@
     "    cumulative_sum[-1] = 1. # ensures sum is exactly one\n",
     "    indexes[k:N] = np.searchsorted(cumulative_sum, random(N-k))\n",
     "\n",
-    "    return indexes```\n",
+    "    return indexes\n",
+    "```\n",
     "\n",
     "You may be tempted to replace the inner for loop with a slice `indexes[k:k + num_copies[i]] = i`, but very short slices are comparatively slow, and the for loop usually runs faster.\n",
     "\n",
@@ -1301,7 +1314,8 @@
     "You may import this from FilterPy using\n",
     "\n",
     "```python\n",
-    "    from filterpy.monte_carlo import residual_resample```"
+    "    from filterpy.monte_carlo import residual_resample\n",
+    "```"
    ]
   },
   {
@@ -1359,12 +1373,14 @@
     "            i += 1\n",
     "        else:\n",
     "            j += 1\n",
-    "    return indexes```\n",
+    "    return indexes\n",
+    "```\n",
     "\n",
     "Import it from FilterPy with\n",
     "\n",
     "```python\n",
-    "from filterpy.monte_carlo import stratified_resample```"
+    "from filterpy.monte_carlo import stratified_resample\n",
+    "```"
    ]
   },
   {
@@ -1421,12 +1437,14 @@
     "            i += 1\n",
     "        else:\n",
     "            j += 1\n",
-    "    return indexes```\n",
+    "    return indexes\n",
+    "```\n",
     "        \n",
     "Import from FilterPy with\n",
     "\n",
     "```python\n",
-    " from filterpy.monte_carlo import systematic_resample```"
+    " from filterpy.monte_carlo import systematic_resample\n",
+    " ```"
    ]
   },
   {
diff --git a/13-Smoothing.ipynb b/13-Smoothing.ipynb
index 99eb499..5acfadf 100644
--- a/13-Smoothing.ipynb
+++ b/13-Smoothing.ipynb
@@ -497,7 +497,8 @@
     "        K[k]  = dot(P[k], F.T).dot(inv(P_pred))\n",
     "        x[k] += dot(K[k], x[k+1] - dot(F, x[k]))\n",
     "        P[k] += dot(K[k], P[k+1] - P_pred).dot(K[k].T)\n",
-    "    return (x, P, K)```\n",
+    "    return (x, P, K)\n",
+    "```\n",
     "        \n",
     "This implementation mirrors the implementation provided in FilterPy. It assumes that the Kalman filter is being run externally in batch mode, and the results of the state and covariances are passed in via the `Xs` and `Ps` variable.\n",
     "\n",
diff --git a/14-Adaptive-Filtering.ipynb b/14-Adaptive-Filtering.ipynb
index 8e3fa5d..ecb898c 100644
--- a/14-Adaptive-Filtering.ipynb
+++ b/14-Adaptive-Filtering.ipynb
@@ -547,7 +547,8 @@
     "\n",
     "q = Q_discrete_white_noise(dim=2, dt=dt, var=0.020)\n",
     "cvfilter.Q[0:2, 0:2] = q\n",
-    "cvfilter.Q[2:4, 2:4] = q```"
+    "cvfilter.Q[2:4, 2:4] = q\n",
+    "```"
    ]
   },
   {
diff --git a/Appendix-E-Ensemble-Kalman-Filters.ipynb b/Appendix-E-Ensemble-Kalman-Filters.ipynb
index f311d3d..f37f229 100644
--- a/Appendix-E-Ensemble-Kalman-Filters.ipynb
+++ b/Appendix-E-Ensemble-Kalman-Filters.ipynb
@@ -340,7 +340,7 @@
    "source": [
     "As I already stated, when the filter is initialized a large number of sigma points are drawn from the initial state ($\\mathbf{x}$) and covariance ($\\mathbf{P}$). From there the algorithm proceeds very similarly to the UKF. During the prediction step the sigma points are passed through the state transition function, and then perturbed by adding a bit of noise to account for the process noise. During the update step the sigma points are translated into measurement space by passing them through the measurement function, they are perturbed by a small amount to account for the measurement noise. The Kalman gain is computed from the \n",
     "\n",
-    "We already mentioned the main difference between the UKF and EnKF - the UKF choses the sigma points deterministically. There is another difference, implied by the algorithm above. With the UKF we generate new sigma points during each predict step, and after passing the points through the nonlinear function we reconstitute them into a mean and covariance by using the *unscented transform*. The EnKF just keeps propagating the originally created sigma points; we only need to compute a mean and covariance as outputs for the filter! \n",
+    "We already mentioned the main difference between the UKF and EnKF - the UKF choses the sigma points deterministically. There is another difference, implied by the algorithm above. With the UKF we generate new sigma points during each predict step, and after passing the points through the nonlinear function we reconstitute them into a mean and covariance by using the *unscented transform*. The EnKF keeps propagating the originally created sigma points; we only need to compute a mean and covariance as outputs for the filter! \n",
     "\n",
     "Let's look at the equations for the filter. As usual, I will leave out the typical subscripts and superscripts; I am expressing an algorithm, not mathematical functions. Here $N$ is the number of sigma points, $\\chi$ is the set of sigma points."
    ]
@@ -358,11 +358,12 @@
    "source": [
     "$$\\boldsymbol\\chi \\sim \\mathcal{N}(\\mathbf{x}_0, \\mathbf{P}_0)$$\n",
     "\n",
-    "This just says to select the sigma points from the filter's initial mean and covariance. In code this might look like\n",
+    "This says to select the sigma points from the filter's initial mean and covariance. In code this might look like\n",
     "\n",
     "```python\n",
     "N = 1000\n",
-    "sigmas = multivariate_normal(mean=x, cov=P, size=N)```"
+    "sigmas = multivariate_normal(mean=x, cov=P, size=N)\n",
+    "```"
    ]
   },
   {
@@ -389,12 +390,14 @@
     "for i, s in enumerate(sigmas):\n",
     "    sigmas[i] = fx(x=s, dt=0.1, u=0.)\n",
     "\n",
-    "sigmas += multivariate_normal(x, Q, N)```\n",
+    "sigmas += multivariate_normal(x, Q, N)\n",
+    "```\n",
     "\n",
     "The second line computes the mean from the sigmas. In Python we will take advantage of `numpy.mean` to do this very concisely and quickly.\n",
     "\n",
     "```python\n",
-    "x = np.mean(sigmas, axis=0)```"
+    "x = np.mean(sigmas, axis=0)\n",
+    "```"
    ]
   },
   {
@@ -411,7 +414,8 @@
     "    P = 0\n",
     "    for s in sigmas:\n",
     "        P += outer(s-x, s-x)\n",
-    "    P = P / (N-1)```"
+    "    P = P / (N-1)\n",
+    "```"
    ]
   },
   {
@@ -455,7 +459,8 @@
     "just passes the sigma points through the measurement function $h$. We name the resulting points $\\chi_h$ to distinguish them from the sigma points. In Python we could write this as\n",
     "\n",
     "```python\n",
-    "sigmas_h = h(sigmas, u)```"
+    "sigmas_h = h(sigmas, u)\n",
+    "```"
    ]
   },
   {
@@ -466,10 +471,11 @@
     "\n",
     "$$\\mathbf{z}_{mean} = \\frac{1}{N}\\sum_1^N \\boldsymbol\\chi_h$$\n",
     "\n",
-    "In Python we can compute that with\n",
+    "In Python we write\n",
     "\n",
     "```python\n",
-    "z_mean = np.mean(sigmas_h, axis=0)```\n",
+    "z_mean = np.mean(sigmas_h, axis=0)\n",
+    "```\n",
     "    \n",
     "Now that we have the mean of the measurement sigmas we can compute the covariance for every measurement sigma point, and the *cross variance* for the measurement sigma points vs the sigma points. That is expressed by these two equations\n",
     "\n",
@@ -491,7 +497,8 @@
     "P_xz = 0\n",
     "for i in range(N):\n",
     "    P_xz += outer(self.sigmas[i] - self.x, sigmas_h[i] - z_mean)\n",
-    "P_xz /= N-1```"
+    "P_xz /= N-1\n",
+    "```"
    ]
   },
   {
@@ -514,14 +521,16 @@
     "```python\n",
     "v_r = multivariate_normal([0]*dim_z, R, N)\n",
     "for i in range(N):\n",
-    "    sigmas[i] += dot(K, z + v_r[i] - sigmas_h[i])```\n",
+    "    sigmas[i] += dot(K, z + v_r[i] - sigmas_h[i])\n",
+    "```\n",
     "\n",
     "\n",
     "Our final step is recompute the filter's mean and covariance.\n",
     "\n",
     "```python\n",
     "    x = np.mean(sigmas, axis=0)\n",
-    "    P = self.P - dot3(K, P_zz, K.T)```"
+    "    P = self.P - dot3(K, P_zz, K.T)\n",
+    "```"
    ]
   },
   {
@@ -554,7 +563,8 @@
     "    return np.array([x[0]])\n",
     "\n",
     "def fx(x, dt):\n",
-    "    return np.dot(F, x)```\n",
+    "    return np.dot(F, x)\n",
+    "```\n",
     "\n",
     "One final thing: the EnKF code, like the UKF code, uses a single dimension for $\\mathbf{x}$, not a two dimensional column matrix as used by the linear kalman filter code.\n",
     "\n",