From 1d700ab734aa9b6711eb31796beb25cb7659d8e0 Mon Sep 17 00:00:00 2001
From: Prefetch
Date: Tue, 20 Dec 2022 20:11:25 +0100
Subject: More improvements to knowledge base

---
 source/_layouts/category.html                      |   2 +-
 source/know/concept/alfven-waves/index.md          |   6 +-
 source/know/concept/binomial-distribution/index.md | 105 +++++++++-----
 source/know/concept/central-limit-theorem/index.md |  90 +++++++-----
 .../know/concept/conditional-expectation/index.md  |   3 +-
 source/know/concept/dispersive-broadening/index.md |  19 +--
 source/know/concept/holomorphic-function/index.md  |  46 +++---
 source/know/concept/ion-sound-wave/index.md        |  34 +++--
 source/know/concept/lagrange-multiplier/index.md   |  18 ++-
 source/know/concept/langmuir-waves/index.md        |  12 +-
 .../maxwell-boltzmann-distribution/index.md        |  85 ++++++-----
 .../know/concept/modulational-instability/index.md |  69 +++++----
 source/know/concept/optical-wave-breaking/index.md |  51 ++++---
 source/know/concept/random-variable/index.md       |  17 ++-
 source/know/concept/residue-theorem/index.md       |  11 +-
 source/know/concept/self-phase-modulation/index.md |  15 +-
 source/know/concept/self-steepening/index.md       |  29 ++--
 source/know/concept/sigma-algebra/index.md         |   4 +-
 source/know/concept/step-index-fiber/index.md      | 157 ++++++++++++++-------
 .../step-index-fiber/transcendental-full.png       | Bin 122545 -> 109224 bytes
 .../step-index-fiber/transcendental-half.avif      | Bin 21001 -> 19600 bytes
 .../step-index-fiber/transcendental-half.jpg       | Bin 95385 -> 84184 bytes
 .../step-index-fiber/transcendental-half.png       | Bin 88438 -> 90521 bytes
 .../step-index-fiber/transcendental-half.webp      | Bin 48626 -> 43374 bytes
 24 files changed, 472 insertions(+), 301 deletions(-)

(limited to 'source')

diff --git a/source/_layouts/category.html b/source/_layouts/category.html
index 872c06c..b4802e7 100644
--- a/source/_layouts/category.html
+++ b/source/_layouts/category.html
@@ -11,7 +11,7 @@ This is an alphabetical list of the concepts in this category.
 {% assign pages_by_letter = site.pages
   | where_exp: "item", "item.layout == 'concept'"
   | where_exp: "item", "item.categories contains page.title"
-  | group_by_exp: "item", "item.title | truncate: 1, ''"
+  | group_by_exp: "item", "item.sort_title | truncate: 1, ''"
   | sort: "name"
 %}
 {% include alphlist.html pages_by_letter = pages_by_letter %}
diff --git a/source/know/concept/alfven-waves/index.md b/source/know/concept/alfven-waves/index.md
index 31576f3..0396c7a 100644
--- a/source/know/concept/alfven-waves/index.md
+++ b/source/know/concept/alfven-waves/index.md
@@ -61,12 +61,12 @@ $$\begin{aligned}
     = \frac{1}{\mu_0} \nabla \cross \vb{B}_1
 \end{aligned}$$
 
-Substituting this into the momentum equation,
+Substituting this into the above momentum equation,
 and differentiating with respect to $$t$$:
 
 $$\begin{aligned}
     \rho \pdvn{2}{\vb{u}_1}{t}
-    = \frac{1}{\mu_0} \bigg( \Big( \nabla \cross \pdv{}{\vb{B}1}{t} \Big) \cross \vb{B}_0 \bigg)
+    = \frac{1}{\mu_0} \bigg( \Big( \nabla \cross \pdv{\vb{B}_1}{t} \Big) \cross \vb{B}_0 \bigg)
 \end{aligned}$$
 
 For which we can use Faraday's law to rewrite $$\ipdv{\vb{B}_1}{t}$$,
@@ -78,7 +78,7 @@ $$\begin{aligned}
     = \nabla \cross (\vb{u}_1 \cross \vb{B}_0)
 \end{aligned}$$
 
-Inserting this into the momentum equation for $$\vb{u}_1$$
+Inserting this back into the momentum equation for $$\vb{u}_1$$
 thus yields its final form:
 
 $$\begin{aligned}
diff --git a/source/know/concept/binomial-distribution/index.md b/source/know/concept/binomial-distribution/index.md
index dc75221..9bb32d3 100644
--- a/source/know/concept/binomial-distribution/index.md
+++ b/source/know/concept/binomial-distribution/index.md
@@ -46,19 +46,25 @@ $$\begin{aligned}
 
 
 {% include proof/start.html id="proof-mean" -%}
-The trick is to treat $$p$$ and $$q$$ as independent until the last moment:
+The trick is to treat $$p$$ and $$q$$ as independent and introduce a derivative:
+
+$$\begin{aligned}
+    \mu
+    &= \sum_{n = 0}^N n P_N(n)
+    = \sum_{n = 0}^N n \binom{N}{n} p^n q^{N - n}
+    = \sum_{n = 0}^N \binom{N}{n} \bigg( p \pdv{(p^n)}{p} \bigg) q^{N - n}
+\end{aligned}$$
+
+Then, using the fact that the binomial coefficients appear when writing out $$(p + q)^N$$:
 
 $$\begin{aligned}
     \mu
-    &= \sum_{n = 0}^N n \binom{N}{n} p^n q^{N - n}
-    = \sum_{n = 0}^N \binom{N}{n} \Big( p \pdv{(p^n)}{p} \Big) q^{N - n}
-    \\
     &= p \pdv{}{p}\sum_{n = 0}^N \binom{N}{n} p^n q^{N - n}
     = p \pdv{}{p}(p + q)^N
     = N p (p + q)^{N - 1}
 \end{aligned}$$
 
-Inserting $$q = 1 - p$$ then gives the desired result.
+Finally, inserting $$q = 1 - p$$ gives the desired result.
 {% include proof/end.html id="proof-mean" %}
 
 
@@ -73,18 +79,21 @@ $$\begin{aligned}
 
 
 {% include proof/start.html id="proof-var" -%}
+We reuse the previous trick to find $$\overline{n^2}$$
 (the mean squared number of successes):
 
 $$\begin{aligned}
     \overline{n^2}
     &= \sum_{n = 0}^N n^2 \binom{N}{n} p^n q^{N - n}
-    = \sum_{n = 0}^N n \binom{N}{n} \Big( p \pdv{}{p}\Big)^2 p^n q^{N - n}
+    = \sum_{n = 0}^N n \binom{N}{n} \bigg( p \pdv{}{p} \bigg) p^n q^{N - n}
+    \\
+    &= \sum_{n = 0}^N \binom{N}{n} \bigg( p \pdv{}{p} \bigg)^2 p^n q^{N - n}
+    = \bigg( p \pdv{}{p} \bigg)^2 \sum_{n = 0}^N \binom{N}{n} p^n q^{N - n}
     \\
-    &= \Big( p \pdv{}{p}\Big)^2 \sum_{n = 0}^N \binom{N}{n} p^n q^{N - n}
-    = \Big( p \pdv{}{p}\Big)^2 (p + q)^N
+    &= \bigg( p \pdv{}{p} \bigg)^2 (p + q)^N
+    = N p \pdv{}{p}p (p + q)^{N - 1}
     \\
-    &= N p \pdv{}{p}p (p + q)^{N - 1}
-    = N p \big( (p + q)^{N - 1} + (N - 1) p (p + q)^{N - 2} \big)
+    &= N p \big( (p + q)^{N - 1} + (N - 1) p (p + q)^{N - 2} \big)
     \\
     &= N p + N^2 p^2 - N p^2
 \end{aligned}$$
@@ -108,7 +117,7 @@ a fact that is sometimes called the **de Moivre-Laplace theorem**:
 
 $$\begin{aligned}
     \boxed{
-        \lim_{N \to \infty} P_N(n) = \frac{1}{\sqrt{2 \pi \sigma^2}} \exp\!\Big(\!-\!\frac{(n - \mu)^2}{2 \sigma^2} \Big)
+        \lim_{N \to \infty} P_N(n) = \frac{1}{\sqrt{2 \pi \sigma^2}} \exp\!\bigg(\!-\!\frac{(n - \mu)^2}{2 \sigma^2} \bigg)
     }
 \end{aligned}$$
 
@@ -121,73 +130,94 @@ $$\begin{aligned}
     \ln\!\big(P_N(n)\big)
     &= \sum_{m = 0}^\infty \frac{(n - \mu)^m}{m!} D_m(\mu)
     \quad \mathrm{where} \quad
-    D_m(n) = \dvn{m}{\ln\!\big(P_N(n)\big)}{n}
+    D_m(n)
+    \equiv \dvn{m}{\ln\!\big(P_N(n)\big)}{n}
 \end{aligned}$$
 
-We use Stirling's approximation to calculate the factorials in $$D_m$$:
+For future convenience while calculating the $$D_m$$, we write out $$\ln(P_N)$$ now:
 
 $$\begin{aligned}
     \ln\!\big(P_N(n)\big)
-    &= \ln(N!) - \ln(n!) - \ln\!\big((N - n)!\big) + n \ln(p) + (N - n) \ln(q)
-    \\
-    &\approx \ln(N!) - n \big( \ln(n)\!-\!\ln(p)\!-\!1 \big) - (N\!-\!n) \big( \ln(N\!-\!n)\!-\!\ln(q)\!-\!1 \big)
+    &= \ln(N!) - \ln(n!) - \ln\!\big((N \!-\! n)!\big) + n \ln(p) + (N \!-\! n) \ln(q)
 \end{aligned}$$
 
-For $$D_0(\mu)$$, we need to use a stronger version of Stirling's approximation
-to get a non-zero result. We take advantage of $$N - N p = N q$$:
+For $$D_0(\mu)$$ specifically,
+we need to use a strong version of *Stirling's approximation*
+to arrive at a nonzero result in the end.
+We know that $$N - N p = N q$$:
 
 $$\begin{aligned}
     D_0(\mu)
+    &= \ln\!\big(P_N(n)\big) \big|_{n = \mu}
+    \\
+    &= \ln(N!) - \ln(\mu!) - \ln\!\big((N \!-\! \mu)!\big) + \mu \ln(p) + (N \!-\! \mu) \ln(q)
+    \\
     &= \ln(N!) - \ln\!\big((N p)!\big) - \ln\!\big((N q)!\big) + N p \ln(p) + N q \ln(q)
     \\
-    &= \Big( N \ln(N) - N + \frac{1}{2} \ln(2\pi N) \Big)
+    &\approx \Big( N \ln(N) - N + \frac{1}{2} \ln(2\pi N) \Big)
     - \Big( N p \ln(N p) - N p + \frac{1}{2} \ln(2\pi N p) \Big) \\
     &\qquad - \Big( N q \ln(N q) - N q + \frac{1}{2} \ln(2\pi N q) \Big)
     + N p \ln(p) + N q \ln(q)
     \\
-    &= N \ln(N) - N (p + q) \ln(N) + N (p + q) - N - \frac{1}{2} \ln(2\pi N p q)
+    &= N \ln(N) - N (p \!+\! q) \ln(N) + N (p \!+\! q) - N - \frac{1}{2} \ln(2\pi N p q)
     \\
     &= - \frac{1}{2} \ln(2\pi N p q)
-    = \ln\!\Big( \frac{1}{\sqrt{2\pi \sigma^2}} \Big)
+    = \ln\!\bigg( \frac{1}{\sqrt{2\pi \sigma^2}} \bigg)
 \end{aligned}$$
 
-Next, we expect that $$D_1(\mu) = 0$$, because $$\mu$$ is the maximum.
-This is indeed the case:
+Next, for $$D_m(\mu)$$ with $$m \ge 1$$,
+we can use a weaker version of Stirling's approximation:
+
+$$\begin{aligned}
+    \ln(P_N)
+    &\approx \ln(N!) - n \big( \ln(n) \!-\! 1 \big) - (N \!-\! n) \big( \ln(N \!-\! n) \!-\! 1 \big) + n \ln(p) + (N \!-\! n) \ln(q)
+    \\
+    &\approx \ln(N!) - n \big( \ln(n) - \ln(p) - 1 \big) - (N\!-\!n) \big( \ln(N\!-\!n) - \ln(q) - 1 \big)
+\end{aligned}$$
+
+We expect that $$D_1(\mu) = 0$$, because $$P_N$$ is maximized at $$\mu$$.
+Indeed it is:
 
 $$\begin{aligned}
     D_1(n)
-    &= - \big( \ln(n)\!-\!\ln(p)\!-\!1 \big) + \big( \ln(N\!-\!n)\!-\!\ln(q)\!-\!1 \big) - 1 + 1
+    &= \dv{}{n} \ln\!\big((P_N(n)\big)
     \\
-    &= - \ln(n) + \ln(N - n) + \ln(p) - \ln(q)
+    &= - \big( \ln(n) - \ln(p) - 1 \big) + \big( \ln(N\!-\!n) - \ln(q) - 1 \big) - \frac{n}{n} + \frac{N \!-\! n}{N \!-\! n}
+    \\
+    &= - \ln(n) + \ln(N \!-\! n) + \ln(p) - \ln(q)
     \\
     D_1(\mu)
-    &= \ln(N q) - \ln(N p) + \ln(p) - \ln(q)
-    = \ln(N p q) - \ln(N p q)
-    = 0
+    &= - \ln(\mu) + \ln(N \!-\! \mu) + \ln(p) - \ln(q)
+    \\
+    &= - \ln(N p q) + \ln(N p q)
+    \\
+    &= 0
 \end{aligned}$$
 
-For the same reason, we expect that $$D_2(\mu)$$ is negative.
+For the same reason, we expect $$D_2(\mu)$$ to be negative.
 We find the following expression:
 
 $$\begin{aligned}
     D_2(n)
-    &= - \frac{1}{n} - \frac{1}{N - n}
-    \qquad
+    &= \dvn{2}{}{n} \ln\!\big((P_N(n)\big)
+    = \dv{}{n} D_1(n)
+    = - \frac{1}{n} - \frac{1}{N - n}
+    \\
     D_2(\mu)
-    = - \frac{1}{Np} - \frac{1}{Nq}
+    &= - \frac{1}{Np} - \frac{1}{Nq}
     = - \frac{p + q}{N p q}
     = - \frac{1}{\sigma^2}
 \end{aligned}$$
 
-The higher-order derivatives tend to zero for $$N \to \infty$$, so we discard them:
+The higher-order derivatives vanish much faster as $$N \to \infty$$, so we discard them:
 
 $$\begin{aligned}
     D_3(n)
     = \frac{1}{n^2} - \frac{1}{(N - n)^2}
-    \qquad
+    \qquad \quad
     D_4(n)
     = - \frac{2}{n^3} - \frac{2}{(N - n)^3}
-    \qquad
+    \qquad \quad
     \cdots
 \end{aligned}$$
 
@@ -197,13 +227,14 @@ the Taylor series approximately becomes:
 $$\begin{aligned}
     \ln\!\big(P_N(n)\big)
     \approx D_0(\mu) + \frac{(n - \mu)^2}{2} D_2(\mu)
-    = \ln\!\Big( \frac{1}{\sqrt{2\pi \sigma^2}} \Big) - \frac{(n - \mu)^2}{2 \sigma^2}
+    = \ln\!\bigg( \frac{1}{\sqrt{2\pi \sigma^2}} \bigg) - \frac{(n - \mu)^2}{2 \sigma^2}
 \end{aligned}$$
 
-Taking $$\exp$$ of this expression then yields a normalized Gaussian distribution.
+Raising $$e$$ to this expression then yields a normalized Gaussian distribution.
 {% include proof/end.html id="proof-normal" %}
 
 
+
 ## References
 1.  H. Gould, J. Tobochnik,
     *Statistical and thermal physics*, 2nd edition,
diff --git a/source/know/concept/central-limit-theorem/index.md b/source/know/concept/central-limit-theorem/index.md
index 595cee7..e933ee7 100644
--- a/source/know/concept/central-limit-theorem/index.md
+++ b/source/know/concept/central-limit-theorem/index.md
@@ -18,24 +18,24 @@ the resulting means $$\mu_m$$ are normally distributed
 across the $$M$$ samples if $$N$$ is sufficiently large.
 
 More formally, for $$N$$ independent variables $$x_n$$ with probability distributions $$p(x_n)$$,
-the central limit theorem states the following,
-where we define the sum $$S$$:
+we define the following totals of all variables, means and variances:
 
 $$\begin{aligned}
-    S = \sum_{n = 1}^N x_n
-    \qquad
-    \mu_S = \sum_{n = 1}^N \mu_n
-    \qquad
-    \sigma_S^2 = \sum_{n = 1}^N \sigma_n^2
+    t \equiv \sum_{n = 1}^N x_n
+    \qquad \qquad
+    \mu_t \equiv \sum_{n = 1}^N \mu_n
+    \qquad \qquad
+    \sigma_t^2 \equiv \sum_{n = 1}^N \sigma_n^2
 \end{aligned}$$
 
-And crucially, it states that the probability distribution $$p_N(S)$$ of $$S$$ for $$N$$ variables
+The central limit theorem then states that
+the probability distribution $$p_N(t)$$ of $$t$$ for $$N$$ variables
 will become a normal distribution when $$N$$ goes to infinity:
 
 $$\begin{aligned}
     \boxed{
-        \lim_{N \to \infty} \!\big(p_N(S)\big)
-        = \frac{1}{\sigma_S \sqrt{2 \pi}} \exp\!\Big( -\frac{(\mu_S - S)^2}{2 \sigma_S^2} \Big)
+        \lim_{N \to \infty} \!\big(p_N(t)\big)
+        = \frac{1}{\sigma_t \sqrt{2 \pi}} \exp\!\bigg( -\frac{(t - \mu_t)^2}{2 \sigma_t^2} \bigg)
     }
 \end{aligned}$$
 
@@ -45,7 +45,8 @@ Given a probability density $$p(x)$$, its [Fourier transform](/know/concept/four
 is called the **characteristic function** $$\phi(k)$$:
 
 $$\begin{aligned}
-    \phi(k) = \int_{-\infty}^\infty p(x) \exp(i k x) \dd{x}
+    \phi(k)
+    \equiv \int_{-\infty}^\infty p(x) \exp(i k x) \dd{x}
 \end{aligned}$$
 
 Note that $$\phi(k)$$ can be interpreted as the average of $$\exp(i k x)$$.
@@ -54,17 +55,19 @@ where an overline denotes the mean:
 
 $$\begin{aligned}
     \phi(k)
-    = \sum_{n = 0}^\infty \frac{k^n}{n!} \: \phi^{(n)}(0)
-    \qquad
+    = \sum_{n = 0}^\infty \frac{k^n}{n!} \bigg( \dvn{n}{\phi}{k} \Big|_{k = 0} \bigg)
+    \qquad \qquad
     \phi(k)
-    = \overline{\exp(i k x)} = \sum_{n = 0}^\infty \frac{(ik)^n}{n!} \overline{x^n}
+    = \overline{\exp(i k x)}
+    = \sum_{n = 0}^\infty \frac{(ik)^n}{n!} \overline{x^n}
 \end{aligned}$$
 
 By comparing the coefficients of these two power series,
 we get a useful relation:
 
 $$\begin{aligned}
-    \phi^{(n)}(0) = i^n \: \overline{x^n}
+    \dvn{n}{\phi}{k} \Big|_{k = 0}
+    = i^n \: \overline{x^n}
 \end{aligned}$$
 
 Next, the **cumulants** $$C^{(n)}$$ are defined from the Taylor expansion of $$\ln\!\big(\phi(k)\big)$$:
@@ -73,73 +76,82 @@ $$\begin{aligned}
     \ln\!\big( \phi(k) \big)
     = \sum_{n = 1}^\infty \frac{(ik)^n}{n!} C^{(n)}
     \quad \mathrm{where} \quad
-    C^{(n)} = \frac{1}{i^n} \: \dvn{n}{}{k} \Big(\ln\!\big(\phi(k)\big)\Big) \Big|_{k = 0}
+    C^{(n)}
+    \equiv \frac{1}{i^n} \: \dvn{n}{}{k} \ln\!\big(\phi(k)\big) \Big|_{k = 0}
 \end{aligned}$$
 
 The first two cumulants $$C^{(1)}$$ and $$C^{(2)}$$ are of particular interest,
-since they turn out to be the mean and the variance respectively,
-using our earlier relation:
+since they turn out to be the mean and the variance respectively.
+Using our earlier relation:
 
 $$\begin{aligned}
     C^{(1)}
-    &= - i \dv{}{k} \Big(\ln\!\big(\phi(k)\big)\Big) \Big|_{k = 0}
+    &= - i \dv{}{k} \ln\!\big(\phi(k)\big) \Big|_{k = 0}
     = - i \frac{\phi'(0)}{\exp(0)}
     = \overline{x}
     \\
     C^{(2)}
-    &= - \dvn{2}{}{k} \Big(\ln\!\big(\phi(k)\big)\Big) \Big|_{k = 0}
+    &= - \dvn{2}{}{k} \ln\!\big(\phi(k)\big) \Big|_{k = 0}
     = \frac{\big(\phi'(0)\big)^2}{\exp(0)^2} - \frac{\phi''(0)}{\exp(0)}
     = - \overline{x}^2 + \overline{x^2} = \sigma^2
 \end{aligned}$$
 
-Let us now define $$S$$ as the sum of $$N$$ independent variables $$x_n$$, in other words:
+Now that we have introduced these tools,
+we define $$t$$ as the sum
+of $$N$$ independent variables $$x_n$$, in other words:
 
 $$\begin{aligned}
-    S = \sum_{n = 1}^N x_n = x_1 + x_2 + ... + x_N
+    t
+    \equiv \sum_{n = 1}^N x_n = x_1 + x_2 + ... + x_N
 \end{aligned}$$
 
-The probability density of $$S$$ is then as follows, where $$p(x_n)$$ are
+The probability density of $$t$$ is then as follows, where $$p(x_n)$$ are
 the densities of all the individual variables and $$\delta$$ is
 the [Dirac delta function](/know/concept/dirac-delta-function/):
 
 $$\begin{aligned}
-    p(S)
-    &= \int\cdots\int_{-\infty}^\infty \Big( \prod_{n = 1}^N p(x_n) \Big) \: \delta\Big( S - \sum_{n = 1}^N x_n \Big) \dd{x_1} \cdots \dd{x_N}
+    p(t)
+    &= \int\cdots\int_{-\infty}^\infty \Big( \prod_{n = 1}^N p(x_n) \Big) \: \delta\Big( t - \sum_{n = 1}^N x_n \Big) \dd{x_1} \cdots \dd{x_N}
     \\
-    &= \Big( p_1 * \big( p_2 * ( ... * (p_N * \delta))\big)\Big)(S)
+    &= \Big( p_1 * \big( p_2 * ( ... * (p_N * \delta))\big)\Big)(t)
 \end{aligned}$$
 
 In other words, the integrals pick out all combinations of $$x_n$$ which
-add up to the desired $$S$$-value, and multiply the probabilities
+add up to the desired $$t$$-value, and multiply the probabilities
 $$p(x_1) p(x_2) \cdots p(x_N)$$ of each such case. This is a convolution,
 so the [convolution theorem](/know/concept/convolution-theorem/)
 states that it is a product in the Fourier domain:
 
 $$\begin{aligned}
-    \phi_S(k) = \prod_{n = 1}^N \phi_n(k)
+    \phi_t(k)
+    = \prod_{n = 1}^N \phi_n(k)
 \end{aligned}$$
 
 By taking the logarithm of both sides, the product becomes a sum,
 which we further expand:
 
 $$\begin{aligned}
-    \ln\!\big(\phi_S(k)\big)
+    \ln\!\big(\phi_t(k)\big)
     = \sum_{n = 1}^N \ln\!\big(\phi_n(k)\big)
     = \sum_{n = 1}^N \sum_{m = 1}^{\infty} \frac{(ik)^m}{m!} C_n^{(m)}
 \end{aligned}$$
 
-Consequently, the cumulants $$C^{(m)}$$ stack additively for the sum $$S$$
+Consequently, the cumulants $$C^{(m)}$$ stack additively for the sum $$t$$
 of independent variables $$x_m$$, and therefore
 the means $$C^{(1)}$$ and variances $$C^{(2)}$$ do too:
 
 $$\begin{aligned}
-    C_S^{(m)} = \sum_{n = 1}^N C_n^{(m)} = C_1^{(m)} + C_2^{(m)} + ... + C_N^{(m)}
+    C_t^{(m)}
+    = \sum_{n = 1}^N C_n^{(m)}
+    = C_1^{(m)} + C_2^{(m)} + ... + C_N^{(m)}
 \end{aligned}$$
 
 We now introduce the scaled sum $$z$$ as the new combined variable:
 
 $$\begin{aligned}
-    z = \frac{S}{\sqrt{N}} = \frac{1}{\sqrt{N}} (x_1 + x_2 + ... + x_N)
+    z
+    \equiv \frac{t}{\sqrt{N}}
+    = \frac{1}{\sqrt{N}} (x_1 + x_2 + ... + x_N)
 \end{aligned}$$
 
 Its characteristic function $$\phi_z(k)$$ is then as follows,
@@ -176,28 +188,30 @@ For sufficiently large $$N$$, we can therefore approximate it using just the fir
 $$\begin{aligned}
     \ln\!\big( \phi_z(k) \big)
     &\approx i k C^{(1)} - \frac{k^2}{2} C^{(2)}
-    = i k \overline{z} - \frac{k^2}{2} \sigma_z^2
+    = i k \mu_z - \frac{k^2}{2} \sigma_z^2
     \\
+    \implies \quad
     \phi_z(k)
-    &\approx \exp(i k \overline{z}) \exp(- k^2 \sigma_z^2 / 2)
+    &\approx \exp(i k \mu_z) \exp(- k^2 \sigma_z^2 / 2)
 \end{aligned}$$
 
 We take its inverse Fourier transform to get the density $$p(z)$$,
-which turns out to be a Gaussian normal distribution,
-which is even already normalized:
+which turns out to be a Gaussian normal distribution
+and is even already normalized:
 
 $$\begin{aligned}
     p(z)
     = \hat{\mathcal{F}}^{-1} \{\phi_z(k)\}
-    &= \frac{1}{2 \pi} \int_{-\infty}^\infty \exp\!\big(\!-\! i k (z - \overline{z})\big) \exp(- k^2 \sigma_z^2 / 2) \dd{k}
+    &= \frac{1}{2 \pi} \int_{-\infty}^\infty \exp\!\big(\!-\! i k (z - \mu_z)\big) \exp(- k^2 \sigma_z^2 / 2) \dd{k}
     \\
-    &= \frac{1}{\sqrt{2 \pi \sigma_z^2}} \exp\!\Big(\!-\! \frac{(z - \overline{z})^2}{2 \sigma_z^2} \Big)
+    &= \frac{1}{\sqrt{2 \pi \sigma_z^2}} \exp\!\Big(\!-\! \frac{(z - \mu_z)^2}{2 \sigma_z^2} \Big)
 \end{aligned}$$
 
 Therefore, the sum of many independent variables tends to a normal distribution,
 regardless of the densities of the individual variables.
 
 
+
 ## References
 1.  H. Gould, J. Tobochnik,
     *Statistical and thermal physics*, 2nd edition,
diff --git a/source/know/concept/conditional-expectation/index.md b/source/know/concept/conditional-expectation/index.md
index f64fa72..cd40315 100644
--- a/source/know/concept/conditional-expectation/index.md
+++ b/source/know/concept/conditional-expectation/index.md
@@ -41,7 +41,7 @@ Where $$Q$$ is a renormalized probability function,
 which assigns zero to all events incompatible with $$Y = y$$.
 If we allow $$\Omega$$ to be continuous,
 then from the definition $$\mathbf{E}[X]$$,
-we know that the following Lebesgue integral can be used,
+we know that the following *Lebesgue integral* can be used,
 which we call $$f(y)$$:
 
 $$\begin{aligned}
@@ -103,6 +103,7 @@ such that $$\mathbf{E}[X | \sigma(Y)] = f(Y)$$,
 then $$Z = \mathbf{E}[X | \sigma(Y)]$$ is unique.
 
 
+
 ## Properties
 
 A conditional expectation defined in this way has many useful properties,
diff --git a/source/know/concept/dispersive-broadening/index.md b/source/know/concept/dispersive-broadening/index.md
index 746eb6d..9642737 100644
--- a/source/know/concept/dispersive-broadening/index.md
+++ b/source/know/concept/dispersive-broadening/index.md
@@ -9,10 +9,10 @@ categories:
 layout: "concept"
 ---
 
-In optical fibers, **dispersive broadening** is a (linear) effect
+In optical fibers, **dispersive broadening** is a linear effect
 where group velocity dispersion (GVD) "smears out" a pulse in the time domain
 due to the different group velocities of its frequencies,
-since pulses always have a non-zero width in the $$\omega$$-domain.
+since pulses always have a nonzero width in the $$\omega$$-domain.
 No new frequencies are created.
 
 A pulse envelope $$A(z, t)$$ inside a fiber must obey the nonlinear Schrödinger equation,
@@ -29,7 +29,7 @@ and consider a Gaussian initial condition:
 
 $$\begin{aligned}
     A(0, t)
-    = \sqrt{P_0} \exp\!\Big(\!-\!\frac{t^2}{2 T_0^2}\Big)
+    = \sqrt{P_0} \exp\!\bigg(\!-\!\frac{t^2}{2 T_0^2}\bigg)
 \end{aligned}$$
 
 By [Fourier transforming](/know/concept/fourier-transform/) in $$t$$,
@@ -38,7 +38,8 @@ where it can be seen that the amplitude
 decreases and the width increases with $$z$$:
 
 $$\begin{aligned}
-    A(z,t) = \sqrt{\frac{P_0}{1 - i \beta_2 z / T_0^2}}
+    A(z,t)
+    = \sqrt{\frac{P_0}{1 - i \beta_2 z / T_0^2}}
     \exp\!\bigg(\! -\!\frac{t^2 / (2 T_0^2)}{1 + \beta_2^2 z^2 / T_0^4} \big( 1 + i \beta_2 z / T_0^2 \big) \bigg)
 \end{aligned}$$
 
@@ -48,10 +49,12 @@ as the distance over which the half-width at $$1/e$$ of maximum power
 (initially $$T_0$$) increases by a factor of $$\sqrt{2}$$:
 
 $$\begin{aligned}
-    T_0 \sqrt{1 + \beta_2^2 L_D^2 / T_0^4} = T_0 \sqrt{2}
+    T_0 \sqrt{1 + \beta_2^2 L_D^2 / T_0^4}
+    = T_0 \sqrt{2}
     \qquad \implies \qquad
     \boxed{
-        L_D = \frac{T_0^2}{|\beta_2|}
+        L_D
+        \equiv \frac{T_0^2}{|\beta_2|}
     }
 \end{aligned}$$
 
@@ -68,7 +71,7 @@ where $$\phi(z, t)$$ is the phase of $$A(z, t) = \sqrt{P(z, t)} \exp(i \phi(z, t
 
 $$\begin{aligned}
     \omega_{\mathrm{GVD}}(z,t)
-    = \pdv{}{t}\Big( \frac{\beta_2 z t^2 / (2 T_0^4)}{1 + \beta_2^2 z^2 / T_0^4} \Big)
+    = \pdv{}{t}\bigg( \frac{\beta_2 z t^2 / (2 T_0^4)}{1 + \beta_2^2 z^2 / T_0^4} \bigg)
     = \frac{\beta_2 z / T_0^2}{1 + \beta_2^2 z^2 / T_0^4} \frac{t}{T_0^2}
 \end{aligned}$$
 
@@ -76,7 +79,7 @@ This expression is linear in time, and depending on the sign of $$\beta_2$$,
 frequencies on one side of the pulse arrive first,
 and those on the other side arrive last.
 The effect is stronger for smaller $$T_0$$:
-this makes sense, since short pulses are spectrally wider.
+this makes sense, since shorter pulses are spectrally wider.
 
 The interaction between dispersion and [self-phase modulation](/know/concept/self-phase-modulation/)
 leads to many interesting effects,
diff --git a/source/know/concept/holomorphic-function/index.md b/source/know/concept/holomorphic-function/index.md
index cf252c0..976758b 100644
--- a/source/know/concept/holomorphic-function/index.md
+++ b/source/know/concept/holomorphic-function/index.md
@@ -9,13 +9,13 @@ layout: "concept"
 ---
 
 In complex analysis, a complex function $$f(z)$$ of a complex variable $$z$$
-is called **holomorphic** or **analytic** if it is complex differentiable in the
-neighbourhood of every point of its domain.
+is called **holomorphic** or **analytic** if it is **complex differentiable**
+in the vicinity of every point of its domain.
 This is a very strong condition.
 
 As a result, holomorphic functions are infinitely differentiable and
 equal their Taylor expansion at every point. In physicists' terms,
-they are extremely "well-behaved" throughout their domain.
+they are very "well-behaved" throughout their domain.
 
 More formally, a given function $$f(z)$$ is holomorphic in a certain region
 if the following limit exists for all $$z$$ in that region,
@@ -23,14 +23,17 @@ and for all directions of $$\Delta z$$:
 
 $$\begin{aligned}
     \boxed{
-        f'(z) = \lim_{\Delta z \to 0} \frac{f(z + \Delta z) - f(z)}{\Delta z}
+        f'(z)
+        = \lim_{\Delta z \to 0} \frac{f(z + \Delta z) - f(z)}{\Delta z}
     }
 \end{aligned}$$
 
 We decompose $$f$$ into the real functions $$u$$ and $$v$$ of real variables $$x$$ and $$y$$:
 
 $$\begin{aligned}
-    f(z) = f(x + i y) = u(x, y) + i v(x, y)
+    f(z)
+    = f(x + i y)
+    = u(x, y) + i v(x, y)
 \end{aligned}$$
 
 Since we are free to choose the direction of $$\Delta z$$, we choose $$\Delta x$$ and $$\Delta y$$:
@@ -56,9 +59,9 @@ $$\begin{aligned}
     }
 \end{aligned}$$
 
-Therefore, a given function $$f(z)$$ is holomorphic if and only if its real
-and imaginary parts satisfy these equations. This gives an idea of how
-strict the criteria are to qualify as holomorphic.
+Therefore, a given function $$f(z)$$ is holomorphic if and only if
+its real and imaginary parts satisfy these equations.
+This gives an idea of how strict the criteria are to qualify as holomorphic.
 
 
 
@@ -70,7 +73,8 @@ provided that $$f(z)$$ is holomorphic for all $$z$$ in the area enclosed by $$C$
 
 $$\begin{aligned}
     \boxed{
-        \oint_C f(z) \dd{z} = 0
+        \oint_C f(z) \dd{z}
+        = 0
     }
 \end{aligned}$$
 
@@ -86,34 +90,36 @@ $$\begin{aligned}
     &= \oint_C u \dd{x} - v \dd{y} + i \oint_C v \dd{x} + u \dd{y}
 \end{aligned}$$
 
-Using Green's theorem, we integrate over the area $$A$$ enclosed by $$C$$:
+Using *Green's theorem*, we integrate over the area $$A$$ enclosed by $$C$$:
 
 $$\begin{aligned}
     \oint_C f(z) \dd{z}
     &= - \iint_A \pdv{v}{x} + \pdv{u}{y} \dd{x} \dd{y} + i \iint_A \pdv{u}{x} - \pdv{v}{y} \dd{x} \dd{y}
 \end{aligned}$$
 
-Since $$f(z)$$ is holomorphic, $$u$$ and $$v$$ satisfy the Cauchy-Riemann
-equations, such that the integrands disappear and the final result is zero.
+Since $$f(z)$$ is holomorphic, $$u$$ and $$v$$ satisfy the Cauchy-Riemann equations,
+such that the integrands disappear and the final result is zero.
 {% include proof/end.html id="proof-int-theorem" %}
 
 
-An interesting consequence is **Cauchy's integral formula**, which
-states that the value of $$f(z)$$ at an arbitrary point $$z_0$$ is
-determined by its values on an arbitrary contour $$C$$ around $$z_0$$:
+An interesting consequence is **Cauchy's integral formula**,
+which states that the value of $$f(z)$$ at an arbitrary point $$z_0$$
+is determined by its values on an arbitrary contour $$C$$ around $$z_0$$:
 
 $$\begin{aligned}
     \boxed{
-        f(z_0) = \frac{1}{2 \pi i} \oint_C \frac{f(z)}{z - z_0} \dd{z}
+        f(z_0)
+        = \frac{1}{2 \pi i} \oint_C \frac{f(z)}{z - z_0} \dd{z}
     }
 \end{aligned}$$
 
 
 {% include proof/start.html id="proof-int-formula" -%}
-Thanks to the integral theorem, we know that the shape and size
-of $$C$$ is irrelevant. Therefore we choose it to be a circle with radius $$r$$,
-such that the integration variable becomes $$z = z_0 + r e^{i \theta}$$. Then
-we integrate by substitution:
+Thanks to the integral theorem, we know that
+the shape and size of $$C$$ are irrelevant.
+Therefore we choose it to be a circle with radius $$r$$,
+such that the integration variable becomes $$z = z_0 + r e^{i \theta}$$.
+Then we integrate by substitution:
 
 $$\begin{aligned}
     \frac{1}{2 \pi i} \oint_C \frac{f(z)}{z - z_0} \dd{z}
diff --git a/source/know/concept/ion-sound-wave/index.md b/source/know/concept/ion-sound-wave/index.md
index 8749f1a..6a9dcff 100644
--- a/source/know/concept/ion-sound-wave/index.md
+++ b/source/know/concept/ion-sound-wave/index.md
@@ -49,7 +49,7 @@ $$\begin{aligned}
 
 Where the perturbations $$n_{i1}$$, $$n_{e1}$$, $$\vb{u}_{i1}$$ and $$\phi_1$$ are tiny,
 and the equilibrium components $$n_{i0}$$, $$n_{e0}$$, $$\vb{u}_{i0}$$ and $$\phi_0$$
-by definition satisfy:
+are assumed to satisfy:
 
 $$\begin{aligned}
     \pdv{n_{i0}}{t} = 0
@@ -63,11 +63,7 @@ $$\begin{aligned}
     \phi_0 = 0
 \end{aligned}$$
 
-Inserting this decomposition into the momentum equations
-yields new equations.
-Note that we will implicitly use $$\vb{u}_{i0} = 0$$
-to pretend that the [material derivative](/know/concept/material-derivative/)
-$$\mathrm{D}/\mathrm{D} t$$ is linear:
+Inserting this decomposition into the momentum equations yields new equations:
 
 $$\begin{aligned}
     m_i (n_{i0} \!+\! n_{i1}) \frac{\mathrm{D} (\vb{u}_{i0} \!+\! \vb{u}_{i1})}{\mathrm{D} t}
@@ -77,17 +73,19 @@ $$\begin{aligned}
     &= - q_e (n_{e0} \!+\! n_{e1}) \nabla (\phi_0 \!+\! \phi_1) - \gamma_e k_B T_e \nabla (n_{e0} \!+\! n_{e1})
 \end{aligned}$$
 
-Using the defined properties of the equilibrium components
-$$n_{i0}$$, $$n_{e0}$$, $$\vb{u}_{i0}$$ and $$\phi_0$$,
-and neglecting all products of perturbations for being small,
-this reduces to:
+Using the assumed properties of $$n_{i0}$$, $$n_{e0}$$, $$\vb{u}_{i0}$$ and $$\phi_0$$,
+and discarding products of perturbations for being too small,
+we arrive at the below equations.
+Our choice $$\vb{u}_{i0} = 0$$ lets us linearize
+the [material derivative](/know/concept/material-derivative/)
+$$\mathrm{D}/\mathrm{D} t = \ipdv{}{t}$$ for the ions:
 
 $$\begin{aligned}
     m_i n_{i0} \pdv{\vb{u}_{i1}}{t}
-    &= - q_i n_{i0} \nabla \phi_1 - \gamma_i k_B T_i \nabla n_{i1}
+    &\approx - q_i n_{i0} \nabla \phi_1 - \gamma_i k_B T_i \nabla n_{i1}
     \\
     0
-    &= - q_e n_{e0} \nabla \phi_1 - \gamma_e k_B T_e \nabla n_{e1}
+    &\approx - q_e n_{e0} \nabla \phi_1 - \gamma_e k_B T_e \nabla n_{e1}
 \end{aligned}$$
 
 Because we are interested in linear waves,
@@ -123,7 +121,7 @@ to get a relation between $$n_{e1}$$ and $$n_{e0}$$:
 $$\begin{aligned}
      i \vb{k} \gamma_e k_B T_e n_{e1}
      = - i \vb{k} q_e n_{e0} \phi_1
-     \quad \implies \quad
+     \qquad \implies \qquad
      n_{e1}
      = - \frac{q_e \phi_1}{\gamma_e k_B T_e} n_{e0}
 \end{aligned}$$
@@ -159,13 +157,13 @@ $$\begin{aligned}
     \approx \pdv{n_{i1}}{t} + n_{i0} \nabla \cdot \vb{u}_{i1}
 \end{aligned}$$
 
-Then we insert our plane-wave ansatz,
+Into which we insert our plane-wave ansatz,
 and substitute $$n_{i0} = n_0$$ as before, yielding:
 
 $$\begin{aligned}
     0
     = - i \omega n_{i1} + i n_{i0} \vb{k} \cdot \vb{u}_{i1}
-    \quad \implies \quad
+    \qquad \implies \qquad
     \vb{k} \cdot \vb{u}_{i1}
     = \omega \frac{n_{i1}}{n_{i0}}
     = \omega \frac{q_e n_{i1} \phi_1}{k_B T_e n_{e1}}
@@ -187,9 +185,9 @@ $$\begin{gathered}
 Finally, we would like to find an expression for $$n_{e1} / n_{i1}$$.
 It cannot be $$1$$, because then $$\phi_1$$ could not be nonzero,
 according to [Gauss' law](/know/concept/maxwells-equations/).
-Nevertheless, authors often ignore this fact,
+Nevertheless, some authors tend to ignore this fact,
 thereby making the so-called **plasma approximation**.
-We will not, and therefore turn to Gauss' law:
+We will not, and thus turn to Gauss' law:
 
 $$\begin{aligned}
     \varepsilon_0 \nabla \cdot \vb{E}
@@ -244,7 +242,7 @@ $$\begin{aligned}
     }
 \end{aligned}$$
 
-Curiously, unlike a neutral gas,
+Curiously, unlike in a neutral gas,
 this velocity is nonzero even if $$T_i = 0$$,
 meaning that the waves still exist then.
 In fact, usually the electron temperature $$T_e$$ dominates $$T_e \gg T_i$$,
diff --git a/source/know/concept/lagrange-multiplier/index.md b/source/know/concept/lagrange-multiplier/index.md
index a0b22aa..ce5418f 100644
--- a/source/know/concept/lagrange-multiplier/index.md
+++ b/source/know/concept/lagrange-multiplier/index.md
@@ -127,8 +127,22 @@ about the interdependence of a system of equations
 then $$\lambda$$ is not even given an expression!
 Hence it is sometimes also called an *undetermined multiplier*.
 
-This method generalizes nicely to multiple constraints or more variables.
-Suppose that we want to find the extrema of $$f(x_1, ..., x_N)$$
+This does not imply that $$\lambda$$ is meaningless;
+it often represents a quantity of interest.
+In general, defining $$h \equiv g + c$$ so that the constraint is $$h(x, y) = c$$,
+we see that the Lagrange multiplier represents the rate of change of $$\mathcal{L}$$
+with respect to the value being constrained:
+
+$$\begin{aligned}
+    \mathcal{L}(x, y, \lambda)
+    = f(x, y) + \lambda (h(x, y) - c)
+    \qquad \implies \qquad
+    -\pdv{\mathcal{L}}{c} = \lambda
+\end{aligned}$$
+
+The method of Lagrange multipliers
+generalizes nicely to more constraints or more variables.
+Suppose we want to find extrema of $$f(x_1, ..., x_N)$$
 subject to $$M < N$$ conditions:
 
 $$\begin{aligned}
diff --git a/source/know/concept/langmuir-waves/index.md b/source/know/concept/langmuir-waves/index.md
index be47567..2dbce8f 100644
--- a/source/know/concept/langmuir-waves/index.md
+++ b/source/know/concept/langmuir-waves/index.md
@@ -22,7 +22,7 @@ tell us that:
 $$\begin{aligned}
     m_e n_e \frac{\mathrm{D} \vb{u}_e}{\mathrm{D} t}
     = q_e n_e \vb{E} - \nabla p_e
-    \qquad \quad
+    \qquad \qquad
     \pdv{n_e}{t} + \nabla \cdot (n_e \vb{u}_e) = 0
 \end{aligned}$$
 
@@ -50,7 +50,7 @@ $$\begin{aligned}
 
 Where the perturbations $$n_{e1}$$, $$\vb{u}_{e1}$$ and $$\vb{E}_1$$ are very small,
 and the equilibrium components $$n_{e0}$$, $$\vb{u}_{e0}$$ and $$\vb{E}_0$$
-by definition satisfy:
+are assumed to satisfy:
 
 $$\begin{aligned}
     \pdv{n_{e0}}{t} = 0
@@ -64,7 +64,7 @@ $$\begin{aligned}
     \vb{E}_0 = 0
 \end{aligned}$$
 
-We insert this decomposistion into the electron continuity equation,
+We insert this decomposition into the electron continuity equation,
 arguing that $$n_{e1} \vb{u}_{e1}$$ is small enough to neglect, leading to:
 
 $$\begin{aligned}
@@ -114,6 +114,7 @@ However, there are three unknowns $$n_{e1}$$, $$\vb{u}_{e1}$$ and $$\vb{E}_1$$,
 so one more equation is needed.
 
 
+
 ## Cold Langmuir waves
 
 We therefore turn to the electron momentum equation.
@@ -172,7 +173,8 @@ $$\begin{aligned}
 Note that this is a dispersion relation $$\omega(k) = \omega_p$$,
 but that $$\omega_p$$ does not contain $$k$$.
 This means that cold Langmuir waves do not propagate:
-the oscillation is "stationary".
+the oscillation is stationary.
+
 
 
 ## Warm Langmuir waves
@@ -181,7 +183,7 @@ Next, we generalize this result to nonzero $$T_e$$,
 in which case the pressure $$p_e$$ is involved:
 
 $$\begin{aligned}
-    m_e n_{e0} \pdv{}{\vb{u}{e1}}{t}
+    m_e n_{e0} \pdv{\vb{u}_{e1}}{t}
     = q_e n_{e0} \vb{E}_1 - \nabla p_e
 \end{aligned}$$
 
diff --git a/source/know/concept/maxwell-boltzmann-distribution/index.md b/source/know/concept/maxwell-boltzmann-distribution/index.md
index 318e659..946525c 100644
--- a/source/know/concept/maxwell-boltzmann-distribution/index.md
+++ b/source/know/concept/maxwell-boltzmann-distribution/index.md
@@ -13,6 +13,7 @@ The **Maxwell-Boltzmann distributions** are a set of closely related
 probability distributions with applications in classical statistical physics.
 
 
+
 ## Velocity vector distribution
 
 In the [canonical ensemble](/know/concept/canonical-ensemble/)
@@ -24,55 +25,51 @@ $$\begin{aligned}
     \:\propto\: \exp\!\big(\!-\! \beta E\big)
 \end{aligned}$$
 
-Where $$\beta = 1 / k_B T$$.
-We split $$E = K + U$$,
-with $$K$$ and $$U$$ the total kinetic and potential energy contributions.
-If there are $$N$$ particles in the system,
-with positions $$\tilde{r} = (\vec{r}_1, ..., \vec{r}_N)$$
-and momenta $$\tilde{p} = (\vec{p}_1, ..., \vec{p}_N)$$,
-then $$K$$ only depends on $$\tilde{p}$$,
-and $$U$$ only depends on $$\tilde{r}$$,
+Where $$\beta \equiv 1 / k_B T$$. We split $$E = K + U$$,
+where $$K$$ and $$U$$ are the total contributions
+from the kinetic and potential energies of the system.
+For $$N$$ particles
+with positions $$\va{r} \equiv (\vb{r}_1, ..., \vb{r}_N)$$
+and momenta $$\va{p} = (\vb{p}_1, ..., \vb{p}_N)$$,
+then $$K$$ only depends on $$\va{p}$$ and $$U$$ only on $$\va{r}$$,
 so the probability of a specific microstate
-$$(\tilde{r}, \tilde{p})$$ is as follows:
+$$(\va{r}, \va{p})$$ is as follows:
 
 $$\begin{aligned}
-    f(\tilde{r}, \tilde{p})
-    \:\propto\: \exp\!\Big(\!-\! \beta \big( K(\tilde{p}) + U(\tilde{r}) \big) \Big)
+    f(\va{r}, \va{p})
+    \:\propto\: \exp\!\Big(\!-\! \beta \big( K(\va{p}) + U(\va{r}) \big) \Big)
 \end{aligned}$$
 
-Since this is classical physics,
-we can split the exponential.
-In quantum mechanics,
-the canonical commutation relation would prevent that.
-Anyway, splitting yields:
+Since this is classical physics, we can split the exponential
+(in quantum mechanics, the canonical commutation relation would prevent that):
 
 $$\begin{aligned}
-    f(\tilde{r}, \tilde{p})
-    \:\propto\: \exp\!\big(\!-\! \beta K(\tilde{p}) \big) \exp\!\big(\!-\! \beta U(\tilde{r}) \big)
+    f(\va{r}, \va{p})
+    \:\propto\: \exp\!\big(\!-\! \beta K(\va{p}) \big) \exp\!\big(\!-\! \beta U(\va{r}) \big)
 \end{aligned}$$
 
 Classically, the probability
 distributions of the momenta and positions are independent:
 
 $$\begin{aligned}
-    f_K(\tilde{p})
-    \:\propto\: \exp\!\big(\!-\! \beta K(\tilde{p}) \big)
+    f_K(\va{p})
+    \:\propto\: \exp\!\big(\!-\! \beta K(\va{p}) \big)
     \qquad \qquad
-    f_U(\tilde{r})
-    \:\propto\: \exp\!\big(\!-\! \beta U(\tilde{r}) \big)
+    f_U(\va{r})
+    \:\propto\: \exp\!\big(\!-\! \beta U(\va{r}) \big)
 \end{aligned}$$
 
-We cannot evaluate $$f_U(\tilde{r})$$ further without knowing $$U(\tilde{r})$$ for a system.
-We thus turn to $$f_K(\tilde{p})$$, and see that the total kinetic
-energy $$K(\tilde{p})$$ is simply the sum of the particles' individual
-kinetic energies $$K_n(\vec{p}_n)$$, which are well-known:
+We cannot evaluate $$f_U(\va{r})$$ further without knowing $$U(\va{r})$$ for a system.
+We thus turn to $$f_K(\va{p})$$, and see that the total kinetic
+energy $$K(\va{p})$$ is simply the sum of the particles' individual
+kinetic energies $$K_n(\vb{p}_n)$$, which are well-known:
 
 $$\begin{aligned}
-    K(\tilde{p})
-    = \sum_{n = 1}^N K_n(\vec{p}_n)
+    K(\va{p})
+    = \sum_{n = 1}^N K_n(\vb{p}_n)
     \qquad \mathrm{where} \qquad
-    K_n(\vec{p}_n)
-    = \frac{|\vec{p}_n|^2}{2 m}
+    K_n(\vb{p}_n)
+    = \frac{|\vb{p}_n|^2}{2 m}
 \end{aligned}$$
 
 Consequently, the probability distribution $$f(p_x, p_y, p_z)$$ for the
@@ -100,10 +97,10 @@ so the velocity in each direction is independent of the others:
 
 $$\begin{aligned}
     f(v_x)
-    = \sqrt{\frac{m}{2 \pi k_B T}} \exp\!\Big( \!-\!\frac{m v_x^2}{2 k_B T} \Big)
+    = \sqrt{\frac{m}{2 \pi k_B T}} \exp\!\bigg( \!-\!\frac{m v_x^2}{2 k_B T} \bigg)
 \end{aligned}$$
 
-The distribution is thus an isotropic gaussian with standard deviations given by:
+The distribution is thus an isotropic Gaussian with standard deviations given by:
 
 $$\begin{aligned}
     \sigma_x = \sigma_y = \sigma_z
@@ -111,17 +108,18 @@ $$\begin{aligned}
 \end{aligned}$$
 
 
+
 ## Speed distribution
 
-We know the distribution of the velocities along each axis,
-but what about the speed $$v = |\vec{v}|$$?
-Because we do not care about the direction of $$\vec{v}$$, only its magnitude,
+That was the distribution of the velocities along each axis,
+but what about the speed $$v = |\vb{v}|$$?
+Because we do not care about the direction of $$\vb{v}$$, only its magnitude,
 the [density of states](/know/concept/density-of-states/) $$g(v)$$ is not constant:
 it is the rate-of-change of the volume of a sphere of radius $$v$$:
 
 $$\begin{aligned}
     g(v)
-    = \dv{}{v}\Big( \frac{4 \pi}{3} v^3 \Big)
+    = \dv{}{v} \bigg( \frac{4 \pi}{3} v^3 \bigg)
     = 4 \pi v^2
 \end{aligned}$$
 
@@ -132,7 +130,7 @@ then gives us the **Maxwell-Boltzmann speed distribution**:
 $$\begin{aligned}
     \boxed{
         f(v)
-        = 4 \pi \Big( \frac{m}{2 \pi k_B T} \Big)^{3/2} v^2 \exp\!\Big( \!-\!\frac{m v^2}{2 k_B T} \Big)
+        = 4 \pi \Big( \frac{m}{2 \pi k_B T} \Big)^{3/2} v^2 \exp\!\bigg( \!-\!\frac{m v^2}{2 k_B T} \bigg)
     }
 \end{aligned}$$
 
@@ -144,10 +142,10 @@ and the root-mean-square speed $$v_{\mathrm{rms}}$$:
 $$\begin{aligned}
     f'(v_\mathrm{mode})
     = 0
-    \qquad
+    \qquad \quad
     v_\mathrm{mean}
     = \int_0^\infty v \: f(v) \dd{v}
-    \qquad
+    \qquad \quad
     v_\mathrm{rms}
     = \bigg( \int_0^\infty v^2 \: f(v) \dd{v} \bigg)^{1/2}
 \end{aligned}$$
@@ -159,12 +157,12 @@ $$\begin{aligned}
         v_{\mathrm{mode}}
         = \sqrt{\frac{2 k_B T}{m}}
     }
-    \qquad
+    \qquad \quad
     \boxed{
         v_{\mathrm{mean}}
         = \sqrt{\frac{8 k_B T}{\pi m}}
     }
-    \qquad
+    \qquad \quad
     \boxed{
         v_{\mathrm{rms}}
         = \sqrt{\frac{3 k_B T}{m}}
@@ -172,6 +170,7 @@ $$\begin{aligned}
 \end{aligned}$$
 
 
+
 ## Kinetic energy distribution
 
 Using the speed distribution,
@@ -194,7 +193,7 @@ so the energy distribution $$f(K)$$ is:
 $$\begin{aligned}
     f(K)
     = \frac{f(v)}{m v}
-    = \sqrt{\frac{2 m}{\pi}} \: \bigg( \frac{1}{k_B T} \bigg)^{3/2} v \exp\!\Big( \!-\!\frac{m v^2}{2 k_B T} \Big)
+    = \sqrt{\frac{2 m}{\pi}} \Big( \frac{1}{k_B T} \Big)^{3/2} v \exp\!\bigg( \!-\!\frac{m v^2}{2 k_B T} \bigg)
 \end{aligned}$$
 
 Substituting $$v = \sqrt{2 K/m}$$ leads to
@@ -203,7 +202,7 @@ the **Maxwell-Boltzmann kinetic energy distribution**:
 $$\begin{aligned}
     \boxed{
         f(K)
-        = 2 \sqrt{\frac{K}{\pi}} \: \bigg( \frac{1}{k_B T} \bigg)^{3/2} \exp\!\Big( \!-\!\frac{K}{k_B T} \Big)
+        = 2 \sqrt{\frac{K}{\pi}} \Big( \frac{1}{k_B T} \Big)^{3/2} \exp\!\bigg( \!-\!\frac{K}{k_B T} \bigg)
     }
 \end{aligned}$$
 
diff --git a/source/know/concept/modulational-instability/index.md b/source/know/concept/modulational-instability/index.md
index e29b2d5..f1c246c 100644
--- a/source/know/concept/modulational-instability/index.md
+++ b/source/know/concept/modulational-instability/index.md
@@ -16,25 +16,28 @@ is a nonlinear effect that leads to the exponential amplification
 of background noise in certain frequency regions.
 It only occurs in the [anomalous dispersion regime](/know/concept/dispersive-broadening/)
 ($$\beta_2 < 0$$), which we will prove shortly.
+The underlying physical process causing it is *degenerate four-wave mixing*.
 
 Consider the following simple solution to the nonlinear Schrödinger equation:
 a time-invariant constant power $$P_0$$ at the carrier frequency $$\omega_0$$,
-which is experiencing [self-phase modulation](/know/concept/self-phase-modulation/):
+experiencing [self-phase modulation](/know/concept/self-phase-modulation/):
 
 $$\begin{aligned}
-    A(z,t) = \sqrt{P_0} \exp( i \gamma P_0 z)
+    A(z,t)
+    = \sqrt{P_0} \exp( i \gamma P_0 z)
 \end{aligned}$$
 
 We add a small perturbation $$\varepsilon(z,t)$$ to this signal,
 representing background noise:
 
 $$\begin{aligned}
-    A(z,t) = \big(\sqrt{P_0} + \varepsilon(z,t)\big) \exp( i \gamma P_0 z)
+    A(z,t)
+    = \big(\sqrt{P_0} + \varepsilon(z,t)\big) \exp( i \gamma P_0 z)
 \end{aligned}$$
 
 We insert this into the nonlinear Schrödinger equation to get a perturbation equation,
 which we linearize by assuming that $$|\varepsilon|^2$$ is negligible compared to $$P_0$$,
-such that all higher-order terms of $$\varepsilon$$ can be dropped, yielding:
+such that all higher-order terms of $$\varepsilon$$ can be dropped, leaving:
 
 $$\begin{aligned}
     0
@@ -52,7 +55,7 @@ $$\begin{aligned}
 
 We split the perturbation into real and imaginary parts
 $$\varepsilon(z,t) = \varepsilon_r(z,t) + i \varepsilon_i(z,t)$$,
-which we fill in in this equation.
+which we put in this equation.
 The point is that $$\varepsilon_r$$ and $$\varepsilon_i$$ are real functions:
 
 $$\begin{aligned}
@@ -66,9 +69,11 @@ Splitting this into its real and imaginary parts gives two PDEs
 relating $$\varepsilon_r$$ and $$\varepsilon_i$$:
 
 $$\begin{aligned}
-    \pdv{\varepsilon_r}{z} = \frac{\beta_2}{2} \pdvn{2}{\varepsilon_i}{t}
-    \qquad \quad
-    \pdv{\varepsilon_i}{z} = - \frac{\beta_2}{2} \pdvn{2}{\varepsilon_r}{t} + 2 \gamma P_0 \varepsilon_r
+    \pdv{\varepsilon_r}{z}
+    \frac{\beta_2}{2} \pdvn{2}{\varepsilon_i}{t}
+    \qquad \qquad
+    \pdv{\varepsilon_i}{z}
+    = - \frac{\beta_2}{2} \pdvn{2}{\varepsilon_r}{t} + 2 \gamma P_0 \varepsilon_r
 \end{aligned}$$
 
 We [Fourier transform](/know/concept/fourier-transform/)
@@ -76,18 +81,22 @@ these in $$t$$ to turn them into ODEs relating
 $$\tilde{\varepsilon}_r(z,\omega)$$ and $$\tilde{\varepsilon}_i(z,\omega)$$:
 
 $$\begin{aligned}
-    \pdv{\tilde{\varepsilon}_r}{z} = - \frac{\beta_2}{2} \omega^2 \tilde{\varepsilon}_i
-    \qquad \quad
-    \pdv{\tilde{\varepsilon}_i}{z} = \Big(\frac{\beta_2}{2} \omega^2 + 2 \gamma P_0 \Big) \tilde{\varepsilon}_r
+    \pdv{\tilde{\varepsilon}_r}{z}
+    = - \frac{\beta_2}{2} \omega^2 \tilde{\varepsilon}_i
+    \qquad \qquad
+    \pdv{\tilde{\varepsilon}_i}{z}
+    = \Big(\frac{\beta_2}{2} \omega^2 + 2 \gamma P_0 \Big) \tilde{\varepsilon}_r
 \end{aligned}$$
 
 We are interested in exponential growth, so let us make the following ansatz,
 where $$k$$ may be a function of $$\omega$$, as long as it is $$z$$-invariant:
 
 $$\begin{aligned}
-    \tilde{\varepsilon}_r(z, \omega) = \tilde{\varepsilon}_r(0, \omega) \exp(k z)
-    \qquad \quad
-    \tilde{\varepsilon}_i(z, \omega) = \tilde{\varepsilon}_i(0, \omega) \exp(k z)
+    \tilde{\varepsilon}_r(z, \omega)
+    = \tilde{\varepsilon}_r(0, \omega) \exp(k z)
+    \qquad \qquad
+    \tilde{\varepsilon}_i(z, \omega)
+    = \tilde{\varepsilon}_i(0, \omega) \exp(k z)
 \end{aligned}$$
 
 With this, we can write the system of ODEs for
@@ -105,11 +114,12 @@ $$\begin{aligned}
     \begin{bmatrix} 0 \\ 0 \end{bmatrix}
 \end{aligned}$$
 
-This has non-zero solutions if the system matrix' determinant is zero,
+This has nonzero solutions if the system matrix' determinant is zero,
 which is true when:
 
 $$\begin{aligned}
-    k = \pm \sqrt{ - \frac{\beta_2}{2} \omega^2 \Big( \frac{\beta_2}{2} \omega^2 + 2 \gamma P_0 \Big) }
+    k
+    = \pm \sqrt{ - \frac{\beta_2}{2} \omega^2 \Big( \frac{\beta_2}{2} \omega^2 + 2 \gamma P_0 \Big) }
 \end{aligned}$$
 
 To get exponential growth, it is essential that $$\mathrm{Re}\{k\} > 0$$,
@@ -117,18 +127,19 @@ so we discard the negative sign,
 and get the following condition for MI:
 
 $$\begin{aligned}
-    - \frac{\beta_2}{2} \omega^2 \Big( \frac{\beta_2}{2} \omega^2 + 2 \gamma P_0 \Big) > 0
-    \quad \implies \quad
+    -\frac{\beta_2}{2} \omega^2 \Big( \frac{\beta_2}{2} \omega^2 + 2 \gamma P_0 \Big)
+    > 0
+    \qquad \implies \qquad
     \boxed{
-        \omega^2 < -\frac{4 \gamma P_0}{\beta_2}
+        \omega^2
+        < -\frac{4 \gamma P_0}{\beta_2}
     }
 \end{aligned}$$
 
-Since $$\omega^2$$ is positive, $$\beta_2$$ must be negative,
-so MI can only occur in the ADR.
+Since $$\omega^2$$ is positive, MI can only occur when $$\beta_2$$ is negative.
 It is worth noting that $$\beta_2 = \beta_2(\omega_0)$$,
-meaning there can only be exponential
-noise growth when the "parent pulse" is in the anomalous dispersion regime,
+meaning there can only be exponential noise growth
+when the parent pulse is in the anomalous dispersion regime,
 but that growth may appear in areas of normal dispersion,
 as long as the above condition is satisfied by the parent.
 
@@ -153,15 +164,17 @@ The frequencies with maximum gain are then found as extrema of $$g(\omega)$$,
 which satisfy:
 
 $$\begin{aligned}
-    g'(\omega_\mathrm{max}) = 0
+    g'(\omega_\mathrm{max})
+    = 0
     \qquad \implies \qquad
     \boxed{
-        \omega_\mathrm{max} = \pm \sqrt{\frac{2 \gamma P_0}{-\beta_2}}
+        \omega_\mathrm{max}
+        = \pm \sqrt{\frac{2 \gamma P_0}{-\beta_2}}
     }
 \end{aligned}$$
 
 A simulation of MI is illustrated below.
-The pulse considered was a solition of the following form
+The pulse considered was a soliton of the following form
 with settings $$T_0 = 10\:\mathrm{ps}$$, $$P_0 = 10\:\mathrm{kW}$$,
 $$\beta = -10\:\mathrm{ps}^2/\mathrm{m}$$ and $$\gamma = 0.1/\mathrm{W}/\mathrm{m}$$,
 whose peak is approximately flat, so our derivation is valid there,
@@ -176,7 +189,7 @@ $$\begin{aligned}
 
 Where $$L_\mathrm{NL} = 1/(\gamma P_0)$$ is the characteristic length of nonlinear effects.
 Note that no noise was added to the simulation;
-what you are seeing are pure numerical errors getting amplified.
+you are seeing pure numerical errors getting amplified.
 
 If one of the gain peaks accumulates a lot of energy quickly ($$L_\mathrm{NL}$$ is small),
 and that peak is in the anomalous dispersion regime,
@@ -187,7 +200,7 @@ This is seen above for $$z > 30 L_\mathrm{NL}$$.
 What we described is "pure" MI, but there also exists
 a different type caused by Raman scattering.
 In that case, amplification occurs at the strongest peak of the Raman gain $$\tilde{g}_R(\omega)$$,
-even when the parent pulse is in the NDR.
+even when the parent pulse has $$\beta_2 > 0$$.
 This is an example of stimulated Raman scattering (SRS).
 
 
diff --git a/source/know/concept/optical-wave-breaking/index.md b/source/know/concept/optical-wave-breaking/index.md
index 882749f..3509bc2 100644
--- a/source/know/concept/optical-wave-breaking/index.md
+++ b/source/know/concept/optical-wave-breaking/index.md
@@ -54,7 +54,7 @@ Dispersive broadening then continues normally:
 {% include image.html file="spectrograms-full.png" width="100%" alt="Spectrograms of pulse shape evolution" %}
 
 We call the distance at which the wave breaks $$L_\mathrm{WB}$$,
-and would like to analytically predict it.
+and want to predict it analytically.
 We do this using the instantaneous frequency $$\omega_i$$,
 by estimating when the SPM fluctuations overtake their own base,
 as was illustrated earlier.
@@ -84,11 +84,13 @@ and $$N_\mathrm{sol}$$ is the **soliton number**,
 which is defined as:
 
 $$\begin{aligned}
-    N_\mathrm{sol}^2 \equiv \frac{L_D}{L_N} = \frac{\gamma P_0 T_0^2}{|\beta_2|}
+    N_\mathrm{sol}^2
+    \equiv \frac{L_D}{L_N}
+    = \frac{\gamma P_0 T_0^2}{|\beta_2|}
 \end{aligned}$$
 
 This quantity is very important in anomalous dispersion,
-but even in normal dispesion, it is still a useful measure of the relative strengths of GVD and SPM.
+but even in normal dispersion, it is still a useful measure of the relative strengths of GVD and SPM.
 As was illustrated earlier, $$\omega_i$$ overtakes itself at the edges,
 so OWB occurs when $$\omega_i$$ oscillates there,
 which starts when its $$t$$-derivative,
@@ -100,17 +102,19 @@ $$\begin{aligned}
     = \xi_i(z,t)
     = \pdv{\omega_i}{t}
     &= \frac{\beta_2 z}{T_0^4} \bigg( 1 + 2 N_\mathrm{sol}^2 \Big( 1 - \frac{2 t^2}{T_0^2} \Big) \exp\!\Big(\!-\!\frac{t^2}{T_0^2}\Big) \bigg)
-    = \frac{\beta_2 z}{T_0^4} \: f\Big(\frac{t^2}{T_0^2}\Big)
+    \equiv \frac{\beta_2 z}{T_0^4} \: f\Big(\frac{t^2}{T_0^2}\Big)
 \end{aligned}$$
 
 Where the function $$f(x)$$ has been defined for convenience. As it turns
-out, this equation can be solved analytically using the Lambert $$W$$ function,
+out, this equation can be solved analytically using the *Lambert $$W$$ function*,
 leading to the following exact minimum value $$N_\mathrm{min}^2$$ for $$N_\mathrm{sol}^2$$,
 such that OWB can only occur when $$N_\mathrm{sol}^2 > N_\mathrm{min}^2$$:
 
 $$\begin{aligned}
     \boxed{
-        N_\mathrm{min}^2 = \frac{1}{4} \exp\!\Big(\frac{3}{2}\Big) \approx 1.12
+        N_\mathrm{min}^2
+        = \frac{1}{4} \exp\!\Big(\frac{3}{2}\Big)
+        \approx 1.12
     }
 \end{aligned}$$
 
@@ -129,28 +133,33 @@ $$\begin{aligned}
     \Delta t
     &\approx z \Delta\beta_1
     \qquad
-    &&\Delta\beta_1 \equiv \beta_1(\omega_i(z,t_2)) - \beta_1(\omega_i(z,t_1))
+    &&\Delta\beta_1
+    \equiv \beta_1(\omega_i(z,t_2)) - \beta_1(\omega_i(z,t_1))
     \\
     &\approx z \beta_2 \Delta\omega_i
     \qquad
-    &&\Delta\omega_i \equiv \omega_i(z,t_2) - \omega_i(z,t_1)
+    &&\Delta\omega_i
+    \equiv \omega_i(z,t_2) - \omega_i(z,t_1)
     \\
     &\approx z \beta_2 \Delta\xi_i \,(t_2 - t_1)
     \qquad \quad
-    &&\Delta\xi_i \equiv \xi_i(z,t_2) - \xi_i(z,t_1)
+    &&\Delta\xi_i
+    \equiv \xi_i(z,t_2) - \xi_i(z,t_1)
 \end{aligned}$$
 
 Where $$\beta_1(\omega)$$ is the inverse of the group velocity.
-OWB takes place when $$t_2$$ and $$t_1$$ catch up to each other,
+For a certain choice of $$t_1$$ and $$t_2$$,
+OWB occurs when they catch up to each other,
 which is when $$-\Delta t = (t_2 - t_1)$$.
-The distance where this happens first, $$z = L_\mathrm{WB}$$,
-must therefore satisfy the following condition
-for a particular value of $$t$$:
+The distance $$L_\mathrm{WB}$$ at which this happens first
+must satisfy the following condition for some value of $$t$$:
 
 $$\begin{aligned}
-    L_\mathrm{WB} \, \beta_2 \, \xi_i(L_\mathrm{WB}, t) = -1
+    L_\mathrm{WB} \: \beta_2 \: \xi_i(L_\mathrm{WB}, t)
+    = -1
     \qquad \implies \qquad
-    L_\mathrm{WB}^2 = - \frac{T_0^4}{\beta_2^2 \, f(t^2/T_0^2)}
+    L_\mathrm{WB}^2
+    = - \frac{T_0^4}{\beta_2^2 \: f(t^2/T_0^2)}
 \end{aligned}$$
 
 The time $$t$$ of OWB must be where $$\omega_i(t)$$ has its steepest slope,
@@ -158,7 +167,8 @@ which is at the minimum value of $$\xi_i(t)$$, and by extension $$f(x)$$.
 This turns out to be $$f(3/2)$$:
 
 $$\begin{aligned}
-    f_\mathrm{min} = f(3/2)
+    f_\mathrm{min}
+    = f(3/2)
     = 1 - 4 N_\mathrm{sol}^2 \exp(-3/2)
     = 1 - N_\mathrm{sol}^2 / N_\mathrm{min}^2
 \end{aligned}$$
@@ -182,8 +192,9 @@ with the OWB observed in the simulation:
 {% include image.html file="simulation-full.png" width="100%" alt="Optical wave breaking simulation results" %}
 
 Because all spectral broadening up to $$L_\mathrm{WB}$$ is caused by SPM,
-whose frequency behaviour is known, it is in fact possible to draw
-some analytical conclusions about the achieved bandwidth when OWB sets in.
+whose $$\omega$$-domain behaviour is known,
+it is in fact possible to draw some analytical conclusions
+about the achieved bandwidth when OWB sets in.
 Filling $$L_\mathrm{WB}$$ in into $$\omega_\mathrm{SPM}$$ gives:
 
 $$\begin{aligned}
@@ -205,7 +216,8 @@ $$\pm 1 / \sqrt{2 e}$$ at $$x^2 = 1/2$$. The maximum SPM frequency shift
 achieved at $$L_\mathrm{WB}$$ is therefore given by:
 
 $$\begin{aligned}
-    \omega_\mathrm{max} = \sqrt{\frac{2 \gamma P_0}{e \beta_2}}
+    \omega_\mathrm{max}
+    = \sqrt{\frac{2 \gamma P_0}{e \beta_2}}
 \end{aligned}$$
 
 Interestingly, this expression does not contain $$T_0$$ at all,
@@ -214,6 +226,7 @@ is independent of the pulse width,
 for sufficiently large $$N_\mathrm{sol}$$.
 
 
+
 ## References
 1.  D. Anderson, M. Desaix, M. Lisak, M.L. Quiroga-Teixeiro,
     [Wave breaking in nonlinear-optical fibers](https://doi.org/10.1364/JOSAB.9.001358),
diff --git a/source/know/concept/random-variable/index.md b/source/know/concept/random-variable/index.md
index ecb8e96..a6cbc8b 100644
--- a/source/know/concept/random-variable/index.md
+++ b/source/know/concept/random-variable/index.md
@@ -17,6 +17,7 @@ Here, we will describe the formal mathematical definition
 of a random variable.
 
 
+
 ## Probability space
 
 A **probability space** or **probability triple** $$(\Omega, \mathcal{F}, P)$$
@@ -24,7 +25,7 @@ is the formal mathematical model of a given **stochastic experiment**,
 i.e. a process with a random outcome.
 
 The **sample space** $$\Omega$$ is the set
-of all possible outcomes $$\omega$$ of the experimement.
+of all possible outcomes $$\omega$$ of the stochastic experiment.
 Those $$\omega$$ are selected randomly according to certain criteria.
 A subset $$A \subset \Omega$$ is called an **event**,
 and can be regarded as a true statement about all $$\omega$$ in that $$A$$.
@@ -34,7 +35,7 @@ that are interesting to us,
 i.e. we have subjectively chosen $$\mathcal{F}$$
 based on the problem at hand.
 Since events $$A$$ represent statements about outcomes $$\omega$$,
-and we would like to use logic on those statemenets,
+and we would like to use logic on those statements,
 we demand that $$\mathcal{F}$$ is a [$$\sigma$$-algebra](/know/concept/sigma-algebra/).
 
 Finally, the **probability measure** or **probability function** $$P$$
@@ -52,6 +53,7 @@ if $$\Omega$$ is continuous, all $$\omega$$ have zero probability,
 while intervals $$A$$ can have nonzero probability.
 
 
+
 ## Random variable
 
 Once we have a probability space $$(\Omega, \mathcal{F}, P)$$,
@@ -91,7 +93,7 @@ $$X^{-1}$$ can be regarded as the inverse of $$X$$:
 it maps $$B$$ to the event for which $$X \in B$$.
 With this, our earlier requirement that $$X$$ be measurable
 can be written as: $$X^{-1}(B) \in \mathcal{F}$$ for any $$B \in \mathcal{B}(\mathbb{R}^n)$$.
-This is also often stated as "$$X$$ is *$$\mathcal{F}$$-measurable"*.
+This is often stated as "$$X$$ is *$$\mathcal{F}$$-measurable*".
 
 Related to $$\mathcal{F}$$ is the **information**
 obtained by observing a random variable $$X$$.
@@ -111,7 +113,7 @@ then the preimage $$X^{-1}(B)$$ (i.e. the event yielding this $$B$$)
 is known to have occurred.
 
 In general, given any $$\sigma$$-algebra $$\mathcal{H}$$,
-a variable $$Y$$ is said to be *"$$\mathcal{H}$$-measurable"*
+a variable $$Y$$ is said to be *$$\mathcal{H}$$-measurable*
 if $$\sigma(Y) \subseteq \mathcal{H}$$,
 so that $$\mathcal{H}$$ contains at least
 all information extractable from $$Y$$.
@@ -145,11 +147,12 @@ $$\begin{aligned}
 \end{aligned}$$
 
 
+
 ## Expectation value
 
 The **expectation value** $$\mathbf{E}[X]$$ of a random variable $$X$$
 can be defined in the familiar way, as the sum/integral
-of every possible value of $$X$$ mutliplied by the corresponding probability (density).
+of every possible value of $$X$$ multiplied by the corresponding probability (density).
 For continuous and discrete sample spaces $$\Omega$$, respectively:
 
 $$\begin{aligned}
@@ -163,7 +166,7 @@ $$\begin{aligned}
 However, $$f_X(x)$$ is not guaranteed to exist,
 and the distinction between continuous and discrete is cumbersome.
 A more general definition of $$\mathbf{E}[X]$$
-is the following Lebesgue-Stieltjes integral,
+is the following *Lebesgue-Stieltjes integral*,
 since $$F_X(x)$$ always exists:
 
 $$\begin{aligned}
@@ -172,7 +175,7 @@ $$\begin{aligned}
 \end{aligned}$$
 
 This is valid for any sample space $$\Omega$$.
-Or, equivalently, a Lebesgue integral can be used:
+Or, equivalently, a *Lebesgue integral* can be used:
 
 $$\begin{aligned}
     \mathbf{E}[X]
diff --git a/source/know/concept/residue-theorem/index.md b/source/know/concept/residue-theorem/index.md
index a0f515e..49a6f7a 100644
--- a/source/know/concept/residue-theorem/index.md
+++ b/source/know/concept/residue-theorem/index.md
@@ -12,7 +12,7 @@ A function $$f(z)$$ is **meromorphic** if it is
 [holomorphic](/know/concept/holomorphic-function/)
 except in a finite number of **simple poles**,
 which are points $$z_p$$ where $$f(z_p)$$ diverges,
-but where the product $$(z - z_p) f(z)$$ is non-zero
+but where the product $$(z - z_p) f(z)$$ is nonzero
 and still holomorphic close to $$z_p$$.
 In other words, $$f(z)$$ can be approximated close to $$z_p$$:
 
@@ -26,7 +26,8 @@ represents the rate at which $$f(z)$$ diverges close to $$z_p$$:
 
 $$\begin{aligned}
     \boxed{
-        R_p = \lim_{z \to z_p} (z - z_p) f(z)
+        R_p
+        \equiv \lim_{z \to z_p} (z - z_p) f(z)
     }
 \end{aligned}$$
 
@@ -37,7 +38,8 @@ purely depends on the simple poles $$z_p$$ enclosed by $$C$$:
 
 $$\begin{aligned}
     \boxed{
-        \oint_C f(z) \dd{z} = i 2 \pi \sum_{z_p} R_p
+        \oint_C f(z) \dd{z}
+        = i 2 \pi \sum_{z_p} R_p
     }
 \end{aligned}$$
 
@@ -48,7 +50,8 @@ we know that we can decompose $$f(z)$$ like so,
 where $$h(z)$$ is holomorphic and $$z_p$$ are all its poles:
 
 $$\begin{aligned}
-    f(z) = h(z) + \sum_{z_p} \frac{R_p}{z - z_p}
+    f(z)
+    = h(z) + \sum_{z_p} \frac{R_p}{z - z_p}
 \end{aligned}$$
 
 We integrate this over a contour $$C$$ which contains all poles, and apply
diff --git a/source/know/concept/self-phase-modulation/index.md b/source/know/concept/self-phase-modulation/index.md
index 48ea20b..931e10b 100644
--- a/source/know/concept/self-phase-modulation/index.md
+++ b/source/know/concept/self-phase-modulation/index.md
@@ -12,8 +12,8 @@ layout: "concept"
 
 In fiber optics, **self-phase modulation** (SPM) is a nonlinear effect
 that gradually broadens pulses' spectra.
-Unlike dispersion, SPM does create new frequencies: in the $$\omega$$-domain,
-the pulse steadily spreads out with a distinctive "accordion" peak.
+Unlike dispersion, SPM creates frequencies: in the $$\omega$$-domain,
+the pulse steadily spreads out in a distinctive "accordion" shape.
 Lower frequencies are created at the front of the
 pulse and higher ones at the back, giving S-shaped spectrograms.
 
@@ -32,22 +32,25 @@ For any arbitrary input pulse $$A_0(t) = A(0, t)$$,
 we arrive at the following analytical solution:
 
 $$\begin{aligned}
-    A(z,t) = A_0 \exp\!\big( i \gamma |A_0|^2 z\big)
+    A(z,t)
+    = A_0 \exp\!\big( i \gamma |A_0|^2 z\big)
 \end{aligned}$$
 
 The intensity $$|A|^2$$ in the time domain is thus unchanged,
 and only its phase is modified.
-It is also clear that the largest phase increase occurs at the peak of the pulse,
+Clearly, the largest phase shift increase occurs at the peak,
 where the intensity is $$P_0$$.
 To quantify this, it is useful to define the **nonlinear length** $$L_N$$,
 which gives the distance after which the phase of the
 peak has increased by exactly 1 radian:
 
 $$\begin{aligned}
-    \gamma P_0 L_N = 1
+    \gamma P_0 L_N
+    = 1
     \qquad \implies \qquad
     \boxed{
-        L_N = \frac{1}{\gamma P_0}
+        L_N
+        \equiv \frac{1}{\gamma P_0}
     }
 \end{aligned}$$
 
diff --git a/source/know/concept/self-steepening/index.md b/source/know/concept/self-steepening/index.md
index e06b0b5..fd48e0f 100644
--- a/source/know/concept/self-steepening/index.md
+++ b/source/know/concept/self-steepening/index.md
@@ -27,7 +27,8 @@ We will use the following ansatz,
 consisting of an arbitrary power profile $$P$$ with a phase $$\phi$$:
 
 $$\begin{aligned}
-    A(z,t) = \sqrt{P(z,t)} \, \exp\!\big(i \phi(z,t)\big)
+    A(z,t)
+    = \sqrt{P(z,t)} \, \exp\!\big(i \phi(z,t)\big)
 \end{aligned}$$
 
 For a long pulse travelling over a short distance, it is reasonable to
@@ -35,16 +36,19 @@ neglect dispersion ($$\beta_2 = 0$$).
 Inserting the ansatz then gives the following, where $$\varepsilon = \gamma / \omega_0$$:
 
 $$\begin{aligned}
-    0 &= i \frac{1}{2} \frac{P_z}{\sqrt{P}} - \sqrt{P} \phi_z + \gamma P \sqrt{P} + i \varepsilon \frac{3}{2} P_t \sqrt{P} - \varepsilon P \sqrt{P} \phi_t
+    0
+    &= i \frac{1}{2} \frac{P_z}{\sqrt{P}} - \sqrt{P} \phi_z + \gamma P \sqrt{P} + i \varepsilon \frac{3}{2} P_t \sqrt{P} - \varepsilon P \sqrt{P} \phi_t
 \end{aligned}$$
 
 This results in two equations, respectively corresponding to the real
 and imaginary parts:
 
 $$\begin{aligned}
-    0 &= - \phi_z - \varepsilon P \phi_t + \gamma P
+    0
+    &= - \phi_z - \varepsilon P \phi_t + \gamma P
     \\
-    0 &= P_z + \varepsilon 3 P_t P
+    0
+    &= P_z + \varepsilon 3 P_t P
 \end{aligned}$$
 
 The phase $$\phi$$ is not so interesting, so we focus on the latter equation for $$P$$.
@@ -53,7 +57,8 @@ which shows that more intense parts of the pulse
 will lag behind compared to the rest:
 
 $$\begin{aligned}
-    P(z,t) = f(t - 3 \varepsilon z P)
+    P(z,t)
+    = f(t - 3 \varepsilon z P)
 \end{aligned}$$
 
 Where $$f$$ is the initial power profile: $$f(t) = P(0,t)$$.
@@ -85,7 +90,8 @@ $$\begin{aligned}
     = 1 + 3 \varepsilon z f_\mathrm{min}'
     \qquad \implies \qquad
     \boxed{
-        L_\mathrm{shock} \equiv -\frac{1}{3 \varepsilon f_\mathrm{min}'}
+        L_\mathrm{shock}
+        \equiv -\frac{1}{3 \varepsilon f_\mathrm{min}'}
     }
 \end{aligned}$$
 
@@ -99,7 +105,8 @@ with $$T_0 = 25\:\mathrm{fs}$$, $$P_0 = 3\:\mathrm{kW}$$,
 $$\beta_2 = 0$$ and $$\gamma = 0.1/\mathrm{W}/\mathrm{m}$$:
 
 $$\begin{aligned}
-    f(t) = P(0,t) = P_0 \exp\!\Big(\! -\!\frac{t^2}{T_0^2} \Big)
+    f(t)
+    = P(0,t) = P_0 \exp\!\Big(\! -\!\frac{t^2}{T_0^2} \Big)
 \end{aligned}$$
 
 
@@ -107,9 +114,11 @@ Its steepest points are found to be at $$2 t^2 = T_0^2$$, so
 $$f_\mathrm{min}'$$ and $$L_\mathrm{shock}$$ are given by:
 
 $$\begin{aligned}
-    f_\mathrm{min}' = - \frac{\sqrt{2} P_0}{T_0} \exp\!\Big(\!-\!\frac{1}{2}\Big)
+    f_\mathrm{min}'
+    = - \frac{\sqrt{2} P_0}{T_0} \exp\!\Big(\!-\!\frac{1}{2}\Big)
     \quad \implies \quad
-    L_\mathrm{shock} = \frac{T_0}{3 \sqrt{2} \varepsilon P_0} \exp\!\Big(\frac{1}{2}\Big)
+    L_\mathrm{shock}
+    = \frac{T_0}{3 \sqrt{2} \varepsilon P_0} \exp\!\Big(\frac{1}{2}\Big)
 \end{aligned}$$
 
 This example Gaussian pulse therefore has a theoretical
@@ -127,6 +136,7 @@ Nevertheless, the general trends are nicely visible:
 the trailing slope becomes extremely steep, and the spectrum
 broadens so much that dispersion cannot be neglected anymore.
 
+{% comment %}
 When self-steepening is added to