From 1d700ab734aa9b6711eb31796beb25cb7659d8e0 Mon Sep 17 00:00:00 2001
From: Prefetch
Date: Tue, 20 Dec 2022 20:11:25 +0100
Subject: More improvements to knowledge base

---
 source/know/concept/binomial-distribution/index.md | 105 +++++++++++++--------
 1 file changed, 68 insertions(+), 37 deletions(-)

(limited to 'source/know/concept/binomial-distribution/index.md')

diff --git a/source/know/concept/binomial-distribution/index.md b/source/know/concept/binomial-distribution/index.md
index dc75221..9bb32d3 100644
--- a/source/know/concept/binomial-distribution/index.md
+++ b/source/know/concept/binomial-distribution/index.md
@@ -46,19 +46,25 @@ $$\begin{aligned}
 
 
 {% include proof/start.html id="proof-mean" -%}
-The trick is to treat $$p$$ and $$q$$ as independent until the last moment:
+The trick is to treat $$p$$ and $$q$$ as independent and introduce a derivative:
+
+$$\begin{aligned}
+    \mu
+    &= \sum_{n = 0}^N n P_N(n)
+    = \sum_{n = 0}^N n \binom{N}{n} p^n q^{N - n}
+    = \sum_{n = 0}^N \binom{N}{n} \bigg( p \pdv{(p^n)}{p} \bigg) q^{N - n}
+\end{aligned}$$
+
+Then, using the fact that the binomial coefficients appear when writing out $$(p + q)^N$$:
 
 $$\begin{aligned}
     \mu
-    &= \sum_{n = 0}^N n \binom{N}{n} p^n q^{N - n}
-    = \sum_{n = 0}^N \binom{N}{n} \Big( p \pdv{(p^n)}{p} \Big) q^{N - n}
-    \\
     &= p \pdv{}{p}\sum_{n = 0}^N \binom{N}{n} p^n q^{N - n}
     = p \pdv{}{p}(p + q)^N
     = N p (p + q)^{N - 1}
 \end{aligned}$$
 
-Inserting $$q = 1 - p$$ then gives the desired result.
+Finally, inserting $$q = 1 - p$$ gives the desired result.
 {% include proof/end.html id="proof-mean" %}
 
 
@@ -73,18 +79,21 @@ $$\begin{aligned}
 
 
 {% include proof/start.html id="proof-var" -%}
+We reuse the previous trick to find $$\overline{n^2}$$
 (the mean squared number of successes):
 
 $$\begin{aligned}
     \overline{n^2}
     &= \sum_{n = 0}^N n^2 \binom{N}{n} p^n q^{N - n}
-    = \sum_{n = 0}^N n \binom{N}{n} \Big( p \pdv{}{p}\Big)^2 p^n q^{N - n}
+    = \sum_{n = 0}^N n \binom{N}{n} \bigg( p \pdv{}{p} \bigg) p^n q^{N - n}
+    \\
+    &= \sum_{n = 0}^N \binom{N}{n} \bigg( p \pdv{}{p} \bigg)^2 p^n q^{N - n}
+    = \bigg( p \pdv{}{p} \bigg)^2 \sum_{n = 0}^N \binom{N}{n} p^n q^{N - n}
     \\
-    &= \Big( p \pdv{}{p}\Big)^2 \sum_{n = 0}^N \binom{N}{n} p^n q^{N - n}
-    = \Big( p \pdv{}{p}\Big)^2 (p + q)^N
+    &= \bigg( p \pdv{}{p} \bigg)^2 (p + q)^N
+    = N p \pdv{}{p}p (p + q)^{N - 1}
     \\
-    &= N p \pdv{}{p}p (p + q)^{N - 1}
-    = N p \big( (p + q)^{N - 1} + (N - 1) p (p + q)^{N - 2} \big)
+    &= N p \big( (p + q)^{N - 1} + (N - 1) p (p + q)^{N - 2} \big)
     \\
     &= N p + N^2 p^2 - N p^2
 \end{aligned}$$
@@ -108,7 +117,7 @@ a fact that is sometimes called the **de Moivre-Laplace theorem**:
 
 $$\begin{aligned}
     \boxed{
-        \lim_{N \to \infty} P_N(n) = \frac{1}{\sqrt{2 \pi \sigma^2}} \exp\!\Big(\!-\!\frac{(n - \mu)^2}{2 \sigma^2} \Big)
+        \lim_{N \to \infty} P_N(n) = \frac{1}{\sqrt{2 \pi \sigma^2}} \exp\!\bigg(\!-\!\frac{(n - \mu)^2}{2 \sigma^2} \bigg)
     }
 \end{aligned}$$
 
@@ -121,73 +130,94 @@ $$\begin{aligned}
     \ln\!\big(P_N(n)\big)
     &= \sum_{m = 0}^\infty \frac{(n - \mu)^m}{m!} D_m(\mu)
     \quad \mathrm{where} \quad
-    D_m(n) = \dvn{m}{\ln\!\big(P_N(n)\big)}{n}
+    D_m(n)
+    \equiv \dvn{m}{\ln\!\big(P_N(n)\big)}{n}
 \end{aligned}$$
 
-We use Stirling's approximation to calculate the factorials in $$D_m$$:
+For future convenience while calculating the $$D_m$$, we write out $$\ln(P_N)$$ now:
 
 $$\begin{aligned}
     \ln\!\big(P_N(n)\big)
-    &= \ln(N!) - \ln(n!) - \ln\!\big((N - n)!\big) + n \ln(p) + (N - n) \ln(q)
-    \\
-    &\approx \ln(N!) - n \big( \ln(n)\!-\!\ln(p)\!-\!1 \big) - (N\!-\!n) \big( \ln(N\!-\!n)\!-\!\ln(q)\!-\!1 \big)
+    &= \ln(N!) - \ln(n!) - \ln\!\big((N \!-\! n)!\big) + n \ln(p) + (N \!-\! n) \ln(q)
 \end{aligned}$$
 
-For $$D_0(\mu)$$, we need to use a stronger version of Stirling's approximation
-to get a non-zero result. We take advantage of $$N - N p = N q$$:
+For $$D_0(\mu)$$ specifically,
+we need to use a strong version of *Stirling's approximation*
+to arrive at a nonzero result in the end.
+We know that $$N - N p = N q$$:
 
 $$\begin{aligned}
     D_0(\mu)
+    &= \ln\!\big(P_N(n)\big) \big|_{n = \mu}
+    \\
+    &= \ln(N!) - \ln(\mu!) - \ln\!\big((N \!-\! \mu)!\big) + \mu \ln(p) + (N \!-\! \mu) \ln(q)
+    \\
     &= \ln(N!) - \ln\!\big((N p)!\big) - \ln\!\big((N q)!\big) + N p \ln(p) + N q \ln(q)
     \\
-    &= \Big( N \ln(N) - N + \frac{1}{2} \ln(2\pi N) \Big)
+    &\approx \Big( N \ln(N) - N + \frac{1}{2} \ln(2\pi N) \Big)
     - \Big( N p \ln(N p) - N p + \frac{1}{2} \ln(2\pi N p) \Big) \\
     &\qquad - \Big( N q \ln(N q) - N q + \frac{1}{2} \ln(2\pi N q) \Big)
     + N p \ln(p) + N q \ln(q)
     \\
-    &= N \ln(N) - N (p + q) \ln(N) + N (p + q) - N - \frac{1}{2} \ln(2\pi N p q)
+    &= N \ln(N) - N (p \!+\! q) \ln(N) + N (p \!+\! q) - N - \frac{1}{2} \ln(2\pi N p q)
     \\
     &= - \frac{1}{2} \ln(2\pi N p q)
-    = \ln\!\Big( \frac{1}{\sqrt{2\pi \sigma^2}} \Big)
+    = \ln\!\bigg( \frac{1}{\sqrt{2\pi \sigma^2}} \bigg)
 \end{aligned}$$
 
-Next, we expect that $$D_1(\mu) = 0$$, because $$\mu$$ is the maximum.
-This is indeed the case:
+Next, for $$D_m(\mu)$$ with $$m \ge 1$$,
+we can use a weaker version of Stirling's approximation:
+
+$$\begin{aligned}
+    \ln(P_N)
+    &\approx \ln(N!) - n \big( \ln(n) \!-\! 1 \big) - (N \!-\! n) \big( \ln(N \!-\! n) \!-\! 1 \big) + n \ln(p) + (N \!-\! n) \ln(q)
+    \\
+    &\approx \ln(N!) - n \big( \ln(n) - \ln(p) - 1 \big) - (N\!-\!n) \big( \ln(N\!-\!n) - \ln(q) - 1 \big)
+\end{aligned}$$
+
+We expect that $$D_1(\mu) = 0$$, because $$P_N$$ is maximized at $$\mu$$.
+Indeed it is:
 
 $$\begin{aligned}
     D_1(n)
-    &= - \big( \ln(n)\!-\!\ln(p)\!-\!1 \big) + \big( \ln(N\!-\!n)\!-\!\ln(q)\!-\!1 \big) - 1 + 1
+    &= \dv{}{n} \ln\!\big((P_N(n)\big)
     \\
-    &= - \ln(n) + \ln(N - n) + \ln(p) - \ln(q)
+    &= - \big( \ln(n) - \ln(p) - 1 \big) + \big( \ln(N\!-\!n) - \ln(q) - 1 \big) - \frac{n}{n} + \frac{N \!-\! n}{N \!-\! n}
+    \\
+    &= - \ln(n) + \ln(N \!-\! n) + \ln(p) - \ln(q)
     \\
     D_1(\mu)
-    &= \ln(N q) - \ln(N p) + \ln(p) - \ln(q)
-    = \ln(N p q) - \ln(N p q)
-    = 0
+    &= - \ln(\mu) + \ln(N \!-\! \mu) + \ln(p) - \ln(q)
+    \\
+    &= - \ln(N p q) + \ln(N p q)
+    \\
+    &= 0
 \end{aligned}$$
 
-For the same reason, we expect that $$D_2(\mu)$$ is negative.
+For the same reason, we expect $$D_2(\mu)$$ to be negative.
 We find the following expression:
 
 $$\begin{aligned}
     D_2(n)
-    &= - \frac{1}{n} - \frac{1}{N - n}
-    \qquad
+    &= \dvn{2}{}{n} \ln\!\big((P_N(n)\big)
+    = \dv{}{n} D_1(n)
+    = - \frac{1}{n} - \frac{1}{N - n}
+    \\
     D_2(\mu)
-    = - \frac{1}{Np} - \frac{1}{Nq}
+    &= - \frac{1}{Np} - \frac{1}{Nq}
     = - \frac{p + q}{N p q}
     = - \frac{1}{\sigma^2}
 \end{aligned}$$
 
-The higher-order derivatives tend to zero for $$N \to \infty$$, so we discard them:
+The higher-order derivatives vanish much faster as $$N \to \infty$$, so we discard them:
 
 $$\begin{aligned}
     D_3(n)
     = \frac{1}{n^2} - \frac{1}{(N - n)^2}
-    \qquad
+    \qquad \quad
     D_4(n)
     = - \frac{2}{n^3} - \frac{2}{(N - n)^3}
-    \qquad
+    \qquad \quad
     \cdots
 \end{aligned}$$
 
@@ -197,13 +227,14 @@ the Taylor series approximately becomes:
 $$\begin{aligned}
     \ln\!\big(P_N(n)\big)
     \approx D_0(\mu) + \frac{(n - \mu)^2}{2} D_2(\mu)
-    = \ln\!\Big( \frac{1}{\sqrt{2\pi \sigma^2}} \Big) - \frac{(n - \mu)^2}{2 \sigma^2}
+    = \ln\!\bigg( \frac{1}{\sqrt{2\pi \sigma^2}} \bigg) - \frac{(n - \mu)^2}{2 \sigma^2}
 \end{aligned}$$
 
-Taking $$\exp$$ of this expression then yields a normalized Gaussian distribution.
+Raising $$e$$ to this expression then yields a normalized Gaussian distribution.
 {% include proof/end.html id="proof-normal" %}
 
 
+
 ## References
 1.  H. Gould, J. Tobochnik,
     *Statistical and thermal physics*, 2nd edition,
-- 
cgit v1.2.3