From 6ce0bb9a8f9fd7d169cbb414a9537d68c5290aae Mon Sep 17 00:00:00 2001
From: Prefetch
Date: Fri, 14 Oct 2022 23:25:28 +0200
Subject: Initial commit after migration from Hugo

---
 source/know/concept/central-limit-theorem/index.md | 203 +++++++++++++++++++++
 1 file changed, 203 insertions(+)
 create mode 100644 source/know/concept/central-limit-theorem/index.md

(limited to 'source/know/concept/central-limit-theorem/index.md')

diff --git a/source/know/concept/central-limit-theorem/index.md b/source/know/concept/central-limit-theorem/index.md
new file mode 100644
index 0000000..126ff3b
--- /dev/null
+++ b/source/know/concept/central-limit-theorem/index.md
@@ -0,0 +1,203 @@
+---
+title: "Central limit theorem"
+date: 2021-03-09
+categories:
+- Statistics
+- Mathematics
+layout: "concept"
+---
+
+In statistics, the **central limit theorem** states that
+the sum of many independent variables tends towards a normal distribution,
+even if the individual variables $x_n$ follow different distributions.
+
+For example, by taking $M$ samples of size $N$ from a population,
+and calculating $M$ averages $\mu_m$ (which involves summing over $N$),
+the resulting means $\mu_m$ are normally distributed
+across the $M$ samples if $N$ is sufficiently large.
+
+More formally, for $N$ independent variables $x_n$ with probability distributions $p(x_n)$,
+the central limit theorem states the following,
+where we define the sum $S$:
+
+$$\begin{aligned}
+    S = \sum_{n = 1}^N x_n
+    \qquad
+    \mu_S = \sum_{n = 1}^N \mu_n
+    \qquad
+    \sigma_S^2 = \sum_{n = 1}^N \sigma_n^2
+\end{aligned}$$
+
+And crucially, it states that the probability distribution $p_N(S)$ of $S$ for $N$ variables
+will become a normal distribution when $N$ goes to infinity:
+
+$$\begin{aligned}
+    \boxed{
+        \lim_{N \to \infty} \!\big(p_N(S)\big)
+        = \frac{1}{\sigma_S \sqrt{2 \pi}} \exp\!\Big( -\frac{(\mu_S - S)^2}{2 \sigma_S^2} \Big)
+    }
+\end{aligned}$$
+
+We prove this below,
+but first we need to introduce some tools.
+Given a probability density $p(x)$, its [Fourier transform](/know/concept/fourier-transform/)
+is called the **characteristic function** $\phi(k)$:
+
+$$\begin{aligned}
+    \phi(k) = \int_{-\infty}^\infty p(x) \exp(i k x) \dd{x}
+\end{aligned}$$
+
+Note that $\phi(k)$ can be interpreted as the average of $\exp(i k x)$.
+We take its Taylor expansion in two separate ways,
+where an overline denotes the mean:
+
+$$\begin{aligned}
+    \phi(k)
+    = \sum_{n = 0}^\infty \frac{k^n}{n!} \: \phi^{(n)}(0)
+    \qquad
+    \phi(k)
+    = \overline{\exp(i k x)} = \sum_{n = 0}^\infty \frac{(ik)^n}{n!} \overline{x^n}
+\end{aligned}$$
+
+By comparing the coefficients of these two power series,
+we get a useful relation:
+
+$$\begin{aligned}
+    \phi^{(n)}(0) = i^n \: \overline{x^n}
+\end{aligned}$$
+
+Next, the **cumulants** $C^{(n)}$ are defined from the Taylor expansion of $\ln\!\big(\phi(k)\big)$:
+
+$$\begin{aligned}
+    \ln\!\big( \phi(k) \big)
+    = \sum_{n = 1}^\infty \frac{(ik)^n}{n!} C^{(n)}
+    \quad \mathrm{where} \quad
+    C^{(n)} = \frac{1}{i^n} \: \dvn{n}{}{k} \Big(\ln\!\big(\phi(k)\big)\Big) \Big|_{k = 0}
+\end{aligned}$$
+
+The first two cumulants $C^{(1)}$ and $C^{(2)}$ are of particular interest,
+since they turn out to be the mean and the variance respectively,
+using our earlier relation:
+
+$$\begin{aligned}
+    C^{(1)}
+    &= - i \dv{}{k} \Big(\ln\!\big(\phi(k)\big)\Big) \Big|_{k = 0}
+    = - i \frac{\phi'(0)}{\exp(0)}
+    = \overline{x}
+    \\
+    C^{(2)}
+    &= - \dvn{2}{}{k} \Big(\ln\!\big(\phi(k)\big)\Big) \Big|_{k = 0}
+    = \frac{\big(\phi'(0)\big)^2}{\exp(0)^2} - \frac{\phi''(0)}{\exp(0)}
+    = - \overline{x}^2 + \overline{x^2} = \sigma^2
+\end{aligned}$$
+
+Let us now define $S$ as the sum of $N$ independent variables $x_n$, in other words:
+
+$$\begin{aligned}
+    S = \sum_{n = 1}^N x_n = x_1 + x_2 + ... + x_N
+\end{aligned}$$
+
+The probability density of $S$ is then as follows, where $p(x_n)$ are
+the densities of all the individual variables and $\delta$ is
+the [Dirac delta function](/know/concept/dirac-delta-function/):
+
+$$\begin{aligned}
+    p(S)
+    &= \int\cdots\int_{-\infty}^\infty \Big( \prod_{n = 1}^N p(x_n) \Big) \: \delta\Big( S - \sum_{n = 1}^N x_n \Big) \dd{x_1} \cdots \dd{x_N}
+    \\
+    &= \Big( p_1 * \big( p_2 * ( ... * (p_N * \delta))\big)\Big)(S)
+\end{aligned}$$
+
+In other words, the integrals pick out all combinations of $x_n$ which
+add up to the desired $S$-value, and multiply the probabilities
+$p(x_1) p(x_2) \cdots p(x_N)$ of each such case. This is a convolution,
+so the [convolution theorem](/know/concept/convolution-theorem/)
+states that it is a product in the Fourier domain:
+
+$$\begin{aligned}
+    \phi_S(k) = \prod_{n = 1}^N \phi_n(k)
+\end{aligned}$$
+
+By taking the logarithm of both sides, the product becomes a sum,
+which we further expand:
+
+$$\begin{aligned}
+    \ln\!\big(\phi_S(k)\big)
+    = \sum_{n = 1}^N \ln\!\big(\phi_n(k)\big)
+    = \sum_{n = 1}^N \sum_{m = 1}^{\infty} \frac{(ik)^m}{m!} C_n^{(m)}
+\end{aligned}$$
+
+Consequently, the cumulants $C^{(m)}$ stack additively for the sum $S$
+of independent variables $x_m$, and therefore
+the means $C^{(1)}$ and variances $C^{(2)}$ do too:
+
+$$\begin{aligned}
+    C_S^{(m)} = \sum_{n = 1}^N C_n^{(m)} = C_1^{(m)} + C_2^{(m)} + ... + C_N^{(m)}
+\end{aligned}$$
+
+We now introduce the scaled sum $z$ as the new combined variable:
+
+$$\begin{aligned}
+    z = \frac{S}{\sqrt{N}} = \frac{1}{\sqrt{N}} (x_1 + x_2 + ... + x_N)
+\end{aligned}$$
+
+Its characteristic function $\phi_z(k)$ is then as follows,
+with $\sqrt{N}$ appearing in the arguments of $\phi_n$:
+
+$$\begin{aligned}
+    \phi_z(k)
+    &= \int\cdots\int
+    \Big( \prod_{n = 1}^N p(x_n) \Big) \: \delta\Big( z - \frac{1}{\sqrt{N}} \sum_{n = 1}^N x_n \Big) \exp(i k z)
+    \dd{x_1} \cdots \dd{x_N}
+    \\
+    &= \int\cdots\int
+    \Big( \prod_{n = 1}^N p(x_n) \Big) \exp\!\Big( i \frac{k}{\sqrt{N}} \sum_{n = 1}^N x_n \Big)
+    \dd{x_1} \cdots \dd{x_N}
+    \\
+    &= \prod_{n = 1}^N \phi_n\Big(\frac{k}{\sqrt{N}}\Big)
+\end{aligned}$$
+
+By expanding $\ln\!\big(\phi_z(k)\big)$ in terms of its cumulants $C^{(m)}$
+and introducing $\kappa = k / \sqrt{N}$, we see that the higher-order terms
+become smaller for larger $N$:
+
+$$\begin{gathered}
+    \ln\!\big( \phi_z(k) \big)
+    = \sum_{m = 1}^\infty \frac{(ik)^m}{m!} C^{(m)}
+    \\
+    C^{(m)}
+    = \frac{1}{i^m} \dvn{m}{}{k} \sum_{n = 1}^N \ln\!\bigg( \phi_n\Big(\frac{k}{\sqrt{N}}\Big) \bigg)
+    = \frac{1}{i^m N^{m/2}} \dvn{m}{}{\kappa} \sum_{n = 1}^N \ln\!\big( \phi_n(\kappa) \big)
+\end{gathered}$$
+
+For sufficiently large $N$, we can therefore approximate it using just the first two terms:
+
+$$\begin{aligned}
+    \ln\!\big( \phi_z(k) \big)
+    &\approx i k C^{(1)} - \frac{k^2}{2} C^{(2)}
+    = i k \overline{z} - \frac{k^2}{2} \sigma_z^2
+    \\
+    \phi_z(k)
+    &\approx \exp(i k \overline{z}) \exp(- k^2 \sigma_z^2 / 2)
+\end{aligned}$$
+
+We take its inverse Fourier transform to get the density $p(z)$,
+which turns out to be a Gaussian normal distribution,
+which is even already normalized:
+
+$$\begin{aligned}
+    p(z)
+    = \hat{\mathcal{F}}^{-1} \{\phi_z(k)\}
+    &= \frac{1}{2 \pi} \int_{-\infty}^\infty \exp\!\big(\!-\! i k (z - \overline{z})\big) \exp(- k^2 \sigma_z^2 / 2) \dd{k}
+    \\
+    &= \frac{1}{\sqrt{2 \pi \sigma_z^2}} \exp\!\Big(\!-\! \frac{(z - \overline{z})^2}{2 \sigma_z^2} \Big)
+\end{aligned}$$
+
+Therefore, the sum of many independent variables tends to a normal distribution,
+regardless of the densities of the individual variables.
+
+
+## References
+1.  H. Gould, J. Tobochnik,
+    *Statistical and thermal physics*, 2nd edition,
+    Princeton.
-- 
cgit v1.2.3