The central limit theorem

Random samples from independent variables will tend towards a normal (Gaussian) distribution, even if the variables are non-normally distributed.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
In [2]:
n = 2000

sig1 = np.sin(np.linspace(0,6*np.pi,n)) # sine wave
sig2 = np.random.rand(n)*2-1 # uniform numbers

combi = sig1 + sig2

fig,ax = plt.subplots(2,3,figsize=(10,4))

ax[0,0].plot(sig1)
ax[0,0].set_title('Sine wave')
ax[1,0].hist(sig1,100)

ax[0,1].plot(sig2,'.',alpha=.3)
ax[0,1].set_title('Uniform numbers')
ax[1,1].hist(sig2,100)

ax[0,2].plot(combi)
ax[0,2].set_title('Combined signal')
ax[1,2].hist(combi,100)

for ax in np.matrix.flatten(ax):
        ax.set_xticks([])
        ax.set_yticks([])

plt.show()
In [3]:
import seaborn as sns

sns.set()

sns.distplot(combi,100)
plt.show()

Exercise

Repeat the Python exercise (sine + uniform noise) for N $\epsilon$ (50, 10000)

In [4]:
ns = np.arange(50,10000,100)
nbins = 50

allhist = np.zeros((len(ns),nbins)) # 50 x 100

for i in range(len(ns)): # x 100
    sig1 = np.sin(np.linspace(0, 6*np.pi, ns[i])) # sine wave ([0 -> 18.84] x [50 -> 10000])
    sig2 = np.random.rand(ns[i])*2-1 # uniform numbers ([-1~1] x [50 -> 10000])

    allhist[i,:],x = np.histogram(sig1+sig2,nbins,density=True) # ([sig1+sig2] x 50)

plt.pcolor(x[:-1], ns, allhist, vmax=.6,cmap='viridis') # ([-2 -> 2] x 50), ([50 -> 10000] x 100), (([sig1+sig2] x 50) x 100)
plt.xlabel('Data value')
plt.ylabel('Points in the sample')
plt.colorbar()
plt.show()