In [1]:
import numpy as np
import matplotlib.pyplot as plt

Correlation Coefficient

$r_{xy} = \frac{\sum_{i=1}^n(x_i- \bar x)(y_i-\bar y)}{\sqrt{\sum_{i=1}^n(x_i-\bar x)^2}\sqrt{\sum_{i=1}^n(y_i-\bar y)^2}}$

In [2]:
# data
num_of_courses = [4,5,8,2,9,9,1,3,7]
life_happiness = [6,7,9,3,9,3,1,6,7]

# mean-center
n = num_of_courses - np.mean(num_of_courses)
l = life_happiness - np.mean(life_happiness)

# compute the correlation
numer = np.dot(n,l)
denom = np.sqrt(np.dot(n,n)) * np.sqrt(np.dot(l,l))
r1 = numer/denom
print(r1)

r2 = np.corrcoef(n,l)
print(r2)
print(np.corrcoef(n,l)[1][0])

plt.plot(num_of_courses,life_happiness,'ms',label='r=%g' %np.round(r1,2))

plt.axis([0,10,0,10])
plt.legend()
plt.gca().set_aspect('equal')
plt.xlabel('Number of courses taken')
plt.ylabel('Life happiness')
plt.show()
0.60530206024143
[[1.         0.60530206]
 [0.60530206 1.        ]]
0.60530206024143