'''
when using n samples to estimate population std,
we will put (n-1) in the denominator:
population_std = sqrt( sample_sum_of_err_sq / (n-1))
i.e.,
population_var * (n-1) = sample_sum_of_err_sq
i.e.,
-1 = sample_sum_of_err_sq / population_var - n
This simulation is to evidence the -1 in the statistic sense
'''
import random
N = 10000 # population size
n = 30 # sample size
num_of_simulation = 500
def sum_of_err_sq(samples):
N = len(samples)
mean = sum(samples) / N
errsq = [(e - mean) ** 2 for e in samples]
return sum(errsq)
def get_bias():
samples = random.sample(population, n)
sample_sum_of_err_sq = sum_of_err_sq(samples)
# true_var * (n-1) = sample_sum_of_err_sq
# n-1 = sample_sum_of_err_sq / true_var
# -1 = sample_sum_of_err_sq / true_var - n
return sample_sum_of_err_sq / population_var - n # ~= -1
def bias_avg_gen(iteration: int):
n = 0
bias_avg = 0
while n < iteration:
bias_new = get_bias()
bias_sum = (bias_avg * n + bias_new)
n += 1
bias_avg = bias_sum / n
yield bias_avg
if __name__ == '__main__':
population = [random.random() for _ in range(N)] # any population works
population_var = sum_of_err_sq(population) / N
for i, bias_avg in enumerate(bias_avg_gen(num_of_simulation)):
print(i, bias_avg)