<

## 6e91e647eaff "statistical bias -1 proven by simulation"

``````'''
when using n samples to estimate population std,
we will put (n-1) in the denominator:

population_std = sqrt( sample_sum_of_err_sq / (n-1))

i.e.,

population_var * (n-1) = sample_sum_of_err_sq

i.e.,

-1 = sample_sum_of_err_sq / population_var - n

This simulation is to evidence the -1 in the statistic sense

'''
import random

N = 10000 # population size
n = 30 # sample size
num_of_simulation = 500

def sum_of_err_sq(samples):
N = len(samples)
mean = sum(samples) / N
errsq = [(e - mean) ** 2 for e in samples]
return sum(errsq)

def get_bias():
samples = random.sample(population, n)
sample_sum_of_err_sq = sum_of_err_sq(samples)

# true_var * (n-1) = sample_sum_of_err_sq
# n-1 = sample_sum_of_err_sq / true_var
# -1 = sample_sum_of_err_sq / true_var - n

return sample_sum_of_err_sq / population_var - n  # ~= -1

def bias_avg_gen(iteration: int):
n = 0
bias_avg = 0
while n < iteration:
bias_new = get_bias()
bias_sum = (bias_avg * n + bias_new)
n += 1
bias_avg = bias_sum / n
yield bias_avg

if __name__ == '__main__':

population = [random.random() for _ in range(N)]  # any population works
population_var = sum_of_err_sq(population) / N
for i, bias_avg in enumerate(bias_avg_gen(num_of_simulation)):
print(i, bias_avg)``````