<

6e91e647eaff "statistical bias -1 proven by simulation"

'''
when using n samples to estimate population std,
we will put (n-1) in the denominator:

population_std = sqrt( sample_sum_of_err_sq / (n-1))

i.e.,

population_var * (n-1) = sample_sum_of_err_sq

i.e.,

-1 = sample_sum_of_err_sq / population_var - n

This simulation is to evidence the -1 in the statistic sense

'''
import random

N = 10000 # population size
n = 30 # sample size
num_of_simulation = 500


def sum_of_err_sq(samples):
    N = len(samples)
    mean = sum(samples) / N
    errsq = [(e - mean) ** 2 for e in samples]
    return sum(errsq)


def get_bias():
    samples = random.sample(population, n)
    sample_sum_of_err_sq = sum_of_err_sq(samples)

    # true_var * (n-1) = sample_sum_of_err_sq
    # n-1 = sample_sum_of_err_sq / true_var
    # -1 = sample_sum_of_err_sq / true_var - n

    return sample_sum_of_err_sq / population_var - n  # ~= -1


def bias_avg_gen(iteration: int):
    n = 0
    bias_avg = 0
    while n < iteration:
        bias_new = get_bias()
        bias_sum = (bias_avg * n + bias_new)
        n += 1
        bias_avg = bias_sum / n
        yield bias_avg


if __name__ == '__main__':

    population = [random.random() for _ in range(N)]  # any population works
    population_var = sum_of_err_sq(population) / N
    for i, bias_avg in enumerate(bias_avg_gen(num_of_simulation)):
        print(i, bias_avg)