双样本T检验代码

发布时间 2023-10-11 16:23:18作者: bregman

双样本T检验代码, 对应数学原理 https://online.stat.psu.edu/stat415/lesson/11/11.2

import numpy as np  # noqa
from scipy import stats
from scipy.stats import beta


base_param = (73, 11737-73)   # (点击, 未点击)
exp_params = [  # (点击, 未点击)
    (56, 11362-56),
    (73, 11393-73),
    (73, 11693-73),
    (82, 11451-82),
    (73, 11737-73),  # 完全相同
    (173, 11737-173),  # 完全不同
    ]

def confidence(n_bad, n_good, tol=2):
    ''' 返回估计的坏率p, 以及在tol倍标准差下的可信度'''
    a, b = n_bad+1, n_good+1
    p = a / (a+b)
    v = beta.std(a, b)
    up, low = min(1, p + v*tol), max(0, p - v*tol)
    d = beta.cdf(up, a, b) - beta.cdf(low, a, b)
    return p, v, d


# T检验
mean1, std1 = confidence(*base_param)[:2]
nobs1 = sum(base_param)
for exp_param in exp_params:
    mean2, std2 = confidence(*exp_param)[:2]
    nobs2 = sum(exp_param)
    t, p = stats.ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2, equal_var=False)
    print(f"基线{base_param}, 实验{exp_param} t-value: {t:.5f}, p-value: {p:.5f}")


# 验证数学原理中介绍的例子 https://online.stat.psu.edu/stat415/lesson/11/11.2
mean1, std1, nobs1, mean2, std2, nobs2 = 105.5, 20.1, 34, 90.9, 12.2, 29
t, p = stats.ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2, equal_var=False)
print(f"t-value: {t:.5f}, p-value: {p:.5f}")

# 以下是 chatgpt 提供的代码例子, 只修改了输入
# # 生成两个服从beta分布的样本
# sample1 = np.random.beta(73 + 1, 11737-73 + 1, size=11737)
# sample2 = np.random.beta(82 + 1, 11451-82 + 1, size=11451)

# # 计算样本均值和标准差

# mean1, mean2 = np.mean(sample1), np.mean(sample2)
# std1, std2 = np.std(sample1, ddof=1), np.std(sample2, ddof=1)

# # 计算t值和p值
# t, p = stats.ttest_ind(sample1, sample2, equal_var=False)

# # 输出结果
# print("Sample 1 mean: {:.5f}, standard deviation: {:.5f}".format(mean1, std1))
# print("Sample 2 mean: {:.5f}, standard deviation: {:.5f}".format(mean2, std2))
# print("t-value: {:.5f}, p-value: {:.5f}".format(t, p))

# 在这个例子中,我们生成了两个服从beta分布的样本,并使用scipy.stats模块中的ttest_ind函数来计算t值和p值。注意,我们还计算了样本均值和标准差,这些值可以用于检查假设条件是否成立。