样本均值估计为总体均值,样本比例估计为总体比例。
import numpy as np
x = [1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0] # 1为成功,0为失败。估计成功失败比率
theta = np.mean(x)
h = theta/(1-theta)
print(h)
1.2500000000000002
import numpy as np
import scipy.stats as ss
n = 6; p = 0.025; sigma = np.sqrt(0.6)
x = [14.6, 15.1, 14.9, 14.8, 15.2, 15.1]
xbar = np.mean(x)
low = xbar-ss.norm.ppf(q=1-p)*(sigma/np.sqrt(n))
up = xbar+ss.norm.ppf(q=1-p)*(sigma/np.sqrt(n))
print([low,up])
[14.330204967695439, 15.569795032304564]
import numpy as np
import scipy.stats as ss
from scipy.stats import t
n = 9; p = 0.025;
x = [99.3, 98.7, 100.5, 101.2, 98.3, 99.7, 99.5, 102.1, 100.5]
xbar = np.mean(x)
s2 = np.var(x, ddof=1) # 样本方差除以n-1
s = np.sqrt(s2)
low = xbar-ss.t.ppf(1-p, n-1)*(s/np.sqrt(n))
up = xbar+ss.t.ppf(1-p, n-1)*(s/np.sqrt(n))
print([low,up])
[99.04599342616191, 100.90956212939363]
from scipy.stats import chi2
n=16; s2=0.0023; p=0.025
low = ((n-1)*s2)/chi2.ppf(1-p,n-1)
up = ((n-1)*s2)/chi2.ppf(p,n-1)
print([low,up])
[0.0012550751937877684, 0.005509300678006194]
import numpy as np
import scipy.stats as ss
x = [628, 583, 510, 554, 612, 523, 530, 615]
y = [535, 433, 398, 470, 567, 480, 498, 560, 503, 426]
n1 = len(x); n2 = len(y)
xbar = np.mean(x); ybar = np.mean(y)
x_s2 = 2140; y_s2=3250; p=0.025
low = xbar-ybar-ss.norm.ppf(q=1-p)*np.sqrt(x_s2/n1+y_s2/n2)
up = xbar-ybar+ss.norm.ppf(q=1-p)*np.sqrt(x_s2/n1+y_s2/n2)
print([low,up])
[34.66688380095825, 130.08311619904174]
import numpy as np
import scipy.stats as ss
x = [628, 583, 510, 554, 612, 523, 530, 615]
y = [535, 433, 398, 470, 567, 480, 498, 560, 503, 426]
n1 = len(x); n2 = len(y)
xbar = np.mean(x); ybar = np.mean(y)
x_s2 = np.var(x); y_s2=np.var(y); p=0.025
s2 = ((n1-1)*x_s2+(n2-1)*y_s2)/(n1-1+n2-1)
low = xbar-ybar-ss.t.ppf(1-p, n1+n2-1)*np.sqrt(s2*(1/n1+1/n2))
up = xbar-ybar+ss.t.ppf(1-p, n1+n2-1)*np.sqrt(s2*(1/n1+1/n2))
print([low,up])
[32.65868498172926, 132.09131501827073]
import numpy as np
from scipy.stats import f
x = [20.5, 19.8, 19.7, 20.4, 20.1, 20.0, 19.0, 19.9]
y = [20.7, 19.8, 19.5, 20.8, 20.4, 19.6, 20.2]
x_s2 = np.var(x); y_s2=np.var(y)
n1 = len(x); n2 = len(y); p = 0.025
low = x_s2/y_s2*1/f.ppf(1-p, n1-1, n2-1)
up = x_s2/y_s2*1/f.ppf(p, n1-1, n2-1)
print([low,up])
[0.1421688673708112, 4.144622814076891]
Exp:随机变量x的转化率为15%,样本量为20,求总体转化率的95%置信区间
import numpy as np
import scipy.stats as ss
n = 60; p = 0.025; tr = 0.15
low = tr-ss.norm.ppf(q=1-p)*np.sqrt(tr*(1-tr)/n)
up = tr+ss.norm.ppf(q=1-p)*np.sqrt(tr*(1-tr)/n)
print([low,up])
[0.05965012454920031, 0.24034987545079967]
最常见的就是总体方差未知时,估计总体的均值u;总体服从二项分布,估计总体的比例p。如果遇到其他情形下的参数估计,同样只需要按照给定公式计算即可。
共勉~