程序代写代做代考 python javascript Java Hypotehesis testing – A/B testing

Hypotehesis testing – A/B testing


In [2]:
%config InlineBackend.figure_format = ‘retina’
import numpy as np
import pandas as pd
from scipy.stats import chi2, chi2_contingency

import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style=”whitegrid”, font_scale=1.9, palette=”tab10″)
In [4]:
# contingency table
# click no click
#——————————
# ad A | a b
# ad B | c d
#
# chi^2 = (ad – bc)^2 (a + b + c + d) / [ (a + b)(c + d)(a + c)(b + d)]
# degrees of freedom = (#cols – 1) x (#rows – 1) = (2 – 1)(2 – 1) = 1

# short example

# T = np.array([[36, 14], [30, 25]])
# c2 = np.linalg.det(T)**2 * T.sum() / ( T[0].sum()*T[1].sum()*T[:,0].sum()*T[:,1].sum() )
# p_value = 1 – chi2.cdf(x=c2, df=1)

# equivalent:
# (36-31.429)**2/31.429+(14-18.571)**2/18.571 + (30-34.571)**2/34.571 + (25-20.429)**2/20.429

Click
No Click
Advertisement A
36
14
Advertisement B
30
25
In [3]:
T = np.array([[36, 14], [30, 25]])
c2 , p, df, observed = chi2_contingency(T, correction=False)
print(“Chi-square test statistic {0:.4f} p-value {1:.3f} degrees of freedom {2:3d}”.format(c2,p,df))

Chi-square test statistic 3.4177 p-value 0.065 degrees of freedom 1
In [4]:
# do the same with pandas
data=pd.DataFrame({“Ad”:[“A”,”B”],”Click”:[36,30],”No click”:[14,25]})
display(data)
contingency_table=data[[“Click”,”No click”]].values
display(contingency_table)
c2 , p, df, observed = chi2_contingency(contingency_table, correction=False)
print(“Chi-square test statistic {0:.4f} p-value {1:.3f} degrees of freedom {2:3d}”.format(c2,p,df))

Ad
Click
No click
0
A
36
14
1
B
30
25

array([[36, 14],
[30, 25]])

Chi-square test statistic 3.4177 p-value 0.065 degrees of freedom 1

(10,0000 impressions)
not converted
converted
Advertisement A
4514
486
Advertisement B
4473
527
(40,000 impressions)
not converted
converted
Advertisement A
17998
2002
Advertisement B
17742
2258
In [5]:
T1 = np.array([[4514, 486], [4473, 527]])
T2 = np.array([[17998, 2002], [17742, 2258]])

# for a 2×2 table the Yate’s correction is commonly used
c2 , p, df, observed = chi2_contingency(T1)
print(“T1 – Chi-square test statistic {0:7.4f} p-value {1:.7f} degrees of freedom {2:3d}”.format(c2,p,df))

c2 , p, df, observed = chi2_contingency(T2)
print(“T2 – Chi-square test statistic {0:7.4f} p-value {1:.7f} degrees of freedom {2:3d}”.format(c2,p,df))

T1 – Chi-square test statistic 1.7575 p-value 0.1849364 degrees of freedom 1
T2 – Chi-square test statistic 17.0835 p-value 0.0000358 degrees of freedom 1

A discussion on p-values – p-hacking
In [6]:
class DataGenerator:
def __init__(self, p1, p2):
self.p1 = p1
self.p2 = p2

def next(self):
click1 = 1 if (np.random.random() < self.p1) else 0 click2 = 1 if (np.random.random() < self.p2) else 0 return click1, click2 def get_p_value(T): # same as scipy.stats.chi2_contingency(T, correction=False) #det = T[0,0]*T[1,1] - T[0,1]*T[1,0] #c2 = float(det) / T[0].sum() * det / T[1].sum() * T.sum() / T[:,0].sum() / T[:,1].sum() #p = 1 - chi2.cdf(x=c2, df=1) c2 , p, df, observed = chi2_contingency(T, correction=False) return p def run_experiment(p1, p2, N): data = DataGenerator(p1, p2) p_values = np.empty(N) T = np.zeros((2, 2)).astype(np.float32) for i in range(N): c1, c2 = data.next() T[0,c1] += 1 T[1,c2] += 1 # ignore the first 10 values if i < 20: p_values[i] = None else: p_values[i] = get_p_value(T) plt.figure(figsize=(13,11)) plt.title("p-values", fontsize=30) sns.lineplot(data=[p_values,np.ones(N)*0.05],linewidth=2.5, legend=False) # sns.lineplot(data=p_values,linewidth=3.5) # plt.plot(p_values) # plt.plot(np.ones(N)*0.05) # plt.show() In [7]: %%javascript IPython.OutputArea.prototype._should_scroll = function(lines) { return false; } In [8]: for _i in range(5): run_experiment(0.1, 0.11, 10000)      In [10]: for _i in range(5): run_experiment(0.1, 0.1, 10000)      Mission 1 Size = 294,478 rows x 5 columns

 a) using the data in the file ab_data.csv find out if differences are significant or not (with and without the Fisher correction).

 b) incrementally calculate the p-value displaying its evolution 



In [ ]:

In [ ]: