## 1. Creating some data

`bcr = 0.1 # Baseline conversion ratelift = 0.02 # Difference between the 2 groupssize = 4000 # Total size of data, each group will have 2000df, summary = generate_data_equal_size(size, bcr, lift)`

## 2. Binomial distribution

`fig, ax = plt.subplots(figsize=(8,5))data_a = binom(n_A, p_A).pmf(xx)data_b = binom(n_B, p_B).pmf(xx)ax.bar(xx, data_a, alpha=0.5)ax.bar(xx, data_b, alpha=0.5)plt.xlabel('converted')plt.ylabel('probability')`
`outcomes = []for i in range(100000):p = 0.1outcome = np.random.binomial(1, p)outcomes.append(outcome)sns.countplot(outcomes)`
`sample_means = []# Simulating 1000 samplesfor i in range(1000):# For each sample, we simulate 2000 userssample = random.choices(outcomes, k=2000)mean = np.mean(sample)sample_means.append(mean)sns.distplot(sample_means)`

## 3. Forming the hypothesis

`fig, ax = plt.subplots()xx = np.linspace(0.07, 0.17, 1000)data_a_norm = norm.pdf(xx, p_A, np.sqrt(p_A*(1-p_A) / n_A))data_b_norm = norm.pdf(xx, p_B, np.sqrt(p_B*(1-p_B) / n_B))sns.lineplot(xx, data_a_norm, color='blue', ax=ax)sns.lineplot(xx, data_b_norm, color='red', ax=ax)ax.axvline(p_A, color='cyan', linestyle='--')ax.axvline(p_B, color='orange', linestyle='--')plt.xlabel('converted')plt.ylabel('probability')`

## 4. Calculating the sample size

`plot_null(p_A=0.1, p_B=0.12, n_A=2000, n_B=2000)`
`# Since we draw the graph for 3, let's just assume pooled SE is calculated with bcr=0.1 and lift=0.02SE_0 = calculate_SE(0.1, 0.12, 2000, 2000)SE_1 = calculate_SE(0.1, 0.12, 2000, 2000)SE_2 = calculate_SE(0.1, 0.12, 2000, 2000)plot_multiple_alt(0, 0.01, 0.02, SE_0, SE_1, SE_2)`
`# Since we draw the graph for 3, let's just assume pooled SE is calculated with bcr=0.1 and lift=0.02SE_0 = calculate_SE(0.1, 0.12, 4000, 4000)SE_1 = calculate_SE(0.1, 0.12, 4000, 4000)SE_2 = calculate_SE(0.1, 0.12, 4000, 4000)plot_multiple_alt(0, 0.01, 0.02, SE_0, SE_1, SE_2)`