Tutorial_01_task_2_solution
In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_excel(“DirectMarketing.xlsx”)
salary = df[‘Salary’]
salary.head()
Out[1]:
0 47500
1 63600
2 13500
3 85600
4 68400
Name: Salary, dtype: int64
In [2]:
fig = plt.figure()
plt.hist(salary)
plt.xlabel(“Salary”)
plt.ylabel(“Number of Customers”)
plt.title(“Customer Salary Distribution”)
plt.savefig(“part1.jpg”)
fig
Out[2]:
In [3]:
good_spenders = df[ df[‘AmountSpent’] > 500]
fig = plt.figure()
plt.hist(good_spenders[‘Salary’])
plt.xlabel(“Salary”)
plt.ylabel(“Number of Customers”)
plt.title(“Big Spenders – Salary Distribution”)
plt.savefig(“part2.jpg”)
fig
Out[3]:
In [4]:
male_cust_spent = df[ df[‘Gender’] == “Male”]
female_cust_spent = df[ df[‘Gender’] == “Female”]
fig = plt.figure()
plt.hist(male_cust_spent[‘AmountSpent’], alpha = 0.5, label = “Male”)
plt.hist(female_cust_spent[‘AmountSpent’], alpha = 0.5, label = “Female”)
plt.legend()
plt.xlabel(“Amount Spent”)
plt.ylabel(“Count”)
plt.title(“Spending Distributions of Male and Female Cohorts”)
plt.savefig(“part3.jpg”)
fig
Out[4]:
In [5]:
avg_male_spent = male_cust_spent[‘AmountSpent’].mean()
var_male_spent = male_cust_spent[‘AmountSpent’].var()
print(“MALE – ${0:.2f} mean spending with variance {1:.2f}”.format(avg_male_spent, var_male_spent))
avg_female_spent = female_cust_spent[‘AmountSpent’].mean()
var_female_spent = female_cust_spent[‘AmountSpent’].var()
print(“FEMALE – ${0:.2f} mean spending with variance {1:.2f}”.format(avg_female_spent, var_female_spent))
MALE – $1412.85 mean spending with variance 946281.38
FEMALE – $1025.34 mean spending with variance 829063.08