In [1]:
import numpy as np
import pandas as pd
In [2]:
df = pd.read_csv(‘train.csv’)
In [ ]:
import matplotlib.pyplot as plt
%matplotlib inline
In [ ]:
neighborhoodNames = list(set(df[‘Neighborhood’]))
neiPrices = [ df.loc[df[‘Neighborhood’] == name][‘SalePrice’] for name in neighborhoodNames]
# plt.set_aspect(1.5)
plt.figure(figsize=(800,400))
plt.boxplot(neiPrices, widths = 1, labels = neighborhoodNames)
# plt.xticks(list(range(1, len(neiPrices) + 1)), neighborhoodNames)
In [8]:
#sns.distplot(df[‘SalePrice’], kde = False, color = ‘b’, hist_kws={‘alpha’: 0.9})’
plt.hist(df[‘SalePrice’], 100, normed=1, facecolor=’green’, alpha=0.5)
Out[8]:
(array([ 4.75580827e-07, 0.00000000e+00, 4.75580827e-07,
5.70696992e-07, 5.70696992e-07, 6.65813158e-07,
3.04371729e-06, 2.75836880e-06, 2.18767180e-06,
3.32906579e-06, 5.61185376e-06, 6.37278308e-06,
6.94348007e-06, 8.75068721e-06, 8.46533872e-06,
8.65557105e-06, 5.89720225e-06, 5.70696992e-06,
5.42162143e-06, 6.94348007e-06, 6.37278308e-06,
5.13627293e-06, 4.28022744e-06, 3.13883346e-06,
2.85348496e-06, 2.94860113e-06, 2.94860113e-06,
3.13883346e-06, 2.47302030e-06, 2.18767180e-06,
1.61697481e-06, 1.80720714e-06, 1.71209098e-06,
1.71209098e-06, 1.33162632e-06, 1.04627782e-06,
4.75580827e-07, 7.60929323e-07, 1.23651015e-06,
1.14139398e-06, 9.51161654e-07, 5.70696992e-07,
5.70696992e-07, 4.75580827e-07, 9.51161654e-08,
2.85348496e-07, 4.75580827e-07, 5.70696992e-07,
4.75580827e-07, 3.80464661e-07, 2.85348496e-07,
1.90232331e-07, 2.85348496e-07, 9.51161654e-08,
2.85348496e-07, 9.51161654e-08, 1.90232331e-07,
1.90232331e-07, 0.00000000e+00, 1.90232331e-07,
0.00000000e+00, 9.51161654e-08, 9.51161654e-08,
0.00000000e+00, 9.51161654e-08, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
9.51161654e-08, 0.00000000e+00, 0.00000000e+00,
1.90232331e-07, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 9.51161654e-08, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 9.51161654e-08,
9.51161654e-08, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 9.51161654e-08,
9.51161654e-08]),
array([ 34900., 42101., 49302., 56503., 63704., 70905.,
78106., 85307., 92508., 99709., 106910., 114111.,
121312., 128513., 135714., 142915., 150116., 157317.,
164518., 171719., 178920., 186121., 193322., 200523.,
207724., 214925., 222126., 229327., 236528., 243729.,
250930., 258131., 265332., 272533., 279734., 286935.,
294136., 301337., 308538., 315739., 322940., 330141.,
337342., 344543., 351744., 358945., 366146., 373347.,
380548., 387749., 394950., 402151., 409352., 416553.,
423754., 430955., 438156., 445357., 452558., 459759.,
466960., 474161., 481362., 488563., 495764., 502965.,
510166., 517367., 524568., 531769., 538970., 546171.,
553372., 560573., 567774., 574975., 582176., 589377.,
596578., 603779., 610980., 618181., 625382., 632583.,
639784., 646985., 654186., 661387., 668588., 675789.,
682990., 690191., 697392., 704593., 711794., 718995.,
726196., 733397., 740598., 747799., 755000.]),
)

In [ ]:
print(“Some Statistics of the Housing Price:\n”)
print(df[‘SalePrice’].describe())
print(“\nThe median of the Housing Price is: “, df[‘SalePrice’].median(axis = 0))
In [9]:
plt.boxplot(df[‘SalePrice’])
Out[9]:
{‘boxes’: [
‘caps’: [
‘fliers’: [
‘means’: [],
‘medians’: [
‘whiskers’: [

In [11]:
plt.boxplot([df[‘SalePrice’], df[‘SalePrice’]])
plt.xticks([1, 2, 3], [‘mon’, ‘tue’, ‘wed’])
Out[11]:
([
)

In [12]:
df[‘Neighborhood’]
Out[12]:
0 CollgCr
1 Veenker
2 CollgCr
3 Crawfor
4 NoRidge
5 Mitchel
6 Somerst
7 NWAmes
8 OldTown
9 BrkSide
10 Sawyer
11 NridgHt
12 Sawyer
13 CollgCr
14 NAmes
15 BrkSide
16 NAmes
17 Sawyer
18 SawyerW
19 NAmes
20 NridgHt
21 IDOTRR
22 CollgCr
23 MeadowV
24 Sawyer
25 NridgHt
26 NAmes
27 NridgHt
28 NAmes
29 BrkSide
…
1430 Gilbert
1431 NPkVill
1432 OldTown
1433 Gilbert
1434 Mitchel
1435 NAmes
1436 NAmes
1437 NridgHt
1438 OldTown
1439 NWAmes
1440 Crawfor
1441 CollgCr
1442 Somerst
1443 BrkSide
1444 CollgCr
1445 Sawyer
1446 Mitchel
1447 CollgCr
1448 Edwards
1449 MeadowV
1450 NAmes
1451 Somerst
1452 Edwards
1453 Mitchel
1454 Somerst
1455 Gilbert
1456 NWAmes
1457 Crawfor
1458 NAmes
1459 Edwards
Name: Neighborhood, dtype: object
In [2]:
set(df[‘Neighborhood’])
—————————————————————————
NameError Traceback (most recent call last)
—-> 1 set(df[‘Neighborhood’])
NameError: name ‘df’ is not defined
In [15]:
In [24]:
In [1]:
In [10]:
In [ ]: