In [1]:
import numpy as np
import pandas as pd
In [2]:
df = pd.read_csv(‘train.csv’)
df.head()
Out[2]:
Id
MSSubClass
MSZoning
LotFrontage
LotArea
Street
Alley
LotShape
LandContour
Utilities
…
PoolArea
PoolQC
Fence
MiscFeature
MiscVal
MoSold
YrSold
SaleType
SaleCondition
SalePrice
0
1
60
RL
65.0
8450
Pave
NaN
Reg
Lvl
AllPub
…
0
NaN
NaN
NaN
0
2
2008
WD
Normal
208500
1
2
20
RL
80.0
9600
Pave
NaN
Reg
Lvl
AllPub
…
0
NaN
NaN
NaN
0
5
2007
WD
Normal
181500
2
3
60
RL
68.0
11250
Pave
NaN
IR1
Lvl
AllPub
…
0
NaN
NaN
NaN
0
9
2008
WD
Normal
223500
3
4
70
RL
60.0
9550
Pave
NaN
IR1
Lvl
AllPub
…
0
NaN
NaN
NaN
0
2
2006
WD
Abnorml
140000
4
5
60
RL
84.0
14260
Pave
NaN
IR1
Lvl
AllPub
…
0
NaN
NaN
NaN
0
12
2008
WD
Normal
250000
5 rows × 81 columns
In [5]:
import matplotlib.pyplot as plt
%matplotlib inline
In [8]:
#sns.distplot(df[‘SalePrice’], kde = False, color = ‘b’, hist_kws={‘alpha’: 0.9})’
plt.hist(df[‘SalePrice’], 100, normed=1, facecolor=’green’, alpha=0.5)
Out[8]:
(array([ 4.75580827e-07, 0.00000000e+00, 4.75580827e-07,
5.70696992e-07, 5.70696992e-07, 6.65813158e-07,
3.04371729e-06, 2.75836880e-06, 2.18767180e-06,
3.32906579e-06, 5.61185376e-06, 6.37278308e-06,
6.94348007e-06, 8.75068721e-06, 8.46533872e-06,
8.65557105e-06, 5.89720225e-06, 5.70696992e-06,
5.42162143e-06, 6.94348007e-06, 6.37278308e-06,
5.13627293e-06, 4.28022744e-06, 3.13883346e-06,
2.85348496e-06, 2.94860113e-06, 2.94860113e-06,
3.13883346e-06, 2.47302030e-06, 2.18767180e-06,
1.61697481e-06, 1.80720714e-06, 1.71209098e-06,
1.71209098e-06, 1.33162632e-06, 1.04627782e-06,
4.75580827e-07, 7.60929323e-07, 1.23651015e-06,
1.14139398e-06, 9.51161654e-07, 5.70696992e-07,
5.70696992e-07, 4.75580827e-07, 9.51161654e-08,
2.85348496e-07, 4.75580827e-07, 5.70696992e-07,
4.75580827e-07, 3.80464661e-07, 2.85348496e-07,
1.90232331e-07, 2.85348496e-07, 9.51161654e-08,
2.85348496e-07, 9.51161654e-08, 1.90232331e-07,
1.90232331e-07, 0.00000000e+00, 1.90232331e-07,
0.00000000e+00, 9.51161654e-08, 9.51161654e-08,
0.00000000e+00, 9.51161654e-08, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
9.51161654e-08, 0.00000000e+00, 0.00000000e+00,
1.90232331e-07, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 9.51161654e-08, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 9.51161654e-08,
9.51161654e-08, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 9.51161654e-08,
9.51161654e-08]),
array([ 34900., 42101., 49302., 56503., 63704., 70905.,
78106., 85307., 92508., 99709., 106910., 114111.,
121312., 128513., 135714., 142915., 150116., 157317.,
164518., 171719., 178920., 186121., 193322., 200523.,
207724., 214925., 222126., 229327., 236528., 243729.,
250930., 258131., 265332., 272533., 279734., 286935.,
294136., 301337., 308538., 315739., 322940., 330141.,
337342., 344543., 351744., 358945., 366146., 373347.,
380548., 387749., 394950., 402151., 409352., 416553.,
423754., 430955., 438156., 445357., 452558., 459759.,
466960., 474161., 481362., 488563., 495764., 502965.,
510166., 517367., 524568., 531769., 538970., 546171.,
553372., 560573., 567774., 574975., 582176., 589377.,
596578., 603779., 610980., 618181., 625382., 632583.,
639784., 646985., 654186., 661387., 668588., 675789.,
682990., 690191., 697392., 704593., 711794., 718995.,
726196., 733397., 740598., 747799., 755000.]),
)

In [ ]:
print(“Some Statistics of the Housing Price:\n”)
print(df[‘SalePrice’].describe())
print(“\nThe median of the Housing Price is: “, df[‘SalePrice’].median(axis = 0))
In [9]:
plt.boxplot(df[‘SalePrice’])
Out[9]:
{‘boxes’: [
‘caps’: [
‘fliers’: [
‘means’: [],
‘medians’: [
‘whiskers’: [

In [11]:
plt.boxplot([df[‘SalePrice’], df[‘SalePrice’]])
plt.xticks([1, 2, 3], [‘mon’, ‘tue’, ‘wed’])
Out[11]:
([
)

In [12]:
df[‘Neighborhood’]
Out[12]:
0 CollgCr
1 Veenker
2 CollgCr
3 Crawfor
4 NoRidge
5 Mitchel
6 Somerst
7 NWAmes
8 OldTown
9 BrkSide
10 Sawyer
11 NridgHt
12 Sawyer
13 CollgCr
14 NAmes
15 BrkSide
16 NAmes
17 Sawyer
18 SawyerW
19 NAmes
20 NridgHt
21 IDOTRR
22 CollgCr
23 MeadowV
24 Sawyer
25 NridgHt
26 NAmes
27 NridgHt
28 NAmes
29 BrkSide
…
1430 Gilbert
1431 NPkVill
1432 OldTown
1433 Gilbert
1434 Mitchel
1435 NAmes
1436 NAmes
1437 NridgHt
1438 OldTown
1439 NWAmes
1440 Crawfor
1441 CollgCr
1442 Somerst
1443 BrkSide
1444 CollgCr
1445 Sawyer
1446 Mitchel
1447 CollgCr
1448 Edwards
1449 MeadowV
1450 NAmes
1451 Somerst
1452 Edwards
1453 Mitchel
1454 Somerst
1455 Gilbert
1456 NWAmes
1457 Crawfor
1458 NAmes
1459 Edwards
Name: Neighborhood, dtype: object
In [ ]: