程序代写代做代考 python Untitled2

Untitled2

In [2]:

import pandas as pd
import numpy as np
from scipy import interp
import matplotlib.pyplot as plt

from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc
from sklearn.cross_validation import StratifiedKFold, KFold
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB

from sklearn import tree

from sklearn import linear_model

# In[4]:

trainData = pd.read_csv(“ds_training.csv”)

testData = pd.read_csv(“ds_test.csv”)

In [3]:

trainData.shape

Out[3]:

(38010, 371)

In [4]:

column = [‘saldo_var30’, ‘num_var4’, ‘num_var35’, ‘ind_var30’, ‘num_var30’, ‘saldo_medio_var5_hace2’, ‘saldo_var42’, ‘num_meses_var5_ult3’, ‘saldo_medio_var5_ult1’, ‘saldo_var5’, ‘saldo_medio_var5_ult3’, ‘num_var42’, ‘ind_var5’, ‘num_var5’, ‘var15’, ‘saldo_medio_var5_hace3’, ‘var36’]

In [5]:

trainData[column]

Out[5]:

saldo_var30 num_var4 num_var35 ind_var30 num_var30 saldo_medio_var5_hace2 saldo_var42 num_meses_var5_ult3 saldo_medio_var5_ult1 saldo_var5 saldo_medio_var5_ult3 num_var42 ind_var5 num_var5 var15 saldo_medio_var5_hace3 var36
0 90.00 1 3 1 3 90.00 90.00 3 90.00 90.00 62.91 3 1 3 57 8.70 3
1 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 64 0.00 99
2 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 26 0.00 99
3 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 23 0.00 99
4 12.39 4 12 1 3 246.39 12.39 2 0.00 0.00 0.00 3 0 0 40 0.99 2
5 937.59 3 9 1 6 12.54 937.59 2 91.95 211.71 52.26 6 1 3 23 0.00 1
6 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.61 3 1 3 24 1.86 3
7 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 30 0.00 99
8 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 39 0.00 99
9 90.00 1 3 1 3 90.00 90.00 3 90.00 90.00 69.63 3 1 3 66 28.92 3
10 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.61 3 1 3 23 1.83 99
11 300.00 2 6 1 3 560.28 300.00 3 763.92 300.00 484.95 3 1 3 36 130.68 1
12 150.00 1 3 1 3 0.00 150.00 0 0.00 0.00 0.00 3 0 0 44 0.00 2
13 153.00 1 3 1 3 114.30 153.00 3 153.00 153.00 89.97 3 1 3 23 2.61 2
14 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.85 3 1 3 28 2.55 3
15 6.00 1 3 1 3 6.00 6.00 3 6.00 6.00 5.58 3 1 3 23 4.77 3
16 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 36 0.00 99
17 15.00 1 3 1 3 15.00 15.00 3 15.00 15.00 12.66 3 1 3 23 8.01 3
18 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 66 0.00 99
19 17365.20 3 9 1 6 2321.76 865.20 3 1064.91 865.20 1282.95 3 1 3 23 462.21 1
20 330.00 2 6 1 3 90.00 330.00 2 387.99 330.00 239.01 3 1 3 23 0.00 99
21 120.00 1 3 1 3 120.00 120.00 3 120.00 120.00 82.68 3 1 3 23 8.01 3
22 4309.35 3 9 1 3 4042.50 4309.35 3 5296.59 4309.35 3838.02 3 1 3 46 2174.97 1
23 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 25 0.00 99
24 90.00 1 3 1 3 90.00 90.00 3 90.00 90.00 69.30 3 1 3 28 27.90 3
25 557.88 4 12 1 3 205.65 557.88 2 0.00 0.00 0.00 3 0 0 40 1.47 1
26 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 23 0.00 99
27 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.37 3 1 3 28 1.14 3
28 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 48 0.00 99
29 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 36 0.00 99
… … … … … … … … … … … … … … … … … …
37980 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.52 3 1 3 23 1.59 3
37981 6.00 1 6 1 6 6.00 6.00 3 6.00 6.00 4.44 6 1 6 24 1.35 3
37982 5370.18 2 6 1 3 1240.32 5370.18 3 3342.33 5370.18 1617.54 3 1 3 31 270.00 1
37983 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 40 0.00 99
37984 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.58 3 1 3 25 1.74 99
37985 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.91 3 1 3 25 2.70 3
37986 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.94 3 1 3 23 2.79 3
37987 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.52 3 1 3 23 1.59 99
37988 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.40 3 1 3 27 1.17 99
37989 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.55 3 1 3 25 1.68 3
37990 240.00 1 3 1 3 90.00 240.00 3 185.07 240.00 106.20 3 1 3 22 43.56 1
37991 90.00 1 3 1 3 90.00 90.00 3 90.00 90.00 68.01 3 1 3 23 24.03 3
37992 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 52 0.00 99
37993 60.00 1 3 1 3 6.78 60.00 2 36.96 60.00 21.87 3 1 3 59 0.00 1
37994 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 43 0.00 99
37995 390003.00 1 3 1 3 13449.00 0.00 2 0.00 0.00 0.00 0 0 0 41 0.27 2
37996 136800.00 2 6 1 6 5819.97 1800.00 2 1800.00 1800.00 3810.00 3 1 3 50 0.00 2
37997 3488.52 2 6 1 3 1296.00 3488.52 3 2606.28 3488.52 1308.18 3 1 3 53 22.23 1
37998 150021.00 2 6 1 6 26635.56 21.00 2 21.00 21.00 13328.28 3 1 3 42 0.00 2
37999 90.00 1 3 1 3 90.00 90.00 3 90.00 90.00 76.44 3 1 3 35 49.32 3
38000 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.34 3 1 3 23 1.05 3
38001 153000.00 1 3 1 3 7258.02 0.00 1 0.00 0.00 0.00 0 0 0 51 0.00 1
38002 1260.00 1 3 1 3 740.01 1260.00 3 1260.00 1260.00 673.77 3 1 3 59 21.30 99
38003 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 23 0.00 99
38004 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.79 3 1 3 26 2.40 3
38005 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 23 0.00 99
38006 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.61 3 1 3 28 1.83 99
38007 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.79 3 1 3 28 2.40 99
38008 3.00 1 3 1 3 3.00 3.00 3 3.00 3.00 2.79 3 1 3 23 2.40 3
38009 0.00 0 0 0 0 0.00 0.00 0 0.00 0.00 0.00 0 0 0 23 0.00 99

38010 rows × 17 columns

In [15]:

plt.figure()
#plt.hist(trainData[‘TARGET’])
plt.title(‘Class Label Distribution’)
trainData[‘TARGET’].hist()
plt.show()

In [14]:

trainData[‘TARGET’]

Out[14]:

0 0
1 0
2 0
3 0
4 0
5 0
6 0
7 0
8 0
9 0
10 0
11 0
12 0
13 0
14 0
15 0
16 0
17 0
18 0
19 0
20 0
21 0
22 0
23 0
24 0
25 0
26 0
27 0
28 0
29 0
..
37980 0
37981 0
37982 0
37983 0
37984 0
37985 0
37986 0
37987 0
37988 0
37989 0
37990 0
37991 0
37992 0
37993 0
37994 0
37995 0
37996 0
37997 0
37998 0
37999 0
38000 0
38001 0
38002 0
38003 0
38004 0
38005 0
38006 0
38007 0
38008 0
38009 0
Name: TARGET, dtype: int64

In [19]:

sum(trainData[‘TARGET’])*1.0 / trainData.shape[0]

Out[19]:

0.039384372533543807

In [18]:

trainData.shape

Out[18]:

(38010, 371)

In [24]:

plt.figure()
plt.boxplot(trainData[‘saldo_var30’])

# trainData[‘saldo_var30’].hist(bins = 100)
plt.show()
# plt.xlabel(“Neighborhood”)
# plt.ylabel(‘Sale Price’)

In [26]:

plt.figure()
# plt.boxplot(trainData[‘num_var4’])
trainData[‘num_var4’].hist()
# trainData[‘saldo_var30’].hist(bins = 100)
plt.show()

In [1]:

for name in column:
plt.figure()
# plt.boxplot(trainData[‘num_var4’])
trainData[name].hist()
plt.title(name + ‘ histogram’)
# trainData[‘saldo_var30’].hist(bins = 100)
plt.show()

plt.figure()
# plt.boxplot(trainData[‘num_var4’])
plt.boxplot( trainData[name])
plt.title(name + ‘ box plot’)
# trainData[‘saldo_var30’].hist(bins = 100)
plt.show()

—————————————————————————
NameError Traceback (most recent call last)
in ()
—-> 1 for name in column:
2 plt.figure()
3 # plt.boxplot(trainData[‘num_var4’])
4 trainData[name].hist()
5 plt.title(name + ‘ histogram’)

NameError: name ‘column’ is not defined

In [5]:

from sklearn import preprocessing

scaled = preprocessing.scale(trainData[column])

In [10]:

scaled = preprocessing.scale(trainData[column])
for (i,name) in enumerate(column):

# plt.figure()
# plt.boxplot(trainData[‘num_var4’])
# trainData[name].hist()
# plt.title(name + ‘ histogram’)
# # trainData[‘saldo_var30’].hist(bins = 100)
# plt.show()

plt.figure()
# plt.boxplot(trainData[‘num_var4’])
plt.boxplot(scaled[:, i])
plt.title(name + ‘ normalized box plot’)
# trainData[‘saldo_var30’].hist(bins = 100)
plt.show()

In [12]:

scaled.mean(0)

Out[12]:

array([ -2.16154669e-16, -3.09109924e-16, -3.29059473e-16,
1.32136694e-15, -1.09992988e-15, -4.88981535e-16,
4.11886608e-17, -2.96953260e-16, -2.35842799e-16,
-1.94023595e-16, -4.60166413e-16, 2.67902278e-17,
-8.15331900e-16, 1.07896971e-16, -1.65175249e-16,
5.82607496e-16, 8.16815702e-16])

In [13]:

trainData.sample(10)

Out[13]:

ID var3 var15 imp_ent_var16_ult1 imp_op_var39_comer_ult1 imp_op_var39_comer_ult3 imp_op_var40_comer_ult1 imp_op_var40_comer_ult3 imp_op_var40_efect_ult1 imp_op_var40_efect_ult3 … saldo_medio_var33_hace2 saldo_medio_var33_hace3 saldo_medio_var33_ult1 saldo_medio_var33_ult3 saldo_medio_var44_hace2 saldo_medio_var44_hace3 saldo_medio_var44_ult1 saldo_medio_var44_ult3 var38 TARGET
9394 75621 2 32 0.0 0.0 0.0 0.0 0.0 0 0.0 … 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 101507.280000 0
30230 20210 2 23 0.0 0.0 0.0 0.0 0.0 0 0.0 … 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 117310.979016 0
16379 89056 2 23 0.0 0.0 0.0 0.0 0.0 0 0.0 … 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 62772.930000 0
32549 35863 2 36 0.0 0.0 0.0 0.0 0.0 0 0.0 … 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 52008.870000 0
1899 106942 2 55 0.0 0.0 0.0 0.0 0.0 0 0.0 … 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 88322.130000 0
31094 23107 2 23 0.0 0.0 0.0 0.0 0.0 0 0.0 … 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 30755.760000 0
12858 105518 2 23 0.0 0.0 0.0 0.0 0.0 0 0.0 … 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 117310.979016 0
25824 9537 2 23 0.0 0.0 0.0 0.0 0.0 0 0.0 … 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 67806.990000 0
20887 147372 2 38 0.0 0.0 0.0 0.0 0.0 0 0.0 … 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 170964.570000 0
37852 82883 2 33 0.0 0.0 0.0 0.0 0.0 0 0.0 … 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 145893.720000 0

10 rows × 371 columns

In [15]:

trainData.shape

Out[15]:

(38010, 371)

In [16]:

sum(trainData[“TARGET”])

Out[16]:

1497

In [18]:

trainData.shape[0] – sum(trainData[“TARGET”])

Out[18]:

36513

In [20]:

36513.0 / 1497

Out[20]:

24.390781563126254

In [22]:

36513.0 / 38010

Out[22]:

0.9606156274664562

In [23]:

1497*2

Out[23]:

2994

In [25]:

38010.0 / 2994

Out[25]:

12.695390781563127

In [ ]: