Untitled1
In [14]:
import numpy as np
from scipy import interp
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc
from sklearn.cross_validation import StratifiedKFold
import pandas as pd
import numpy as np
from scipy import interp
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc
from sklearn.cross_validation import StratifiedKFold, KFold
In [4]:
trainData = pd.read_csv(“ds_training.csv”)
In [8]:
X = trainData.drop(‘TARGET’, 1)
In [9]:
y = trainData[‘TARGET’]
In [ ]:
In [ ]:
column = [‘saldo_var30’, ‘num_var4’, ‘num_var35’, ‘ind_var30’, ‘num_var30’, ‘saldo_medio_var5_hace2’, ‘saldo_var42’, ‘num_meses_var5_ult3’, ‘saldo_medio_var5_ult1’, ‘saldo_var5’, ‘saldo_medio_var5_ult3’, ‘num_var42’, ‘ind_var5’, ‘num_var5’, ‘var15’, ‘saldo_medio_var5_hace3’, ‘var36′]
X = X.as_matrix(columns = column)
y = y.as_matrix()
In [ ]:
random_state = np.random.RandomState(0)
cv = KFold(y.shape[0], n_folds=3, shuffle = True)
classifier = svm.SVC(kernel=’linear’, probability=True,
random_state=random_state)
classifier = linear_model.LogisticRegression()
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
all_tpr = []
for i, (train, test) in enumerate(cv):
print(train, test)
probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
# Compute ROC curve and area the curve
fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
mean_tpr += interp(mean_fpr, fpr, tpr)
mean_tpr[0] = 0.0
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, lw=1, label=’ROC fold %d (area = %0.2f)’ % (i, roc_auc))
plt.plot([0, 1], [0, 1], ‘–‘, color=(0.6, 0.6, 0.6), label=’Luck’)
mean_tpr /= len(cv)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
plt.plot(mean_fpr, mean_tpr, ‘k–‘,
label=’Mean ROC (area = %0.2f)’ % mean_auc, lw=2)
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel(‘False Positive Rate’)
plt.ylabel(‘True Positive Rate’)
plt.title(‘Receiver operating characteristic example’)
plt.legend(loc=”lower right”)
plt.show()
(array([ 2, 3, 4, …, 38005, 38006, 38007]), array([ 0, 1, 8, …, 38000, 38008, 38009]))
In [ ]: