python - Compare classifiers using Precision recall curve -
i'm using python precision recall curve analysis (with 3 fold technique) , using 5 different classifiers. code
import numpy scipy import interp import matplotlib.pyplot plt import pandas pd sklearn import svm sklearn.metrics import auc sklearn.cross_validation import stratifiedkfold sklearn.metrics import precision_recall_curve sklearn.metrics import average_precision_score sklearn.ensemble import randomforestclassifier sklearn.feature_selection import selectfrommodel sklearn.neighbors import kneighborsclassifier sklearn.tree import decisiontreeclassifier sklearn.naive_bayes import gaussiannb # import data play mydata = pd.read_csv("file.csv") y = mydata["class_label"] #provided csv has header row, , label column named "label" n_points=len(mydata) ##select last column data x = mydata.ix[:,:-1] #x=x.iloc[:,:] ################################## cv = stratifiedkfold(y, n_folds=3) y_real = [] y_proba = [] #classifier = svm.svc(kernel='rbf',gamma=0.0001, c=1000, probability=true, class_weight ='balanced') #classifier = randomforestclassifier(n_estimators=50, # class_weight="auto", # criterion='gini', # bootstrap=true, # max_features=0.5, # min_samples_split=1, # min_samples_leaf=5, # max_depth=10, # n_jobs=1) #classifier= kneighborsclassifier() #classifier = gaussiannb() classifier = decisiontreeclassifier(max_depth=11) i, (train, test) in enumerate(cv): x_train=x[train[0]:train[len(train)-1]] x_test=x[test[0]:test[len(test)-1]] y_train= y[train[0]:train[len(train)-1]] y_test=y[test[0]:test[len(test)-1]] probas_ = classifier.fit(x_train, y_train).predict_proba( x_test) precision, recall, thresholds = precision_recall_curve(y_test, probas_[:, 1]) lab = 'pre-recall fold %d (area = %0.2f)' % (i+1, auc(recall, precision)) plt.plot(recall, precision, lw=1, label=lab) y_real.append(y_test) y_proba.append(probas_[:, 1]) y_real = numpy.concatenate(y_real) y_proba = numpy.concatenate(y_proba) precision, recall, _ = precision_recall_curve(y_real, y_proba) lab = 'mean pre-recall (area = %0.2f)' % (auc(recall, precision)) plt.plot(recall, precision, label=lab, lw=2, color='black') plt.xlim([0.02, 0.99]) plt.ylim([0, 1.05]) plt.grid(true) plt.xlabel('recall') plt.ylabel('precision') plt.title('precision recall curve, decision tree') plt.rcparams['axes.facecolor']='white' plt.legend(loc="lower left") plt.show()
it looks this
now want compare different 5 models in 1 graph don't have idea how that, want end result this
Comments
Post a Comment