# 10-fold cross-validation with K=5 for KNN (the n_neighbors parameter)knn=KNeighborsClassifier(n_neighbors=5)scores=cross_val_score(knn,X,y,cv=10,scoring='accuracy')printscores

[ 0.625 0.75 0.625 0.625 0.625 0.625
0.57142857 0.33333333 0.4 0.6 ]

# use average accuracy as an estimate of out-of-sample accuracyprintscores.mean()

0.577976190476

# search for an optimal value of K for KNNk_range=range(1,31)k_scores=[]forkink_range:knn=KNeighborsClassifier(n_neighbors=k)scores=cross_val_score(knn,X,y,cv=10,scoring='accuracy')k_scores.append(scores.mean())printk_scores

# plot the value of K for KNN (x-axis) versus the cross-validated accuracy (y-axis)plt.plot(k_range,k_scores)plt.xlabel('Value of K for KNN')plt.ylabel('Cross-Validated Accuracy')

<matplotlib.text.Text at 0x155d3ac8>

# 10-fold cross-validation with the best KNN modelknn=KNeighborsClassifier(n_neighbors=2)printcross_val_score(knn,X,y,cv=10,scoring='accuracy').mean()

0.603928571429

revisit standardization to translate back and forth between actual numbers

print'\tabv\t\tibu'# subtract the mean for each feature:features_revisit=beers.copy()features_mean=features_revisit.mean(axis=0)printfeatures_meanfeatures_revisit-=features_mean# divide each feature by its standard deviationfeatures_std=features_revisit.std(axis=0)printfeatures_stdfeatures_revisit/=features_std

# if NB has an IBU of 55 or greater, perhaps consider recategorizing to BIPAstout_ibus=[-1.65306122,-0.30612245]foribuinstout_ibus:printibu*features_std[1]+features_mean[1]print"\n55 IBU standardized: ",(55.-features_mean[1])/features_std[1]