import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('Solarize_Light2')
import seaborn as sns
import altair as al
import os
pd.set_option('display.max_columns', None)
os.chdir('D:\Data\Projects\Klassifikation\mushrooms')
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn import metrics
# ML
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
df = pd.read_csv('df_clean.csv')
x = df.drop('target', axis=1)
y = df.target
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape
# Dataframe machen
model_results = pd.DataFrame(columns = ['model', 'cv_mean', 'cv_std'])
# Falls ich ein Boxplot machen will
results = []
names = []
# Liste der Algorithmen
models = [RandomForestClassifier(),
LogisticRegression(),
KNeighborsClassifier(),
SVC(),
GradientBoostingClassifier()]
# Parameter Cross Validation
kfold = StratifiedKFold(n_splits=3, random_state=42)
scorer = make_scorer(f1_score, greater_is_better=True, average = 'micro')
# Jedes Model in Cross Validation testen
for model in models:
model_name = model.__class__.__name__
cv_scores = model_selection.cross_val_score(model, X_train, y_train, cv = kfold, scoring=scorer, n_jobs = -1)
# Falls ich ein Boxplot machen will
results.append(cv_scores)
names.append(model_name)
# DataFrame mit Ergebnissen füllen
model_results = model_results.append(pd.DataFrame({'model': model_name,
'cv_mean': cv_scores.mean(),
'cv_std': cv_scores.std()},
index = [0]),
ignore_index = True)
model_results = model_results.sort_values('cv_mean', ascending=False)
model_results
# Boxplot machen aus results und names
fig = plt.figure(figsize=(15,15))
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()