import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('Solarize_Light2')
import seaborn as sns
import altair as al
import os
pd.set_option('display.max_columns', None)
os.chdir('D:\Data\Projects\Klassifikation\mushrooms')
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn import metrics
from sklearn.ensemble import GradientBoostingClassifier
df = pd.read_csv('df_if.csv')
x = df.drop('target', axis=1)
y = df.target
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape
model = GradientBoostingClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
f1_score(y_test, y_pred)
df1 = pd.read_csv('df_clean.csv')
x1 = df1.drop('target', axis=1)
y1 = df1.target
X_train1, X_test1, y_train1, y_test1 = train_test_split(x1, y1, test_size = 0.3, random_state=42)
X_train1.shape, X_test1.shape, y_train1.shape, y_test1.shape
model1 = GradientBoostingClassifier()
model1.fit(X_train1, y_train1)
y_pred1 = model1.predict(X_test1)
f1_score(y_test1, y_pred1)