import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('Solarize_Light2')
import folium
import os
os.chdir('D:\Data\Projects\Regression\Taxi Fare Prediction_Linear Regression')
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
import pickle
df = pd.read_csv('train_clean_features.csv')
df.head()
x = df.drop(['fare_amount', 'pickup_datetime'], axis=1)
y = df.fare_amount
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape
rfm = pickle.load(open('finalized_rf.sav', 'rb'))
fi = pd.DataFrame({'Feature': x.columns, 'Importance':list(rfm.feature_importances_)})
fi = fi.sort_values('Importance', ascending=False).reset_index(drop=True)
plt.figure(figsize=(10,6))
sns.barplot(y=fi.Feature, x=fi.Importance, palette="Reds_d", orient='h');