Supervised_classi_&_regression
Supervised_classi_&_regression
[ ]: #numerical cols
df['col'] = df['col'].fillna(df['col'].mean()) # Replace with mean
df['col'] = df['col'].fillna(df['col'].median()) # Replace with median
[ ]: # Categorical columns
df['col'] = df['col'].fillna(df['col'].mode()[0]) # Replace with mode
imputer = IterativeImputer()
df_imputed = imputer.fit_transform(df)
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df)
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)
scaler = RobustScaler()
df_scaled = scaler.fit_transform(df)
1
scaler = MaxAbsScaler()
df_scaled = scaler.fit_transform(df)
encoder = LabelEncoder()
df['col'] = encoder.fit_transform(df['col'])
[ ]: mean_encoding = df.groupby('col')['target'].mean()
df['col'] = df['col'].map(mean_encoding)
[ ]: freq_encoding = df['col'].value_counts()
df['col'] = df['col'].map(freq_encoding)
df['col'].hist(bins=20)
plt.title('Histogram')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.show()
model = LinearRegression()
model.fit(X_train, y_train)
2
y_pred = model.predict(X_test)
# Metrics
print("R-squared:", r2_score(y_test, y_pred))
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error:", np.sqrt(mean_squared_error(y_test, y_pred)))
y_pred_prob = model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, y_pred_prob)
print(auc)
# Metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1-Score:", f1_score(y_test, y_pred))
print("ROC-AUC Score:", roc_auc_score(y_test, y_pred_prob)) # also -␣
↪classification report,confusion matrix same way
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(model.feature_importances_)
# Metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1-Score:", f1_score(y_test, y_pred)) # also - classification ␣
↪report,confusion matrix same way
3
[ ]: from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
model.fit(X_train, y_train)
print(model.feature_importances_)
print(model.oob_score_)
y_pred = model.predict(X_test)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred_prob = model.predict_proba(X_test)[:, 1]
# Metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Log Loss:", log_loss(y_test, y_pred_prob))
print("ROC-AUC Score:", roc_auc_score(y_test, y_pred_prob)) # also -␣
↪classification report,confusion matrix same way
plot_importance(model)
plt.show()
y_pred = model.predict(X_test)
y_pred_prob = model.predict_proba(X_test)[:, 1]
# Metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
4
print("Log Loss:", log_loss(y_test, y_pred_prob))
y_pred = model.predict(X_test)
[ ]:
[ ]:
[ ]: