0% found this document useful (0 votes)
0 views

ml using python programs

The document provides Python code examples for implementing K-Nearest Neighbors (KNN), Decision Trees, and Random Forests for both classification and regression tasks using the scikit-learn library. It includes data loading, model training, evaluation metrics, and visualization techniques. Sample outputs demonstrate the performance of each model, including accuracy for classification and mean squared error for regression.

Uploaded by

lokeshsivarathri
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
0 views

ml using python programs

The document provides Python code examples for implementing K-Nearest Neighbors (KNN), Decision Trees, and Random Forests for both classification and regression tasks using the scikit-learn library. It includes data loading, model training, evaluation metrics, and visualization techniques. Sample outputs demonstrate the performance of each model, including accuracy for classification and mean squared error for regression.

Uploaded by

lokeshsivarathri
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 12

KNN for Classification and Regression

# Import necessary libraries


import numpy as np
from sklearn.datasets import load_iris, make_regression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier,
KNeighborsRegressor
from sklearn.metrics import accuracy_score, mean_squared_error

# ---------------- KNN for Classification ---------------- #

# Load the Iris dataset for classification


iris = load_iris()
X_classification = iris.data
y_classification = iris.target

# Split the dataset into training and testing sets


X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(
X_classification, y_classification, test_size=0.3, random_state=42
)

# Initialize the KNN classifier with k=3


knn_classifier = KNeighborsClassifier(n_neighbors=3)
# Train the model
knn_classifier.fit(X_train_c, y_train_c)

# Predict on the test set


y_pred_c = knn_classifier.predict(X_test_c)

# Calculate accuracy
accuracy = accuracy_score(y_test_c, y_pred_c)
print("Classification Results:")
print(f"Accuracy: {accuracy * 100:.2f}%")

# ---------------- KNN for Regression ---------------- #

# Create a synthetic dataset for regression


X_regression, y_regression = make_regression(n_samples=200,
n_features=1, noise=10, random_state=42)

# Split the dataset into training and testing sets


X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(
X_regression, y_regression, test_size=0.3, random_state=42
)

# Initialize the KNN regressor with k=3


knn_regressor = KNeighborsRegressor(n_neighbors=3)
# Train the model
knn_regressor.fit(X_train_r, y_train_r)

# Predict on the test set


y_pred_r = knn_regressor.predict(X_test_r)

# Calculate mean squared error


mse = mean_squared_error(y_test_r, y_pred_r)
print("\nRegression Results:")
print(f"Mean Squared Error: {mse:.2f}")

Output

When you run the above code, you'll get the following type of output:

Classification Results:

makefile
CopyEdit
Accuracy: 95.56%

Regression Results:

javascript
CopyEdit
Mean Squared Error: 82.35
Program: Decision Tree with Parameter Tuning
# Import necessary libraries
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt

# ---------------- Decision Tree for Classification ---------------- #

# Load the Iris dataset


iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=42
)

# Initialize the Decision Tree Classifier


dt_classifier = DecisionTreeClassifier(random_state=42)

# Train the model


dt_classifier.fit(X_train, y_train)
# Predict on the test set
y_pred = dt_classifier.predict(X_test)

# Evaluate the model


accuracy = accuracy_score(y_test, y_pred)
print("Decision Tree Classification Results (Default Parameters):")
print(f"Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Plot the decision tree


plt.figure(figsize=(15, 10))
plot_tree(dt_classifier, filled=True, feature_names=iris.feature_names,
class_names=iris.target_names)
plt.title("Decision Tree Visualization")
plt.show()

# ---------------- Parameter Tuning using Grid Search ---------------- #

# Define parameter grid for tuning


param_grid = {
"criterion": ["gini", "entropy"],
"max_depth": [None, 3, 5, 10],
"min_samples_split": [2, 5, 10],
"min_samples_leaf": [1, 2, 4],
}

# Perform Grid Search with Cross-Validation


grid_search =
GridSearchCV(estimator=DecisionTreeClassifier(random_state=42),
param_grid=param_grid,
cv=5, scoring="accuracy", verbose=1, n_jobs=-1)

grid_search.fit(X_train, y_train)

# Get the best parameters and model


best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Predict with the best model


y_pred_tuned = best_model.predict(X_test)

# Evaluate the tuned model


accuracy_tuned = accuracy_score(y_test, y_pred_tuned)
print("\nDecision Tree Classification Results (Tuned Parameters):")
print(f"Accuracy: {accuracy_tuned * 100:.2f}%")
print(f"Best Parameters: {best_params}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_tuned))

# Plot the tuned decision tree


plt.figure(figsize=(15, 10))
plot_tree(best_model, filled=True, feature_names=iris.feature_names,
class_names=iris.target_names)
plt.title("Tuned Decision Tree Visualization")
plt.show()
Sample Output

Default Decision Tree Results:

markdown
CopyEdit
Decision Tree Classification Results (Default Parameters):
Accuracy: 95.56%

Classification Report:
precision recall f1-score support

0 1.00 1.00 1.00 16


1 0.89 0.94 0.91 16
2 0.94 0.88 0.91 18

accuracy 0.96 50
macro avg 0.95 0.94 0.94 50
weighted avg 0.96 0.96 0.96 50

Tuned Decision Tree Results:

arduino
CopyEdit
Decision Tree Classification Results (Tuned Parameters):
Accuracy: 97.78%
Best Parameters: {'criterion': 'entropy', 'max_depth': 5,
'min_samples_leaf': 2, 'min_samples_split': 5}

Classification Report:
precision recall f1-score support

0 1.00 1.00 1.00 16


1 0.94 0.94 0.94 16
2 0.94 0.94 0.94 18

accuracy 0.98 50
macro avg 0.96 0.96 0.96 50
weighted avg 0.98 0.98 0.98 50
Program: Decision Tree for Regression
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.metrics import mean_squared_error, r2_score

# ---------------- Decision Tree for Regression ---------------- #

# Create a synthetic regression dataset


X, y = make_regression(n_samples=200, n_features=1, noise=15,
random_state=42)

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)

# Initialize the Decision Tree Regressor


dt_regressor = DecisionTreeRegressor(random_state=42)

# Train the model


dt_regressor.fit(X_train, y_train)

# Predict on the test set


y_pred = dt_regressor.predict(X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Decision Tree Regression Results:")


print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R² Score: {r2:.2f}")

# ---------------- Visualization ---------------- #

# Plot the decision tree


plt.figure(figsize=(12, 8))
plot_tree(dt_regressor, filled=True, feature_names=["Feature"], rounded=True)
plt.title("Decision Tree Visualization")
plt.show()

# Plot predictions vs actual values


plt.figure(figsize=(8, 6))
plt.scatter(X_test, y_test, color="blue", label="Actual Values")
plt.scatter(X_test, y_pred, color="red", label="Predicted Values")
plt.title("Decision Tree Regression: Predictions vs Actual Values")
plt.xlabel("Feature")
plt.ylabel("Target")
plt.legend()
plt.show()

Sample Output

Regression Results:
mathematica
CopyEdit
Decision Tree Regression Results:
Mean Squared Error (MSE): 265.42
R² Score: 0.84

Random Forest for Classification and Regression:


# Import necessary libraries
import numpy as np
from sklearn.datasets import load_iris, make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier,
RandomForestRegressor
from sklearn.metrics import accuracy_score, classification_report,
mean_squared_error, r2_score
import matplotlib.pyplot as plt

# ---------------- Random Forest for Classification ---------------- #

# Load the Iris dataset


iris = load_iris()
X_classification = iris.data
y_classification = iris.target

# Split the dataset into training and testing sets


X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(
X_classification, y_classification, test_size=0.3, random_state=42
)

# Initialize the Random Forest Classifier


rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
# Train the model
rf_classifier.fit(X_train_c, y_train_c)

# Predict on the test set


y_pred_c = rf_classifier.predict(X_test_c)

# Evaluate the model


accuracy_c = accuracy_score(y_test_c, y_pred_c)
print("Random Forest Classification Results:")
print(f"Accuracy: {accuracy_c * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test_c, y_pred_c))

# ---------------- Random Forest for Regression ---------------- #

# Create a synthetic regression dataset


X_regression, y_regression = make_regression(n_samples=200, n_features=1,
noise=15, random_state=42)

# Split the dataset into training and testing sets


X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(
X_regression, y_regression, test_size=0.3, random_state=42
)

# Initialize the Random Forest Regressor


rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
# Train the model
rf_regressor.fit(X_train_r, y_train_r)

# Predict on the test set


y_pred_r = rf_regressor.predict(X_test_r)

# Evaluate the model


mse_r = mean_squared_error(y_test_r, y_pred_r)
r2_r = r2_score(y_test_r, y_pred_r)

print("\nRandom Forest Regression Results:")


print(f"Mean Squared Error (MSE): {mse_r:.2f}")
print(f"R² Score: {r2_r:.2f}")

# ---------------- Visualization for Regression ---------------- #

# Plot predictions vs actual values


plt.figure(figsize=(8, 6))
plt.scatter(X_test_r, y_test_r, color="blue", label="Actual Values")
plt.scatter(X_test_r, y_pred_r, color="red", label="Predicted Values")
plt.title("Random Forest Regression: Predictions vs Actual Values")
plt.xlabel("Feature")
plt.ylabel("Target")
plt.legend()
plt.show()

You might also like