0% found this document useful (0 votes)
5 views

Linear Regression

This is the linear regression that we are working on so basically it is the fundamental principle of the project that works on the basic life of the potential
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views

Linear Regression

This is the linear regression that we are working on so basically it is the fundamental principle of the project that works on the basic life of the potential
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 3

7/30/24, 3:55 PM Untitled4.

ipynb - Colab

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the Diabetes dataset


def load_data():
diabetes = load_diabetes(as_frame=True)
data = diabetes.frame
return data

# Prepare the data


def prepare_data(data):
X = data.drop('target', axis=1).values # Features
y = data['target'].values # Target variable

# Split data into training and test sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

return X_train, X_test, y_train, y_test

# Train the model


def train_linear_regression(X_train, y_train):
model = LinearRegression()
model.fit(X_train, y_train)
return model

# Predict with the model


def predict(model, X_test):
return model.predict(X_test)

# Plot feature distributions


def plot_feature_distributions(data):
plt.figure(figsize=(12, 8))
for i, feature in enumerate(data.columns[:-1]): # Exclude the target column
plt.subplot(4, 3, i+1) # Adjusted to fit all features
sns.histplot(data[feature], kde=True)
plt.title(feature)
plt.tight_layout()
plt.show()

# Plot actual vs predicted values


def plot_actual_vs_predicted(y_test, y_pred):
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.7)
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Actual vs Predicted Values')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], 'r--', lw=2)
plt.show()

# Plot residuals
def plot_residuals(y_test, y_pred):
residuals = y_test - y_pred
plt.figure(figsize=(10, 6))
sns.histplot(residuals, kde=True)
plt.xlabel('Residuals')
plt.ylabel('Frequency')
plt.title('Residuals Distribution')
plt.show()

# Main workflow
def main():
# Load and prepare data
data = load_data()
X_train, X_test, y_train, y_test = prepare_data(data)

# Train the model


model = train_linear_regression(X_train, y_train)
print("Trained coefficients:", model.coef_)
print("Intercept:", model.intercept_)

# Predict on the test set


y_pred = predict(model, X_test)

# Evaluate the model


mse = mean_squared_error(y_test, y_pred)
( d)
https://colab.research.google.com/drive/1D79M0mc1VqFBI-BSWQk8leEX87LjbAmI#printMode=true 1/3
7/30/24, 3:55 PM Untitled4.ipynb - Colab
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")


print(f"R-squared: {r2}")

# Display the original data features for reference


print("Original features (first 5 rows):")
print(data.head())

# Plot visualizations
plot_feature_distributions(data)
plot_actual_vs_predicted(y_test, y_pred)
plot_residuals(y_test, y_pred)

# Run the main function


if __name__ == "__main__":
main()

https://colab.research.google.com/drive/1D79M0mc1VqFBI-BSWQk8leEX87LjbAmI#printMode=true 2/3
7/30/24, 3:55 PM Untitled4.ipynb - Colab

Trained coefficients: [ 37.90402135 -241.96436231 542.42875852 347.70384391 -931.48884588


518.06227698 163.41998299 275.31790158 736.1988589 48.67065743]
Intercept: 151.34560453985995
Mean Squared Error: 2900.193628493482
R-squared: 0.4526027629719195
Original features (first 5 rows):
age sex bmi bp s1 s2 s3 \
0 0.038076 0.050680 0.061696 0.021872 -0.044223 -0.034821 -0.043401
1 -0.001882 -0.044642 -0.051474 -0.026328 -0.008449 -0.019163 0.074412
2 0.085299 0.050680 0.044451 -0.005670 -0.045599 -0.034194 -0.032356
3 -0.089063 -0.044642 -0.011595 -0.036656 0.012191 0.024991 -0.036038
4 0.005383 -0.044642 -0.036385 0.021872 0.003935 0.015596 0.008142

s4 s5 s6 target
0 -0.002592 0.019907 -0.017646 151.0
1 -0.039493 -0.068332 -0.092204 75.0
2 -0.002592 0.002861 -0.025930 141.0
3 0.034309 0.022688 -0.009362 206.0
4 -0.002592 -0.031988 -0.046641 135.0

https://colab.research.google.com/drive/1D79M0mc1VqFBI-BSWQk8leEX87LjbAmI#printMode=true 3/3

You might also like