0% found this document useful (0 votes)
20 views

ABHAYMLFILE

Ml file

Uploaded by

ranabeena804
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
20 views

ABHAYMLFILE

Ml file

Uploaded by

ranabeena804
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 16

Name: Abhay Chand Ramola

Course: BCA(6) Sec: A


Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

Problem Statement: Write a python program to implement logistic regression on California_housing


dataset.
Source code:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the dataset


df = pd.read_csv('/content/sample_data/california_housing_train.csv')

# Data preprocessing by dropping any rows with missing values


df.dropna(inplace=True)

# Binning the target variable 'median_house_value' into two categories


median_value = df['median_house_value'].median()
df['value_category'] = (df['median_house_value'] > median_value).astype(int)

# Splitting the dataset into X and y variables


X = df.drop(['median_house_value', 'value_category'], axis=1)
y = df['value_category']

# Splitting the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Logistic Regression model


model = LogisticRegression()

# Training the model


model.fit(X_train_scaled, y_train)

# Predictions on the testing set


y_pred = model.predict(X_test_scaled)

# Evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy:, {accuracy}")
print(f"Confusion matrix:\n{conf_matrix} ")

Output:
Accuracy:, 0.8370588235294117
Confusion matrix:
[[1397 259]
[ 295 1449]]
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

Problem Statement : Write a python program to implement ID3 algorithm using entropy in decision tree.
Source Code:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,confusion_matrix

#Load the dataset


df=pd.read_csv('/content/sample_data/california_housing_train.csv')

#Data preprocessing
#Dropping any rows with missing values
df.dropna(inplace=True)

#Splitting the dataset into features and target values


X=df.drop('median_house_value',axis=1)#Replalce 'target_column_name' with actual column name
y=df['median_house_value']# Replace 'target_column_name' with actual column name

#Splitting the dataset into training and testing sets


X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

#Feature Scaling
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
#Decision Tree model
model=DecisionTreeClassifier(criterion='entropy') #Using ID#(Entropy) criterion

#Training the model


model.fit(X_train_scaled,y_train)

#Preddictions on the testing set


y_pred=model.predict(X_test_scaled)

#Model evaluation
accuracy=accuracy_score(y_test,y_pred)
print("Accuracy: ",accuracy)
print("Confusion matrix : \n",conf_matrix)

Output:
Accuracy: 0.025588235294117648
Confusion matrix :
[[1397 259]
[ 295 1449]]
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

Problem Statement: Write a python program to implement CART algorithm for decision tree.
Source Code:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,confusion_matrix

#Load the dataset


df=pd.read_csv('/content/sample_data/california_housing_train.csv')

#Data Preprocessing
#Dropping any rows with missing values
df.dropna(inplace=True)

#Splitting the dataset into features and target variables


X=df.drop('median_house_value',axis=1) #Replace 'target_column_name' with actual target column name
y=df['median_house_value']#Replace 'target_column_name' with actual target column name

#Splitting the dataset into trianing and testing sets


X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

#Feature Scaling
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
#CART (Decision Tree)model
model.fit(X_train_scaled,y_train)

#Predictions on the testing set


y_pred=model.predict(X_test_scaled)

#Model evaluation
accuracy=accuracy_score(y_test,y_pred)
print("Accuracy: ",accuracy)
print("Confusion matrix : \n",conf_matrix)

Output:
Accuracy: 0.023823529411764705
Confusion matrix :
[[1397 259]
[ 295 1449]]
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

Problem Statement: Write a python program to implement SVM using linear kernel on iris.csv.
Source Code:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report,accuracy_score

url="http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
column_names=['sepal_length','sepal_width','petal_length','petal_width','species']
iris=pd.read_csv(url ,header=None, names=column_names)

print(iris.head())

X=iris.iloc[:,:-1].values #all columns except the last one


y=iris.iloc[:,-1].values #the last column

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

svm=SVC(kernel='linear',random_state=42)
svm.fit(X_train,y_train)
y_pred=svm.predict(X_test)
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
accuracy=accuracy_score(y_test,y_pred)
print(f"Accuracy:{accuracy:.2f}")
print(classification_report(y_test,y_pred))

Output:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
Accuracy:0.98
precision recall f1-score support

Iris-setosa 1.00 1.00 1.00 19


Iris-versicolor 1.00 0.92 0.96 13
Iris-virginica 0.93 1.00 0.96 13

accuracy 0.98 45
macro avg 0.98 0.97 0.97 45
weighted avg 0.98 0.98 0.98 45
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

Problem Statement: Write a python program to carry out visualization for each feature separately .
Source Code:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Load the Iris dataset


iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
target_names = iris.target_names

# Plot histograms for each feature


plt.figure(figsize=(12, 6))
for i in range(X.shape[1]):
plt.subplot(2, 2, i+1)
sns.histplot(X[:, i], kde=True, color='skyblue')
plt.title(feature_names[i])
plt.tight_layout()
plt.show()

# Load Iris dataset in a DataFrame for pairplot


iris_df = sns.load_dataset('iris')

# Correct the hue parameter to a valid column


sns.pairplot(iris_df, hue='species')
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
plt.show()

# PCA Visualization
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# Scatter plot for PCA components


plt.figure(figsize=(8, 6))
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=y, palette='viridis', legend='full')
plt.title('PCA Visualization')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

Output:
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

Problem Statement: Write a program to data analyse using supervised algorithms building a predictive
model for customer churn in a subscription based bussiness.
Source Code:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Step 1: Data Generation


def generate_customer_churn_data(num_customers=1000, start_date='2019-01-01', end_date='2022-01-
01'):
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)

customer_ids = np.arange(1, num_customers + 1)


join_dates = [np.random.choice(pd.date_range(start_date, end_date)) for _ in range(num_customers)]
churn_dates = [join_date + pd.Timedelta(days=np.random.randint(30, 365)) for join_date in join_dates]
churn_status = ['Churned' if date <= end_date else 'Active' for date in churn_dates]

data = {
'CustomerID': customer_ids,
'JoinDate': join_dates,
'ChurnDate': churn_dates,
'ChurnStatus': churn_status
}

df = pd.DataFrame(data)
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
return df

# Step 2: Data Preprocessing


def preprocess_data(df):
df['JoinYear'] = df['JoinDate'].dt.year
df['JoinMonth'] = df['JoinDate'].dt.month
df['JoinDay'] = df['JoinDate'].dt.day
df['JoinDayOfWeek'] = df['JoinDate'].dt.dayofweek

df['DaysToChurn'] = (df['ChurnDate'] - df['JoinDate']).dt.days


df.drop(['JoinDate', 'ChurnDate'], axis=1, inplace=True)

df['ChurnStatus'] = df['ChurnStatus'].map({'Active': 0, 'Churned': 1})

return df

# Step 3: Split Data


def split_data(df, test_size=0.2):
X = df.drop('ChurnStatus', axis=1)
y = df['ChurnStatus']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
return X_train, X_test, y_train, y_test

# Step 4: Model Training


def train_model(X_train, y_train):
model = RandomForestClassifier(n_estimators=100, random_state=42)

model.fit(X_train, y_train)
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
return model

# Step 5: Model Evaluation


def evaluate_model(model, X_test, y_test):
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
# Step 6: Model Deployment
def save_model(model, filepath='customer_churn_model.pkl'):
joblib.dump(model, filepath)
print("Model saved successfully.")

def main():
# Step 1: Generate data
df = generate_customer_churn_data()

# Step 2: Preprocess data


df = preprocess_data(df)

# Step 3: Split data


X_train, X_test, y_train, y_test = split_data(df)

# Step 4: Train model


Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
model = train_model(X_train, y_train)

# Step 5: Evaluate model


evaluate_model(model, X_test, y_test)

# Step 6: Save model


save_model(model)
main()

Output:
Accuracy: 0.975

Classification Report:
precision recall f1-score support
0 1.00 0.85 0.92 33
1 0.97 1.00 0.99 167

accuracy 0.97 200


macro avg 0.99 0.92 0.95 200
weighted avg 0.98 0.97 0.97 200

Confusion Matrix:
[[ 28 5]
[ 0 167]]
Model saved successfully.
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

You might also like