0% found this document useful (0 votes)
6 views22 pages

ASSESSMENT2

The document contains code implementations for various machine learning algorithms, including decision trees using ID3 and CART methods, simple linear regression, and logistic regression. It utilizes Python libraries such as pandas, numpy, and matplotlib for data manipulation and visualization. The code reads data from CSV files, calculates necessary metrics, and builds models to predict outcomes based on input features.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views22 pages

ASSESSMENT2

The document contains code implementations for various machine learning algorithms, including decision trees using ID3 and CART methods, simple linear regression, and logistic regression. It utilizes Python libraries such as pandas, numpy, and matplotlib for data manipulation and visualization. The code reads data from CSV files, calculates necessary metrics, and builds models to predict outcomes based on input features.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 22

ASSESSMENT-2

NAME - VAIBHAV DUTT TRIVEDI

REG. NO. – 22MIC0118

COURSE TITLE – MACHINE LEARNING

COURSE CODE – CSI 3026

LAB SLOT – L9+L10


CODE-
import pandas as pd
import numpy as np

def calculate_entropy(column):
values, counts = np.unique(column, return_counts=True)
probabilities = counts / len(column)
return -np.sum(probabilities * np.log2(probabilities))

def calculate_info_gain(data, feature, target):


total_entropy = calculate_entropy(data[target])
feature_values = data[feature].unique()
weighted_entropy = 0
for value in feature_values:
subset = data[data[feature] == value]
weight = len(subset) / len(data)
weighted_entropy += weight * calculate_entropy(subset[target])
return total_entropy - weighted_entropy

def build_tree(data, features, target):


if len(np.unique(data[target])) == 1:
return data[target].iloc[0]
if not features:
return data[target].mode()[0]
best_feature = max(features, key=lambda f: calculate_info_gain(data, f, target))
tree = {best_feature: {}}
remaining_features = [f for f in features if f != best_feature]
for value in np.unique(data[best_feature]):
subset = data[data[best_feature] == value]
tree[best_feature][value] = build_tree(subset, remaining_features, target)
return tree

data = pd.read_csv("id3.csv")
target = data.columns[-1]
features = list(data.columns[:-1])

decision_tree = build_tree(data, features, target)


print("Decision Tree:", decision_tree)
CODE-
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data = pd.read_csv('L.csv')
x = data['X']
y = data['Y']

n = len(x)
mean_x = np.mean(x)
mean_y = np.mean(y)

numerator = np.sum((x - mean_x) * (y - mean_y))


denom = np.sum((x - mean_x) ** 2)
slope = numerator / denom
intercept = mean_y - slope * mean_x

print("Slope (m):", slope)


print("Intercept (b):", intercept)

y_pred = slope * x + intercept

plt.scatter(x, y, color='blue', label='Data Points')


plt.plot(x, y_pred, color='red', label='Regression Line')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Simple Linear Regression')
plt.legend()
plt.show()
4
CODE-
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def sigmoid(z):
return 1 / (1 + np.exp(-z))

data = pd.read_csv('LOG.csv')
x = data['X']
y = data['Y']

n = len(x)
mean_x = np.mean(x)
mean_y = np.mean(y)

numerator = np.sum((x - mean_x) * (y - mean_y))


denom = np.sum((x - mean_x) ** 2)
slope = numerator / denom
intercept = mean_y - slope * mean_x

print("Slope (m):", slope)


print("Intercept (b):", intercept)

z = slope * x + intercept
y_pred = sigmoid(z)

plt.scatter(x, y, color='blue', label='Data Points')


plt.plot(x, y_pred, color='red', label='Logistic Regression Curve')
plt.xlabel('X')
plt.ylabel('Probability')
plt.title('Logistic Regression')
plt.legend()
plt.show()
CODE-

import pandas as pd
import numpy as np

def gini_impurity(column):
values, counts = np.unique(column, return_counts=True)
probabilities = counts / len(column)
return 1 - np.sum(probabilities ** 2)

def calculate_gini_index(data, feature, target):


feature_values = data[feature].unique()
weighted_gini = 0
for value in feature_values:
subset = data[data[feature] == value]
weight = len(subset) / len(data)
weighted_gini += weight * gini_impurity(subset[target])
return weighted_gini

def build_cart_tree(data, features, target):


if len(np.unique(data[target])) == 1:
return data[target].iloc[0]
if not features:
return data[target].mode()[0]
best_feature = min(features, key=lambda f: calculate_gini_index(data, f, target))
tree = {best_feature: {}}
remaining_features = [f for f in features if f != best_feature]
for value in np.unique(data[best_feature]):
subset = data[data[best_feature] == value]
tree[best_feature][value] = build_cart_tree(subset, remaining_features, target)
return tree

data = pd.read_csv("cart.csv")
target = data.columns[-1]
features = list(data.columns[:-1])

decision_tree = build_cart_tree(data, features, target)


print("Decision Tree:", decision_tree)

You might also like