0% found this document useful (0 votes)
24 views

ML Lab Programs

The document discusses implementations of various machine learning algorithms in Python including linear regression, logistic regression, Naive Bayes, SVM, KNN, K-means clustering, and decision trees. It provides code examples to demonstrate how to apply each algorithm for classification or regression tasks using scikit-learn and pandas. For algorithms like KNN and K-means clustering, it also shows how to predict cluster assignments for new data points.

Uploaded by

Roopa 18-19-36
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
24 views

ML Lab Programs

The document discusses implementations of various machine learning algorithms in Python including linear regression, logistic regression, Naive Bayes, SVM, KNN, K-means clustering, and decision trees. It provides code examples to demonstrate how to apply each algorithm for classification or regression tasks using scikit-learn and pandas. For algorithms like KNN and K-means clustering, it also shows how to predict cluster assignments for new data points.

Uploaded by

Roopa 18-19-36
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 23

1) Implementation of Linear Regression using python

2) Implementation of Logistic Regression using python


3) Implementation of Naïve Bayes, SVM
4) Implementation of K NEAREST NEIGHBOURS (KNN)
5) Implementation of K means
6) Implementation of Decision Trees
7) Implementation of Random

Implementation of Linear Regression


## Importing the libraries

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

## Importing the dataset

dataset = pd.read_csv('/content/Salary_Data (1).csv')

X = dataset.iloc[:, :-1].values

y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3, random_state = 0)

## Training the Simple Linear Regression model on the Training set

from sklearn.linear_model import LinearRegression

regressor = LinearRegression()

regressor.fit(X_train, y_train)

## Predicting the Test set results

y_pred = regressor.predict(X_test)

## Visualising the Training set results


plt.scatter(X_train, y_train, color = 'red')

plt.plot(X_train, regressor.predict(X_train), color = 'blue')

plt.title('Salary vs Experience (Training set)')

plt.xlabel('Years of Experience')

plt.ylabel('Salary')

plt.show()

## Visualising the Test set results

plt.scatter(X_test, y_test, color = 'red')

plt.plot(X_train, regressor.predict(X_train), color = 'blue')

plt.title('Salary vs Experience (Test set)')

plt.xlabel('Years of Experience')

plt.ylabel('Salary')

plt.show()
Implementation of Logistic Regression
import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

df = pd.read_csv("/content/Social_Network_Ads (1).csv")

df.head()

x = df[['Age','EstimatedSalary']]

x.head()
y = df['Purchased']

y.head()

from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2)

from sklearn.preprocessing import StandardScaler

sc= StandardScaler()

x_train = sc.fit_transform(x_train)

x_test = sc.fit_transform(x_test)

from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()

lr.fit(x_train,y_train)

y_pred = lr.predict(x_test)

from sklearn.metrics import accuracy_score

accuracy_score(y_test,y_pred)

lr.score(x_test,y_test)

#check

lr.predict(sc.transform([[19,19000]]))

import seaborn as sns

sns.regplot(x='Age', y='Purchased', data=df, logistic=True)


Implementation of Naïve Bayes , SVM
import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

dataset = pd.read_csv('Social_Network_Ads.csv')

dataset.head()

dataset.Purchased.value_counts()

X = dataset.iloc[:, :-1].values

y = dataset.iloc[:, -1].values
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)

X_test = sc.transform(X_test)

from sklearn.svm import SVC

SV_classifier = SVC(kernel = 'rbf', random_state = 0)

SV_classifier.fit(X_train, y_train)

y_pred_SVC = SV_classifier.predict(X_test)

from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred_SVC))

from sklearn.naive_bayes import GaussianNB

NB_classifier = GaussianNB()

NB_classifier.fit(X_train, y_train)

y_pred_NB = NB_classifier.predict(X_test)

from sklearn.metrics import classification_report

print(classification_report(y_test,y_pred_NB))
Implementation of K NEAREST NEIGHBOURS KNN
#importing libraries and dataset from sklearn

import matplotlib.pyplot as plt

import sklearn

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

%matplotlib inline

from sklearn.datasets import load_iris

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

#loading the dataset

iris = load_iris()

df = pd.concat([

(pd.DataFrame(data=iris['data'], columns=iris['feature_names'])),

(pd.DataFrame(data=iris['target'], columns=['target']))],

axis=1)

df.replace({'target':{0:'setosa', 1:'versicolor', 2:'virginica'}}, inplace = True)


df

#labels

df.columns

Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)', 'target'],
dtype='object')

#values

df.values

array([[5.1, 3.5, 1.4, 0.2, 'setosa'],

[4.9, 3.0, 1.4, 0.2, 'setosa'],

[4.7, 3.2, 1.3, 0.2, 'setosa'],

[4.6, 3.1, 1.5, 0.2, 'setosa'],……

df.describe()

sns.set(style="ticks")
sns.pairplot(df, hue="target", palette={"setosa": "red", "versicolor": "yellow", "virginica": "blue"})

plt.legend(loc='upper right')

plt.show()

sns.set(style="whitegrid")

plt.figure(figsize=(10, 6))

sns.scatterplot(x='sepal length (cm)', y='sepal width (cm)', hue='target', palette={'setosa': 'red',


'versicolor': 'yellow', 'virginica': 'blue'}, data=df)

plt.xlabel('Sepal Length (cm)')

plt.ylabel('Sepal Length vs Sepal Width by Species')


plt.legend(title='Species', loc='upper right')

plt.show()

# Split the dataset into features (X) and target (y)

X = df.drop('target', axis=1)

y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2401)

len(X_train), len(X_test)

knn = KNeighborsClassifier(n_neighbors=10)

knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

y_pred

array(['setosa', 'setosa', 'versicolor', 'virginica', 'versicolor', 'setosa', 'setosa', 'setosa', 'versicolor',


'versicolor', 'virginica', 'versicolor', 'versicolor', 'versicolor', 'versicolor', 'setosa', 'setosa', 'setosa',
'virginica', 'setosa', 'virginica', 'setosa', 'versicolor', 'virginica', 'setosa', 'versicolor', 'setosa', 'setosa',
'virginica', 'virginica'], dtype=object)

y_test
39 setosa

46 setosa

60 versicolor …….

Name: target, dtype: object

accuracy_score(y_test,y_pred)

0.9666666666666667

print(classification_report(y_test, y_pred))

confusion_matrix(y_test, y_pred)

array([[13, 0, 0], [ 0, 9, 0], [ 0, 1, 7]])

df.sample()

predicted_classes = {0: 'setosa', 1: 'versicolor', 2: 'virginica'}

sepal_length = float(input("Enter sepal length (cm): "))

sepal_width = float(input("Enter sepal width (cm): "))

petal_length = float(input("Enter petal length (cm): "))

petal_width = float(input("Enter petal width (cm): "))

new_data = pd.DataFrame({

'sepal length (cm)': [sepal_length],

'sepal width (cm)': [sepal_width],


'petal length (cm)': [petal_length],

'petal width (cm)': [petal_width]

})

new_data

predicted_class = knn.predict(new_data)

predicted_class

array(['setosa'], dtype=object)

predicted_class_name = predicted_class[0]

predicted_class_name

setosa

Implementation of K Means
import plotly.express as px

from sklearn.cluster import KMeans

from yellowbrick.cluster import KElbowVisualizer

# the elbow method

x = df.iloc[:, [0, 1, 2, 3]].values

wcss = []

for i in range(1, 11):

kmeans = KMeans(n_clusters = i, init = 'k-means++', max_iter = 300, n_init = 10, random_state


=2041)

kmeans.fit(x)
wcss.append(kmeans.inertia_)

fig = px.line(df, x=range(1, 11), y=wcss)

fig.show()

kmeans = KMeans(n_clusters = 3, init = 'k-means++',

max_iter = 300, n_init = 10, random_state =2041 )

y_kmeans = kmeans.fit_predict(x)

plt.scatter(x[y_kmeans == 0, 0], x[y_kmeans == 0, 1],

s = 100, c = 'red', label = 'setosa')

plt.scatter(x[y_kmeans == 1, 0], x[y_kmeans == 1, 1],

s = 100, c = 'yellow', label = 'versicolour')

plt.scatter(x[y_kmeans == 2, 0], x[y_kmeans == 2, 1],

s = 100, c = 'blue', label = 'virginica')

# Plotting the centroids of the clusters

plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:,1],

s = 100, c = 'orange', label = 'Centroids')

plt.legend()
kmeans = KMeans(n_clusters = 3, init = 'k-means++',

max_iter = 300, n_init = 10, random_state = 2041)

y_kmeans = kmeans.fit_predict(x)

plt.scatter(x[y_kmeans == 0, 0], x[y_kmeans == 0, 1],

s = 100, c = 'red', label = 'setosa')

plt.scatter(x[y_kmeans == 1, 0], x[y_kmeans == 1, 1],

s = 100, c = 'yellow', label = 'versicolour')

plt.scatter(x[y_kmeans == 2, 0], x[y_kmeans == 2, 1],

s = 100, c = 'blue', label = 'virginica')

# Plotting the centroids of the clusters

plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:,1],

s = 100, c = 'orange', label = 'Centroids')

plt.legend()
kmeans = KMeans(n_clusters=3, init='k-means++', max_iter=300, n_init=10, random_state=2041)

kmeans.fit(x)

sepal_length = float(input("Enter Sepal Length (in cm): "))

sepal_width = float(input("Enter Sepal Width (in cm): "))

petal_length = float(input("Enter Petal Length (in cm): "))

petal_width = float(input("Enter Petal Width (in cm): "))

new_data = np.array([sepal_length, sepal_width, petal_length, petal_width]).reshape(1, -1)

new_cluster_assignment = kmeans.predict(new_data)

print("Predicted cluster assignment:", new_cluster_assignment[0])

Enter Sepal Length (in cm): 4.3

Enter Sepal Width (in cm): 1.2

Enter Petal Length (in cm): 3.4

Enter Petal Width (in cm): 2.1

Predicted cluster assignment: 1

import warnings

warnings.filterwarnings("ignore")
#formation of clusters with varied values of 'k'

import numpy as np

import matplotlib.pyplot as plt

from sklearn.cluster import KMeans

from sklearn.datasets import load_iris

from matplotlib.animation import FuncAnimation

iris = load_iris()

X = iris.data

y = iris.target

max_clusters = 10

fig, ax = plt.subplots()

def update(frame):

ax.clear()

kmeans = KMeans(n_clusters=frame + 1, random_state=2401)

kmeans.fit(X)

cluster_labels = kmeans.labels_

for cluster in range(frame + 1):

ax.scatter(X[cluster_labels == cluster, 0], X[cluster_labels == cluster, 1], label=f'Cluster {cluster + 1}')

ax.set_title(f'Cluster Formation (K = {frame + 1})')

ax.set_xlabel('Sepal Length (cm)')

ax.set_ylabel('Sepal Width (cm)')

ax.legend(loc='upper right')

ani = FuncAnimation(fig, update, frames=max_clusters, repeat=False)

from IPython.display import HTML

HTML(ani.to_jshtml())

#ani.save('cluster_formation.mp4', writer='ffmpeg', fps=2)


Implementation of Decision Trees
from sklearn.datasets import load_iris

data = load_iris()

df = pd.DataFrame(data["data"], columns=data["feature_names"])

df

df["target"] = data["target"]

df

df.replace({"target":{0:"setosa", 1:"versicolor", 2:"virginica"}}, inplace=True)

df
feature_cols = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']

X1 = df[feature_cols] # Features

y1 = df.target

# Split dataset into training set and test set

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training


and 30% test

from sklearn.tree import DecisionTreeClassifier

# Create a decision tree classifier object

clf = DecisionTreeClassifier(criterion="gini", max_depth=3, min_samples_split=10,


min_samples_leaf=5)

# Fit the classifier to the training data

clf=clf.fit(X_train, y_train)

y_pred=clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

from sklearn.tree import export_graphviz

from IPython.display import Image

import pydotplus

dot_data = StringIO()

clf

export_graphviz(clf, out_file=dot_data,

filled=True, rounded=True,

special_characters=True,class_names=['0','1','2'])

graph = pydotplus.graph_from_dot_data(dot_data.getvalue())

graph.write_png('flower_data.png')

Image(graph.create_png())
Implementation of Random Forest
# Data Processing

import pandas as pd

import numpy as np

import pandas as pd

dataset=pd.read_csv("/content/drive/MyDrive/Colab Notebooks/BostonHousing.csv")

dataset

# Split the data into features (X) and target (y)

X = pd.DataFrame(dataset.iloc[:,:-1])

X
y=pd.DataFrame(dataset.iloc[:,-1])

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20)

from sklearn.ensemble import RandomForestRegressor

regressor=RandomForestRegressor(n_estimators=20,random_state=0)
regressor.fit(X_train,y_train)

y_pred=regressor.predict(X_test)

from sklearn import metrics

print("Mean Absolute Error", metrics.mean_absolute_error(y_test,y_pred))

print("Mean Squared Error", metrics.mean_squared_error(y_test,y_pred))

print("Root Mean Squared Error", np.sqrt(metrics.mean_squared_error(y_test,y_pred)))

Mean Absolute Error 2.6388235294117646

Mean Squared Error 17.72390980392157

Root Mean Squared Error 4.2099774113315105

You might also like