0% found this document useful (0 votes)
48 views

ML Lab Record

Uploaded by

Nirmal Kumar V
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
48 views

ML Lab Record

Uploaded by

Nirmal Kumar V
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 33

EX NO: 1

For a given set of training data examples stored in a .CSV file, implement and
DATE: demonstrate the Candidate-Elimination algorithm to output a description of the
set of all hypotheses consistent with the training examples

AIM:

PROCEDURE:

CANDIDATE-ELIMINATION Learning Algorithm


The CANDIDATE-ELIMINTION algorithm computes the version space containing all hypotheses
from H that are consistent with an observed sequence of training examples.

CANDIDATE- ELIMINTION algorithm using version spaces

Training Examples:
enjoysport.csv

1
PROGRAM:
importnumpy as np
import pandas as pd
# Loading Data from a CSV File
data = pd.DataFrame(data=pd.read_csv('E:/ML/enjoysport.csv'))
print(data)

# Separating concept features from Target


concepts = np.array(data.iloc[:,0:-1])
print(concepts)

# Isolating target into a separate DataFrame


# copying last column to target array
target = np.array(data.iloc[:,-1])
print(target)

def learn(concepts, target):


'''
learn() function implements the learning method of the Candidate elimination algorithm.
Arguments:
concepts - a data frame with all the features
target - a data frame with corresponding output values
'''

# Initialise S0 with the first instance from concepts


# .copy() makes sure a new list is created instead of just pointing to the same memory location
specific_h = concepts[0].copy()
print("\nInitialization of specific_h and general_h")
print(specific_h)
#h=["#" for i in range(0,5)]
#print(h)

general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]


print(general_h)
# The learning iterations
for i, h in enumerate(concepts):

# Checking if the hypothesis has a positive target


if target[i] == "Yes":
for x in range(len(specific_h)):

# Change values in S & G only if values change


if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
# Checking if the hypothesis has a positive target
if target[i] == "No":
for x in range(len(specific_h)):
2
# For negative hyposthesis change values only in G
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'

print("\nSteps of Candidate Elimination Algorithm",i+1)


print(specific_h)
print(general_h)

# find indices where we have empty rows, meaning those that are unchanged
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]
for i in indices:
# remove those rows from general_h
general_h.remove(['?', '?', '?', '?', '?', '?'])
# Return final values
returnspecific_h, general_h
s_final, g_final = learn(concepts, target)
print("\nFinalSpecific_h:", s_final, sep="\n")
print("\nFinalGeneral_h:", g_final, sep="\n")

OUTPUT:

3
RESULT:

4
EX NO: 2
Write a program to demonstrate the working of the decision tree based ID3
DATE: algorithm. Use an appropriate data set for building the decision tree and apply this
knowledge to classify a new sample

AIM:

PROCEDURE:

ID3Algorithm:

ID3(Examples,Target_attribute,Attributes)

o CreateaRootnodeforthetree
o IfallExamplesarepositive,Returnthesingle-nodetreeRoot,withlabel=+
o IfallExamplesarenegative,Returnthe single-nodetreeRoot,withlabel=-
o IfAttributesisempty,Returnthesingle-nodetreeRoot,withlabel=
mostcommonvalueofTarget_attributeinExamples
oOtherwiseBegin
 A←the attributefromAttributesthatbest*classifiesExamples
 ThedecisionattributeforRoot←A
 Foreachpossiblevalue,υi,ofA, 
 Addanewtreebranchbelow Root,correspondingtothetestA=υi
 LetExamplesυi,bethe subsetofExamplesthathavevalueυiforA
 IfExamplesυi,isempty
 Thenbelowthis newbranchadda
leafnodewithlabel=mostcommonvalueofTarget_attributeinExamples
 Elsebelowthisnewbranchaddthe subtree
ID3(Examplesυi,Target_attribute, Attributes–{A}))End

ReturnRoot

PROGRAM:

import numpy as np
import math
import csv
defread_data(filename):

with open(filename, 'r') as csvfile:


datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
metadata.append(name)
5
for row in datareader:
traindata.append(row)

return (metadata, traindata)

class Node:
def __init__(self, attribute):

self.attribute = attribute
self.children = []
self.answer = ""

def __str__(self):
returnself.attribute

defsubtables(data, col, delete):


dict = {}
items = np.unique(data[:, col])
count = np.zeros((items.shape[0], 1), dtype=np.int32)

for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y, col] == items[x]:
count[x] += 1

for x in range(items.shape[0]):

dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32")


pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1

if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)
6
return items, dict

def entropy(S):
items = np.unique(S)

if items.size == 1:

return 0

counts = np.zeros((items.shape[0], 1))


sums = 0

for x in range(items.shape[0]):

counts[x] = sum(S == items[x]) / (S.size * 1.0)

for count in counts:


sums += -1 * count * math.log(count, 2)
return sums

defgain_ratio(data, col):
items, dict = subtables(data, col, delete=False)

total_size = data.shape[0]
entropies = np.zeros((items.shape[0], 1))
intrinsic = np.zeros((items.shape[0], 1))

for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0]/(total_size * 1.0)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log(ratio, 2)

total_entropy = entropy(data[:, -1])


iv = -1 * sum(intrinsic)
7
for x in range(entropies.shape[0]):
total_entropy -= entropies[x]

return total_entropy / iv

defcreate_node(data, metadata):

if (np.unique(data[:, -1])).shape[0] == 1:
node = Node("")
node.answer = np.unique(data[:, -1])[0]
return node

gains = np.zeros((data.shape[1] - 1, 1))

for col in range(data.shape[1] - 1):


gains[col] = gain_ratio(data, col)

split = np.argmax(gains)

node = Node(metadata[split])
metadata = np.delete(metadata, split, 0)

items, dict = subtables(data, split, delete=True)

for x in range(items.shape[0]):

child = create_node(dict[items[x]], metadata)


node.children.append((items[x], child))

return node

def empty(size):

s = ""
for x in range(size):
8
s += " "
return s

defprint_tree(node, level):
if node.answer != "":
print(empty(level), node.answer)
return

print(empty(level), node.attribute)
for value, n in node.children:
print(empty(level + 1), value)
print_tree(n, level + 2)

metadata, traindata = read_data("tennisdata.csv")

data = np.array(traindata)
node = create_node(data, metadata)
print_tree(node, 0)

Data Set:

Tennisdata.csv

9
OUTPUT:

RESULT:

10
EX NO: 3
Build an Artificial Neural Network by implementing the Backpropagation
DATE: algorithm and test the same using appropriate data sets

AIM:

PROCEDURE:

11
PROGRAM:

import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X,axis=0) # maximum of X array longitudinally

y = y/100
#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))
#Derivative of Sigmoid Function
defderivatives_sigmoid(x):

return x * (1 - x)
#Variable initialization
epoch=5000 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons

output_neurons = 1 #number of neurons at output layer


#weight and bias initialization
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons)) 12
bout=np.random.uniform(size=(1,output_neurons))
#draws a random range of numbers uniformly of dim x*y
for i in range(epoch):
#Forward Propogation
hinp1=np.dot(X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)

outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+ bout
output = sigmoid(outinp)
#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)

d_output = EO* outgrad


EH = d_output.dot(wout.T)
#how much hidden layer wts contributed to error
hiddengrad = derivatives_sigmoid(hlayer_act)
d_hiddenlayer = EH * hiddengrad
# dotproduct of nextlayererror and currentlayerop

wout += hlayer_act.T.dot(d_output) *lr


wh += X.T.dot(d_hiddenlayer) *lr
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)

13
OUTPUT:

RESULT:

14
EX NO: 4
Write a program to implement the naïve Bayesian classifier for a sample training
DATE: data set stored as a .CSV file and compute the accuracy with a few test data sets.

AIM:

PROCEDURE:

15
DATA SET:

Tennisdata.csv

PROGRAM:

# import necessary libarities


import pandas as pd
fromsklearn import tree
fromsklearn.preprocessing import LabelEncoder

fromsklearn.naive_bayes import GaussianNB

# load data from CSV


data = pd.read_csv('D:/tennisdata.csv')
print("THe first 5 values of data is :\n",data.head())

# obtain Train data and Train output


X = data.iloc[:,:-1]
print("\nThe First 5 values of train data is\n",X.head())

y = data.iloc[:,-1]
print("\nThe first 5 values of Train output is\n",y.head())

# Convert then in numbers


le_outlook = LabelEncoder()
X.Outlook = le_outlook.fit_transform(X.Outlook) 16
le_Temperature = LabelEncoder()
X.Temperature = le_Temperature.fit_transform(X.Temperature)

le_Humidity = LabelEncoder()
X.Humidity = le_Humidity.fit_transform(X.Humidity)

le_Windy = LabelEncoder()
X.Windy = le_Windy.fit_transform(X.Windy)

print("\nNow the Train data is :\n",X.head())

le_PlayTennis = LabelEncoder()

y = le_PlayTennis.fit_transform(y)
print("\nNow the Train output is\n",y)

fromsklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20)

classifier = GaussianNB()
classifier.fit(X_train,y_train)

fromsklearn.metrics import accuracy_score


print("Accuracy is:",accuracy_score(classifier.predict(X_test),y_test))

17
OUTPUT:

RESULT:

18
EX NO: 5
Implement naïve Bayesian Classifier model to classify a set of documents and
DATE: measure the accuracy, precision, and recall.

AIM:

Data Set:

document.csv

I love this sandwich pos


This is an amazing place pos
I feel very good about these
beers pos
This is my best work pos
What an awesome view pos
I do not like this restaurant neg
I am tired of this stuff neg
I can't deal with this neg
He is my sworn enemy neg
My boss is horrible neg
This is an awesome place pos
I do not like the taste of this
juice neg
I love to dance pos
I am sick and tired of this place neg
What a great holiday pos
That is a bad locality to stay neg
We will have good fun
tomorrow pos
I went to my enemy's house
today neg

PROGRAM:

import pandas as pd
msg = pd.read_csv('document.csv', names=['message', 'label'])
print("Total Instances of Dataset: ", msg.shape[0])
msg['labelnum'] = msg.label.map({'pos': 1, 'neg': 0})

OUTPUT:

Total Instances of Dataset: 18

19
X = msg.message
y = msg.labelnum
from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y)
from sklearn.feature_extraction.text import CountVectorizer

count_v = CountVectorizer()
Xtrain_dm = count_v.fit_transform(Xtrain)
Xtest_dm = count_v.transform(Xtest)

df = pd.DataFrame(Xtrain_dm.toarray(),columns=count_v.get_feature_names())
print(df[0:5])

OUTPUT:
about am an and awesome bad beers best boss can ... tired to \
0 0 1 0 1 0 0 0 0 0 0 ... 1 0
1 0 0 0 0 0 0 0 0 0 0 ... 0 0
2 0 0 0 0 0 0 0 0 0 0 ... 0 0
3 0 0 0 0 0 0 0 0 0 1 ... 0 0
4 0 0 0 0 0 0 0 0 0 0 ... 0 0

today tomorrow very we went will with work


0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 1 0
4 0 0 0 0 0 0 0 0

[5 rows x 49 columns]

from sklearn.naive_bayes import MultinomialNB


clf = MultinomialNB()
clf.fit(Xtrain_dm, ytrain)
pred = clf.predict(Xtest_dm)
for doc, p in zip(Xtrain, pred):
p = 'pos' if p == 1 else 'neg'
print("%s -> %s" % (doc, p))

OUTPUT:
I am sick and tired of this place -> pos
I do not like the taste of this juice -> neg
I love this sandwich -> neg
I can't deal with this -> pos
I do not like this restaurant -> neg

from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score


print('Accuracy Metrics: \n')
print('Accuracy: ', accuracy_score(ytest, pred))
print('Recall: ', recall_score(ytest, pred))
print('Precision: ', precision_score(ytest, pred))
print('Confusion Matrix: \n', confusion_matrix(ytest, pred))

20
OUTPUT:
Accuracy Metrics:

Accuracy: 0.6
Recall: 0.5
Precision: 1.0
Confusion Matrix:
[[1 0]
[2 2]]

RESULT:

21
EX NO: 6
Write a program to construct a Bayesian network to diagnose CORONA infection
DATE: using standard WHO Data Set.

AIM:

PROCEDURE:

Let’s see if a Bayesian Belief Network (BBN) is able to diagnose the COVID-19 virus with any reasonable
success. The idea is that a patients presents some symptoms, and we must diagnostically reason from the
symptoms back to the cause. The BBN is taken from BayesiaLab’s Differential Diagnosis model.

PROGRAM:

Data

import pandas as pd

inside = pd.read_csv('./covid/data/00/COVID19_2020_open_line_list - Hubei.csv', low_memory=False)


outside = pd.read_csv('./covid/data/00/COVID19_2020_open_line_list - outside_Hubei.csv',
low_memory=False)

outside = outside.drop(['data_moderator_initials'], axis=1)

data = pd.concat([inside, outside])

Data Transformation

We will apply transformations to the data, primarily on the symptoms. There are only about 200 unique
symptoms on all the COVID-19 patients. We map these 200 unique symptoms in a many-to-many approach
to 32 broad symptom categories. The following are the 32 broad symptom categories.

 abdominal_pain
 anorexia
 anosmia
 chest
 chills
 coronary
 diarrhoea
 digestive
 discharge
 dizziness
 dry_cough
 dryness
 dyspnea
 eye
 fatigue
 fever
 headache
 lungs
 malaise
 mild
22
 muscle
 myelofibrosis
 nasal
 nausea
 respiratory
 running_nose
 sneezing
 sore_throat
 sputum
 sweating
 walking
 wheezing

import json
import itertools
from datetime import datetime

with open('./covid/data/00/symptom-mapping.json', 'r') as f:


symptom_map = json.load(f)

def tokenize(s):
if s is None or isinstance(s, float) or len(s) < 1 or pd.isna(s):
return None
try:
delim = ';' if ';' in s else ','
return [t.strip().lower() for t in s.split(delim) if len(t.strip()) > 0]
except:
return s

def map_to_symptoms(s):
if s.startswith('fever') or s.startswith('low fever'):
return ['fever']
return [k for k, v in symptom_map.items() if s in v]

d = data[['symptoms']].dropna(how='all').copy(deep=True)
print(d.shape)

d.symptoms = d.symptoms.apply(lambda s: tokenize(s))


d.symptoms = d.symptoms.apply(lambda tokens: [map_to_symptoms(s) for s in tokens] if tokens is not None
else None)
d.symptoms = d.symptoms.apply(lambda arrs: None if arrs is None else list(itertools.chain(*arrs)))

for s in symptom_map.keys():
d[s] = d.symptoms.apply(lambda arr: 0 if arr is None else 1 if s in arr else 0)

d = d.drop(['symptoms'], axis=1)
print(d.shape)

OUTPUT:
(656, 1)
(656, 32)

23
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

plt.style.use('seaborn')

v = [d[d[c] == 1].shape[0] for c in d.columns]


s = pd.Series(v, d.columns)

fig, ax = plt.subplots(figsize=(15, 5))


_ = s.plot(kind='bar', ax=ax, title=f'Frequency of symptoms, n={d.shape[0]}')

plt.tight_layout()

OUTPUT:

RESULT:

24
EX NO: 7
Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same
DATE: data set for clustering using the k-Means algorithm. Compare the results of these
two algorithms.

AIM:

PROGRAM:

import matplotlib.pyplot as plt


from sklearn import datasets
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np

# import some data to play with


iris = datasets.load_iris()
X = pd.DataFrame(iris.data)
X.columns = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y = pd.DataFrame(iris.target)
y.columns = ['Targets']

# Build the K Means Model


model = KMeans(n_clusters=3)
model.fit(X) # model.labels_ : Gives cluster no for which samples belongs to

# # Visualise the clustering results


plt.figure(figsize=(14,7))
colormap = np.array(['red', 'lime', 'black'])

# Plot the Original Classifications using Petal features


plt.subplot(1, 3, 1)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)
plt.title('Real Clusters')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')

# Plot the Models Classifications


plt.subplot(1, 3, 2)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[model.labels_], s=40)
plt.title('K-Means Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')

# General EM for GMM


from sklearn import preprocessing

# transform your data such that its distribution will have a # mean value 0 and standard deviation of 1.
scaler = preprocessing.StandardScaler()
scaler.fit(X)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa, columns = X.columns)
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=40)
25
gmm.fit(xs)
plt.subplot(1, 3, 3)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[0], s=40)
plt.title('GMM Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')

print('Observation: The GMM using EM algorithm based clustering matched the true labels more closely
than the Kmeans.')

OUTPUT:
Observation: The GMM using EM algorithm based clustering matched the true labels more closely than the
Kmeans.

RESULT:

26
EX NO: 8
Write a program to implement k-Nearest Neighbour algorithm to classify the iris
DATE: data set. Print both correct and wrong predictions.

AIM:

PROCEDURE:

Training algorithm:

 For each training example (x, f (x)), add the example to the list training examples Classification
algorithm:
 Given a query instance xq to be classified,
 Let x1 . . .xk denote the k instances from training examples that are nearest to xq
 Return

 Where, f(xi) function to calculate the mean value of the k nearest training examples.

Data Set:

Iris Plants Dataset: Dataset contains 150 instances (50 in each of three classes) Number of Attributes: 4
numeric, predictive attributes and the Class.

27
Sample Data:

PROGRAM:

import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics

names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']

# Read dataset to pandas dataframe


dataset = pd.read_csv("9-dataset.csv", names=names)
X = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]
print(X.head())
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.10)
28
classifier = KNeighborsClassifier(n_neighbors=5).fit(Xtrain, ytrain)

ypred = classifier.predict(Xtest)

i=0
print ("\n-------------------------------------------------------------------------")
print ('%-25s %-25s %-25s' % ('Original Label', 'Predicted Label', 'Correct/Wrong'))
print ("-------------------------------------------------------------------------")
for label in ytest:
print ('%-25s %-25s' % (label, ypred[i]), end="")
if (label == ypred[i]):
print (' %-25s' % ('Correct'))
else:
print (' %-25s' % ('Wrong'))
i=i+1
print ("-------------------------------------------------------------------------")
print("\nConfusion Matrix:\n",metrics.confusion_matrix(ytest, ypred))
print ("-------------------------------------------------------------------------")
print("\nClassification Report:\n",metrics.classification_report(ytest, ypred))
print ("-------------------------------------------------------------------------")
print('Accuracy of the classifer is %0.2f' % metrics.accuracy_score(ytest,ypred))
print ("-------------------------------------------------------------------------")

OUTPUT:

sepal-length sepal-width petal-length petal-width


0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2

-------------------------------------------------------------------------
Original Label Predicted Label Correct/Wrong
-------------------------------------------------------------------------
Iris-versicolor Iris-versicolor Correct
Iris-virginica Iris-versicolor Wrong
Iris-virginica Iris-virginica Correct
Iris-versicolor Iris-versicolor Correct
Iris-setosa Iris-setosa Correct
Iris-versicolor Iris-versicolor Correct
Iris-setosa Iris-setosa Correct
Iris-setosa Iris-setosa Correct
Iris-virginica Iris-virginica Correct
Iris-virginica Iris-versicolor Wrong
Iris-virginica Iris-virginica Correct
Iris-setosa Iris-setosa Correct
Iris-virginica Iris-virginica Correct
Iris-virginica Iris-virginica Correct
Iris-versicolor Iris-versicolor Correct
-------------------------------------------------------------------------

Confusion Matrix:
[[4 0 0]
[0 4 0]
[0 2 5]] 29
-------------------------------------------------------------------------

Classification Report:
precision recall f1-score support

Iris-setosa 1.00 1.00 1.00 4


Iris-versicolor 0.67 1.00 0.80 4
Iris-virginica 1.00 0.71 0.83 7

avg / total 0.91 0.87 0.87 15

-------------------------------------------------------------------------
Accuracy of the classifer is 0.87
-------------------------------------------------------------------------

RESULT:

30
EX NO: 9
Implement the non-parametric Locally Weighted Regression algorithm in order
DATE: to fit data points. Select an appropriate data set for your experiment and draw
graphs.

AIM:

PROCEDURE:

Regression:

Regression:
 Regressionisatechniquefromstatisticsthatisusedtopredictvaluesofadesired target
quantity when the target quantity is continuous.
 Inregression,weseektoidentify(orestimate)acontinuousvariableyassociatedwith a given
input vector x.
 yiscalledthedependentvariable.
 xiscalledtheindependentvariable.
Loess/LowessRegression:
Loessregressionisanonparametrictechnique thatuseslocalweightedregressiontofita smooth curve
through points in a scatter plot.

LowessAlgorithm:
 Locallyweightedregressionisaverypowerfulnonparametric modelusedinstatistical
learning.
 GivenadatasetX,y,weattempttofindamodelparameterβ(x)that minimizes
residualsumofweightedsquared errors.
 Theweightsaregivenbyakernelfunction(korw)whichcanbechosenarbitrarily
ALGORITHM:

1. ReadtheGivendataSampletoXandthecurve(linearornonlinear)toY
2. SetthevalueforSmootheningparameterorFreeparametersayτ
3. Setthebias/Pointofinterestsetx0whichisasubsetofX
4. Determinetheweightmatrixusing:

5. Determinethevalueofmodeltermparameter βusing :

6. Prediction=x0*β:

31
PROGRAM:

from math import ceil


import numpy as np
from scipy import linalg

def lowess(x, y, f, iterations):


n = len(x)
r = int(ceil(f * n))
h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
w = (1 - w ** 3) ** 3
yest = np.zeros(n)
delta = np.ones(n)
for iteration in range(iterations):
for i in range(n):
weights = delta * w[:, i]
b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
A = np.array([[np.sum(weights), np.sum(weights * x)],[np.sum(weights * x), np.sum(weights * x *
x)]])
beta = linalg.solve(A, b)
yest[i] = beta[0] + beta[1] * x[i]

residuals = y - yest
s = np.median(np.abs(residuals))
delta = np.clip(residuals / (6.0 * s), -1, 1)
delta = (1 - delta ** 2) ** 2

return yest

import math
n = 100
x = np.linspace(0, 2 * math.pi, n)
y = np.sin(x) + 0.3 * np.random.randn(n)
f =0.25
iterations=3
yest = lowess(x, y, f, iterations)

import matplotlib.pyplot as plt


plt.plot(x,y,"r.")
plt.plot(x,yest,"b-")

32
OUTPUT:

RESULT:

33

You might also like