(P) Program AIO
(P) Program AIO
Implement and demonstrate the FIND-S algorithm for finding the most specific
hypothesis based on a given set of training data samples. Read the training data from
a .CSV file.
Code:
import csv
for i in your_list:
print(i)
if i[-1] == "Y":
j = 0
for x in i:
if x != "Y":
if x != h[0][j] and h[0][j] == '0':
h[0][j] = x
elif x != h[0][j] and h[0][j] != '0':
h[0][j] = '?'
else:
pass
j = j + 1
print("Most specific hypothesis is")
print(h)
-------------------------------------------------------------------------------------------------------------------------------
-
Program 2
For a given set of training data examples stored in a .CSV file, implement and
demonstrate the Candidate-Elimination algorithm to output a description of the set
of all hypotheses consistent with the training examples.
Code:
import numpy as np
import csv
def candidateElimination():
data = []
data.append(np.array(row))
X = data[:, :-1]
Y = data[:, -1].reshape(X.shape[0], 1)
print("\nShape Of X :")
print (X.shape)
print ("\nShape Of Y :")
print (Y.shape)
else:
for i in range(Y.shape[0]):
if Y[i] == "P":
specificH = X[i]
break
# Positive Example
if Y[i] == "P":
for j in range(X.shape[1]):
if X[i][j] != specificH[j]:
specificH[j] = '?'
# Negative Example
else:
for j in range(X.shape[1]):
if X[i][j] != specificH[j]:
generalH[j][j] = specificH[j]
candidateElimination()
-------------------------------------------------------------------------------------------------------------------------------
-
Program 3
Write a program to demonstrate the working of the decision tree based ID3
algorithm. Use an appropriate data set for building the decision tree and apply this
knowledge to classify a new sample.
Code:
import pandas as pd
import numpy as np
import math
class Node:
def __init__(self,l):
self.label=l
self.branches = {}
def entropy(data):
total_ex = len(data)
positive_ex = len(data.loc[data["Play Tennis"] == 'Y'])
negative_ex = len(data.loc[data["Play Tennis"] == 'N'])
entropy = 0
if(positive_ex > 0):
entropy = (-
1)*(positive_ex/float(total_ex))*(math.log(positive_ex,2)-
math.log(total_ex,2))
if(negative_ex > 0):
entropy += (-
1)*(negative_ex/float(total_ex))*(math.log(negative_ex,2)-
math.log(total_ex,2))
return entropy
def gain(s,data,attrib):
values = set(data[attrib])
print(values)
gain = s
for val in values:
gain -= len(data.loc[data[attrib] ==
val])/float(len(data))*entropy(data.loc[data[attrib] == val])
return gain
def get_attrib(data):
entropy_s = entropy(data)
attribute =""
max_gain = 0
for attr in data.columns[:len(data.columns)-1]:
g = gain(entropy_s,data,attr)
if g > max_gain:
max_gain = g
attribute = attr
return attribute
def decision_tree(data):
root = Node("NULL")
if(entropy(data) == 0):
if(len(data.loc[data[data.columns[-1]] == 'Y']) == len(data)):
root.label = "Y"
return root
else:
root.label = "N"
return root
if(len(data.columns) == 1):
return
else:
attrib = get_attrib(data)
root.label = attrib
values = set(data[attrib])
def get_rules(root,rule,rules):
if not root.branches:
rules.append(rule[:-2]+" => "+root.label)
return rules
for i in root.branches:
get_rules(root.branches[i],rule+root.label+"="+i+" ^ ",rules)
return rules
def test(tree,test_str):
if not tree.branches:
return tree.label
return test(tree.branches[test_str[tree.label]],test_str)
data = pd.read_csv('Data3.csv')
entropy_s = entropy(data)
attrib_count = 0
cols = len(data.columns)-1
tree = decision_tree(data)
rules = get_rules(tree,"",[])
print(rules)
test_str = {}
print("Enter test case input")
for i in data.columns[:-1]:
test_str[i] = input(i+": ")
print(test_str)
print(test(tree,test_str))
-------------------------------------------------------------------------------------------------------------------------------
-
Program 4 (Mam)
Build an Artificial Neural Network by implementing the Backpropagation algorithm
and test the same using appropriate data sets.
Code:
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import random
from math import exp
from random import seed
# Initialize a network
#Network Initialization
network = initialize_network(n_inputs, 2, n_outputs)
i= 1
for layer in network:
j=1
for sub in layer:
print("\n Layer[%d] Node[%d]:\n" %(i,j),sub)
j=j+1
i=i+1
# In[5]:
#Prediction
from math import exp
-------------------------------------------------------------------------------------------------------------------------------
-
Program 4 (Alt)
Build an Artificial Neural Network by implementing the Backpropagation algorithm
and test the same using appropriate data sets.
Code:
import numpy as np
import csv
filename='Data4.csv'
lines = csv.reader(open(filename,"r"))
lines2= csv.reader(open(filename,"r"))
data = list(lines)
data2 = list(lines2)
for i in range(len(data)):
data[i] = [float(x) for x in data[i][:-1]]
for i in range(len(data2)):
data2[i] = [float(x) for x in data2[i][-1]]
X = np.array((data),dtype=float)
y = np.array((data2),dtype=float)
print(X)
print(y)
X = X/np.amax(X,axis=0) # maximum of X array longitudinally
print(X)
y = y/100
print(y)
#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))
#Derivative of Sigmoid Function
def derivatives_sigmoid(x):
return x * (1 - x)
#Variable initialization
epoch=1500 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
#weight and bias initialization
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))
#draws a random range of numbers uniformly of dim x*y
for i in range(epoch):
#Forward Propogation
print("epoch",i+1)
hinp1=np.dot(X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+ bout
output = sigmoid(outinp)
#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer wts
contributed to error
d_hiddenlayer = EH * hiddengrad
wout += hlayer_act.T.dot(d_output) *lr
wh += X.T.dot(d_hiddenlayer) *lr
Code:
import csv
import math
import random
#Handle data
def loadCsv(filename):
lines = csv.reader(open(filename, "r"))
dataset = list(lines)
for i in range(len(dataset)):
dataset[i] = [float(x) for x in dataset[i]]
return dataset
#Split dataset with ratio
def splitDataset(dataset, splitRatio):
trainSize = int(len(dataset) * splitRatio)
trainSet = []
copy = list(dataset)
while len(trainSet) < trainSize:
index = random.randrange(len(copy))
trainSet.append(copy.pop(index))
return [trainSet, copy]
#Separate by Class
def separateByClass(dataset):
separated = {}
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated
#Calculate Mean
def mean(numbers):
return sum(numbers)/float(len(numbers))
def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
return math.sqrt(variance)
#Summarize Dataset
def summarize(dataset):
summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
del summaries[-1]
return summaries
#Summarize attributes by class
def summarizeByClass(dataset):
separated = separateByClass(dataset)
summaries = {}
for classValue, instances in separated.items():
summaries[classValue] = summarize(instances)
return summaries
#Calculate Gaussian Probability Density Function
def calculateProbability(x, mean, stdev):
exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
return (1/(math.sqrt(2*math.pi)*stdev))*exponent
#Calculate Class Probabilities
def calculateClassProbabilities(summaries, inputVector):
probabilities = {}
for classValue, classSummaries in summaries.items():
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, stdev = classSummaries[i]
x = inputVector[i]
probabilities[classValue] *= calculateProbability(x, mean, stdev)
return probabilities
#Make a prediction
def predict(summaries, inputVector):
probabilities = calculateClassProbabilities(summaries, inputVector)
bestLabel, bestProb = None, -1
for classValue, probability in probabilities.items():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel
#Get predictions
def getPredictions(summaries, testSet):
predictions = []
for i in range(len(testSet)):
result = predict(summaries, testSet[i])
predictions.append(result)
return predictions
#Get Accuracy
def getAccuracy(testSet, predictions):
correct = 0
for x in range(len(testSet)):
if testSet[x][-1] == predictions[x]:
correct += 1
return (correct/float(len(testSet)))*100.0
def main():
filename = 'Data5.csv'
splitRatio = 0.68
dataset = loadCsv(filename)
trainingSet, testSet = splitDataset(dataset, splitRatio)
print('Split {0} rows into train = {1} and test = {2}
rows'.format(len(dataset),len(trainingSet),len(testSet)))
#prepare model
summaries = summarizeByClass(trainingSet)
#test model
predictions = getPredictions(summaries, testSet)
accuracy = getAccuracy(testSet, predictions)
print('Accuracy: {0}%'.format(accuracy))
main()
-------------------------------------------------------------------------------------------------------------------------------
-
Program 6
Assuming a set of documents that need to be classified, use the naïve Bayesian
Classifier model to perform this task. Calculate the accuracy, precision, and recall for
your data set.
Code:
from sklearn.datasets import fetch_20newsgroups #Load finenames and data from 20
newsgroups dataset
from sklearn.metrics import confusion_matrix #It is used to compute accuracy of
classification
from sklearn.metrics import classification_report #Build a text report showing the main
classifications metrics
import numpy as np
import os
#categories=['alt.atheism','soc.religion.christian','comp.graphics','sci.med']
#twenty_train=fetch_20newsgroups(subset='train',categories=categories,shuffle=True)
#twenty_test=fetch_20newsgroups(subset='test',categories=categories,shuffle=True)
twenty_train=fetch_20newsgroups(data_home='./scikit_learn_data',subset='train',shuffle=T
rue)
#print(twenty_train)
twenty_test=fetch_20newsgroups(data_home='./scikit_learn_data',subset='test',shuffle=Tr
ue)
#print(twenty_train)
print("Number of Training Examples: ",len(twenty_train.data))
print("Number of Test Examples: ",len(twenty_test.data))
print(twenty_train.target_names)
print("Accuracy: ",accuracy_score(twenty_test.target,predicted))
print(classification_report(twenty_test.target,predicted,target_names=twenty_test.target_n
ames))
print("Confusion matrix \n",metrics.confusion_matrix(twenty_test.target,predicted))
-------------------------------------------------------------------------------------------------------------------------------
-
Program 7
Write a program to construct a Bayesian network considering medical data. Use this
model to demonstrate the diagnosis of heart patients using standard Heart Disease
Data Set.
Code:
import numpy as np
import pandas as pd
import csv
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
#read attributes
lines = list(csv.reader(open('Data7_Names.csv','r')))
attributes = lines[0]
#attributes =
['age','sex','cp','trestbps','chol','fbs','restecg','thalach','exang','oldpeak',
'slope','ca',thal','heartdisease']
#read cleveland heart disease data
heartDisease = pd.read_csv('Data7.csv')
#for row in heartDisease:
# print(row)
heartDisease = heartDisease.replace("?",np.nan)
#display data
print("Few examples from dataset are given below")
print(heartDisease.head())
print("Attributes and data types")
print(heartDisease.dtypes)
#Model Bayseian Network
model = BayesianModel([('age','trestbps'),('age','fbs'),('sex','trestbps'),
('sex','trestbps'),('exang','trestbps'),('trestbps','heartdisease'),
('fbs','heartdisease'),('heartdisease','restecg'),('heartdisease','thalach'),
('heartdisease','chol')])
#learning CPDs using maximum likelihood estimators
print("Learning CPDs using maximum likelihood estimators...")
model.fit(heartDisease,estimator=MaximumLikelihoodEstimator)
#inferencing with bayesian network
print("\nInferencing the bayesian network:")
HeartDisease_infer = VariableElimination(model)
#Computing the probability of bronc given smoke
print("\n1.Probability of heart disease given age=28")
q = HeartDisease_infer.query(variables=['heartdisease'],evidence={'age':28})
print(q["heartdisease"])
print("\n2.Probability of heart disease given chol(cholestrol)=100")
q = HeartDisease_infer.query(variables=['heartdisease'],evidence={'chol':100})
print(q['heartdisease'])
-------------------------------------------------------------------------------------------------------------------------------
-
Program 8
Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data
set for clustering using k-Means algorithm. Compare the results of these two
algorithms and comment on the quality of clustering.
Code:
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
from sklearn import preprocessing
#from sklearn.mixture import GMM # Used for older versions of sklearn
from sklearn.mixture import GaussianMixture
iris = datasets.load_iris()
X = pd.DataFrame(iris.data)
X.columns = ['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width']
X_norm = preprocessing.normalize(X)
y = pd.DataFrame(iris.target)
y.columns = ['Targets']
# K-Means Model
model = KMeans(n_clusters = 3)
model.fit(X_norm)
# EM Model
#gmm = GMM(n_components = 3) # Used for older versions of sklearn
gmm = GaussianMixture(n_components = 3)
gmm.fit(X_norm)
gmm_y = gmm.predict(X_norm)
# Real Clusters
plt.subplot(2, 2, 1)
plt.scatter(X.Petal_Length, X.Petal_Width, c = colormap[y.Targets], s = 40)
plt.title('Real Clusters')
plt.xlabel('Petal Lenght')
plt.ylabel('Petal Width')
# K-Means Output
plt.subplot(2, 2, 2)
plt.scatter(X.Petal_Length, X.Petal_Width, c = colormap[model.labels_], s = 40)
plt.title('K-Means Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
# EM Output
plt.subplot(2, 2, 3)
plt.scatter(X.Petal_Length, X.Petal_Width, c = colormap[gmm_y], s = 40)
plt.title('GMM Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
plt.show()
-------------------------------------------------------------------------------------------------------------------------------
-
Program 9
Write a program to implement k-Nearest Neighbour algorithm to classify the iris data
set. Print both correct and wrong predictions.
Code:
#import
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split #Use this for Sk Learn 0.20 version
#from sklearn.cross_validation import train_test_split # Used for older versions of sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
#Input Data
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pd.read_csv("Data_8_9.csv", names = names)
print(dataset.head())
#Preprocessing
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:,4].values
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
#Model Creation
classifier = KNeighborsClassifier(n_neighbors = 5)
classifier.fit(x_train, y_train)
#Prediction
y_pred = classifier.predict(x_test)
for i in range(len(y_pred)):
print ("Training Example : ")
print(x_test[i])
print ("Actual Label : ")
print(y_test[i])
print ("Predicted Label : ")
print (y_pred[i])
print ("--------------------------------------------")
-------------------------------------------------------------------------------------------------------------------------------
-
Program 10
Implement the non-parametric Locally Weighted Regression algorithm in order to fit
data points. Select appropriate data set for your experiment and draw graphs.
Code:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
def localWeight(point,xmat,ymat,k):
wei = kernel(point,xmat,k)
W = (X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W
def localWeightRegression(xmat,ymat,k):
m,n = np.shape(xmat)
ypred = np.zeros(m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred
def graphPlot(X,ypred):
sortindex = X[:,1].argsort(0) #argsort - index of the smallest
xsort = X[sortindex][:,0]
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(bill,tip, color='green')
ax.plot(xsort[:,1],ypred[sortindex], color = 'red', linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('Tip')
plt.show()