Import Import Def
Import Import Def
In [13]:
import math
import csv
def load_csv(filename):
lines=csv.reader(open(filename,"r"))
dataset=list(lines)
headers=dataset.pop(0)
return dataset,headers
class Node:
self.attribute = attribute
self.children = []
self.answer = ""
dic={}
attr = list(set(coldata))
for k in attr:
dic[k]=[]
for y in range(len(data)):
key=data[y][col]
if delete:
del data[y][col]
dic[key].append(data[y])
return attr,dic
def entropy(S):
attr=list(set(S))
if len(attr)==1:
return 0
counts=[0,0]
for i in range(2):
sums=0
sums+=-1*cnt*math.log(cnt,2)
return sums
def compute_gain(data,col):
attValues,dic=subtables(data,col,delete=False)
for x in range(len(attValues)):
ratio=len(dic[attValues[x]])/(len(data)*1.0)
total_entropy-=ratio*entro
return total_entropy
def build_tree(data,features):
if (len(set(lastcol)))==1:
node=Node("")
node.answer=lastcol[0]
return node
n=len(data[0])-1
node=Node(features[split])
fea=features[:split]+features[split+1:]
attr,dic=subtables(data,split,delete=True)
for x in range(len(attr)):
child=build_tree(dic[attr[x]],fea)
localhost:8889/nbconvert/html/Documents/4GW19IS020/ID3.ipynb?download=false 1/2
11/8/22, 3:02 PM ID3
node.children.append((attr[x],child))
return node
def print_tree(node,level):
if node.answer!="":
print(" "*level,node.answer)
return
print(" "*level,node.attribute)
print(" "*(level+1),value)
print_tree(n,level+2)
def classify(node,x_test,features):
if node.answer!="":
print(node.answer)
return
pos=features.index(node.attribute)
if x_test[pos]==value:
classify(n,x_test,features)
datasets,features=load_csv("train_weather.csv")
node=build_tree(datasets,features)
print("The decision tree for the dataset using ID3 algorithm is")
print_tree(node,0)
testdata,features=load_csv("test_weather.csv")
classify(node,xtest,features)
Outlook
sunny
Humidity
high
no
normal
yes
rainy
Wind
weak
yes
strong
no
overcast
yes
localhost:8889/nbconvert/html/Documents/4GW19IS020/ID3.ipynb?download=false 2/2