0% found this document useful (0 votes)
12 views

DMT Cia2

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
12 views

DMT Cia2

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 11

K Means

import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load the Iris dataset


data = pd.read_csv('iris.csv')

# Separate features and target variable


X = data.drop('species', axis=1)
y = data['species']

# Standardize the features


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Create a K-means clustering model with 3 clusters


kmeans = KMeans(n_clusters=3, random_state=42)

# Fit the model to the scaled data


kmeans.fit(X_scaled)

# Get the cluster labels


labels = kmeans.labels_

# Visualize the clusters


plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=labels)
plt.title('K-means Clustering of Iris Dataset')
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.show()
Decision Tree

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the Zoo dataset


data = pd.read_csv('zoo.csv')

# Separate features and target variable


X = data.drop('type', axis=1)
y = data['type']

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Create a decision tree classifier


clf = DecisionTreeClassifier()

# Fit the classifier to the training data


clf.fit(X_train, y_train)
# Make predictions on the testing data
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

# Create a DataFrame with your CPT data (replace with your actual data)
data = pd.DataFrame({
'age': ['<=30', '<=30', '<=30', '>30', '>30', '>30', '<=30', '>30', '>30',
'<=30'],
'income': ['low', 'low', 'medium', 'medium', 'medium', 'high', 'high', 'low',
'medium', 'high'],
'student': ['yes', 'yes', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no', 'yes'],
'credit_rating': ['excellent', 'good', 'excellent', 'excellent', 'fair', 'fair',
'good', 'excellent', 'fair', 'good'],
'buys_computer': ['yes', 'yes', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'yes', 'yes']
})

# Separate features and target variable


X = data.drop('buys_computer', axis=1)
y = data['buys_computer']

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Create a decision tree classifier


clf = DecisionTreeClassifier()

# Fit the classifier to the training data


clf.fit(X_train, y_train)

# Make predictions on the testing data


y_pred = clf.predict(X_test)

# Evaluate the model (e.g., accuracy)


accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
K Nearest Neighbour

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Assuming your data is in a CSV file named 'zoo.csv'


data = pd.read_csv('zoo.csv')

# Separate features (X) and target variable (y)


X = data.drop('animal_name', axis=1) # Remove the 'animal_name'
column
y = data['animal_name']

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Standardize the features


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create a KNN classifier with k=3 (you can adjust the value of k)
knn = KNeighborsClassifier(n_neighbors=3)

# Fit the classifier to the training data


knn.fit(X_train_scaled, y_train)

# Make predictions on the testing data


y_pred = knn.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
Eclat Algorithm

import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Create a sample dataset (replace with your own data)


data = [
['milk', 'bread', 'eggs'],
['bread', 'butter'],
['milk', 'bread', 'butter'],
['milk', 'eggs'],
['bread', 'eggs', 'butter']
]

# Encode the data into a one-hot encoded format


te = TransactionEncoder()
te_data = te.fit(data).transform(data)
df = pd.DataFrame(te_data, columns=te.columns_)

# Apply the ECLAT algorithm to find frequent itemsets


frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)

# Generate association rules from the frequent itemsets


rules = association_rules(frequent_itemsets, metric="lift",
min_threshold=1)

# Print the results


print("Frequent itemsets:")
print(frequent_itemsets)
print("\nAssociation rules:")
print(rules)
import pandas as pd
from mlxtend.frequent_patterns import eclat, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Sample dataset
data = [
['milk', 'bread', 'eggs'],
['bread', 'butter'],
['milk', 'bread', 'butter'],
['milk', 'eggs'],
['bread', 'eggs', 'butter']
]

# Encode the data into a one-hot encoded format


te = TransactionEncoder()
te_data = te.fit(data).transform(data)
df = pd.DataFrame(te_data, columns=te.columns_)

# Apply the ECLAT algorithm to find frequent itemsets


frequent_itemsets = eclat(df, min_support=0.5, use_colnames=True)

# Find closed frequent itemsets


closed_itemsets =
frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda itemset: not
any(subset in frequent_itemsets['itemsets'].tolist() if len(subset) <
len(itemset) and set(subset) == set(itemset)))]

# Find maximal frequent itemsets


maximal_itemsets =
frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda itemset: not
any(subset in frequent_itemsets['itemsets'].tolist() if len(subset) >
len(itemset) and set(subset).issubset(set(itemset))))]

# Find minimal frequent itemsets


minimal_itemsets =
frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda itemset: not
any(subset in frequent_itemsets['itemsets'].tolist() if len(subset) <
len(itemset) and set(subset).issubset(set(itemset))))]

# Print the results


print("Frequent itemsets:")
print(frequent_itemsets)
print("\nClosed itemsets:")
print(closed_itemsets)
print("\nMaximal itemsets:")
print(maximal_itemsets)
print("\nMinimal itemsets:")
print(minimal_itemsets)
D-Eclat

import pandas as pd
from mlxtend.frequent_patterns import diff_eclat
from mlxtend.preprocessing import TransactionEncoder

# Create a sample dataset (replace with your own data)


data = [
['milk', 'bread', 'eggs'],
['bread', 'butter'],
['milk', 'bread', 'butter'],
['milk', 'eggs'],
['bread', 'eggs', 'butter'],
['milk', 'bread', 'cheese'],
['bread', 'butter', 'yogurt'],
['milk', 'bread', 'butter', 'jam'],
['milk', 'eggs', 'cheese'],
['bread', 'eggs', 'butter', 'honey']
]

# Encode the data into a one-hot encoded format


te = TransactionEncoder()
te_data = te.fit(data).transform(data)
df = pd.DataFrame(te_data, columns=te.columns_)

# Split the data into two parts to simulate a temporal change


df1 = df[:5]
df2 = df[5:]

# Apply the DiffEclat algorithm to find frequent itemsets


frequent_itemsets = diff_eclat(df1, df2, min_support=0.5,
use_colnames=True)

# Print the results


print("Frequent itemsets:")
print(frequent_itemsets)
Apriori Algorithm

import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import TransactionEncoder

# Sample transaction data as a dictionary


transaction_data = {
1: ['milk', 'bread', 'eggs'],
2: ['bread', 'butter'],
3: ['milk', 'bread', 'butter'],
4: ['milk', 'eggs'],
5: ['bread', 'eggs', 'butter']
}

# Convert the dictionary to a list of lists


data = list(transaction_data.values())

# Encode the data into a one-hot encoded format


te = TransactionEncoder()
te_data = te.fit(data).transform(data)
df = pd.DataFrame(te_data, columns=te.columns_)

# Apply the Apriori algorithm to find frequent itemsets


frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)

# Print the results


print("Frequent itemsets:")
print(frequent_itemsets)
FP Growth

import pandas as pd
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.preprocessing import TransactionEncoder
import graphviz

# Sample dataset
data = [
['milk', 'bread', 'eggs'],
['bread', 'butter'],
['milk', 'bread', 'butter'],
['milk', 'eggs'],
['bread', 'eggs', 'butter']
]

# Encode the data into a one-hot encoded format


te = TransactionEncoder()
te_data = te.fit(data).transform(data)
df = pd.DataFrame(te_data, columns=te.columns_)

# Apply the FP-growth algorithm to find frequent itemsets and get the
FP-tree
frequent_itemsets, fptree = fpgrowth(df, min_support=0.5,
use_colnames=True, return_tree=True)

# Visualize the FP-tree


dot_data = fptree.graphviz()
graph = graphviz.Source(dot_data)
graph.render("fptree_visualization")
Naive Bayes
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Load your dataset (replace 'your_dataset.csv' with your actual file)


data = pd.read_csv('your_dataset.csv')

# Separate features and target variable


X = data.drop('target_column', axis=1) # Replace 'target_column' with
your actual target column name
y = data['target_column']

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Create a Naive Bayes classifier


nb = GaussianNB()

# Fit the classifier to the training data


nb.fit(X_train, y_train)

# Make predictions on the testing data


y_pred = nb.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Assign predicted labels to the original data


data['predicted_label'] = y_pred

# Print the predicted labels


print(data)

You might also like