MLT Practical 3 and 4
MLT Practical 3 and 4
stored in a .CSV file. Compute the accuracy of the classifier using a few test datasets
(PlayTennis.csv).
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
file_path = "PlayTennis.csv"
df = pd.read_csv(file_path)
print("Dataset:")
print(df.head())
for column in df.columns:
df[column] = df[column].astype('category').cat.codes
X = df.iloc[:, :-1] # All columns except the last one
y = df.iloc[:, -1] # Last column as the target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
y_pred = nb_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
//Assume a set of documents that need to be classified. Use the naïve Bayesian classifier
model to perform this task. Calculate accuracy, precision, and recall for your dataset
(Spam.csv).
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score
file_path = "Spam.csv"
df = pd.read_csv(file_path)
df['label'] = df['label'].map({'spam': 1, 'ham': 0})
X = df['message']
y = df['label']
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train, y_train)
y_pred = nb_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
print("Dataset Sample:")
print(df.head())
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")