0% found this document useful (0 votes)
18 views

SVM

Uploaded by

Anand Kumar
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
18 views

SVM

Uploaded by

Anand Kumar
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 2

# Loading necessary libraries for SVM and data manipulation

library(e1071) # For Support Vector Machine (SVM) algorithms


install.packages("ISLR") # Install the ISLR package for datasets
library(ISLR) # Load ISLR package for access to datasets

# Load the iris dataset, which is a multi-class classification problem


data(iris)
View(iris) # View the iris dataset
str(iris) # Check the structure of the iris dataset

# Performing binary classification with the first 100 samples of iris dataset
ran <- sample(nrow(iris), 0.8 * nrow(iris)) # Randomly select 80% for training
train_data <- iris[ran, ] # Train data is the subset of the dataset
str(train_data) # Check the structure of the train data

# Train an SVM model using a radial kernel (RBF kernel)


svm_model <- svm(Species ~ ., data = train_data, kernel = "radial", cost = 5, gamma = 0.4)

# View the trained model summary


svm_model # View the details of the SVM model

# Check the number of support vectors used by the SVM model


summary(svm_model)$nSV # Display the number of support vectors used in the model

# Load the Auto dataset from the ISLR package


data(Auto)
summary(Auto) # Summary of the Auto dataset
str(Auto) # Check the structure of the Auto dataset
View(Auto) # View the Auto dataset

# Create a binary target variable 'mpg_high' based on the median of 'mpg' column
Auto$mpg_high <- as.factor(ifelse(Auto$mpg > median(Auto$mpg), "High", "Low"))

# Check the updated structure of the Auto dataset


str(Auto)

# Train-test split (70% for training and 30% for testing)


set.seed(123) # Set seed for reproducibility of the random sampling
train_index <- sample(nrow(Auto), size = 0.7 * nrow(Auto)) # Select 70% of data for training
train_data <- Auto[train_index, ] # Training set
test_data <- Auto[-train_index, ] # Testing set

# Train three different SVM models with varying cost and gamma parameters

# Model 1: SVM with radial kernel, low cost (0.1), and low gamma (0.01)
svm_model1 <- svm(mpg_high ~ ., data = train_data, kernel = "radial", cost = 0.1, gamma =
0.01)
cat("Model 1 support vectors:", summary(svm_model1)$nSV, "\n") # Check number of
support vectors for Model 1

# Model 2: SVM with radial kernel, higher cost (10), and higher gamma (0.1)
svm_model2 <- svm(mpg_high ~ ., data = train_data, kernel = "radial", cost = 10, gamma =
0.1)
cat("Model 2 support vectors:", summary(svm_model2)$nSV, "\n") # Check number of
support vectors for Model 2

# Model 3: SVM with radial kernel, higher cost (10), but default gamma (1/n)
svm_model3 <- svm(mpg_high ~ ., data = train_data, kernel = "radial", cost = 10)
cat("Model 3 support vectors:", summary(svm_model3)$nSV, "\n") # Check number of
support vectors for Model 3

# Evaluate each model's performance on the test set


pred1 <- predict(svm_model1, newdata = test_data) # Predictions using Model 1
pred2 <- predict(svm_model2, newdata = test_data) # Predictions using Model 2
pred3 <- predict(svm_model3, newdata = test_data) # Predictions using Model 3

# Calculate the accuracy for each model


accuracy1 <- mean(pred1 == test_data$mpg_high) # Model 1 accuracy
accuracy2 <- mean(pred2 == test_data$mpg_high) # Model 2 accuracy
accuracy3 <- mean(pred3 == test_data$mpg_high) # Model 3 accuracy

# Display the accuracy results for each model


cat("Model 1 accuracy:", accuracy1, "\n")
cat("Model 2 accuracy:", accuracy2, "\n")
cat("Model 3 accuracy:", accuracy3, "\n")

# Using caret and gmodels for detailed performance metrics (confusion matrix)
library(gmodels) # For CrossTable
library(caret) # For confusionMatrix

# Confusion matrix and performance metrics for Model 1


confusionMatrix(pred1, test_data$mpg_high) # Confusion matrix for Model 1
CrossTable(pred1, test_data$mpg_high) # Cross-table for Model 1

# Sensitivity, specificity, and accuracy calculations:


# Sensitivity = TP / (TP + FN) -> True Positive Rate
# Specificity = TN / (TN + FP) -> True Negative Rate
# Accuracy = (TP + TN) / (TP + TN + FP + FN)
# Error Rate = (FP + FN) / (TP + TN + FP + FN)
# Precision (Positive Predictive Value) = TP / (TP + FP)
# Recall = TP / (TP + FN)

# These metrics are available directly in the confusion matrix and CrossTable output.

You might also like