0% found this document useful (0 votes)
2 views17 pages

File 2

The document contains a series of R programming tasks including data analysis using built-in datasets, implementing various algorithms such as Linear Regression, Support Vector Machine, Decision Trees, Naive Bayes, K-Nearest Neighbors, and K-Means Clustering. It also includes tasks for creating patterns with asterisks and calculating factorials. Each task is presented with code snippets demonstrating the implementation of the respective functionality.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views17 pages

File 2

The document contains a series of R programming tasks including data analysis using built-in datasets, implementing various algorithms such as Linear Regression, Support Vector Machine, Decision Trees, Naive Bayes, K-Nearest Neighbors, and K-Means Clustering. It also includes tasks for creating patterns with asterisks and calculating factorials. Each task is presented with code snippets demonstrating the implementation of the respective functionality.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 17

17) Write an R program to print, get information, print variable values,

sort variable values and analyse data for the inbuilt dataset.

# Load the built-in dataset


data("PlantGrowth")

# 1. Print the dataset


print("Dataset - PlantGrowth:")
print(PlantGrowth)

# 2. Get basic information about the dataset


print("Structure of the dataset:")
str(PlantGrowth)

print("Summary of the dataset:")


summary(PlantGrowth)

# 3. Print variable values


print("Values of 'weight' variable:")
print(PlantGrowth$weight)

print("Values of 'group' variable:")


print(PlantGrowth$group)

# 4. Sort the 'weight' variable in ascending order


print("Sorted weights (ascending):")
print(sort(PlantGrowth$weight))

sorted_df <- PlantGrowth[order(PlantGrowth$weight), ]


print("Data frame sorted by weight:")
print(sorted_df)

# 5. Analyze the data


mean_weight <- mean(PlantGrowth$weight)
cat("Mean of weights:", mean_weight, "\n")

sd_weight <- sd(PlantGrowth$weight)


cat("Standard deviation of weights:", sd_weight, "\n")

boxplot(weight ~ group, data = PlantGrowth,


main = "Weight by Treatment Group",
xlab = "Group", ylab = "Weight",
col = c("lightblue", "lightgreen", "lightcoral"))

group_counts <- table(PlantGrowth$group)


print("Count of samples in each group:")
print(group_counts)
21) Write an R Program to find Factorial of a number.

num <- as.numeric(readline("Enter a number: "))


factorial <- 1
if (num >= 0) {
for (i in 1:num)
{
factorial <- factorial * i
}
cat("Factorial of", num, "is:", factorial, "\n")
} else {
cat("Factorial is not defined for negative numbers.\n")
}

22) Write an R Program to print the following pattern


* *
** **
*** ***
*******

rows <- 4
for (i in 1:rows)
{ cat(rep("*", i), sep =
"")

spaces <- 2 * (rows - i)


cat(rep(" ", spaces), sep = "")
cat(rep("*", i), sep = "") cat("\
n")
}

23) Write an R Program to print the following pattern


*
**
***
****
rows <- 4
for (i in 1:rows) {
cat(rep(" ", rows - i), sep = "")

cat(rep("*", i), sep = "")

cat("\n")
}
24) Write an R Program to print the following pattern
*
**
***
****
rows <- 4
for (i in 1:rows) {
cat(rep(" ", rows - i), sep = "")

cat(rep("* ", i), sep = "") cat("\


n")
}

25) Write an R Program to print the following pattern


*
***
*****
***
*
rows <- 3
for (i in 1:rows) {
cat(rep(" ", rows - i), sep = "")

cat(rep("* ", i), sep = "")


cat("\n")
}

for (i in (rows - 1):1)


{ cat(rep(" ", rows - i), sep =
"")
cat(rep("* ", i), sep = "")

cat("\n")
}

26) Write an R Program to process the dataset by using its functions.


library(dplyr)
data <- iris
head(data)
sum(is.na(data))

summary_data <- data %>%


summarise(mean_Sepal.Length = mean(Sepal.Length, na.rm = TRUE),
max_Sepal.Length = max(Sepal.Length, na.rm = TRUE),
min_Sepal.Length = min(Sepal.Length, na.rm = TRUE))

species_summary <- data %>%


group_by(Species) %>%
summarise(mean_Sepal.Length = mean(Sepal.Length, na.rm = TRUE))

print(summary_data)
print(species_summary)

plot(data$Sepal.Length,
data$Sepal.Width, main = "Sepal
Length vs Sepal Width",
xlab = "Sepal Length", ylab = "Sepal Width",
col = data$Species, pch = 19)
27) Write an R Program to implement Linear Regression Algorithm.

library(ggplot2)
library(readr)
library(caret)

dataset <- read_csv("preprocessed_diabetes_dataset.csv")


head(dataset)

model <- lm(Glucose ~ BMI, data = dataset)

summary(model)

dataset$Predicted_Glucose <- predict(model, newdata = dataset)

plot <- ggplot(dataset, aes(x = BMI, y = Glucose)) +


geom_point(color = "blue", alpha = 0.5) + # Scatter plot of actual
values

geom_line(aes(y = Predicted_Glucose), color = "red", size = 1) + #


Regression line

labs(title = "Linear Regression: Glucose vs BMI",


x = "BMI",
y = "Glucose") +
theme_minimal()

print(plot)
28) Write an R program to implement a Support Vector
Machine Algorithm.

if (!require(e1071)) {
install.packages("e1071")
library(e1071)
}
data(iris)
set.seed(123)

sample_index <- sample(1:nrow(iris), 0.7 * nrow(iris))


train_data <- iris[sample_index, ]
test_data <- iris[-sample_index, ]

svm_model <- svm(Species ~ ., data = train_data, kernel = "linear")


predictions <- predict(svm_model, test_data)

conf_matrix <- table(Predicted = predictions, Actual = test_data$Species)


print(conf_matrix)
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Accuracy:", round(accuracy * 100, 2), "%\n")
29) Write an R program to implement a Decision Tree Algorithm.

# Load necessary libraries


if (!require(rpart)) install.packages("rpart")
if (!require(rpart.plot)) install.packages("rpart.plot")
library(rpart)
library(rpart.plot)

# Load dataset
diabetes <- read.csv("diabetes_data.csv")

# Set seed
set.seed(123)

# Split data into training (70%) and testing (30%)


sample_index <- sample(1:nrow(diabetes), 0.7 * nrow(diabetes))
train_data <- diabetes[sample_index, ]
test_data <- diabetes[-sample_index, ]

# Build decision tree model


tree_model <- rpart(Outcome ~ ., data = train_data, method = "class")

# Plot the tree


rpart.plot(tree_model, main = "Decision Tree for Diabetes Prediction")

# Make predictions
predictions <- predict(tree_model, test_data, type = "class")
# Confusion matrix
conf_matrix <- table(Predicted = predictions, Actual =
test_data$Outcome)
print(conf_matrix)

# Accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Accuracy:", round(accuracy * 100, 2), "%\n")
30) Write an R program to implement a Naive Bayes Algorithm.
if (!require(e1071)) {
install.packages("e1071")
library(e1071)
}

data(mtcars)
mtcars$am <- as.factor(mtcars$am)
set.seed(123)

sample_index <- sample(1:nrow(mtcars), 0.7 * nrow(mtcars))


train_data <- mtcars[sample_index, ]
test_data <- mtcars[-sample_index, ]

nb_model <- naiveBayes(am ~ ., data = train_data)


predictions <- predict(nb_model, test_data)

conf_matrix <- table(Predicted = predictions, Actual = test_data$am)


print(conf_matrix)

accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)


cat("Accuracy:", round(accuracy * 100, 2), "%\n")
31) Write an R program to implement a K-Nearest
Neighbour Algorithm.

# Install and load required libraries


if (!require(class)) {
install.packages("class")
library(class)
}

# Load the dataset


wine_data <- read.csv("redwinequality.csv")

# View the first few rows of the dataset


head(wine_data)

# Convert 'quality' to a factor (since it's the target variable)


wine_data$quality <- as.factor(wine_data$quality)

# Set seed for reproducibility


set.seed(123)

# Split the dataset into training (70%) and testing (30%) sets
sample_index <- sample(1:nrow(wine_data), 0.7 * nrow(wine_data))
train_data <- wine_data[sample_index, ]
test_data <- wine_data[-sample_index, ]

# Separate features and target variable for both train and test sets
train_x <- train_data[, -ncol(wine_data)] # All columns except 'quality'
train_y <- train_data$quality # Target variable (quality)
test_x <- test_data[, -ncol(wine_data)] # All columns except
'quality' test_y <- test_data$quality # Target variable (quality)

# Train the KNN model (k = 3)


k_value <- 3
knn_predictions <- knn(train_x, test_x, train_y, k = k_value)

# Confusion matrix
conf_matrix <- table(Predicted = knn_predictions, Actual = test_y)
print(conf_matrix)

# Calculate accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Accuracy:", round(accuracy * 100, 2), "%\n")
32) Write an R program to implement a K-Means
Clustering Algorithm.

data("USArrests")
head(USArrests)
USArrests <- na.omit(USArrests)

scaled_data <- scale(USArrests)

set.seed(123)

kmeans_result <- kmeans(scaled_data, centers = 3, nstart = 25)

print(kmeans_result)

USArrests$Cluster <- as.factor(kmeans_result$cluster)

if (!require(ggplot2)) {
install.packages("ggplot2")
library(ggplot2)
}

pca_result <- prcomp(scaled_data)

pca_data <- data.frame(pca_result$x[, 1:2], Cluster = USArrests$Cluster)

ggplot(pca_data, aes(x = PC1, y = PC2, color = Cluster)) +


geom_point(size = 3) +
labs(title = "K-means Clustering on USArrests Dataset", x = "Principal
Component 1", y = "Principal Component 2") +
theme_minimal()

You might also like