Assignment-1 80501
Assignment-1 80501
R
manpreetkaur
2024-01-23
# Load required libraries
library(dplyr)
##
## Attaching package: 'dplyr'
library(ggplot2)
# Display results
print("Mean annual change from 1979 to 1989:")
## [1] "Mean annual change from 1979 to 1989:"
print(mean_change_79_89)
print(mean_change_89_07)
print(mean_change_07_12)
library(tidyr)
# Load required libraries
library(dplyr)
library(ggplot2)
library(tidyr) # Add this line to load tidyr
# Plot
ggplot(elephant_data_long, aes(x = Year, y = Population, fill = Country)) +
geom_boxplot() +
labs(title = "Elephant Populations Over Time",
x = "Year", y = "Population") +
theme_minimal()
#Q2
# Mean and standard deviation for web traffic
mean_traffic <- 4.5e6
std_dev_traffic <- 820000
# Probability calculations
prob_less_than_5m <- pnorm(5e6, mean = mean_traffic, sd = std_dev_traffic)
prob_3m_or_more <- 1 - pnorm(3e6, mean = mean_traffic, sd = std_dev_traffic)
prob_between_3m_4m <- pnorm(4e6, mean = mean_traffic, sd = std_dev_traffic) -
pnorm(3e6, mean = mean_traffic, sd = std_dev_traffic)
# Display results
print("Probability of fewer than 5 million visitors:")
print(prob_less_than_5m)
## [1] 0.7289883
print(prob_3m_or_more)
## [1] 0.9663203
print(prob_between_3m_4m)
## [1] 0.237332
print(additional_capacity)
## [1] 5349875
#Q3
# Load dataset
bigblue <- read_excel("C:/Users/jagta/Downloads/BigBlue.xlsx")
# Normalize variables
normalized_data <- bigblue %>%
mutate(
UsageRate = (UsageRate - mean(UsageRate)) / sd(UsageRate),
Recognition = (Recognition - mean(Recognition)) / sd(Recognition),
Leader = (Leader - mean(Leader)) / sd(Leader)
)
# Determine number of clusters using the elbow method
wss <- numeric(10)
for (i in 1:10) {
kmeans_model <- kmeans(normalized_data[, c("UsageRate", "Recognition",
"Leader")], centers = i)
wss[i] <- sum(kmeans_model$withinss)
}
# Based on the elbow plot, let's assume the optimal number of clusters is 3
optimal_clusters <- 3
# Display results
print("Number of Clusters:")
print(optimal_clusters)
## [1] 3
print("Cluster Summary:")
## V1 V2 V3
## kmeans_model$cluster 1.0000000 2.0000000 3.0000000
## EmployeeID.1 66.2962963 91.5000000 47.6578947
## EmployeeID.2 2.0000000 78.0000000 1.0000000
## EmployeeID.3 105.0000000 104.0000000 107.0000000
## UsageRate.1 1.3726029 1.5793215 -0.5707574
## UsageRate.2 0.5215694 0.7150272 -0.9831708
## UsageRate.3 1.9846656 2.1550943 0.6844744
## Recognition.1 0.8108851 2.7516181 -0.4328996
## Recognition.2 -0.4328996 1.8831133 -0.4328996
## Recognition.3 4.1991262 3.0411198 -0.4328996
## Leader.1 -0.1857172 4.7822168 -0.1857172
## Leader.2 -0.1857172 3.1262388 -0.1857172
## Leader.3 -0.1857172 6.4381947 -0.1857172
## Cluster.1 1.0000000 2.0000000 3.0000000
## Cluster.2 1.0000000 2.0000000 3.0000000
## Cluster.3 1.0000000 2.0000000 3.0000000