R Commands
R Commands
Basic Math:
> 2+3 # Addition of two numbers
[1] 5
> 5-3 # Subtraction of two numbers
[1] 2
> 2*3 # Multiplication of two numbers
[1] 6
> 12/3 # Division of two numbers
[1] 4
> 12/8 # Division of two numbers
[1] 1.5
> 3*(12+8) # DMAS Rule
[1] 60
> 2^3 # Raise to power
[1] 8
> sqrt(8) # Square root of a number
[1] 2.828427
> round(sqrt(8), digit=2) # Rounding off the answer of square root to 2 decimal places
[1] 2.83
> abs(-8) # Absolute value of a number
[1] 8
> log10(8) # Absolute value of a number
[1] 8
Objects:
elements <- c("fire", "earth", "air", "water")
> mymatrix2 <- matrix(c(1001,1002,1003,1004,12,35,26,4 ),nrow=4,ncol=2)
# ‘c’ is for “concatenate” and is used to create sets (here a sequence from 1 to 5).
> mymatrix3 <- matrix(c(45,25,75,84,98,78,21,32,20),3,3,byrow=T)
> mymatrix1 <- matrix(1:6, nrow=3, ncol=2)
> mylist <- list(22,"ab", TRUE, 1 + 2*i)
> class(mymatrix1)
“matrix”, “array”
> dim(mymatrix1)
[1] 3 2
> age <- c(23, 44, 15, 12, 31, 16) # read in a vector of ages
> dim(age) <- c(2,3) # reshape the vector as a 3 x 2 matrix
> age
[,1] [,2] [,3]
[1,] 23 15 31
[2,] 44 12 16
> class(age)
[1] "matrix"
> x <- c(1, 2, 3, 4, 5, 6)
> y <- c(20, 30, 40, 50, 60,70)
> cbind(x, y)
xy
[1,] 1 20
[2,] 2 30
[3,] 3 40
[4,] 4 50
[5,] 5 60
[6,] 6 70
> class(df)
[1] "data.frame"
> dim(df)
[1] 4 2
> str(df)
’data.frame’: 4 obs. of 2 variables:
$ name : chr "ash" "jane" "paul" "mark"
$ score: num 67 56 87 91
> nrow(df)
[1] 4
> ncol(df)
[1] 2
Work space & working directory:
The working directory is the directory on your computer that R currently ‘works’ in. The work
space is the set of objects currently defined.
> ls() # shows the workspace
> rm(x) # deletes the object x
> getwd() # shows the current working directory
[1] "C:/Users/m.a.javed/Documents"
> setwd(dir=”~/Desktop”) # change the working directory (here to Desktop; the path given
has to be an existing folder).
> setwd(dir=’F:/UCP/FoMS/DataScience/MBA F23’)
> setwd(dir=”F:/UCP/FoMS/DataScience/MBA F23”)
> save(mydata, file=’F:/UCP/FoMS/DataScience/MBA F23/myworkspace.RData’) # saves
the data to a workspace file (myworkspace.RData) (!Note: if this is an existing file, it will
overwrite it!)
PART II – Data frames, data types
Data frames
> read.csv(file="path/mydata.csv")
# reads the file in the given URL (this must be an existing file on your computer);
> read.csv(file="path/mydata.csv", sep="\t", header=F)
# reads the .csv file as separated by tab stop, and with no header row (i.e. column headings); for
data separated by “;” use sep=”;” or the function read.csv2();
> mydata <- read.csv(file="path/mydata.csv")
# reads the .csv file as above, and assigns a variable to it. Now
> mydata # shows the data set. This is a ‘data frame’:
> is.data.frame(mydata)
[1] TRUE
> nrow(mydata) # shows number of rows;
> ncol(mydata) # shows number of columns;
> colnames(mydata) # shows the names of the columns;
> colnames(mydata)[3] # shows the name of the third column;
> colnames(mydata)[3] <- “variant” # changes the name of the third column to “variant”;
> head(mydata) # shows the first 6 rows of the data frame;
> str(mydata) # shows the structure of the data frame;
> mydata[23,] # shows the 23rd row; the format for selection is: dataframe[row(s) , columns]
> mydata[,3] # shows the 3rd column;
> mydata[23,3] # shows the value in the 23rd row of the 3rd column;
> mydata[c(1:10),3] # rows 1-10, 3rd column;
> mydata$variant # shows the column with the heading “variant”;
> mydata[mydata$variant == ”short” ,] # all rows in which the value for ‘variant’ is “short”.
> mydata[mydata$duration >= 10 ,] # all rows in which the value for ‘duration’ is greater or
equal 10.
> mydata[mydata$duration <= 10 ,] # all rows in which the value for ‘duration’ is smaller or
equal 10.
> mydata$newcolumn <- “x” # adds a column ‘newcolumn’ to the data frame, with the value “x”
in every row;
> mydata[mydata$variant==”short”,]$newcolumn <- “y” # sets the value for ‘newcolumn’ to
“y” in rows in which the value for ‘variant’ is “short”.
> write.csv(file="~/Desktop/mydata.csv")
# writes the data frame to a .csv file in the specified file path. (!Note: if this is an existing file, it
will overwrite it!)
> save(mydata, file="~/Desktop/myworkspace.RData")
# saves the data to a workspace file (.RData) (!Note: if this is an existing file, it will overwrite
it!)
> save(“mydata”, “elements”, file="~/Desktop/myworkspace.RData")
# saves the listed items to the specified file;
> save.image(file="~/Desktop/myworkspace.RData")
# saves the entire work space to the specified file.
> load("~/Desktop/myworkspace.RData")
# loads the data in the specified file. (You can also just double-click the file.)
Data types
> is.numeric(57) # numbers are numeric
[1] TRUE
> is.character("blablabla") # ‘words’ are character strings
[1] TRUE
> class(mydata$duration) # gives out the data type of this vector
> mydata$variant <- as.factor(mydata$variant)
# turns the vector (column) into a ‘factor’; likewise: as.numeric(), as.character().
> levels(mydata$variant) # only factors have levels.
[1] "long" "medium" “short”
> mean(mydata$duration)
> sd(mydata$duration)
> sum(mydata$duration) # only numeric vectors have a mean, standard deviation, sum, etc.
> summary(mydata$duration) # shows the central tendency measures all at once. (Or token
counts if it is a factor.)
PART III – Data inspection and analysis
> boxplot(mydata$duration) # creates a boxplot of ‘duration’ (numeric);
> boxplot(duration ~ gender, mydata)
# creates boxplots of ‘duration’ (numeric) for each level of ‘gender’ (categorical). Many
additional parameters can be set, see the help function:
> ?boxplot
> t.test(duration ~ gender, mydata, paired=F) # An unpaired t-test of ‘duration‘ by ‘gender’ (set paired=T
for paired data).
Prog. 3: # Write R code to calculate the factorial of a given positive integer n using a recursive
function.
factorial <- function(n) {
if (n == 0 || n == 1) {
return(1)
} else {
return(n * factorial(n - 1))
}
}
# Example usage
n <- 5
result <- factorial(n)
print(result)
Prog. 7: # Given a data frame df with columns "Age" and "Income", write R code to calculate
the average income for individuals above the age of 30.
df <- data.frame(
Income = c(75000, 65425, 83000, 103000, 90570),
Age = c(20, 32, 41, 39, 23)
)
# Calculate the sum and average income for individuals above the age of 30
sum_income <- sum(df$Income[df$Age > 30], na.rm = TRUE)
average_income <- mean(df$Income[df$Age > 30], na.rm = TRUE)
# Print the average income
print(sum_income)
print(average_income)
Prog. 8: # Given a data frame df with columns "Category" and "Value", write R code to calculate
the sum of values for each unique category.
# Calculate the sum of values for each unique category
sum_by_category <- aggregate(Value ~ Category, data = df, FUN = sum)
# Print the result
print(sum_by_category)
Prog. 11: # Given a data frame "students" with columns "Name" and "Score", write R code to
find the student with the highest score.
# Create a sample data frame
students <- data.frame(
Name = c("John", "Emily", "Michael", "Sarah", "David"),
Score = c(85, 92, 78, 80, 95)
)
# Find the student with the highest score
highest_score <- students$Score[which.max(students$Score)]
student_name <- students$Name[which.max(students$Score)]
# Print the student with the highest score
cat("Student with the highest score:", student_name, "\n")
cat("Highest score:", highest_score, "\n")
Prog. 12: # Write R code to calculate the average score of a student given a vector of scores.
# Create a sample vector of scores
scores <- c(85, 92, 78, 80, 95)
# Calculate the average score
average_score <- mean(scores)
# Print the average score
print(average_score)
Prog. 15: # Create a matrix named myMatrix with dimensions 3x3, filled with random values.
Write R code to calculate the sum of the values in the 3rd col of the matrix.
# Create a 3x3 matrix
a <- c(2,4,6,3,5,7,8,5,9)
myMatrix <- matrix(a, nrow = 3, ncol = 3)
# Print the matrix
print(myMatrix)
# Calculate the sum of the 3rd col elements
elem_sum <- 0
r <- nrow(myMatrix)
for (i in 1:r) {elem_sum <- elem_sum + myMatrix[i,3]
}
# Print the sum of the 3rd col elements
print(elem_sum)
Prog. 16: # Create a 3x3 matrix
myMatrix <- matrix(1:9, nrow = 3, ncol = 3)
# Print the matrix
print(myMatrix)
# Calculate the sum of the diagonal elements
diagonal_sum <- sum(diag(myMatrix))
# Print the sum
print(diagonal_sum)
Prog. 17: # Write a function in R called "passing_rate" that takes a vector of scores and returns
the percentage of students who passed (scores above or equal to 60)
#Define the passing_rate function
Prog. 18: # Write a function in R called missing_values that takes a data frame df as input and
returns the count of missing values in each column.
# Create a missing_values function
missing_values <- function(df) {
missing_counts <- colSums(is.na(df))
return(missing_counts)
}
# Example usage
df <- data.frame(
A = c(1, NA, 3),
B = c("Apple", "Banana", NA),
C = c(NA, "Orange", "Mango")
)
result <- missing_values(df)
print(result)
Prog. 19: # Write a function in R called remove_duplicates that takes a vector v as input and
returns a new vector with duplicate values removed.
remove_duplicates <- function(v) {
unique_values <- unique(v)
return(unique_values)
}
# Example usage
v <- c(1, 2, 3, 2, 4, 1, 5)
result <- remove_duplicates(v)
print(result)
Prog. 20: # Given two numeric vectors x and y, write R code to calculate their dot product.
# Calculate the dot product of two vectors
dot_product <- sum(x * y)
# Print the result
print(dot_product)
Prog. 21: # Write R code to sort a vector named numbers in ascending order.
# Sort the vector in ascending order
sorted_numbers <- sort(numbers)
# Print the sorted vector
print(sorted_numbers)
Prog. 22: # Write a function in R called compute_median that takes a numeric vector v as input
and returns the median value.
compute_median <- function(v) {
median_value <- median(v)
return(median_value)
}
# Example usage
vector <- c(1, 2, 3, 4, 5)
result <- compute_median(vector)
print(result)