0% found this document useful (0 votes)
21 views3 pages

r-cheatsheet-ABC

Uploaded by

ikon82870
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
21 views3 pages

r-cheatsheet-ABC

Uploaded by

ikon82870
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 3

Statistics and Data Science I (ABC) CHEAT SHEET

Word Equations Summary Tables Simple Statistics


outcome = explanatory + other stuff # compute five-number summary mean(data_set$Y)
favstats(~ Y, data = data_set) var(data_set$Y)
Y = X + other stuff sd(data_set$Y)
# create frequency table
tally(data_set$Y) cohensD(Y ~ X, data = data_set)
Basics tally(~ Y, data = data_set) cor(Y ~ X, data = data_set)
print("Hello world!") # tally by condition b1(Y ~ X, data = data_set)
tally(~ Y < 1900, data = data_set) b1(my_model)
# assign value to object
my_number <- 5 # two-way frequency table pre(Y ~ X, data = data_set)
tally(Y ~ X, data = data_set, margin = TRUE, f(Y ~ X, data = data_set)
# combine values into vector format = “proportion”)
my_vector <- c(1, 2, 3)

# first element in vector


Data Frame
my_vector[1] # structure of data frame # arrange rows by variable
str(data_set) arrange(data_set, Y)
# orders values or cases
sort(my_vector)
# view first/last six rows # creates data frame from csv file
head(data_set) data_set <- read.csv("file_name", header = TRUE)
# arithmetic operations
sum(1, 2, 100), +, -, *, / tail(data_set)
# convert quantitative variable
sqrt(157)
# select multiple variables # to categorical
abs(data_set$Y)
select(data_set, Y1, Y2) factor(data_set$Y)
# logical operations factor(data_set$Y,
>, <, >=, <=, ==, !=, |, & # first six rows of selected variables levels = c(1,2),
head(select(data_set, Y1, Y2)) labels = c("A", "B"))
# results in a new variable with values
# of TRUE or FALSE # transform values
data_set$C <- data_set$A > data_set$B # select variable (a column) recode(data_set$Y, "0" = 0, "1" = 50, "2" = 100)
data_set$Y
Probability Distribution # creates two equal sized groups
# find rows that meet some condition ntile(data_set$Y, 2)
# calculate the probability area data_set[data_set$Y > 40]
xpnorm(65.1, data_set$mean, data_set$sd) filter(data_set, Y > 300) # convert categorical variable
# to quantitative
zscore(data_set$Y) as.numeric(data_set$Y)
# find rows that do not have NA
filter(data_set, is.na(Y) == FALSE)
# returns t at this probability filter(data_set, !is.na(Y))
qt(.975, df = 999)
# returns F at this probability
qf(.95, df1 = 1, df2 = 100)

# CI using t distribution
confint(empty_model)

# calculate p-value using F-distribution


xpf(sample_f, df1 = 2 , df2 = 10)
Page: 1 ▷ Updated: 2024-10 ▷ Learn more about CourseKata @ https://coursekata.org
Statistics and Data Science I (ABC) CHEAT SHEET
Simulation Fitting and Evaluating Models
# sample without replacement # randomize sampling distribution of b1s, # empty model
sample(data_set, 6) # centered on 0 empty_model <- lm(Y ~ NULL, data = data_set)
sdob1 <- do(1000) *
# sample with replacement b1(shuffle(Y) ~ X, data = data_set) # use one explanatory variable
resample(data_set, 10) my_model <- lm(Y ~ X, data = data_set)
# bootstrap sampling distribution of b1s,
# centered on sample b1 # model predictions and residuals
do(3) * resample (data_set, 10) data_set$empty_predict <- predict(empty_model)
sdob1_boot <- do(1000) *
b1(Y ~ X, data = resample(data_set)) data_set$empty_resid <- resid(empty_model)
# mixes up values in a variable
shuffle(data_set$Y) # produce ANOVA table
# count the number of b1s at the upper
anova(empty_model)
# simulate sampling 10000 Ys # and lower extreme
tally(sdob1$b1 > sample_b1 | supernova(my_model)
# from normal distribution
sim_Y <- rnorm(10000, Y_stats$mean, sdob1$b1 < -sample_b1)
# t-test, using pooled variance
Y_stats$sd) t.test(Tip ~ Condition, data = data_set,
# return TRUE for middle 95% of distribution var.equal=TRUE)
# put simulated Ys into dataframe
data_set<- data.frame(sim_Y) middle(sdob1$b1, .95)
# pairwise comparison
# randomize sampling distribution of PREs # corrections: "Tukey","Bonferroni","none"
# simulate
sdopre <- do(1000) * pre(shuffle(Y) ~ X, pairwise(my_model, correction = "Tukey")
# sampling distribution of means data = data_set)
sdom_sim <- do(10000) * mean(rnorm(157,
Y_stats$mean, Y_stats$sd)) # randomize sampling distribution of Fs
sdof <- do(1000) *
# bootstrap f(shuffle(Y) ~ X, data = data_set)
# sampling distribution of means
sdom_boot <- do(10000) * # counts extreme Fs
mean(resample(data_set$Y, 157)) sample_f <- f(shuffle(Y) ~ X, data = data_set)
tally(~f > sample_f, data = sdof)

Page: 2 ▷ Updated: 2024-10 ▷ Learn more about CourseKata @ https://coursekata.org


Statistics and Data Science I (ABC) CHEAT SHEET
Visualizations
gf_histogram(~ Y, data = data_set) %>% # sampling distribution of b1
gf_point(Y ~ X, data = data_set) # change labels gf_histogram(~b1, data = sdob1,
gf_labs(title = "Graph Title", fill = ~middle(b1, .95)) %>%
x = "Y_Name", y = "Frequency") # modify the limits on x- and y-axes
gf_lims(x = c(-12, 12), y = c(0, 70))

gf_jitter(Y ~ X, data = data_set)


# faceted grid of histograms
gf_histogram(~ Y, data = data_set) %>%
gf_facet_grid(X ~ .)

gf_point(Y ~ X, data = data_set) %>%


# add model predictions as red points
gf_point(Y ~ X , shape = 1, size = 3,
color = "firebrick") %>%
# add best fitting model as a red line
gf_boxplot(Y ~ X, data = data_set) gf_model(my_model, color = “red”)

gf_dhistogram(~ Y, data = data_set,


fill = "orange") %>%
gf_density()

gf_boxplot(Y ~ X, data = data_set, fill =


"orange") %>% pairwise(my_model, plot = TRUE)
gf_jitter(height = 0, alpha = .2, size = 3)
gf_bar( ~ Y, data = data_set)

Page: 3 ▷ Updated: 2024-10 ▷ Learn more about CourseKata @ https://coursekata.org

You might also like