0% found this document useful (0 votes)
10 views

Code R For Student

The document discusses setting up folders and libraries in R to analyze salary data. It shows how to import data, calculate descriptive statistics, create histograms and density plots, and estimate confidence intervals and perform hypothesis tests on the mean, proportion, variance and normality of the data distribution.
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
10 views

Code R For Student

The document discusses setting up folders and libraries in R to analyze salary data. It shows how to import data, calculate descriptive statistics, create histograms and density plots, and estimate confidence intervals and perform hypothesis tests on the mean, proportion, variance and normality of the data distribution.
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 6

rm(list = ls()) # Xoa ALL

#====Set up folder lam viec===========#


setwd("C:\\Users\\gtech\\Desktop\\R_BM Toan")
library(carData)# lay du lieu luong giao su
library(BSDA) # chay ham uoc luong va kiem dinh
library(EnvStats) # kiem dinh cho phuong sai
library(table1) # chay thong ke mo ta cho toan bo du lieu

#==========Lay du lieu========#

#=== 1. Lay du lieu tu file co san ===#


d <- Salaries
#d <- read.csv("salary_pro.csv")
names(d) # xem ten cac bien cua du lieu
head(d) #xem mot so dong dau cua du lieu
View(d)
str(d) # xem dang du lieu
d$salary # lay 1 cot du lieu cua d

#=== 2. Tao du lieu =====#


x <- c(rep(100,17), rep(120,20),rep(140,25),rep(180,23),rep(200,10),rep(220,5))
table(x) # tao bang tan so cho du lieu
prop.table(x) # tao bang tan suat cho du lieu
write.csv(table(x), "vi_du_1.csv")# luu bang tan so cua du lieu

#=== Mot so ham tinh cac thong so cua du lieu====#


mean(x) # gia tri trung binh mau - x ngang
sd(x) # do lenh chuan mau - s
var(x) # phuong sai mau - s^2
mean(d$salary) # cho gia tri trung binh
median(d$salary) # cho trung vi
var(d$salary) # cho phuong sai mau s^2
sd(d$salary) # do lech chuan mau s
which(table(d$salary) == max(table(d$salary))) # cho vi tri nhan gia tri lon nhat cua bien salary
summary(d$salary) # cho cac thong tin ve gia tri min, max, median, mean, phan vi muc 25%,
75%
range(d$salary) # cho gia tri lon nhat, nho nhat
range(x)

#=====THONG KE MO TA=========#
#===Lap bang thong ke mo ta cho toan bo du lieu===#
str(d)
names(d) # cho ten cac bien cua du lieu
bang_tkmt <- table1(~rank + discipline + yrs.since.phd + yrs.service +sex+salary , data= d)
bang_tkmt
write.csv(bang_tkmt, "thongkemota_vidu.csv")

#======= Xem dang phan bo cua du lieu=======#


hist(d$salary) # xem phan bo cua du lieu, ve theo tan so
hist(d$salary, main = "Bieu do phan bo cua du lieu", freq = F) #freq = T ve theo tan so
lines(density(d$salary), col = 'blue') # ve duong mat do theo bieu do phan bo cua du lieu

#==================== NOI DUNG CUA TKT 2: PHAN THONG


KE===============#
x <- c(rep(100,17), rep(120,20),rep(140,25),rep(180,23),rep(200,10),rep(220,5))
n = length(x)
#========Uoc luong diem========#
xbar = mean(d$salary) # cho uoc luong diem cua gia tri trung binh
s = sd(d$salary) # cho uoc luong diem cua do lech chuan

#============UOC LUONG BANG KHOANG TIN CAY================#

#1.UOC LUONG GIA TRI TRUNG BINH VOI PHUONG SAI DA BIET

#========Tu tao cong thuc tinh===============#


xbar <- mean(d$salary)
xbar
n <- length(d$salary) # kich thuoc mau
n
alpha <- 0.05
z.score = qnorm(1-alpha/2)# gia tri u_alpha/2
z.score
E <- z.score * 0.1/sqrt(n) # 0.1 là gia trị do lech chuan da cho
lower <- xbar - E
upper <- xbar + E
print(c(lower, upper))

#=== Tao 1 ham tong quat de su dung===#

ktc_muy_1 <- function(x, alpha, sigma_da_co)


{
xbar <- mean(x)
z.score = qnorm(1-alpha/2)
E <- z.score * sigma_da_co/sqrt(length(x))
lower <- xbar - E
upper <- xbar + E
ket_qua = c(lower, upper)
return(ket_qua)
}

x= c(rep(100, 17), rep(120,30), rep(150,10))


alpha = 0.1
sigma_da_co <- 0.3
ktc_muy_1(x, 0.1, 0.3)

#====== DUNG HÀM z.test=======#


ktc <- z.test(d$salary, sigma.x = 0.5, conf.level = 0.9)
ktc
ktc$conf.int # cho ket qua khoang tin cay
#2.UOC LUONG GIA TRI TRUNG BINH VOI PHUONG SAI CHUA BIET

#========Tu tao cong thuc tinh===============#


xbar <- mean(d$salary)
s <- sd(d$salary)
SE <- s/sqrt(n) #S.E = Standard Error
alpha = 0.05
df = n - 1
t.score = qt(1- alpha/2, df) # t_alpha/2 _(n-1)
E <- t.score * SE
lower <- xbar - E
upper <- xbar + E

print(c(lower,upper))

#=== Tao ham tinh tong quat=====#


ktc_muy_2 <- function(x, alpha)
{xbar <- mean(d$salary)
s <- sd(d$salary)
SE <- s/sqrt(n)
t.score = qt(1- alpha/2)
E <- t.score * SE
lower <- xbar - E
upper <- xbar + E
c(lower, upper)
}

#=============Dung hàm t.test=================#


# t.test(du lieu, do tin cay)

ktc_1 <- t.test(d$salary,conf.level = 0.95) # khoang tin cay doi xung


ktc_1$conf.int # cho ket qua khoang tin cay
ktc_2 <- t.test(d$salary,alternative = "less", conf.level = 0.95) # khoang tin cay ben trai
ktc_3 <- t.test(d$salary,alternative = "greater", conf.level = 0.95) # khoang tin cay ben phai

#3.UOC LUONG TY LE
View(d$salary)
# Uoc luong ti le giao su co thu nhap tren 100000
x <- sum(d$salary >100000) # đem so nguoi co thu nhap tren 10000 cua du lieu
x
n = length(d$salary)
n
x= c(rep(100, 17), rep(120,30), rep(150,10))
x_1 <- sum(x>100)
x_1
n = length(x)

#======Tinh thu cong======


alpha <- 0.05
f <- x/n
z.score <- qnorm(1- alpha/2)
lower <- f - E
E <- sqrt(f*(1-f)/n)*z.score
lower <- f - E
upper <- f + E
print(c(lower,upper))

#====Dung ham prop.test======#


#prop.test(so quan sat thoa man dieu kien, kich thuoc mau,correct = F, conf.level = 0.95)
ktc_4 <- prop.test(256, 397,correct = F, conf.level = 0.95) # correct = F khi thoa man dk: nf >=5;
n(1-f)>=5
ktc_5 <- prop.test(256, 397,correct = F,alternative = "less", conf.level = 0.95)
ktc_6 <- prop.test(256, 397,correct = F,alternative = "greater", conf.level = 0.95)
ktc_5$conf.int

#4.UOC LUONG PHUONG SAI


ktc_7 <- varTest(d$salary, conf.level = 0.95)
ktc_7$conf.int
ktc_8 <- varTest(d$salary, alternative = "g", conf.level = 0.95)
ktc_8$conf.int
ktc_9 <- varTest(d$salary, alternative = "l", conf.level = 0.95)
ktc_9$conf.int

#=====================KIEM DINH THAM SO================#


#1. KIEM DINH MUY VOI PHUONG SAI DA BIET
# Kiểm định 1 mẫu, H0: mu = 10, H1: mu # 10
z.test(d$salary,mu = 10, sigma.x = 10,alternative = "two.sided") # alternative = "two.sided";
"greater","less"

#2. KIEM DINH MUY VOI PHUONG SAI CHUA BIET


# Kiểm định 1 mẫu, H0: mu = 1200000, H1: mu < 120000
t.test(d$salary,mu=120000, alternative = "l") # alternative = "two.sided"; "greater","less"

#3 KIEM DINH PHUONG SAI


# Kiểm định 1 mẫu, H0: sigma^2 = 25, H1: sigma^2 > 25
varTest(d$salary, sigma.squared = 25, alternative = "g") # alternative = "two.sided";
"greater","less"

#=============KIEM DINH PHAN PHOI CHUAN CUA DU LIEU========#


shapiro.test(d$salary)

setwd("C:\\Users\\gtech\\Desktop\\R_BM Toan")
library(carData)# lay du lieu luong giao su
library(corrplot)
library(BSDA) #
#==================== Mo phong du lieu cho mot so phan phoi========#
#1. Phan phoi nhi thuc
x<- rbinom(100, 20, 0.5) # tao co mau
hist(x,xlim=c(0,20)) #
dbinom(3,size = 20, prob = 0.3) # tính P(X = 3)
pbinom(3, size = 20, prob = 0.3) # tính P(X=<3)

plot(density(x))

#2. Phan phoi Possion


dpois(10, lambda = 0.3) # tính P(X=10)
ppois(10, lambda = 0.3) # tinh P(X=<10)

x <- rpois(100, lambda=15)


hist(x)
plot(density(x))
#hist(x,main="Frequency distribution", xlab="x", ylab=" ",probability = T)
lines(density(x))

#3. Phan phoi mu


dexp(5, rate = 0.3) # tinh f(x)
pexp(5, rate = 0.5) # pexp(x, rate) = P(X =< x)
qexp(0.3, rate = 0.5) # cho gia tri P(X < ...) =0.3 )

#mo phong
x<- rexp(150, 0.1) #rexp(n, rate) mo phong tao ngau nhien 150 gia tri
hist(x)
curve(dexp(x,10)) # ve duong cong ham mat do

#5. Phan phoi deu


dunif(10, 1, 9) # dnuif(x, min, max) = f(x)
punif(3, 1, 9) # Tính P(X =< 3)
qunif(0.25,1,9) #qunif(q,min,max) tim x, de P(X > x) = q
x=runif(1000,12,111)
hist(x,probability = T)

#4. Phan phoi chuan


dnorm(5, 5, 1) # dnorm(x, mean, sd)= f(x)
# gia tri toi han
# qnorm(p,mean,sd): cho gia tri: P(X < ...) = p
alpha <- 0.025
qnorm(1-0.025)
# mo phong
x=rnorm(100,12,1) # rnorm(n ,mean, sd): mo phong tao ngay nhien 100 gia tri
x

hist(x)
# lay mau tu mo phong
y <- sample(x, 50, replace = T)

#5. Phan phoi Student


dt(3, 15) # df(x, df) = f(x)
pt(6, 15) #pt(x, df) = P(X =<x)
# gia tri toi han
# qt(p, df): cho gia tri: P(X < ...) = p
alpha <- 0.05
qt(1 - 0.025, 300)

#6. Phan phoi Chi binh phuong


pchisq(10, 50) # pchi(q, df) = P(X=< q)
#gia tri toi han
# qchisp(p, df): cho gia tri: P(X < ...) = p
alpha <- 0.5
qchisq(1- alpha, 50)

#Phân phối Fisher


df(5, 10, 13) # df(x, df1,df2) = f(x)
pf(3, 10, 12) # pf(x, df1, df2) = P(X=<x)
# gia tri toi han
#qf(q, df1, df2): cho gia tri: P(X < ...) = p
alpha = 0.05
qf(1-alpha, 10, 12)

NHó m số 2: Minh, Mai, Ngọ c anh, Lê Na, Thuỷ, Quỳnh phương, N Tuấ n Minh

You might also like