0% found this document useful (0 votes)
6 views7 pages

PPNC Code

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views7 pages

PPNC Code

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 7

# win1: ghi coding va note

# win2 (trai duoi): show ket qua thuc hien cau lenh
# win 3 (phai tren): chua ca doi tuong (object)
# <- gan' (=)
THUONG <- "SINH VIEN"
TH <- "21"
UONG <- "12"
THUONG <- "TH.UONG"

# cHAPTER 2, C2 P.64
# NGAY 9/9/2024
# Read CEOSAL2
# help: ?ten_function
?read.csv
# file: duong dan den file va ten file, .csv, /, ''
# header: lay dong dau tien lam ten cot (T/F)
dCEO2 <- read.csv(file = 'E:/PhamThiHoaiThuong_B2200099/ceosal2.csv', header = F)
# goi ten cot: ten_data$tencot, Vi du: mean(dCEO2$V1)
# (i) Find the average salary and the average tenure in the sample.
mean(dCEO2$V1)
mean(dCEO2$V6)
# (ii) How many CEOs are in their first year as CEO (that is, ceoten = 0)?
n = 5
# What is the longest tenure as a CEO?
max(dCEO2$V6)

# NGAY 16/9/2024. C2 P.64 (cont)


# (iii) Estimate the simple regression model
# log(salary) = b0 + b1*ceoten + u
# y ~ log(salary) = v10, x1 ~ ceoten = v6
# lm: linear model
RegC2.C2.3 <- lm(V10~V6, data = dCEO2)
summary(RegC2.C2.3)
# b0_hat = 6.505498
# b1_hat = 0.009724

# and report your results in the usual form.


# log(salary)_hat = 6.505498 + 0.009724*ceoten
# R2 = 0.01316 ; sample size (n) = 177

# What is the (approximate) predicted percentage increase in salary given one more
year as a CEO?
# delta(ceoten) = +1
# delta(log(salary)_hat) = b1_hat*delta(ceoten) = 0.009724*(+1) = 0.009724 =
0.9724%
0.009724*mean(dCEO2$V1)

# CHAPTER 2, C3, P.64


dsleep75 <- read.csv(file = 'E:/PhamThiHoaiThuong_B2200099/sleep75.csv', header =
F)

# sleep = b0 + b1*totwrk + u
# y ~ sleep = V21, X1 ~ totwrk = V26
RegC2.C3.1 <- lm(V21~V26, data = dsleep75)
summary(RegC2.C3.1)
# b0_hat = 3586
# b1_hat = -0.15075
# sleep_hat = 3586 - 0.15075*totwrk
# (i) Report your results in equation form along with the number of observations
and R2
# n = 706 obs
# R2 = 0.1033
# What does the intercept in this equation mean?
# Intercept = 3586.37695
# Neu mot nguoi khong lam gi thi ngu 3586 phut = 59 gio
# (ii) If totwrk increases by 2 hours, by how much is sleep estimated to fall?
# delta(totwrk) = +2 hours = 120 mins
# delta(sleep_hat) = b1_hat*totwrk = - 0.15075*(+120) = -18.09 mins
# Neu tang thoi gian lam viec len 2 tieng thi giam thoi gian ngu la 18.09 phut
# Do you find this to be a large effect?
# Khi tang thoi gian lam viec 2 tieng thi thoi gian vui choi giam 102 - 18.09 =
83.91 phut, giam nhieu hon thoi gian ngu

# CHAPTER 2 , C4 P.64
dwage2 <- read.csv(file = 'E:/PhamThiHoaiThuong_B2200099/wage2.csv')
# (i) Find the average salary and average IQ in the sample.
mean(dwage2$v1)
mean(dwage2$v3)
# What is the sample standard deviation of IQ?
sd(dwage2$v3)
# Estimate a simple regression model where a one-point increase in IQ changes wage
by a constant dollar amount

# wage = b0 + b1*IQ + u
RegC2.C4.1 <- lm(v1~v3, data = dwage2)
summary(RegC2.C4.1)
# Estimate a model where each one-point increase in IQ has the same percentage
effect on wage.
# If IQ increases by 15 points, what is the approximate percentage increase in
predicted wage?

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NGAY 23/09/2024
# CHAPTER 3, C1, P.110

dbwght <- read.csv(file = 'E:/PhamThiHoaiThuong_B2200099/bwght.csv', header = F)


# bwght = b0 + b1*cigs + b2*faminc + u
# (i) What is the most likely sign for b2?
# b2 > 0: faminc(thu nhap) cao thi gia dinh cham soc cho phu nu mang thai tot ve
suc khoe, dinh duong,...

# (ii) Do you think cigs and faminc are likely to be correlated?


# Explain why the correlation might be positive or negative.
cor.test(dbwght$V1,dbwght$V10)
# corre(faminc,cigs) = -0.1730449 < 0
# Tuong quan nguoc chieu, cho thay thu nhap cao doi voi nhung gia dinh co trinh do
hoc van cao, va trinh do hoc van cao thi hut thuoc la giam
# (iii) Estimate the equation with and without faminc, using the data in BWGHT.RAW.
Report the results in equation form, including the sample size and R-squared.
# bwght = b0 + b1*cigs + u
RegC3.C1.3.1 <- lm(V4~V10, data = dbwght)
summary(RegC3.C1.3.1)
# bwght_hat = 119.77190 + -0.51377*cigs
# sample size (n) = 1388
# R2 = 0.02273
# bwght = b0 + b1*cigs + b2*faminc + u
RegC3.C1.3.2 <- lm(V4~V10 + V1, data = dbwght)
summary(RegC3.C1.3.2)
# bwght_hat = 116.97413 + -0.46341*cigs + 0.09276*faminc
# sample size (n) = 1388
# R2 = 0.0298
# Discuss your results, focusing on whether adding faminc substantially changes the
estimated effect of cigs on bwght.
# Khi them bien thu nhap vao pt hoi quy thi tac dong cua viec hut thuoc la nho hon
nhung khac biet khong nhieu.
# Cho thay viec hut thuoc va thu nhap khong co moi tuong quan manh, he so cua thu
nhap nho

# CHAPTER 3, C2, P.110

dhprice1 <- read.csv(file = 'E:/PhamThiHoaiThuong_B2200099/hprice1.csv', header =


F)
# (i) Write out the results in equation form.
# price = b0 + b1*sqrft + b2*bdrms + u
RegC3.C2 <- lm(V1~V5+V3, data = dhprice1)
summary(RegC3.C2)
# price_hat = -19.31500 + 0.12844*sqrft + 15.19819*bdrms

#(ii) What is the estimated increase in price for a house with one more bedroom,
holding square footage constant?
# delta(bdrms) = +1
# delta(sqrft) = 0
# delta(price_hat)= b1_hat*sqrft + b2_hat*bdrms = 0.12844*(0) + 15.19819*(+1) =
+15.19819
# Khi tang 1 phong ngu, giu nguyen dien tich thi gia nha tang 15.19819

# (iii) What is the estimated increase in price for a house with an additional
bedroom that is 140 square feet in size?
# price_hat = -19.31500 + 0.12844*sqrft + 15.19819*bdrms

# (iv) What percentage of the variation in price is explained by square footage and
number of bedrooms?
# log(price) = b0 + b1*sqrft + b2*bdrms + u

# (v) The first house in the sample has sqrft = 2,438 and bdrms = 4
# price = b0 + b1*sqrft + b2*bdrms + u
price_hat1 <- coef(RegC3.C2)[1]+coef(RegC3.C2)[2]*2438+coef(RegC3.C2)[3]*4

# (vi) The actual selling price of the first house in the sample was $300,000

#----------------------------------------------------------------------------------
----------------------------------------------------------
# NGAY 03/9/2024
# CHAPTER 3, C3, P.111
# (i) Estimate a model relating annual salary to firm sales and market value
# log(salary) = b0 + b1*log(sales) + b2*log(mktval) + u
# (V10) (V11) (V12)

dCEO2 <- read.csv(file = 'E:/PhamThiHoaiThuong_B2200099/ceosal2.csv', header = F)


RegC3.C3.1 <- lm(V10~V11+V12, data = dCEO2)
summary(RegC3.C3.1)

# Write the results out in equation form.


# log(salary)_hat = b0_hat + b1_hat*log(sales) + b2_hat*log(mktval)
# log(salary)_hat = 4.62092 + 0.16213*log(sales) + 0.10671*log(mktval)
# n = 177, R2 = 0.2991

# (ii) Add profits to the model from part (i). Why can this variable not be
included in logarithmic form?
# Vi profits mang gia tri am voi vai cong ty nen ta khong the lay dang logarit
# Would you say that these firm performance variables explain most of the variation
in CEO salaries?

RegC3.C3.2 <- lm(V10~V11+V12+V8, data = dCEO2)


summary(RegC3.C3.2)
# log(salary) = b0 + b1*log(sales) + b2*log(mktval) + b3*profits + u
# log(salary)_hat = b0_hat + b1_hat*log(sales) + b2_hat*log(mktval) +
b3_hat*profits
# log(salary)_hat = 4.687*10^0 + 1.614*10^(-1)*log(sales) + 9.753*10^(-
2)*log(mktval) + 3.566*10^(-5)*profits
# n = 177, R2 = 0.2993

# (iii) Add the variable ceoten to the model in part (ii).


# log(salary) = b0 + b1*log(sales) + b2*log(mktval) + b3*profits + b4*ceoten + u
regC3.C3.3 <- lm(V10~V11+V12+V8+V6, data = dCEO2)
summary(regC3.C3.3)
# log(salary)_hat = b0_hat + b1_hat*log(sales) + b2_hat*log(mktval) +
b3_hat*profits + b4_hat*ceoten
# log(salary)_hat = 4.558*10^0 + 1.622*10^(-1)*log(sales) + 1.018*10^(-
1)*log(mktval) + 2.905*10^(-5)*profits + 1.168*10^(-2)*ceoten
# n = 177, R2 = 0.3183

# What is the estimated percentage return for another year of CEO tenure, holding
other factors fixed?
# delta(ceoten) = +1 , holding other factors fixed
# delta(log(salary)_hat) = b4_hat*delta(ceoten) = 1.168*10^(-2)*(+1) = 0.01168 =
1.16%

# Find the sample correlation coefficient between the variables log(mktval) and
profits.
cor.test(dCEO2$V12,dCEO2$V8) = 0.776
# Are these variables highly correlated? What does this say about the OLS
estimators?
# Cac bien nay tuong quan kha cao voi nhau, nhung khong lam sai lech uoc luong OLS

# CHAPTER 3, C4, P.111


# (i) Obtain the minimum, maximum, and average values for the variables
# atndrte, priGPA, and ACT
dattend <- read.csv(file = 'E:/PhamThiHoaiThuong_B2200099/attend.csv', header = F)
dattend1 <- dattend[,c(6,3,4)]
summary(dattend1)

# (ii) Estimate the model


# atndrte = b0 + b1*priGPA + b2*ACT + u
regC3.C4.2 <- lm(V6~V3+V4, data = dattend)
summary(regC3.C4.2)
# atndrte_hat = b0_hat + b1_hat*priGPA + b2_hat*ACT
# atndrte_hat = 75.7 + 17.261*priGPA + -1.717*ACT

#----------------------------------------------------------------------------------
--------------------------

# NGAY 28/10/2024
# CHAPTER 4, C1, P.164

dvote1 <- read.csv(file = 'E:/PhamThiHoaiThuong_B2200099/vote1.csv', header = F)

# voteA = b0 + b1*log(expendA) + b2*log(expendB) + b3*prtystrA + u

# (i) What is the interpretation of b1?


# Giu cac yeu to khac khong doi:
# delta(voteA) = b1*delta(log(expendA)) = (b1/100)*(%delta(expendA))
# Khi expendA tang 1% thi voteA thay doi b1/100 phan tram

# (ii) In terms of the parameters, state the null hypothesis that a 1% increase in
A�s expenditures is offset by a 1% increase in B�s expenditures
# H0: b1 + b2 = 0
# H1: b1 + b2 =|= 0
# Chi tieu A tang 1% va chi tieu B tang 1% lam cho ket qua bo phieu khong thay doi

# (iii) Estimate the given model using the data in VOTE1.RAW and report the results
in usual form.
# voteA = b0 + b1*log(expendA) + b2*log(expendB) + b3*prtystrA + u
# (V4) (V8) (V9) (v7)
RegC4.C1.3 <- lm(V4 ~ V8 + V9 + V7, data = dvote1)
summary(RegC4.C1.3)
# voteA_hat = b0_hat + b1_hat*log(expendA) + b2_hat*log(expendB) + b3_hat*prtystrA
# voteA_hat = 45.07893 + 6.08332t*log(expendA) + -6.61542*log(expendB) +
0.15196*prtystrA

# Do A�s expenditures affect the outcome?


# H0: b1 + b2 = 0
# H1: b1 +b2 =|= 0
# t_b1_hat <- b1_hat/se(b1_hat)
# t_b1_hat <- 6.08332/0.38215 = 15.92 > C0.01 = 2.576 (df=169>120)
# Reject H0 and Accept H1
# A�s expenditures affects the outcome

# What about b�s expenditures?


# H0: b1 + b2 = 0
# H1: b1 +b2 =|= 0
# t_b2_hat <- b2_hat/se(b2_hat)
# t_b2_hat <- 6.61542/0.37882 = 17.46 > C0.01 = 2.576 (df=169>120)
# Reject H0 and Accept H1
# B�s expenditures affects the outcome

# Can you use these results to test the hypothesis in part (ii)?
# H0: b1 + b2 = 0
# H1: b1 +b2 =|= 0
# t_b1b2 <- (b1_hat + b2_hat)/se(b1_hat + b2_hat)
# var(b1_hat + b2_hat) = var(b1_hat) + var(b2_hat) + 2*cov(b1_hat,b2_hat)
var <- vcov(RegC4.C1.3)
Varb12 <- var[2,2] + var[3,3] + 2*var[2,3]
coe <- coef(RegC4.C1.3)
t_b1b2 <- (coe[2] + coe[3])/sqrt(Varb12)
# |t_b1b2| = 0.998 < C0.10 = 1.65 => Accept H0: b1 + b2 = 0 and Reject H1
# offset

#(iv) Estimate a model that directly gives the t statistic for testing the
hypothesis in part (ii).
# theta1 = b1 + b2 => b1 = theta1 - b2, put:
# voteA = b0 + b1*log(expendA) + b2*log(expendB) + b3*prtystrA + u
# voteA = b0 + (theta1 - b2)*log(expendA) + b2*log(expendB) + b3*prtystrA + u
# voteA = b0 + theta1*log(expendA) + b2*[log(expendB) - log(expendA)] + b3*prtystrA
+ u
# Regress voteA on three vars: log(expendA), log(expendB) - log(expendA), prtystrA
# tao bien moi:
dvote1$V89 <- dvote1$V9 - dvote1$V8
RegC4.C1.4 <- lm(V4 ~ V8 + V89 + V7, data = dvote1)
summary(RegC4.C1.4)
# voteA = 45.07893 + -0.53210*log(expendA) + -6.61542*[log(expendB) - log(expendA)]
+ 0.15196*prtystrA + u
# What do you conclude?
# |t| = 0.998 => Accept H0.

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Ngay 04/11/2024

# Chapter 4, C5, P.165


dmlb1 <- read.csv(file = 'E:/PhamThiHoaiThuong_B2200099/mlb1.csv' , header = F)
# log(salary) = b0 + b1*year + b2*gamesyr + b3*bavg + b4*hrunsyr + b5*rbisyr + u
# (V47) (V4) (V31) (V13) (V32) (V36)
regc4.c5.1 <- lm(V47 ~ V4 + V31 + V13 + V32 + V36, data=dmlb1)
summary(regc4.c5.1)

regc4.c5.1_drop <- lm(V47 ~ V4 + V31 + V13 + V13 + V32, data=dmlb1)


summary(regc4.c5.1_drop)
# what happens to the statistical significance of hrunsyr?
# p-value (hrunsyr) << 0.001 => the statistical significance of hrunsyr is great

# What about the size of the coefficient on hrunsyr?


# The coefficient on hrunsyr might increase or decrease depending on the
correlation between rbisyr and hrunsyr

# (ii) Add the variables runsyr (runs per year), fldperc (fielding percentage),
# (V38) (V17)
# and sbasesyr (stolen bases per year) to the model from part (i).
# (V37)

regC4.C5.2 <- lm(V47 ~ V4 + V31 + V13 + V32 + V38 + V17 + V37, data=dmlb1)
summary(regC4.C5.2)

# Which of these factors are individually significant? => None of these are
significant
# (iii) In the model from part (ii), test the joint significance of bavg, fldperc,
and sbasesyr.
# H0: b_bavg = 0 and b_fldperc = 0 and b_sbasesyr = 0
# H1: It is not true
regC4.C5.3_ur <- lm(V47 ~ V4 + V31 + V13 + V32 + V38 + V17 + V37, data=dmlb1)
SSR_ur <- sum(regC4.C5.3_ur$residuals^2)

regC4.C5.3_r <- lm(V47 ~ V4 + V31 + V32 + V38, data=dmlb1)


SSR_r <- sum(regC4.C5.3_r$residuals^2)

# F_test <- ((SSR_r -SSR_ur)/3)/((SSR_ur)/(353-8-1))


# F_test = 0.68 < 2.08 (F0.1) => Accepted
# Chapet 4, C3, P.165
# log(price) = b0 = b1*sqrft + b2*bdrms + u
# (i) You are interested in estimating and obtaining a confidence interval for the
percentage change in price when a 150-square-foot bedroom is added to a house.
# In decimal form, this is theta1 = 150*b1 + b2. Use the data in HPRICE1.RAW to
estimate u1.

dhprice1 <- read.csv (file = 'E:/PhamThiHoaiThuong_B2200099/hprice1.csv', header =


F,)
# log(price) = b0 + b1*sqrft + b2*bdrms + u
# 1. price house price. $1000s
# 3. bdrms number of bedrooms
# 5. sqr.fl size of house in square feet
# 7. Tprice log(price)

regC4.C3.1 <- lm(V7 ~ V5 + V3, data = dhprice1)


summary(regC4.C3.1)

# theta1,hat <- 150*3.794e-04 + 2.888e-02


# 0.08579*100 = 8.58%
# Write b2 in terms of u1 and b1 and plug this into the log(price) equation.
# theta1 = 150*b1 + b2 => b2 = theta1 - 150*b1, plus this into
# log(price) = b0 + b1*sqrft + b2*bdrms + u
# = b0 + b1*sqrft + (theta1 - 150*b1)*bdrm + u
# = b0 + b1*(sqrft - 150*bdrms) + theta1*bdrms + u

# (iii) Use part (ii) to obtain a standard error for theta1 and use this standard
error to construct a 95% confidence interval.
# Regress log(price) on 2 variable: "(sqrft - 150*bdrms)", "bdrms"
# Create "sqrft.bd" on dhprice1

dhprice1$sqrft.bd <- dhprice1$V5 - 150*dhprice1$V3

regC4.C3.2 <- lm(V7 ~ sqrft.bd + V3, data = dhprice1)


summary(regC4.C3.2)

inter.up <- 8.580e-02 + 1.96*2.677e-02


inter.down <- 8.580e-02 + 1.96*2.677e-02

You might also like