PPNC Code
PPNC Code
# win2 (trai duoi): show ket qua thuc hien cau lenh
# win 3 (phai tren): chua ca doi tuong (object)
# <- gan' (=)
THUONG <- "SINH VIEN"
TH <- "21"
UONG <- "12"
THUONG <- "TH.UONG"
# cHAPTER 2, C2 P.64
# NGAY 9/9/2024
# Read CEOSAL2
# help: ?ten_function
?read.csv
# file: duong dan den file va ten file, .csv, /, ''
# header: lay dong dau tien lam ten cot (T/F)
dCEO2 <- read.csv(file = 'E:/PhamThiHoaiThuong_B2200099/ceosal2.csv', header = F)
# goi ten cot: ten_data$tencot, Vi du: mean(dCEO2$V1)
# (i) Find the average salary and the average tenure in the sample.
mean(dCEO2$V1)
mean(dCEO2$V6)
# (ii) How many CEOs are in their first year as CEO (that is, ceoten = 0)?
n = 5
# What is the longest tenure as a CEO?
max(dCEO2$V6)
# What is the (approximate) predicted percentage increase in salary given one more
year as a CEO?
# delta(ceoten) = +1
# delta(log(salary)_hat) = b1_hat*delta(ceoten) = 0.009724*(+1) = 0.009724 =
0.9724%
0.009724*mean(dCEO2$V1)
# sleep = b0 + b1*totwrk + u
# y ~ sleep = V21, X1 ~ totwrk = V26
RegC2.C3.1 <- lm(V21~V26, data = dsleep75)
summary(RegC2.C3.1)
# b0_hat = 3586
# b1_hat = -0.15075
# sleep_hat = 3586 - 0.15075*totwrk
# (i) Report your results in equation form along with the number of observations
and R2
# n = 706 obs
# R2 = 0.1033
# What does the intercept in this equation mean?
# Intercept = 3586.37695
# Neu mot nguoi khong lam gi thi ngu 3586 phut = 59 gio
# (ii) If totwrk increases by 2 hours, by how much is sleep estimated to fall?
# delta(totwrk) = +2 hours = 120 mins
# delta(sleep_hat) = b1_hat*totwrk = - 0.15075*(+120) = -18.09 mins
# Neu tang thoi gian lam viec len 2 tieng thi giam thoi gian ngu la 18.09 phut
# Do you find this to be a large effect?
# Khi tang thoi gian lam viec 2 tieng thi thoi gian vui choi giam 102 - 18.09 =
83.91 phut, giam nhieu hon thoi gian ngu
# CHAPTER 2 , C4 P.64
dwage2 <- read.csv(file = 'E:/PhamThiHoaiThuong_B2200099/wage2.csv')
# (i) Find the average salary and average IQ in the sample.
mean(dwage2$v1)
mean(dwage2$v3)
# What is the sample standard deviation of IQ?
sd(dwage2$v3)
# Estimate a simple regression model where a one-point increase in IQ changes wage
by a constant dollar amount
# wage = b0 + b1*IQ + u
RegC2.C4.1 <- lm(v1~v3, data = dwage2)
summary(RegC2.C4.1)
# Estimate a model where each one-point increase in IQ has the same percentage
effect on wage.
# If IQ increases by 15 points, what is the approximate percentage increase in
predicted wage?
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NGAY 23/09/2024
# CHAPTER 3, C1, P.110
#(ii) What is the estimated increase in price for a house with one more bedroom,
holding square footage constant?
# delta(bdrms) = +1
# delta(sqrft) = 0
# delta(price_hat)= b1_hat*sqrft + b2_hat*bdrms = 0.12844*(0) + 15.19819*(+1) =
+15.19819
# Khi tang 1 phong ngu, giu nguyen dien tich thi gia nha tang 15.19819
# (iii) What is the estimated increase in price for a house with an additional
bedroom that is 140 square feet in size?
# price_hat = -19.31500 + 0.12844*sqrft + 15.19819*bdrms
# (iv) What percentage of the variation in price is explained by square footage and
number of bedrooms?
# log(price) = b0 + b1*sqrft + b2*bdrms + u
# (v) The first house in the sample has sqrft = 2,438 and bdrms = 4
# price = b0 + b1*sqrft + b2*bdrms + u
price_hat1 <- coef(RegC3.C2)[1]+coef(RegC3.C2)[2]*2438+coef(RegC3.C2)[3]*4
# (vi) The actual selling price of the first house in the sample was $300,000
#----------------------------------------------------------------------------------
----------------------------------------------------------
# NGAY 03/9/2024
# CHAPTER 3, C3, P.111
# (i) Estimate a model relating annual salary to firm sales and market value
# log(salary) = b0 + b1*log(sales) + b2*log(mktval) + u
# (V10) (V11) (V12)
# (ii) Add profits to the model from part (i). Why can this variable not be
included in logarithmic form?
# Vi profits mang gia tri am voi vai cong ty nen ta khong the lay dang logarit
# Would you say that these firm performance variables explain most of the variation
in CEO salaries?
# What is the estimated percentage return for another year of CEO tenure, holding
other factors fixed?
# delta(ceoten) = +1 , holding other factors fixed
# delta(log(salary)_hat) = b4_hat*delta(ceoten) = 1.168*10^(-2)*(+1) = 0.01168 =
1.16%
# Find the sample correlation coefficient between the variables log(mktval) and
profits.
cor.test(dCEO2$V12,dCEO2$V8) = 0.776
# Are these variables highly correlated? What does this say about the OLS
estimators?
# Cac bien nay tuong quan kha cao voi nhau, nhung khong lam sai lech uoc luong OLS
#----------------------------------------------------------------------------------
--------------------------
# NGAY 28/10/2024
# CHAPTER 4, C1, P.164
# (ii) In terms of the parameters, state the null hypothesis that a 1% increase in
A�s expenditures is offset by a 1% increase in B�s expenditures
# H0: b1 + b2 = 0
# H1: b1 + b2 =|= 0
# Chi tieu A tang 1% va chi tieu B tang 1% lam cho ket qua bo phieu khong thay doi
# (iii) Estimate the given model using the data in VOTE1.RAW and report the results
in usual form.
# voteA = b0 + b1*log(expendA) + b2*log(expendB) + b3*prtystrA + u
# (V4) (V8) (V9) (v7)
RegC4.C1.3 <- lm(V4 ~ V8 + V9 + V7, data = dvote1)
summary(RegC4.C1.3)
# voteA_hat = b0_hat + b1_hat*log(expendA) + b2_hat*log(expendB) + b3_hat*prtystrA
# voteA_hat = 45.07893 + 6.08332t*log(expendA) + -6.61542*log(expendB) +
0.15196*prtystrA
# Can you use these results to test the hypothesis in part (ii)?
# H0: b1 + b2 = 0
# H1: b1 +b2 =|= 0
# t_b1b2 <- (b1_hat + b2_hat)/se(b1_hat + b2_hat)
# var(b1_hat + b2_hat) = var(b1_hat) + var(b2_hat) + 2*cov(b1_hat,b2_hat)
var <- vcov(RegC4.C1.3)
Varb12 <- var[2,2] + var[3,3] + 2*var[2,3]
coe <- coef(RegC4.C1.3)
t_b1b2 <- (coe[2] + coe[3])/sqrt(Varb12)
# |t_b1b2| = 0.998 < C0.10 = 1.65 => Accept H0: b1 + b2 = 0 and Reject H1
# offset
#(iv) Estimate a model that directly gives the t statistic for testing the
hypothesis in part (ii).
# theta1 = b1 + b2 => b1 = theta1 - b2, put:
# voteA = b0 + b1*log(expendA) + b2*log(expendB) + b3*prtystrA + u
# voteA = b0 + (theta1 - b2)*log(expendA) + b2*log(expendB) + b3*prtystrA + u
# voteA = b0 + theta1*log(expendA) + b2*[log(expendB) - log(expendA)] + b3*prtystrA
+ u
# Regress voteA on three vars: log(expendA), log(expendB) - log(expendA), prtystrA
# tao bien moi:
dvote1$V89 <- dvote1$V9 - dvote1$V8
RegC4.C1.4 <- lm(V4 ~ V8 + V89 + V7, data = dvote1)
summary(RegC4.C1.4)
# voteA = 45.07893 + -0.53210*log(expendA) + -6.61542*[log(expendB) - log(expendA)]
+ 0.15196*prtystrA + u
# What do you conclude?
# |t| = 0.998 => Accept H0.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Ngay 04/11/2024
# (ii) Add the variables runsyr (runs per year), fldperc (fielding percentage),
# (V38) (V17)
# and sbasesyr (stolen bases per year) to the model from part (i).
# (V37)
regC4.C5.2 <- lm(V47 ~ V4 + V31 + V13 + V32 + V38 + V17 + V37, data=dmlb1)
summary(regC4.C5.2)
# Which of these factors are individually significant? => None of these are
significant
# (iii) In the model from part (ii), test the joint significance of bavg, fldperc,
and sbasesyr.
# H0: b_bavg = 0 and b_fldperc = 0 and b_sbasesyr = 0
# H1: It is not true
regC4.C5.3_ur <- lm(V47 ~ V4 + V31 + V13 + V32 + V38 + V17 + V37, data=dmlb1)
SSR_ur <- sum(regC4.C5.3_ur$residuals^2)
# (iii) Use part (ii) to obtain a standard error for theta1 and use this standard
error to construct a 95% confidence interval.
# Regress log(price) on 2 variable: "(sqrft - 150*bdrms)", "bdrms"
# Create "sqrft.bd" on dhprice1