Lab 5 EA
Lab 5 EA
1226118
Lab 5
NORMALIDAD
>
> #Distribucion Normal
> set.seed(100)
> N <- rnorm(100)
>
> # Distribucion Chi-cuadrado
> set.seed(100)
> C <- rchisq(100, df=5)
>
> # Visualizacion
> par(mfrow=c(1,2))
> hist(N, col="navyblue")
> hist(C, col="navyblue")
>
> # Visualizacion ggplot2
> hoja1 <- data.frame(Normal=N,Chi2=C)
>
> dev.new()
> gN <- ggplot(hoja1, aes(x=Normal))
> gN + geom_histogram(color="white", fill="navyblue", bins = 10)
>
> dev.new()
> gC <- ggplot(hoja1, aes(x=Chi2))
> gC + geom_histogram(color="black", fill="red", bins = 10)
> datos.n <- read.csv(file.choose(),head=T);str(datos.n)
Warning messages:
1: In if (!header) rlabp <- FALSE :
the condition has length > 1 and only the first element will be used
2: In if (header) { :
the condition has length > 1 and only the first element will be used
'data.frame': 474 obs. of 9 variables:
$ fechnac : Factor w/ 462 levels " ","01/02/1951",..: 58 207 286 153 68 329 165
181 45 78 ...
$ genero : int 1 1 2 2 1 1 1 2 2 2 ...
$ educ : int 15 16 12 8 15 15 15 12 15 12 ...
$ catlab : int 3 1 1 1 1 1 1 1 1 1 ...
$ salario : int 57000 40200 21450 21900 45000 32100 36000 21900 27900 24000 ...
$ salini : int 27000 18750 12000 13200 21000 13500 18750 9750 12750 13500 ...
$ tiempemp: int 98 98 98 98 98 98 98 98 98 98 ...
$ expprev : int 144 36 381 190 138 67 114 0 115 244 ...
$ minoria : int 1 1 1 1 1 1 1 1 1 1 ...
> # Visualizacion
> par(mfrow=c(2,2))
> hist(N, col="navyblue")
> hist(datos.n$expprev, col="navyblue")
> qqnorm(N, pch=16)
> qqline(N, col="red")
> qqnorm(datos.n$expprev, pch=16)
> qqline(datos.n$expprev, col="red")
> # Visualizacion ggplot2
> dev.new()
> gN <- ggplot(hoja1, aes(x=Normal))
> gN + geom_histogram(color="white", fill="navyblue", bins = 10)
>
> dev.new()
> gN <- ggplot(datos.n, aes(x=expprev))
> gN + geom_histogram(color="white", fill="navyblue", bins = 10)
>
> dev.new()
> gQQ <- ggplot(hoja1, aes(sample=Normal))
> gQQ + stat_qq() + stat_qq_line(color = "red")
>
> dev.new()
> gQQ <- ggplot(datos.n, aes(sample=expprev))
> gQQ + stat_qq() + stat_qq_line(color = "red")
> dev.new()
Warning messages:
1: In match(x, table, nomatch = 0L) : display list redraw incomplete
2: In unit %in% c("strwidth", "strheight", "strascent", "strdescent") :
display list redraw incomplete
> gQQ <- ggplot(datos.n, aes(sample=expprev))
> gQQ + stat_qq() + stat_qq_line(color = "red")
> dev.new()
> gQQ <- ggplot(datos.n, aes(sample=expprev))
> gQQ + stat_qq()
> install.packages(nortest)
Error in install.packages(nortest) : object 'nortest' not found
> install.packages(nortest)
Error in install.packages(nortest) : object 'nortest' not found
> install.packages("nortest")
--- Please select a CRAN mirror for use in this session ---
Warning: unable to access index for repository
https://cran.revolutionanalytics.com/src/contrib:
cannot open URL 'https://cran.revolutionanalytics.com/src/contrib/PACKAGES'
Warning: unable to access index for repository
https://cran.revolutionanalytics.com/bin/windows/contrib/3.6:
cannot open URL
'https://cran.revolutionanalytics.com/bin/windows/contrib/3.6/PACKAGES'
Warning message:
package �nortest� is not available (for R version 3.6.1)
> library(nortest)
Error in library(nortest) : there is no package called �nortest�
>
> lillie.test(N)
Error in lillie.test(N) : could not find function "lillie.test"
> library(nortest)
Error in library(nortest) : there is no package called �nortest�
> library(nortest)
Error in library(nortest) : there is no package called �nortest�
> # Ho: La distribucion observada se ajusta a la distribucion teorica
>
> library(nortest)
Error in library(nortest) : there is no package called �nortest�
>
> lillie.test(N)
Error in lillie.test(N) : could not find function "lillie.test"
>
> # p-value = 0.1588
>
> # p-value < 0.05, se rechaza Ho, la distribucion no es normal, FALSO
>
> lillie.test(datos.n$expprev)
Error in lillie.test(datos.n$expprev) :
could not find function "lillie.test"
>
> # p-value = 2.2e-16
>
> # p-value < 0.05, se rechaza Ho, la distribucion no es normal, VERDADERO
> install.packages("nortest")
-----------------------------------------------------------
MULTICOLINEALIDAD
$ tiempemp: int 98 98 98 98 98 98 98 98 98 98 ...
$ expprev : int 144 36 381 190 138 67 114 0 115 244 ...
$ minoria : int 1 1 1 1 1 1 1 1 1 1 ...
> cor(datos[,-1])
genero educ catlab salario salini
genero 1.00000000 -0.35598562 -0.377660072 -0.44992300 -0.45667563
educ -0.35598562 1.00000000 0.513853677 0.66055891 0.63319565
catlab -0.37766007 0.51385368 1.000000000 0.78011486 0.75466244
salario -0.44992300 0.66055891 0.780114863 1.00000000 0.88011747
salini -0.45667563 0.63319565 0.754662438 0.88011747 1.00000000
tiempemp -0.06646673 0.04737878 0.005328829 0.08409227 -0.01975347
expprev -0.16485670 -0.25235252 0.062644949 -0.09746693 0.04513563
minoria -0.07566758 -0.13288857 -0.143781245 -0.17733731 -0.15759773
tiempemp expprev minoria
genero -0.066466734 -0.164856699 -0.07566758
educ 0.047378777 -0.252352521 -0.13288857
catlab 0.005328829 0.062644949 -0.14378124
salario 0.084092267 -0.097466926 -0.17733731
salini -0.019753475 0.045135627 -0.15759773
tiempemp 1.000000000 0.002978134 0.04950064
expprev 0.002978134 1.000000000 0.14474651
minoria 0.049500639 0.144746512 1.00000000
> # Redondeamos el resultado
> datos.round <- round(cor(datos[,-1]),2);datos.round
genero educ catlab salario salini tiempemp expprev minoria
genero 1.00 -0.36 -0.38 -0.45 -0.46 -0.07 -0.16 -0.08
educ -0.36 1.00 0.51 0.66 0.63 0.05 -0.25 -0.13
catlab -0.38 0.51 1.00 0.78 0.75 0.01 0.06 -0.14
salario -0.45 0.66 0.78 1.00 0.88 0.08 -0.10 -0.18
salini -0.46 0.63 0.75 0.88 1.00 -0.02 0.05 -0.16
tiempemp -0.07 0.05 0.01 0.08 -0.02 1.00 0.00 0.05
expprev -0.16 -0.25 0.06 -0.10 0.05 0.00 1.00 0.14
minoria -0.08 -0.13 -0.14 -0.18 -0.16 0.05 0.14 1.00
> ------------------------------------------
LINEALIDAD
> datos <- read.csv(file.choose(),head=T);str(datos)
Warning messages:
1: In if (!header) rlabp <- FALSE :
the condition has length > 1 and only the first element will be used
2: In if (header) { :
the condition has length > 1 and only the first element will be used
'data.frame': 474 obs. of 9 variables:
$ fechnac : Factor w/ 462 levels " ","01/02/1951",..: 58 207 286 153 68 329 165
181 45 78 ...
$ genero : int 1 1 2 2 1 1 1 2 2 2 ...
$ educ : int 15 16 12 8 15 15 15 12 15 12 ...
$ catlab : int 3 1 1 1 1 1 1 1 1 1 ...
$ salario : int 57000 40200 21450 21900 45000 32100 36000 21900 27900 24000 ...
$ salini : int 27000 18750 12000 13200 21000 13500 18750 9750 12750 13500 ...
$ tiempemp: int 98 98 98 98 98 98 98 98 98 98 ...
$ expprev : int 144 36 381 190 138 67 114 0 115 244 ...
$ minoria : int 1 1 1 1 1 1 1 1 1 1 ...
>
> # Gr�ficos de dispersi�n
> plot(datos, pch=16)
> ---------------------------------------------------------------------------
AUTOCORRELACION
> # Wallis
> # h-Durbin
> # Breusch-Godfrey
> # Cochrane-Orcutt
>
> # salario = b0 + b1(educ) + b2(tiempemp)
>
> summary(datos.lm)
Call:
lm(formula = salario ~ educ + tiempemp, data = datos.n)
Residuals:
Min 1Q Median 3Q Max
-22432 -7880 -2785 6036 77787
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -25415.26 5415.86 -4.693 3.54e-06 ***
educ 3895.07 204.49 19.048 < 2e-16 ***
tiempemp 89.81 58.63 1.532 0.126
---
Signif. codes: 0 �***� 0.001 �**� 0.01 �*� 0.05 �.� 0.1 � � 1
>
> # Residuos estudentizados
> datos.lm.rs <- rstudent(datos.lm)
>
> plot(datos.lm.rs,pch=16,type="b")