R Lab Ex 1 to 5
R Lab Ex 1 to 5
> 2587+2149
[1] 4736
> 2587-2149
[1] 438
> 2587*2149
[1] 5559463
> 2587/2149
[1] 1.203816
> a=100
> class(a)
[1] "numeric"
> b=300
> class(b)
[1] "numeric"
> c=a-b
>c
[1] -200
> class(c)
[1] "numeric"
> c<a-b
[1] FALSE
>c
[1] -200
> x<-56
> y<-45
> x+y
[1] 101
> x-y
[1] 11
> x*y
[1] 2520
> x/y
[1] 1.244444
> y%/%x
[1] 0
> y%%x
[1] 45
> x^y
[1] 4.660808e+78
> z<-c(x+y)
> z<-c(x,y)
>z
[1] 56 45
> z<-c(x+y,x-y,x*y,x/y,y%/%x,y%%x,x^y)
>z
[1] 1.010000e+02 1.100000e+01 2.520000e+03 1.244444e+00 0.000000e+00
> getwd()
[1] "C:/Users/bca348/Documents"
> write.csv(z,'Ex1.csv')
> write.csv(z,'C:\\users\\bca348\\Documents')
Output:
Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0 Min. :2.760 Min. :1.513 Min. :14.50
Min. :0.0000 Min. :0.0000 Min. :3.000
1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5 1st Qu.:3.080 1st Qu.:2.581 1st
Qu.:16.89 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:3.000
Median :19.20 Median :6.000 Median :196.3 Median :123.0 Median :3.695 Median :3.325
Median :17.71 Median :0.0000 Median :0.0000 Median :4.000
Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7 Mean :3.597 Mean :3.217 Mean
:17.85 Mean :0.4375 Mean :0.4062 Mean :3.688
3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0 3rd Qu.:3.920 3rd Qu.:3.610 3rd
Qu.:18.90 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:4.000
Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0 Max. :4.930 Max. :5.424 Max.
:22.90 Max. :1.0000 Max. :1.0000 Max. :5.000
carb
Min. :1.000
1st Qu.:2.000
Median :2.000
Mean :2.812
3rd Qu.:4.000
Max. :8.000
> str(mtcars)
$ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
$ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
$ am : num 1 1 1 0 0 0 0 0 0 0 ...
> quantile(mtcars$mpg)
> cars
speed dist
1 4 2
2 4 10
3 7 4
4 7 22
5 8 16
6 9 10
7 10 18
8 10 26
9 10 34
10 11 17
11 11 28
12 12 14
13 12 20
14 12 24
15 12 28
16 13 26
17 13 34
18 13 34
19 13 46
20 14 26
21 14 36
22 14 60
23 14 80
24 15 20
25 15 26
26 15 54
27 16 32
28 16 40
29 17 32
30 17 40
31 17 50
32 18 42
33 18 56
34 18 76
35 18 84
36 19 36
37 19 46
38 19 68
39 20 32
40 20 48
41 20 52
42 20 56
43 20 64
44 22 66
45 23 54
46 24 70
47 24 92
48 24 93
49 24 120
50 25 85
> summary(cars)
speed dist
> class(cars)
[1] "data.frame"
> dim(cars)
[1] 50 2
> str(cars)
> quantile(cars$speed)
4 12 15 19 25
Reading different types of data sets(txt , .csv) from web and disk and
writing in file in specific disk location.
> library(utils)
> data<-read.csv("Z:\\R\\input.csv")
> data
id name Salary start_date dept
> data<-read.csv("Z:\\R\\input.csv")
> print(is.data.frame(data))
[1] TRUE
> print(ncol(data))
[1] 5
> print(nrow(data))
[1] 8
> data<-read.csv("Z:\\R\\input.csv")
> sal<-max(data$Salary)
> sal
[1] 855
> data<-read.csv("Z:\\R\\input.csv")
> sal<-max(data$Salary)
> retval<-subset(data,Salary==max(Salary))
> retval
> data<-read.csv("Z:\\R\\input.csv")
> retval<-subset(data,dept=="IT")
> retval
> retval<-subset(data,as.Date(start_date)>as.Date("01-01-2012"))
> retval
> data<-read.csv("Z:\\R\\input.csv")
> write.csv(retval,"Output.csv")
> newdata<-read.csv("Output.csv")
> newdata
Output:
Ex :04 VISUALIZATION
a. Find the data distributions using box and scatter plot using airquality
dataset.
> airquality
1 41 190 7.4 67 5 1
2 36 118 8.0 72 5 2
3 12 149 12.6 74 5 3
4 18 313 11.5 62 5 4
5 NA NA 14.3 56 5 5
6 28 NA 14.9 66 5 6
7 23 299 8.6 65 5 7
8 19 99 13.8 59 5 8
9 8 19 20.1 61 5 9
10 NA 194 8.6 69 5 10
11 7 NA 6.9 74 5 11
12 16 256 9.7 69 5 12
13 11 290 9.2 66 5 13
14 14 274 10.9 68 5 14
15 18 65 13.2 58 5 15
16 14 334 11.5 64 5 16
17 34 307 12.0 66 5 17
18 6 78 18.4 57 5 18
19 30 322 11.5 68 5 19
20 11 44 9.7 62 5 20
21 1 8 9.7 59 5 21
22 11 320 16.6 73 5 22
23 4 25 9.7 61 5 23
24 32 92 12.0 61 5 24
25 NA 66 16.6 57 5 25
26 NA 266 14.9 58 5 26
27 NA NA 8.0 57 5 27
28 23 13 12.0 67 5 28
29 45 252 14.9 81 5 29
31 37 279 7.4 76 5 31
32 NA 286 8.6 78 6 1
33 NA 287 9.7 74 6 2
34 NA 242 16.1 67 6 3
35 NA 186 9.2 84 6 4
36 NA 220 8.6 85 6 5
37 NA 264 14.3 79 6 6
38 29 127 9.7 82 6 7
39 NA 273 6.9 87 6 8
40 71 291 13.8 90 6 9
41 39 323 11.5 87 6 10
42 NA 259 10.9 93 6 11
43 NA 250 9.2 92 6 12
44 23 148 8.0 82 6 13
45 NA 332 13.8 80 6 14
46 NA 322 11.5 79 6 15
47 21 191 14.9 77 6 16
48 37 284 20.7 72 6 17
49 20 37 9.2 65 6 18
50 12 120 11.5 73 6 19
51 13 137 10.3 76 6 20
52 NA 150 6.3 77 6 21
53 NA 59 1.7 76 6 22
54 NA 91 4.6 76 6 23
55 NA 250 6.3 76 6 24
56 NA 135 8.0 75 6 25
57 NA 127 8.0 78 6 26
58 NA 47 10.3 73 6 27
59 NA 98 11.5 80 6 28
60 NA 31 14.9 77 6 29
61 NA 138 8.0 83 6 30
63 49 248 9.2 85 7 2
64 32 236 9.2 81 7 3
65 NA 101 10.9 84 7 4
66 64 175 4.6 83 7 5
67 40 314 10.9 83 7 6
68 77 276 5.1 88 7 7
69 97 267 6.3 92 7 8
70 97 272 5.7 92 7 9
71 85 175 7.4 89 7 10
72 NA 139 8.6 82 7 11
73 10 264 14.3 73 7 12
74 27 175 14.9 81 7 13
75 NA 291 14.9 91 7 14
76 7 48 14.3 80 7 15
77 48 260 6.9 81 7 16
78 35 274 10.3 82 7 17
79 61 285 6.3 84 7 18
80 79 187 5.1 87 7 19
81 63 220 11.5 85 7 20
82 16 7 6.9 74 7 21
83 NA 258 9.7 81 7 22
84 NA 295 11.5 82 7 23
85 80 294 8.6 86 7 24
88 52 82 12.0 86 7 27
89 82 213 7.4 88 7 28
90 50 275 7.4 86 7 29
91 64 253 7.4 83 7 30
92 59 254 9.2 81 7 31
93 39 83 6.9 81 8 1
94 9 24 13.8 81 8 2
95 16 77 7.4 82 8 3
96 78 NA 6.9 86 8 4
97 35 NA 7.4 85 8 5
98 66 NA 4.6 87 8 6
107 NA 64 11.5 79 8 15
108 22 71 10.3 77 8 16
109 59 51 6.3 79 8 17
128 47 95 7.4 87 9 5
129 32 92 15.5 84 9 6
137 9 24 10.9 71 9 14
145 23 14 9.2 71 9 22
147 7 49 10.3 69 9 24
148 14 20 16.6 63 9 25
> data(airquality)
+ at La Guardia Airport",
+ ylab="Wind",
+ col="orange",
+ border="brown",
+ horizontal=TRUE, notch=TRUE)
> boxplot(airquality[,0:4],
+ main="Scatterplot Example",
+ ylab="Month of observation",pch=19)
b. Plot the histogram ,bar chart and pie chart on sample data
> barplot(airquality$Ozone,
+ main='Ozone Concentration in air',
+ xlab='Ozone levels',horiz=TRUE)
> barplot(airquality$Ozone, main='Ozone concentration in air',
+ xlab='ozone levels', col='blue', horiz=FALSE)
> hist(airquality$Temp, main="La Guardia Airport's\
+ Maximum Temperature(Daily)",
+ xlab="Temperature(Fahrenheit)",
+ xlim=c(50,125),col="yellow",
+ freq=TRUE)
> library(graphics)
> x<-c(135,350,20.4,45)
> labels<-c("Ozone", "Solar.R", "Wind", "Temp")
> pie(x,labels)
Ex: 05 PROBLEM DEFINITION
a. Correlations
#Data vectors
> x<-c(1,3,5,10)
> y<-c(2,4,6,20)
> print(cor(x,y))
[1] 0.9724702
> print(cor(x,y,method="pearson"))
[1] 0.9724702
> print(cor(x,y,method="kendall"))
[1] 1
> print(cor(x,y,method="spearman"))
[1] 1
b. Covariance
#Data Vectors
> x<-c(1,3,5,10)
> y<-c(2,4,6,20)
> print(cov(x,y))
[1] 30.66667
> print(cov(x,y,method="pearson"))
[1] 30.66667
> print(cov(x,y,method="kendall"))
[1] 12
> print(cov(x,y,method="spearman"))
[1] 1.666667
c. Conversion of Covariance to Correlation in R
#Data Vectors
> x<-rnorm(2)
> y<-rnorm(2)
> mat<-cbind(x,y)
> x<-cov(mat)
> print(x)
x y
x 0.6974460 -0.3465384
y -0.3465384 0.1721837
> print(cor(mat))
x y
x 1 -1
y -1 1
> print(cov2cor(x))
x y
x 1 -1
y -1 1