0% found this document useful (0 votes)
14 views

Sunil Test

The document contains code examples demonstrating basic calculations, vector and matrix operations, and data frame manipulation in R. It includes operations such as addition, multiplication, creating vectors and matrices, and using functions like `log`, `sqrt`, and `factorial`. Additionally, it showcases how to create and manipulate data frames, including subsetting and applying functions to data frames.

Uploaded by

amishagoyal12008
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
14 views

Sunil Test

The document contains code examples demonstrating basic calculations, vector and matrix operations, and data frame manipulation in R. It includes operations such as addition, multiplication, creating vectors and matrices, and using functions like `log`, `sqrt`, and `factorial`. Additionally, it showcases how to create and manipulate data frames, including subsetting and applying functions to data frames.

Uploaded by

amishagoyal12008
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 15

Class 2 IPCW

SUNIL KUMAR

2025-01-21

Hello IPCW

Calculation, Vector, Matrix

Below codes are showing how to preform basic calculation

#Basic Calculation
2+2

## [1] 4

2*6

## [1] 12

5/6

## [1] 0.8333333

sqrt(5)

## [1] 2.236068

log(10)

## [1] 2.302585

log(10,2)

## [1] 3.321928

abs(5) #absolute value

## [1] 5

1
exp(5) #exponential function eˆ5

## [1] 148.4132

round(5.514,2)

## [1] 5.51

factorial(5)

## [1] 120

choose(10,2) #Binomial coefficients

## [1] 45

 
n
k
#Assigning name

a<-2+2
a

## [1] 4

b<-5ˆ2
b

## [1] 25

Create a Vector

d<-c(7:12)
d

## [1] 7 8 9 10 11 12

e<-c(5,56,6,4,6,4)
e

## [1] 5 56 6 4 6 4

f<-d+e
f

## [1] 12 64 15 14 17 16

2
sqrt(f)

## [1] 3.464102 8.000000 3.872983 3.741657 4.123106 4.000000

#Find the second element of a


d[4]

## [1] 10

#Change the second element of a to 4


d[2] <- 5
d

## [1] 7 5 9 10 11 12

#Look at some elements of a


d[1:2]

## [1] 7 5

d[c(1,2)]

## [1] 7 5

d[c(TRUE,TRUE,FALSE)]

## [1] 7 5 10 11

d[d<10]

## [1] 7 5 9

d[c(2,2,2)]

## [1] 5 5 5

#Second vector
e

## [1] 5 56 6 4 6 4

#Create a matrix
x <- cbind(d,e)
x

## d e
## [1,] 7 5
## [2,] 5 56
## [3,] 9 6
## [4,] 10 4
## [5,] 11 6
## [6,] 12 4

3
#Look at parts of x
x[3,2]

## e
## 6

#Get just the rows where a is less than 4


x[,1] < 10

## [1] TRUE TRUE TRUE FALSE FALSE FALSE

x[x[,1] < 4,]

## d e

length(d)

## [1] 6

max(d)

## [1] 12

min(f)

## [1] 12

sort(d)

## [1] 5 7 9 10 11 12

## [1] 7 5 9 10 11 12

sum(d)

## [1] 54

rep(5,5)

## [1] 5 5 5 5 5

seq(5)

## [1] 1 2 3 4 5

4
seq(4,10)

## [1] 4 5 6 7 8 9 10

seq(3,10,2)

## [1] 3 5 7 9

#logical vectors

0==1

## [1] FALSE

0<1

## [1] TRUE

g<-c(5,698,5,5,456,4)
h<-g<10
h

## [1] TRUE FALSE TRUE TRUE FALSE TRUE

Sys.time()

## [1] "2025-02-04 10:45:44 IST"

Matrix

m <- matrix(c(1, 2, 3, 4), nrow = 2)


m

## [,1] [,2]
## [1,] 1 3
## [2,] 2 4

n<-matrix(1:12,nrow = 3)
n

## [,1] [,2] [,3] [,4]


## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12

5
n[3,]

## [1] 3 6 9 12

n[3,1]

## [1] 3

class(m)

## [1] "matrix" "array"

#data frame
patientID <- c(1, 2, 3, 4)
age <- c(25, 34, 28, 52)
diabetes <- c("Type1", "Type2", "Type1", "Type1")
status <- c("Poor", "Improved", "Excellent", "Poor")
patientdata <- data.frame(patientID, age, diabetes, status)
patientdata

## patientID age diabetes status


## 1 1 25 Type1 Poor
## 2 2 34 Type2 Improved
## 3 3 28 Type1 Excellent
## 4 4 52 Type1 Poor

str(patientdata)

## ’data.frame’: 4 obs. of 4 variables:


## $ patientID: num 1 2 3 4
## $ age : num 25 34 28 52
## $ diabetes : chr "Type1" "Type2" "Type1" "Type1"
## $ status : chr "Poor" "Improved" "Excellent" "Poor"

Class 3

Naming and Indexing Vectors

#Diagonal and identity matrices


diag(c(4,3,4,21))

## [,1] [,2] [,3] [,4]


## [1,] 4 0 0 0
## [2,] 0 3 0 0
## [3,] 0 0 4 0
## [4,] 0 0 0 21

6
diag(4)

## [,1] [,2] [,3] [,4]


## [1,] 1 0 0 0
## [2,] 0 1 0 0
## [3,] 0 0 1 0
## [4,] 0 0 0 1

# Transpose and Inverse


t(m)

## [,1] [,2]
## [1,] 1 2
## [2,] 3 4

solve(m)

## [,1] [,2]
## [1,] -2 1.5
## [2,] 1 -0.5

solve(diag(4))

## [,1] [,2] [,3] [,4]


## [1,] 1 0 0 0
## [2,] 0 1 0 0
## [3,] 0 0 1 0
## [4,] 0 0 0 1

det(diag(4))

## [1] 1

det(m)

## [1] -2

# Create a vector "avgs"


avgs<- c(.366,.354,.452,.523,.545)
#Create a string vector of names:
players<- c("mohan","rohan","sohan","riya","meera")
#Assign names to vector of names
names(avgs)<-players
avgs

## mohan rohan sohan riya meera


## 0.366 0.354 0.452 0.523 0.545

7
#Indices by number
avgs[2]

## rohan
## 0.354

#Indices by name
avgs["riya"]

## riya
## 0.523

#Logical Indices
avgs[avgs>=.45]

## sohan riya meera


## 0.452 0.523 0.545

Subsets of Data

Sales<-data.frame(year=c(2008,2009,2010,2011,2012,2013),product_1=c(5,6,2,56,2,5),product_2=c(5,5,5,6,57
Sales

## year product_1 product_2 product_3


## 1 2008 5 5 5
## 2 2009 6 5 5
## 3 2010 2 5 5
## 4 2011 56 6 8
## 5 2012 2 57 72
## 6 2013 5 7 2

subset(Sales,product_3>7)

## year product_1 product_2 product_3


## 4 2011 56 6 8
## 5 2012 2 57 72

Others
Factor? (A factor is a special case of vector that is solely used to represent categorical or ordinal variables)

save(Sales,file="sales.RData")
load("sales.RData")
str(Sales)

## ’data.frame’: 6 obs. of 4 variables:


## $ year : num 2008 2009 2010 2011 2012 ...
## $ product_1: num 5 6 2 56 2 5
## $ product_2: num 5 5 5 6 57 7
## $ product_3: num 5 5 5 8 72 2

8
head(Sales)

## year product_1 product_2 product_3


## 1 2008 5 5 5
## 2 2009 6 5 5
## 3 2010 2 5 5
## 4 2011 56 6 8
## 5 2012 2 57 72
## 6 2013 5 7 2

colMeans(Sales)

## year product_1 product_2 product_3


## 2010.50000 12.66667 14.16667 16.16667

strptime("02/05/2022","%d/%m/%y")

## [1] "2020-05-02 IST"

#factor and chr

gender<-c("male","female")
str(gender)

## chr [1:2] "male" "female"

gender

## [1] "male" "female"

gender2<-factor(c("male","female","male"))
str(gender2)

## Factor w/ 2 levels "female","male": 2 1 2

gender2

## [1] male female male


## Levels: female male

blood<-factor(c("o","AB","A"),levels = c("A","B","AB","o"))
blood[1:2]

## [1] o AB
## Levels: A B AB o

9
symptoms<- factor(c("severe","mild","moderate"),levels = c("mild","moderate","severe"),ordered = TRUE)
symptoms

## [1] severe mild moderate


## Levels: mild < moderate < severe

symptoms>"moderate"

## [1] TRUE FALSE FALSE

sex<-c(1,1,1,2,1,2,1,2,1,2,2,1,2,1,1,2,1,2,1,2,1,2,1,1,2,2,1,2,2,2,1,1,2,1,1,2,1,2,1,2,1,2,1,1)
sex

## [1] 1 1 1 2 1 2 1 2 1 2 2 1 2 1 1 2 1 2 1 2 1 2 1 1 2 2 1 2 2 2 1 1 2 1 1 2 1 2
## [39] 1 2 1 2 1 1

sex<-factor(sex)
sex

## [1] 1 1 1 2 1 2 1 2 1 2 2 1 2 1 1 2 1 2 1 2 1 2 1 1 2 2 1 2 2 2 1 1 2 1 1 2 1 2
## [39] 1 2 1 2 1 1
## Levels: 1 2

levels(sex)

## [1] "1" "2"

levels(sex)[1:2]<-c("male","female")
sex

## [1] male male male female male female male female male female
## [11] female male female male male female male female male female
## [21] male female male male female female male female female female
## [31] male male female male male female male female male female
## [41] male female male male
## Levels: male female

levels(sex)

## [1] "male" "female"

# Vectors with random patterns


sample(1:6,8,replace = TRUE) #an imaginary die is tossed 8 times

## [1] 2 5 3 1 2 1 4 5

#others

10
state<-c("MH", "UK","UP","OR","MP","RAJ","HP","BH","CG","Goa","TN","UP","UK","MH","RAJ","OR")

statef<-factor(state)
levels(statef)

## [1] "BH" "CG" "Goa" "HP" "MH" "MP" "OR" "RAJ" "TN" "UK" "UP"

#The function tapply() and ragged arrays


Income<-c(60,151,484,51,45,51,78,94,545,52,52,855,2651,41,562,75)
# To calculate the sample mean income for each state we can now use the special function
incmean<-tapply(Income,statef,mean)
incmean

## BH CG Goa HP MH MP OR RAJ TN UK UP
## 94.0 545.0 52.0 78.0 50.5 45.0 63.0 306.5 52.0 1401.0 669.5

# The combination of a vector and a labeling factor is an example of what is sometimes called a ragged a
#frequency tables
statfr<-tapply(Income, statef, length)
statfr

## BH CG Goa HP MH MP OR RAJ TN UK UP
## 1 1 1 1 2 1 2 2 1 2 2

factor(cut(Income, breaks = 35+100*(0:10000)))-> incomef


table(incomef,statef)

## statef
## incomef BH CG Goa HP MH MP OR RAJ TN UK UP
## (35,135] 1 0 1 1 2 1 2 1 1 0 0
## (135,235] 0 0 0 0 0 0 0 0 0 1 0
## (435,535] 0 0 0 0 0 0 0 0 0 0 1
## (535,635] 0 1 0 0 0 0 0 1 0 0 0
## (835,935] 0 0 0 0 0 0 0 0 0 0 1
## (2635,2735] 0 0 0 0 0 0 0 0 0 1 0

View(table(incomef,statef))

Import and Export of Text files

write.csv and write.table can help to make table

getwd()

## [1] "C:/sunil files/Data Analysis Tools/R_IPCW/R Sunil 2025"

dir()

11
## [1] "~$samp.xlsx" "census.dta" "ipconsum.txt"
## [4] "math_typesetting.tex" "R-intro-1.pdf" "R Code.zip"
## [7] "sales.RData" "sales.txt" "saless"
## [10] "samp.csv" "samp.txt" "samp.xls"
## [13] "samp.xlsx" "sunil-test.html" "sunil-test.pdf"
## [16] "sunil-test.Rmd" "sunil test.Rmd"

read.table("sales.txt",header = TRUE)

## Warning in read.table("sales.txt", header = TRUE): incomplete final line found


## by readTableHeader on ’sales.txt’

## year product1
## 1 2002 2
## 2 2003 3
## 3 2004 4
## 4 2005 5

write.table(data.frame (name=c("riya","siya","priya"),consumption=c(455,611,546
)),file = "ipconsum.txt")

dir()

## [1] "~$samp.xlsx" "census.dta" "ipconsum.txt"


## [4] "math_typesetting.tex" "R-intro-1.pdf" "R Code.zip"
## [7] "sales.RData" "sales.txt" "saless"
## [10] "samp.csv" "samp.txt" "samp.xls"
## [13] "samp.xlsx" "sunil-test.html" "sunil-test.pdf"
## [16] "sunil-test.Rmd" "sunil test.Rmd"

read.table("ipconsum.txt")

## name consumption
## 1 riya 455
## 2 siya 611
## 3 priya 546

library(foreign)

stata<-read.dta("census.dta")
print(stata)

## state region pop poplt5 pop5_17 pop18p pop65p popurban


## 1 Alabama South 3893888 296412 865836 2731640 440015 2337713
## 2 Alaska West 401851 38949 91796 271106 11547 258567
## 3 Arizona West 2718215 213883 577604 1926728 307362 2278728
## 4 Arkansas South 2286435 175592 495782 1615061 312477 1179556
## 5 California West 23667902 1708400 4680558 17278944 2414250 21607606
## 6 Colorado West 2889964 216495 592318 2081151 247325 2329869
## 7 Connecticut NE 3107576 185188 637731 2284657 364864 2449774

12
## 8 Delaware South 594338 41151 125444 427743 59179 419819
## 9 Florida South 9746324 570224 1789412 7386688 1687573 8212385
## 10 Georgia South 5463105 414935 1231195 3816975 516731 3409081
## 11 Hawaii West 964691 77848 197735 689108 76150 834592
## 12 Idaho West 943935 93531 213134 637270 93680 509702
## 13 Illinois N Cntrl 11426518 842241 2400796 8183481 1261885 9518039
## 14 Indiana N Cntrl 5490224 418764 1199554 3871906 585384 3525298
## 15 Iowa N Cntrl 2913808 221628 604245 2087935 387584 1708232
## 16 Kansas N Cntrl 2363679 180877 468158 1714644 306263 1575899
## 17 Kentucky South 3660777 282731 799999 2578047 409828 1862183
## 18 Louisiana South 4205900 361533 968935 2875432 404279 2887309
## 19 Maine NE 1124660 78514 242873 803273 140918 534072
## 20 Maryland South 4216975 272274 895256 3049445 395609 3386555
## 21 Massachusetts NE 5737037 337215 1153174 4246648 726531 4808339
## 22 Michigan N Cntrl 9262078 685113 2066873 6510092 912258 6551551
## 23 Minnesota N Cntrl 4075970 307249 864559 2904162 479564 2725202
## 24 Mississippi South 2520638 215279 598918 1706441 289357 1192805
## 25 Missouri N Cntrl 4916686 354144 1008339 3554203 648126 3349588
## 26 Montana West 786690 64455 167440 554795 84559 416402
## 27 Nebraska N Cntrl 1569825 122946 324224 1122655 205684 987859
## 28 Nevada West 800493 56132 159667 584694 65756 682947
## 29 New Hampshire NE 920610 62512 195570 662528 102967 480325
## 30 New Jersey NE 7364823 463289 1527572 5373962 859771 6557377
## 31 New Mexico West 1302894 114731 303176 884987 115906 939963
## 32 New York NE 17558072 1135925 3551938 12870209 2160767 14858068
## 33 N. Carolina South 5881766 404076 1253659 4224031 603181 2822852
## 34 N. Dakota N Cntrl 652717 54752 136239 461726 80445 318310
## 35 Ohio N Cntrl 10797630 787150 2307170 7703310 1169460 7918259
## 36 Oklahoma South 3025290 233307 621577 2170406 376126 2035082
## 37 Oregon West 2633105 198046 525011 1910048 303336 1788354
## 38 Pennsylvania NE 11863895 747458 2375838 8740599 1530933 8220851
## 39 Rhode Island NE 947154 56692 186159 704303 126922 824004
## 40 S. Carolina South 3121820 238516 703450 2179854 287328 1689253
## 41 S. Dakota N Cntrl 690768 58446 147160 485162 91019 320777
## 42 Tennessee South 4591120 326088 972472 3292560 517588 2773573
## 43 Texas South 14229191 1169061 3137045 9923085 1371161 11333017
## 44 Utah West 1461037 189962 350143 920932 109220 1233060
## 45 Vermont NE 511456 35998 109320 366138 58166 172735
## 46 Virginia South 5346818 360686 1113648 3872484 505304 3529423
## 47 Washington West 4132156 306123 833237 2992796 431562 3037014
## 48 W. Virginia South 1949644 145583 414053 1390008 237868 705319
## 49 Wisconsin N Cntrl 4705767 346940 1010880 3347947 564197 3020732
## 50 Wyoming West 469557 44845 100708 324004 37175 294639
## medage death marriage divorce
## 1 29.3 35305 49018 26745
## 2 26.1 1604 5361 3517
## 3 29.2 21226 30223 19908
## 4 30.6 22676 26513 15882
## 5 29.9 186428 210864 133541
## 6 28.6 18925 34917 18571
## 7 32.0 26005 26048 13488
## 8 29.8 5123 4437 2313
## 9 34.7 104190 108344 71579
## 10 28.7 44230 70638 34743

13
## 11 28.4 4849 11856 4438
## 12 27.6 6753 13428 6596
## 13 29.9 102230 109823 50997
## 14 29.2 47300 57853 40006
## 15 30.0 26348 27474 11854
## 16 30.1 21910 24847 13410
## 17 29.1 33765 32727 16731
## 18 27.4 35518 43460 18108
## 19 30.4 10768 12040 6205
## 20 30.3 34025 46278 17494
## 21 31.2 54919 46273 17873
## 22 28.8 75102 86898 45047
## 23 29.2 33412 37641 15371
## 24 27.7 23570 27908 13846
## 25 30.9 49329 54625 27595
## 26 29.0 6664 8336 4940
## 27 29.7 14465 14239 6442
## 28 30.2 5852 114333 13842
## 29 30.1 7594 9251 5254
## 30 32.2 68762 55794 27796
## 31 27.4 9016 16641 10426
## 32 31.9 171769 144518 61972
## 33 29.6 48426 46718 28050
## 34 28.3 5596 6094 2142
## 35 29.9 98268 99832 58809
## 36 30.1 28227 46509 24226
## 37 30.2 21756 23004 17762
## 38 32.1 123261 93673 34922
## 39 31.8 9300 7490 3606
## 40 28.1 25138 53915 13595
## 41 28.9 6523 8800 2811
## 42 30.1 40713 59175 30206
## 43 28.2 108019 181762 96809
## 44 24.2 8103 16958 7802
## 45 29.4 4587 5226 2623
## 46 29.8 42496 60210 23615
## 47 29.8 32012 47728 28642
## 48 30.4 19186 17391 10273
## 49 29.4 39255 41111 17546
## 50 27.1 3215 6868 4003

View(stata)
options(scipen = 9) #turn off scientific notation except for big numbers

set.seed(1523) # For replication (random numbers generated in your code are the same every time you run
#(No Special Meaning – The number itself (e.g., 123) has no mathematical significance. Any integer works
N = 1000 # Population size
X0 = runif(N) # Value of intercept value ##uniform distribution
X1 = runif(N) # Value of explanatory value
Xerr = runif(N) # Value of error term value
Y = X0 + 4*X1 +Xerr # Value of outcome Y
samp = data.frame(X0,X1,Y) # data that might “observe”
View(samp)

14
library(writexl)
#write_xlsx(samp,"samp.xlsx")
write.csv(samp,"samp.csv",row.names = FALSE)
b<-read.csv("samp.csv")
View(b)

# Read data from a website

library(readstata13)

url="https://stats.idre.ucla.edu/stat/stata/examples/greene/TBL5-1.DTA"
#caschool <- read.dta13(url)
#View(caschool)
#rename x1 age #rename x2 income #rename x3 exp #rename x4 ownrent #rename x5 selfemp

data("infert")

library(readxl)
c<-read_excel("C:/sunil files/Data Analysis Tools/R_IPCW/R Sunil 2025/samp.xlsx",sheet = "sunil")
View(c)

# create a new variable using "dplyr and pipe" mathematical expression

library(dplyr)

##
## Attaching package: ’dplyr’

## The following objects are masked from ’package:stats’:


##
## filter, lag

## The following objects are masked from ’package:base’:


##
## intersect, setdiff, setequal, union

install.packages(“rmarkdown”) library(rmarkdown“) library(“rmarkdown”) tinytex::install_tinytex() tiny-


tex::latexmk()

15

You might also like