Sunil Test
Sunil Test
SUNIL KUMAR
2025-01-21
Hello IPCW
#Basic Calculation
2+2
## [1] 4
2*6
## [1] 12
5/6
## [1] 0.8333333
sqrt(5)
## [1] 2.236068
log(10)
## [1] 2.302585
log(10,2)
## [1] 3.321928
## [1] 5
1
exp(5) #exponential function eˆ5
## [1] 148.4132
round(5.514,2)
## [1] 5.51
factorial(5)
## [1] 120
## [1] 45
n
k
#Assigning name
a<-2+2
a
## [1] 4
b<-5ˆ2
b
## [1] 25
Create a Vector
d<-c(7:12)
d
## [1] 7 8 9 10 11 12
e<-c(5,56,6,4,6,4)
e
## [1] 5 56 6 4 6 4
f<-d+e
f
## [1] 12 64 15 14 17 16
2
sqrt(f)
## [1] 10
## [1] 7 5 9 10 11 12
## [1] 7 5
d[c(1,2)]
## [1] 7 5
d[c(TRUE,TRUE,FALSE)]
## [1] 7 5 10 11
d[d<10]
## [1] 7 5 9
d[c(2,2,2)]
## [1] 5 5 5
#Second vector
e
## [1] 5 56 6 4 6 4
#Create a matrix
x <- cbind(d,e)
x
## d e
## [1,] 7 5
## [2,] 5 56
## [3,] 9 6
## [4,] 10 4
## [5,] 11 6
## [6,] 12 4
3
#Look at parts of x
x[3,2]
## e
## 6
## d e
length(d)
## [1] 6
max(d)
## [1] 12
min(f)
## [1] 12
sort(d)
## [1] 5 7 9 10 11 12
## [1] 7 5 9 10 11 12
sum(d)
## [1] 54
rep(5,5)
## [1] 5 5 5 5 5
seq(5)
## [1] 1 2 3 4 5
4
seq(4,10)
## [1] 4 5 6 7 8 9 10
seq(3,10,2)
## [1] 3 5 7 9
#logical vectors
0==1
## [1] FALSE
0<1
## [1] TRUE
g<-c(5,698,5,5,456,4)
h<-g<10
h
Sys.time()
Matrix
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
n<-matrix(1:12,nrow = 3)
n
5
n[3,]
## [1] 3 6 9 12
n[3,1]
## [1] 3
class(m)
#data frame
patientID <- c(1, 2, 3, 4)
age <- c(25, 34, 28, 52)
diabetes <- c("Type1", "Type2", "Type1", "Type1")
status <- c("Poor", "Improved", "Excellent", "Poor")
patientdata <- data.frame(patientID, age, diabetes, status)
patientdata
str(patientdata)
Class 3
6
diag(4)
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
solve(m)
## [,1] [,2]
## [1,] -2 1.5
## [2,] 1 -0.5
solve(diag(4))
det(diag(4))
## [1] 1
det(m)
## [1] -2
7
#Indices by number
avgs[2]
## rohan
## 0.354
#Indices by name
avgs["riya"]
## riya
## 0.523
#Logical Indices
avgs[avgs>=.45]
Subsets of Data
Sales<-data.frame(year=c(2008,2009,2010,2011,2012,2013),product_1=c(5,6,2,56,2,5),product_2=c(5,5,5,6,57
Sales
subset(Sales,product_3>7)
Others
Factor? (A factor is a special case of vector that is solely used to represent categorical or ordinal variables)
save(Sales,file="sales.RData")
load("sales.RData")
str(Sales)
8
head(Sales)
colMeans(Sales)
strptime("02/05/2022","%d/%m/%y")
gender<-c("male","female")
str(gender)
gender
gender2<-factor(c("male","female","male"))
str(gender2)
gender2
blood<-factor(c("o","AB","A"),levels = c("A","B","AB","o"))
blood[1:2]
## [1] o AB
## Levels: A B AB o
9
symptoms<- factor(c("severe","mild","moderate"),levels = c("mild","moderate","severe"),ordered = TRUE)
symptoms
symptoms>"moderate"
sex<-c(1,1,1,2,1,2,1,2,1,2,2,1,2,1,1,2,1,2,1,2,1,2,1,1,2,2,1,2,2,2,1,1,2,1,1,2,1,2,1,2,1,2,1,1)
sex
## [1] 1 1 1 2 1 2 1 2 1 2 2 1 2 1 1 2 1 2 1 2 1 2 1 1 2 2 1 2 2 2 1 1 2 1 1 2 1 2
## [39] 1 2 1 2 1 1
sex<-factor(sex)
sex
## [1] 1 1 1 2 1 2 1 2 1 2 2 1 2 1 1 2 1 2 1 2 1 2 1 1 2 2 1 2 2 2 1 1 2 1 1 2 1 2
## [39] 1 2 1 2 1 1
## Levels: 1 2
levels(sex)
levels(sex)[1:2]<-c("male","female")
sex
## [1] male male male female male female male female male female
## [11] female male female male male female male female male female
## [21] male female male male female female male female female female
## [31] male male female male male female male female male female
## [41] male female male male
## Levels: male female
levels(sex)
## [1] 2 5 3 1 2 1 4 5
#others
10
state<-c("MH", "UK","UP","OR","MP","RAJ","HP","BH","CG","Goa","TN","UP","UK","MH","RAJ","OR")
statef<-factor(state)
levels(statef)
## [1] "BH" "CG" "Goa" "HP" "MH" "MP" "OR" "RAJ" "TN" "UK" "UP"
## BH CG Goa HP MH MP OR RAJ TN UK UP
## 94.0 545.0 52.0 78.0 50.5 45.0 63.0 306.5 52.0 1401.0 669.5
# The combination of a vector and a labeling factor is an example of what is sometimes called a ragged a
#frequency tables
statfr<-tapply(Income, statef, length)
statfr
## BH CG Goa HP MH MP OR RAJ TN UK UP
## 1 1 1 1 2 1 2 2 1 2 2
## statef
## incomef BH CG Goa HP MH MP OR RAJ TN UK UP
## (35,135] 1 0 1 1 2 1 2 1 1 0 0
## (135,235] 0 0 0 0 0 0 0 0 0 1 0
## (435,535] 0 0 0 0 0 0 0 0 0 0 1
## (535,635] 0 1 0 0 0 0 0 1 0 0 0
## (835,935] 0 0 0 0 0 0 0 0 0 0 1
## (2635,2735] 0 0 0 0 0 0 0 0 0 1 0
View(table(incomef,statef))
getwd()
dir()
11
## [1] "~$samp.xlsx" "census.dta" "ipconsum.txt"
## [4] "math_typesetting.tex" "R-intro-1.pdf" "R Code.zip"
## [7] "sales.RData" "sales.txt" "saless"
## [10] "samp.csv" "samp.txt" "samp.xls"
## [13] "samp.xlsx" "sunil-test.html" "sunil-test.pdf"
## [16] "sunil-test.Rmd" "sunil test.Rmd"
read.table("sales.txt",header = TRUE)
## year product1
## 1 2002 2
## 2 2003 3
## 3 2004 4
## 4 2005 5
write.table(data.frame (name=c("riya","siya","priya"),consumption=c(455,611,546
)),file = "ipconsum.txt")
dir()
read.table("ipconsum.txt")
## name consumption
## 1 riya 455
## 2 siya 611
## 3 priya 546
library(foreign)
stata<-read.dta("census.dta")
print(stata)
12
## 8 Delaware South 594338 41151 125444 427743 59179 419819
## 9 Florida South 9746324 570224 1789412 7386688 1687573 8212385
## 10 Georgia South 5463105 414935 1231195 3816975 516731 3409081
## 11 Hawaii West 964691 77848 197735 689108 76150 834592
## 12 Idaho West 943935 93531 213134 637270 93680 509702
## 13 Illinois N Cntrl 11426518 842241 2400796 8183481 1261885 9518039
## 14 Indiana N Cntrl 5490224 418764 1199554 3871906 585384 3525298
## 15 Iowa N Cntrl 2913808 221628 604245 2087935 387584 1708232
## 16 Kansas N Cntrl 2363679 180877 468158 1714644 306263 1575899
## 17 Kentucky South 3660777 282731 799999 2578047 409828 1862183
## 18 Louisiana South 4205900 361533 968935 2875432 404279 2887309
## 19 Maine NE 1124660 78514 242873 803273 140918 534072
## 20 Maryland South 4216975 272274 895256 3049445 395609 3386555
## 21 Massachusetts NE 5737037 337215 1153174 4246648 726531 4808339
## 22 Michigan N Cntrl 9262078 685113 2066873 6510092 912258 6551551
## 23 Minnesota N Cntrl 4075970 307249 864559 2904162 479564 2725202
## 24 Mississippi South 2520638 215279 598918 1706441 289357 1192805
## 25 Missouri N Cntrl 4916686 354144 1008339 3554203 648126 3349588
## 26 Montana West 786690 64455 167440 554795 84559 416402
## 27 Nebraska N Cntrl 1569825 122946 324224 1122655 205684 987859
## 28 Nevada West 800493 56132 159667 584694 65756 682947
## 29 New Hampshire NE 920610 62512 195570 662528 102967 480325
## 30 New Jersey NE 7364823 463289 1527572 5373962 859771 6557377
## 31 New Mexico West 1302894 114731 303176 884987 115906 939963
## 32 New York NE 17558072 1135925 3551938 12870209 2160767 14858068
## 33 N. Carolina South 5881766 404076 1253659 4224031 603181 2822852
## 34 N. Dakota N Cntrl 652717 54752 136239 461726 80445 318310
## 35 Ohio N Cntrl 10797630 787150 2307170 7703310 1169460 7918259
## 36 Oklahoma South 3025290 233307 621577 2170406 376126 2035082
## 37 Oregon West 2633105 198046 525011 1910048 303336 1788354
## 38 Pennsylvania NE 11863895 747458 2375838 8740599 1530933 8220851
## 39 Rhode Island NE 947154 56692 186159 704303 126922 824004
## 40 S. Carolina South 3121820 238516 703450 2179854 287328 1689253
## 41 S. Dakota N Cntrl 690768 58446 147160 485162 91019 320777
## 42 Tennessee South 4591120 326088 972472 3292560 517588 2773573
## 43 Texas South 14229191 1169061 3137045 9923085 1371161 11333017
## 44 Utah West 1461037 189962 350143 920932 109220 1233060
## 45 Vermont NE 511456 35998 109320 366138 58166 172735
## 46 Virginia South 5346818 360686 1113648 3872484 505304 3529423
## 47 Washington West 4132156 306123 833237 2992796 431562 3037014
## 48 W. Virginia South 1949644 145583 414053 1390008 237868 705319
## 49 Wisconsin N Cntrl 4705767 346940 1010880 3347947 564197 3020732
## 50 Wyoming West 469557 44845 100708 324004 37175 294639
## medage death marriage divorce
## 1 29.3 35305 49018 26745
## 2 26.1 1604 5361 3517
## 3 29.2 21226 30223 19908
## 4 30.6 22676 26513 15882
## 5 29.9 186428 210864 133541
## 6 28.6 18925 34917 18571
## 7 32.0 26005 26048 13488
## 8 29.8 5123 4437 2313
## 9 34.7 104190 108344 71579
## 10 28.7 44230 70638 34743
13
## 11 28.4 4849 11856 4438
## 12 27.6 6753 13428 6596
## 13 29.9 102230 109823 50997
## 14 29.2 47300 57853 40006
## 15 30.0 26348 27474 11854
## 16 30.1 21910 24847 13410
## 17 29.1 33765 32727 16731
## 18 27.4 35518 43460 18108
## 19 30.4 10768 12040 6205
## 20 30.3 34025 46278 17494
## 21 31.2 54919 46273 17873
## 22 28.8 75102 86898 45047
## 23 29.2 33412 37641 15371
## 24 27.7 23570 27908 13846
## 25 30.9 49329 54625 27595
## 26 29.0 6664 8336 4940
## 27 29.7 14465 14239 6442
## 28 30.2 5852 114333 13842
## 29 30.1 7594 9251 5254
## 30 32.2 68762 55794 27796
## 31 27.4 9016 16641 10426
## 32 31.9 171769 144518 61972
## 33 29.6 48426 46718 28050
## 34 28.3 5596 6094 2142
## 35 29.9 98268 99832 58809
## 36 30.1 28227 46509 24226
## 37 30.2 21756 23004 17762
## 38 32.1 123261 93673 34922
## 39 31.8 9300 7490 3606
## 40 28.1 25138 53915 13595
## 41 28.9 6523 8800 2811
## 42 30.1 40713 59175 30206
## 43 28.2 108019 181762 96809
## 44 24.2 8103 16958 7802
## 45 29.4 4587 5226 2623
## 46 29.8 42496 60210 23615
## 47 29.8 32012 47728 28642
## 48 30.4 19186 17391 10273
## 49 29.4 39255 41111 17546
## 50 27.1 3215 6868 4003
View(stata)
options(scipen = 9) #turn off scientific notation except for big numbers
set.seed(1523) # For replication (random numbers generated in your code are the same every time you run
#(No Special Meaning – The number itself (e.g., 123) has no mathematical significance. Any integer works
N = 1000 # Population size
X0 = runif(N) # Value of intercept value ##uniform distribution
X1 = runif(N) # Value of explanatory value
Xerr = runif(N) # Value of error term value
Y = X0 + 4*X1 +Xerr # Value of outcome Y
samp = data.frame(X0,X1,Y) # data that might “observe”
View(samp)
14
library(writexl)
#write_xlsx(samp,"samp.xlsx")
write.csv(samp,"samp.csv",row.names = FALSE)
b<-read.csv("samp.csv")
View(b)
library(readstata13)
url="https://stats.idre.ucla.edu/stat/stata/examples/greene/TBL5-1.DTA"
#caschool <- read.dta13(url)
#View(caschool)
#rename x1 age #rename x2 income #rename x3 exp #rename x4 ownrent #rename x5 selfemp
data("infert")
library(readxl)
c<-read_excel("C:/sunil files/Data Analysis Tools/R_IPCW/R Sunil 2025/samp.xlsx",sheet = "sunil")
View(c)
library(dplyr)
##
## Attaching package: ’dplyr’
15