R: http://www.ats.ucla.edu/stat/R/notes/managing.htm

> hs1 <- read.table("http://www.ats.ucla.edu/stat/R/notes/hs1.csv", header=T, sep=",")
> # Keeping only the observations where the reading score is 60 or higher.
> hs1.read.well <- hs1[hs1$read >= 60, ]
> # Comparing means of read in the original hs1 data frame and the new smaller hs1.read.well data frame.
> mean(hs1.read.well$read)
[1] 65.48214
> mean(hs1$read)
[1] 52.23
> # Keeping only the variables read and write from the hs1 data frame.
> hs2<-hs1[, c("read", "write")]
> # another way of doing the same thing
> hs3<-hs1[, c(7, 8)]
> names(hs3)
[1] "read" "write"
> # Dropping the variables read and write from the hs1 data frame by using the column indices corresponding to these two variables with a negative sign.
> hs2.drop<-hs1[, -c(7, 8)]
> names(hs2.drop)
[1] "female" "id" "race" "ses" "schtyp" "prgtype" "math" "science" "socst" "prog"
> # We will subset hs1 to two data sets, one for female and one for male. We then put them back together.
> attach(hs1)
> hsfemale<-hs1[female==1, ]
> hsmale<-hs1[female==0, ]
> dim(hsfemale)
[1] 109 12
> dim(hsmale)
[1] 91 12
> hs.all<-rbind(hsfemale, hsmale)
> dim(hs.all)
[1] 200 12
> dim(hs1)
[1] 200 12
> # We will create two data sets from hs1, one contains demographic variables and the other one contains test scores. We then merge the two data sets by the id variable.
> hs.demo<-hs1[, c("id", "ses", "female", "race")]
> hs.scores<-hs1[, c("id", "read", "write", "math", "science")]
> dim(hs.demo)
[1] 200 4
> dim(hs.scores)
[1] 200 5
> hs.merge <- merge(hs.demo, hs.scores, by="id", all=T)
> head(hs.merge)
  id ses female race read write math science
1 1 1 1 1 34 44 40 39
2 2 2 1 1 39 41 33 42
3 3 1 0 1 63 65 48 63
4 4 1 1 1 44 50 41 39
5 5 1 0 1 47 40 43 NA
6 6 1 1 1 47 41 46 40
> dim(hs.merge)
[1] 200 8
> hs.merge1 <- merge(hs.demo, hs.scores, by.x="id", by.y="id", all=T)
> dim(hs.merge1)
[1] 200 8
> head(hs.merge1)
  id ses female race read write math science
1 1 1 1 1 34 44 40 39
2 2 2 1 1 39 41 33 42
3 3 1 0 1 63 65 48 63
4 4 1 1 1 44 50 41 39
5 5 1 0 1 47 40 43 NA
6 6 1 1 1 47 41 46 40
> # test
> hs.demo1 <-hs1[, c("id", "ses", "female", "race")]
> hs.scores1 <-hs1[, c("read", "write", "math", "science")]
> dim(hs.demo1)
[1] 200 4
> dim(hs.scores1)
[1] 200 4
> hs.merge2 <- merge(hs.demo1, hs.scores1, by.x="id", by.y="read", all=T)
> dim(hs.merge2)
[1] 370 7
> head(hs.merge2)
  id ses female race write math science
1 1 1 1 1 NA NA NA
2 2 2 1 1 NA NA NA
3 3 1 0 1 NA NA NA
4 4 1 1 1 NA NA NA
5 5 1 0 1 NA NA NA
6 6 1 1 1 NA NA NA

R

Wednesday, March 23, 2011

http://www.ats.ucla.edu/stat/R/notes/managing.htm

No comments:

Post a Comment