Wednesday, March 23, 2011

http://www.ats.ucla.edu/stat/R/notes/managing.htm

> hs1 <- read.table("http://www.ats.ucla.edu/stat/R/notes/hs1.csv", header=T, sep=",")
> # Keeping only the observations where the reading score is 60 or higher.
> hs1.read.well <- hs1[hs1$read >= 60, ]
> # Comparing means of read in the original hs1 data frame and the new smaller hs1.read.well data frame.
> mean(hs1.read.well$read)
[1] 65.48214
> mean(hs1$read)
[1] 52.23
> # Keeping only the variables read and write from the hs1 data frame.
> hs2<-hs1[, c("read", "write")]
> # another way of doing the same thing
> hs3<-hs1[, c(7, 8)]
> names(hs3)
[1] "read"  "write"
> # Dropping the variables read and write from the hs1 data frame by using the column indices corresponding to these two variables with a negative sign.
> hs2.drop<-hs1[, -c(7, 8)]
> names(hs2.drop)
 [1] "female"  "id"      "race"    "ses"     "schtyp"  "prgtype" "math"    "science" "socst"   "prog"   
> # We will subset hs1 to two data sets, one for female and one for male. We then put them back together.
> attach(hs1)
> hsfemale<-hs1[female==1, ]
> hsmale<-hs1[female==0, ]
> dim(hsfemale)
[1] 109  12
> dim(hsmale)
[1] 91 12
> hs.all<-rbind(hsfemale, hsmale)
> dim(hs.all)
[1] 200  12
> dim(hs1)
[1] 200  12
> # We will create two data sets from hs1, one contains demographic variables and the other one contains test scores. We then merge the two data sets by the id variable.
> hs.demo<-hs1[, c("id", "ses", "female", "race")]
> hs.scores<-hs1[, c("id", "read", "write", "math", "science")]
> dim(hs.demo)
[1] 200   4
> dim(hs.scores)
[1] 200   5
> hs.merge <- merge(hs.demo, hs.scores, by="id", all=T)
> head(hs.merge)
  id ses female race read write math science
1  1   1      1    1   34    44   40      39
2  2   2      1    1   39    41   33      42
3  3   1      0    1   63    65   48      63
4  4   1      1    1   44    50   41      39
5  5   1      0    1   47    40   43      NA
6  6   1      1    1   47    41   46      40
> dim(hs.merge)
[1] 200   8
> hs.merge1 <- merge(hs.demo, hs.scores, by.x="id", by.y="id", all=T)
> dim(hs.merge1)
[1] 200   8
> head(hs.merge1)
  id ses female race read write math science
1  1   1      1    1   34    44   40      39
2  2   2      1    1   39    41   33      42
3  3   1      0    1   63    65   48      63
4  4   1      1    1   44    50   41      39
5  5   1      0    1   47    40   43      NA
6  6   1      1    1   47    41   46      40
> # test
> hs.demo1 <-hs1[, c("id", "ses", "female", "race")]
> hs.scores1 <-hs1[, c("read", "write", "math", "science")]
> dim(hs.demo1)
[1] 200   4
> dim(hs.scores1)
[1] 200   4
> hs.merge2 <- merge(hs.demo1, hs.scores1, by.x="id", by.y="read", all=T)
> dim(hs.merge2)
[1] 370   7
> head(hs.merge2)
  id ses female race write math science
1  1   1      1    1    NA   NA      NA
2  2   2      1    1    NA   NA      NA
3  3   1      0    1    NA   NA      NA
4  4   1      1    1    NA   NA      NA
5  5   1      0    1    NA   NA      NA
6  6   1      1    1    NA   NA      NA



No comments:

Post a Comment