> hs0[1:5, ]
gender id race ses schtyp prgtype read write math science socst
1 0 70 4 1 1 general 57 52 41 47 57
2 1 121 4 2 1 vocati 68 59 53 63 61
3 0 86 4 3 1 general 44 33 54 58 31
4 0 141 4 3 1 vocati 63 44 47 53 56
5 0 172 4 2 1 academic 47 52 57 53 61
> names(hs0)
[1] "gender" "id" "race" "ses" "schtyp" "prgtype" "read" "write" "math" "science" "socst"
> # shorthand way of referring to read, write, math, science
> read.sci <- hs0[ , 7:10]
> # checking the type of object
> class(read.sci)
[1] "data.frame"
> # listing the first 10 observations
> head(read.sci, n=10)
read write math science
1 57 52 41 47
2 68 59 53 63
3 44 33 54 58
4 63 44 47 53
5 47 52 57 53
6 44 52 51 63
7 50 59 42 53
8 34 46 45 39
9 63 57 54 NA
10 57 55 52 50
> # displaying the dimensions
> dim(read.sci)
[1] 200 4
> length(read.sci)
[1] 4
> length(read.sci$read)
[1] 200
> summary(read.sci)
read write math science
Min. :28.00 Min. :31.00 Min. :33.00 Min. :26.00
1st Qu.:44.00 1st Qu.:45.75 1st Qu.:45.00 1st Qu.:44.00
Median :50.00 Median :54.00 Median :52.00 Median :53.00
Mean :52.23 Mean :52.77 Mean :52.65 Mean :51.66
3rd Qu.:60.00 3rd Qu.:60.00 3rd Qu.:59.00 3rd Qu.:58.00
Max. :76.00 Max. :67.00 Max. :75.00 Max. :74.00
NA's : 5.00
> range(read.sci$write)
[1] 31 67
> range(read.sci$science)
[1] NA NA
> range(read.sci$science, na.rm=T)
[1] 26 74
> # the minimum and the maximum among all the variables
> range(read.sci, na.rm=T)
[1] 26 76
> mean(read.sci)
read write math science
52.230 52.775 52.645 NA
> mean(read.sci, na.rm=T)
read write math science
52.23000 52.77500 52.64500 51.66154
> sd(read.sci, na.rm=T)
read write math science
10.252937 9.478586 9.368448 9.866026
> table(hs0$prgtype)
academic general vocati
105 45 50
> by(hs0, hs0$prgtype, mean)
hs0$prgtype: academic
gender id race ses schtyp prgtype read write math science socst
0.552381 107.628571 3.495238 2.219048 1.228571 NA 56.161905 56.257143 56.733333 NA 56.695238
-----------------------------------------------------------------
hs0$prgtype: general
gender id race ses schtyp prgtype read write math science socst
0.5333333 97.3111111 3.4666667 1.8444444 1.1333333 NA 49.7555556 51.3333333 50.0222222 NA 50.6000000
-----------------------------------------------------------------
hs0$prgtype: vocati
gender id race ses schtyp prgtype read write math science socst
0.54 88.40 3.30 1.90 1.04 NA 46.20 46.76 46.42 47.22 45.02
Warning messages:
1: In mean.default(X[[6L]], ...) :
argument is not numeric or logical: returning NA
2: In mean.default(X[[6L]], ...) :
argument is not numeric or logical: returning NA
3: In mean.default(X[[6L]], ...) :
argument is not numeric or logical: returning NA
> by(hs0, hs0$prgtype, sd)
hs0$prgtype: academic
gender id race ses schtyp prgtype read write math science socst
0.4996336 61.0042964 1.0107481 0.7335498 0.4219265 NA 9.5887793 7.9433433 8.7302157 NA 9.1736703
----------------------------------------------------------------------------------------------------------------------------------------------
hs0$prgtype: general
gender id race ses schtyp prgtype read write math science socst
0.5045250 52.5793004 1.0135446 0.7371800 0.3437758 NA 9.2347062 9.3977754 7.4421676 NA 9.3088423
----------------------------------------------------------------------------------------------------------------------------------------------
hs0$prgtype: vocati
gender id race ses schtyp prgtype read write math science socst
0.5034574 54.3713610 1.1649647 0.6144518 0.1979487 NA 8.9076899 9.3187544 7.9541800 10.3337963 10.6569705
Warning messages:
1: In var(as.vector(x), na.rm = na.rm) : NAs introduced by coercion
2: In var(as.vector(x), na.rm = na.rm) : NAs introduced by coercion
3: In var(as.vector(x), na.rm = na.rm) : NAs introduced by coercion
> # attaching hs0, so its variables will be sesarchable by R
> attach(hs0)
> getOption("digits")
[1] 7
> options(digits=2)
> by(hs0, prgtype, mean, na.rm=T)
prgtype: academic
gender id race ses schtyp prgtype read write math science socst
0.55 107.63 3.50 2.22 1.23 NA 56.16 56.26 56.73 53.62 56.70
-----------------------------------------------------------------
prgtype: general
gender id race ses schtyp prgtype read write math science socst
0.53 97.31 3.47 1.84 1.13 NA 49.76 51.33 50.02 52.19 50.60
-----------------------------------------------------------------
prgtype: vocati
gender id race ses schtyp prgtype read write math science socst
0.54 88.40 3.30 1.90 1.04 NA 46.20 46.76 46.42 47.22 45.02
Warning messages:
1: In mean.default(X[[6L]], ...) :
argument is not numeric or logical: returning NA
2: In mean.default(X[[6L]], ...) :
argument is not numeric or logical: returning NA
3: In mean.default(X[[6L]], ...) :
argument is not numeric or logical: returning NA
> by(hs0, prgtype, sd, na.rm=T)
prgtype: academic
gender id race ses schtyp prgtype read write math science socst
0.50 61.00 1.01 0.73 0.42 NA 9.59 7.94 8.73 9.01 9.17
-----------------------------------------------------------------
prgtype: general
gender id race ses schtyp prgtype read write math science socst
0.50 52.58 1.01 0.74 0.34 NA 9.23 9.40 7.44 9.83 9.31
-----------------------------------------------------------------
prgtype: vocati
gender id race ses schtyp prgtype read write math science socst
0.50 54.37 1.16 0.61 0.20 NA 8.91 9.32 7.95 10.33 10.66
Warning messages:
1: In var(as.vector(x), na.rm = na.rm) : NAs introduced by coercion
2: In var(as.vector(x), na.rm = na.rm) : NAs introduced by coercion
3: In var(as.vector(x), na.rm = na.rm) : NAs introduced by coercion
> m <- tapply(write, prgtype, mean)
> v <- tapply(write, prgtype, var)
> med <- tapply(write, prgtype, median)
> n <- tapply(write, prgtype, length)
> sd <- tapply(write, prgtype, sd)
> cbind(mean=m, var=v, std.dev=sd, median=med, n=n)
mean var std.dev median n
academic 56 63 7.9 59 105
general 51 88 9.4 54 45
vocati 47 87 9.3 46 50
> # set the number of digits to 7
> options(digits=7)
> hist(write)
> # load trellis graphics
> library(lattice)
> # trellis graphs
> histogram(~write, hs0, type="count")
> # histogram of write by gender
> histogram(~write | gender, hs0, type="count")
> # change the number of bins to 15
> hist(write, breaks=15)
> # boxplot function in the graphics package
> boxplot(write)
> #trellis graphs
> bwplot(ses~ write, hs0)
> # boxplot by gender
> bwplot(ses~ write| gender, hs0)
> barplot(table(ses, gender), legend=c("low", "medium", "high"))
> barplot(table(ses, gender), beside=T, legend=c("low", "medium", "high"), ylim=c(0, 50))
> # changing the location of legend and adding a title, etc
> barplot(table(ses, gender), beside=T, legend.text=c("low", "medium", "high"), ylim=c(0, 50), space=c(.1, 1),
+ col=c("lightblue", "blue", "dark blue"), names.arg=c("male", "female"),
+ main="Distribution of SES by gender", args.legend=list(x =9, y=45, cex=.6))
> table(ses)
ses
1 2 3
47 95 58
> tab1<-table(gender, ses)
> # row proportions
> prop.table(tab1,1)
ses
gender 1 2 3
0 0.1648352 0.5164835 0.3186813
1 0.2935780 0.4403670 0.2660550
> # row frequencies
> rowSums(tab1)
0 1
91 109
> # column frequencies
> colSums(tab1)
1 2 3
47 95 58
> # correlation of a pair of variables
> cor(write, math)
[1] 0.6174493
> cor(write, science)
[1] NA
> cor(write, science, use="complete.obs")
[1] 0.5671298
> # correlation matrix
> cor(read.sci, use="complete.obs")
read write math science
read 1.0000000 0.5959677 0.6492202 0.6170562
write 0.5959677 1.0000000 0.6203022 0.5671298
math 0.6492202 0.6203022 1.0000000 0.6166288
science 0.6170562 0.5671298 0.6166288 1.0000000
> cor(read.sci, use="pairwise.complete.obs")
read write math science
read 1.0000000 0.5967765 0.6622801 0.6170562
write 0.5967765 1.0000000 0.6174493 0.5671298
math 0.6622801 0.6174493 1.0000000 0.6166288
science 0.6170562 0.5671298 0.6166288 1.0000000
> plot(math, write)
> # scatter plot matrix
> plot(read.sci)
> # Unless you are going to continue working with the hs0 data frame it is generally a good idea to detach all attached data frames.
> detach()











No comments:
Post a Comment