|
|
@ -43,99 +43,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
** Sampling Distributions :slide:
|
|
|
|
** Sampling Distributions :slide:
|
|
|
|
|
|
|
|
|
|
|
|
#+RESULTS: sampling
|
|
|
|
#+RESULTS: sampling2
|
|
|
|
[[file:bsample.png]]
|
|
|
|
[[file:bsample2.png]]
|
|
|
|
|
|
|
|
|
|
|
|
#+NAME: sampling
|
|
|
|
|
|
|
|
#+BEGIN_SRC R :results output graphics :exports results :file bsample.png :width 4500 :height 3000 :res 600
|
|
|
|
|
|
|
|
library(data.table)
|
|
|
|
|
|
|
|
readRDS("plfsdata/plfsacjdata.rds")->worker
|
|
|
|
|
|
|
|
worker$standardwage->worker$wage
|
|
|
|
|
|
|
|
#read.table("~/ssercloud/acj2018/worker.csv",sep=",",header=T)->worker
|
|
|
|
|
|
|
|
c(1:nrow(worker))->worker$SamplingFrameOrder
|
|
|
|
|
|
|
|
worker[sex!=3,]->worker
|
|
|
|
|
|
|
|
library(ggplot2)
|
|
|
|
|
|
|
|
ggplot(worker,aes(wage))+geom_density(colour="black",size=1)+scale_y_continuous(limits=c(0,0.05))+scale_x_continuous(limits=c(0,1000),breaks=c(0,mean(worker$wage),1000))->p
|
|
|
|
|
|
|
|
# p+facet_wrap(~sex)->p
|
|
|
|
|
|
|
|
p+annotate("text",x=520,y=0.045,
|
|
|
|
|
|
|
|
label=paste("Population mean = ",round(mean(worker$wage)),sep=""))->p
|
|
|
|
|
|
|
|
p+theme_bw()->p
|
|
|
|
|
|
|
|
p
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sample(1:nrow(worker),5, replace=FALSE)->a1
|
|
|
|
|
|
|
|
worker[a1,]->s1
|
|
|
|
|
|
|
|
mean(s1$wage)->t1
|
|
|
|
|
|
|
|
for (i in c(1:9999)) {
|
|
|
|
|
|
|
|
sample(1:nrow(worker),5, replace=FALSE)->a1
|
|
|
|
|
|
|
|
worker[a1,]->s1
|
|
|
|
|
|
|
|
c(t1,mean(s1$wage))->t1
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data.frame(sno=c(1:10000),meancol=t1)->t1
|
|
|
|
|
|
|
|
p+geom_density(data=t1,aes(meancol),colour="blue",size=1)-> p
|
|
|
|
|
|
|
|
paste("Distribution of sample means (5): mean = ",
|
|
|
|
|
|
|
|
round(mean(t1$meancol)),
|
|
|
|
|
|
|
|
"; stdev = ",
|
|
|
|
|
|
|
|
round(sqrt(var(t1$meancol))),sep="")->lab
|
|
|
|
|
|
|
|
p+annotate("text",x=700,y=0.033,label=lab,colour="blue")->p
|
|
|
|
|
|
|
|
p
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sample(1:nrow(worker),20, replace=FALSE)->a1
|
|
|
|
|
|
|
|
worker[a1,]->s1
|
|
|
|
|
|
|
|
mean(s1$wage)->t0
|
|
|
|
|
|
|
|
for (i in c(1:9999)) {
|
|
|
|
|
|
|
|
sample(1:nrow(worker),20, replace=FALSE)->a1
|
|
|
|
|
|
|
|
worker[a1,]->s1
|
|
|
|
|
|
|
|
c(t0,mean(s1$wage))->t0
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data.frame(sno=c(1:10000),meancol=t0)->t0
|
|
|
|
|
|
|
|
p+geom_density(data=t0,aes(meancol),colour="darkolivegreen",size=1)-> p
|
|
|
|
|
|
|
|
paste("Distribution of sample means (20): mean = ",
|
|
|
|
|
|
|
|
round(mean(t0$meancol)),
|
|
|
|
|
|
|
|
"; stdev = ",
|
|
|
|
|
|
|
|
round(sqrt(var(t0$meancol))),sep="")->lab
|
|
|
|
|
|
|
|
p+annotate("text",x=700,y=0.036,label=lab,colour="darkolivegreen")->p
|
|
|
|
|
|
|
|
p
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sample(1:nrow(worker),50, replace=FALSE)->a1
|
|
|
|
|
|
|
|
worker[a1,]->s1
|
|
|
|
|
|
|
|
mean(s1$wage)->t
|
|
|
|
|
|
|
|
for (i in c(1:9999)) {
|
|
|
|
|
|
|
|
sample(1:nrow(worker),50, replace=FALSE)->a1
|
|
|
|
|
|
|
|
worker[a1,]->s1
|
|
|
|
|
|
|
|
c(t,mean(s1$wage))->t
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data.frame(sno=c(1:10000),meancol=t)->t
|
|
|
|
|
|
|
|
p+geom_density(data=t,aes(meancol),colour="red",size=1)-> p
|
|
|
|
|
|
|
|
paste("Distribution of sample means (50): mean = ",
|
|
|
|
|
|
|
|
round(mean(t$meancol)),
|
|
|
|
|
|
|
|
"; stdev = ",
|
|
|
|
|
|
|
|
round(sqrt(var(t$meancol))),sep="")->lab
|
|
|
|
|
|
|
|
p+annotate("text",x=700,y=0.039,label=lab,colour="red")->p
|
|
|
|
|
|
|
|
p
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sample(1:nrow(worker),200, replace=FALSE)->a1
|
|
|
|
|
|
|
|
worker[a1,]->s1
|
|
|
|
|
|
|
|
mean(s1$wage)->t4
|
|
|
|
|
|
|
|
for (i in c(1:9999)) {
|
|
|
|
|
|
|
|
sample(1:nrow(worker),200, replace=FALSE)->a1
|
|
|
|
|
|
|
|
worker[a1,]->s1
|
|
|
|
|
|
|
|
c(t4,mean(s1$wage))->t4
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data.frame(sno=c(1:10000),meancol=t4)->t4
|
|
|
|
|
|
|
|
p+geom_density(data=t4,aes(meancol),colour="pink",size=1)-> p
|
|
|
|
|
|
|
|
paste("Distribution of sample means (200): mean = ",
|
|
|
|
|
|
|
|
round(mean(t4$meancol)),
|
|
|
|
|
|
|
|
"; stdev = ",
|
|
|
|
|
|
|
|
round(sqrt(var(t4$meancol))),sep="")->lab
|
|
|
|
|
|
|
|
p+annotate("text",x=700,y=0.042,label=lab,colour="pink")->p
|
|
|
|
|
|
|
|
p
|
|
|
|
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#+NAME: sampling2
|
|
|
|
#+NAME: sampling2
|
|
|
|
#+BEGIN_SRC R :results output graphics :exports results :file bsample2.png :width 4500 :height 3000 :res 600
|
|
|
|
#+BEGIN_SRC R :results output graphics :exports results :file bsample2.png :width 4500 :height 3000 :res 600
|
|
|
@ -230,7 +139,4 @@
|
|
|
|
p
|
|
|
|
p
|
|
|
|
#+end_src
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
|
|
#+RESULTS: sampling2
|
|
|
|
|
|
|
|
[[file:bsample2.png]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|