|
|
|
@ -0,0 +1,233 @@
|
|
|
|
|
#+TITLE: Correlation and Regressions
|
|
|
|
|
#+PROPERTY: header-args:R :session acj :eval never-export
|
|
|
|
|
#+STARTUP: hideall inlineimages hideblocks
|
|
|
|
|
#+HTML_HEAD: <style>#content{max-width:1200px;} </style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#+NAME: sassociation1
|
|
|
|
|
#+BEGIN_SRC R :results output list org :exports results
|
|
|
|
|
library(data.table)
|
|
|
|
|
readRDS("plfsdata/plfsacjdata.rds")->worker
|
|
|
|
|
worker$standardwage->worker$wage
|
|
|
|
|
factor(worker$social_group)->worker$social_group
|
|
|
|
|
factor(worker$religion)->worker$religion
|
|
|
|
|
factor(as.numeric(worker$state))->worker$state
|
|
|
|
|
factor(worker$sector)->worker$sector
|
|
|
|
|
|
|
|
|
|
cor.test(worker$wage,worker$years_edu)
|
|
|
|
|
cor.test(worker$wage,worker$age)
|
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
#+RESULTS: sassociation1
|
|
|
|
|
#+begin_src org
|
|
|
|
|
- Pearson's product-moment correlation
|
|
|
|
|
- data: worker$wage and worker$years_edu
|
|
|
|
|
- t = 35.998, df = 37633, p-value < 0.00000000000000022
|
|
|
|
|
- alternative hypothesis: true correlation is not equal to 0
|
|
|
|
|
- 95 percent confidence interval:
|
|
|
|
|
- 0.1726625 0.1921962
|
|
|
|
|
- sample estimates:
|
|
|
|
|
- cor
|
|
|
|
|
- 0.1824473
|
|
|
|
|
- Pearson's product-moment correlation
|
|
|
|
|
- data: worker$wage and worker$age
|
|
|
|
|
- t = 9.3777, df = 37633, p-value < 0.00000000000000022
|
|
|
|
|
- alternative hypothesis: true correlation is not equal to 0
|
|
|
|
|
- 95 percent confidence interval:
|
|
|
|
|
- 0.03819950 0.05835859
|
|
|
|
|
- sample estimates:
|
|
|
|
|
- cor
|
|
|
|
|
- 0.04828396
|
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#+NAME: sassociation2
|
|
|
|
|
#+BEGIN_SRC R :results output list org :exports results
|
|
|
|
|
lm(wage~sex+age+years_edu,
|
|
|
|
|
data=worker)->t
|
|
|
|
|
summary(t)
|
|
|
|
|
|
|
|
|
|
lm(wage~sex+age+years_edu+sector+social_group+religion+quarter,
|
|
|
|
|
data=worker)->t
|
|
|
|
|
summary(t)
|
|
|
|
|
|
|
|
|
|
lm(wage~sex+age+years_edu+
|
|
|
|
|
sector+social_group+religion+quarter+state,
|
|
|
|
|
data=worker)->t
|
|
|
|
|
summary(t)
|
|
|
|
|
|
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
#+RESULTS: sassociation2
|
|
|
|
|
#+begin_src org
|
|
|
|
|
- Call:
|
|
|
|
|
- lm(formula = wage ~ sex
|
|
|
|
|
- age
|
|
|
|
|
- years_edu, data = worker)
|
|
|
|
|
- Residuals:
|
|
|
|
|
- Min 1Q Median 3Q Max
|
|
|
|
|
- -1638.7 -489.5 -72.1 437.6 12305.1
|
|
|
|
|
- Coefficients:
|
|
|
|
|
- Estimate Std. Error t value Pr(>|t|)
|
|
|
|
|
- (Intercept) 2185.0021 19.6473 111.211 < 0.0000000000000002 ***
|
|
|
|
|
- sex -667.9011 9.5525 -69.919 < 0.0000000000000002 ***
|
|
|
|
|
- age 1.9781 0.3152 6.276 0.000000000352 ***
|
|
|
|
|
- years_edu 10.7387 0.8999 11.933 < 0.0000000000000002 ***
|
|
|
|
|
- ---
|
|
|
|
|
- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
|
|
|
|
|
- Residual standard error: 695.3 on 37631 degrees of freedom
|
|
|
|
|
- Multiple R-squared: 0.1343, Adjusted R-squared: 0.1342
|
|
|
|
|
- F-statistic: 1946 on 3 and 37631 DF, p-value: < 0.00000000000000022
|
|
|
|
|
- Call:
|
|
|
|
|
- lm(formula = wage ~ sex
|
|
|
|
|
- age
|
|
|
|
|
- years_edu
|
|
|
|
|
- sector
|
|
|
|
|
- social_group
|
|
|
|
|
- religion
|
|
|
|
|
- quarter, data = worker)
|
|
|
|
|
- Residuals:
|
|
|
|
|
- Min 1Q Median 3Q Max
|
|
|
|
|
- -1789.3 -484.2 -60.3 432.2 12337.4
|
|
|
|
|
- Coefficients:
|
|
|
|
|
- Estimate Std. Error t value Pr(>|t|)
|
|
|
|
|
- (Intercept) 1716.2498 23.9940 71.528 < 0.0000000000000002 ***
|
|
|
|
|
- sex -621.1712 9.4569 -65.684 < 0.0000000000000002 ***
|
|
|
|
|
- age 1.5207 0.3096 4.912 0.00000090742386603 ***
|
|
|
|
|
- years_edu 8.0624 0.8860 9.100 < 0.0000000000000002 ***
|
|
|
|
|
- sector 211.8919 8.0576 26.297 < 0.0000000000000002 ***
|
|
|
|
|
- social_group 15.1192 1.3090 11.550 < 0.0000000000000002 ***
|
|
|
|
|
- religion 24.2759 3.0235 8.029 0.00000000000000101 ***
|
|
|
|
|
- quarterQ2 33.0336 11.4750 2.879 0.00399 **
|
|
|
|
|
- quarterQ3 42.1100 10.9479 3.846 0.00012 ***
|
|
|
|
|
- quarterQ4 60.3340 10.7247 5.626 0.00000001860438745 ***
|
|
|
|
|
- quarterQ5 96.8388 13.8317 7.001 0.00000000000257946 ***
|
|
|
|
|
- ---
|
|
|
|
|
- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
|
|
|
|
|
- Residual standard error: 682.3 on 37624 degrees of freedom
|
|
|
|
|
- Multiple R-squared: 0.1665, Adjusted R-squared: 0.1662
|
|
|
|
|
- F-statistic: 751.4 on 10 and 37624 DF, p-value: < 0.00000000000000022
|
|
|
|
|
- Call:
|
|
|
|
|
- lm(formula = wage ~ sex
|
|
|
|
|
- age
|
|
|
|
|
- years_edu
|
|
|
|
|
- sector
|
|
|
|
|
- social_group
|
|
|
|
|
- religion
|
|
|
|
|
- quarter
|
|
|
|
|
- state, data = worker)
|
|
|
|
|
- Residuals:
|
|
|
|
|
- Min 1Q Median 3Q Max
|
|
|
|
|
- -2166.6 -420.9 -26.9 393.6 11885.4
|
|
|
|
|
- Coefficients:
|
|
|
|
|
- Estimate Std. Error t value Pr(>|t|)
|
|
|
|
|
- (Intercept) 2334.9216 31.6980 73.661 < 0.0000000000000002 ***
|
|
|
|
|
- sex -600.1791 8.9602 -66.983 < 0.0000000000000002 ***
|
|
|
|
|
- age -0.7958 0.2925 -2.720 0.006522 **
|
|
|
|
|
- years_edu 3.5156 0.8463 4.154 0.0000327426267356 ***
|
|
|
|
|
- sector 170.5218 7.7194 22.090 < 0.0000000000000002 ***
|
|
|
|
|
- social_group 11.0980 1.2958 8.565 < 0.0000000000000002 ***
|
|
|
|
|
- religion 6.8266 3.0199 2.261 0.023793 *
|
|
|
|
|
- quarterQ2 27.5969 10.5232 2.622 0.008733 **
|
|
|
|
|
- quarterQ3 47.4106 10.0603 4.713 0.0000024541348868 ***
|
|
|
|
|
- quarterQ4 65.0550 9.8696 6.591 0.0000000000441218 ***
|
|
|
|
|
- quarterQ5 111.5467 12.9159 8.636 < 0.0000000000000002 ***
|
|
|
|
|
- state02 -182.7089 36.7495 -4.972 0.0000006664834133 ***
|
|
|
|
|
- state03 -302.4308 31.4091 -9.629 < 0.0000000000000002 ***
|
|
|
|
|
- state04 54.6670 157.9356 0.346 0.729243
|
|
|
|
|
- state05 -364.2626 49.7310 -7.325 0.0000000000002443 ***
|
|
|
|
|
- state06 -158.2256 34.1364 -4.635 0.0000035796630662 ***
|
|
|
|
|
- state07 103.8315 85.9533 1.208 0.227056
|
|
|
|
|
- state08 -232.7281 31.6792 -7.346 0.0000000000002077 ***
|
|
|
|
|
- state09 -625.4561 26.6275 -23.489 < 0.0000000000000002 ***
|
|
|
|
|
- state1 -329.4467 43.8025 -7.521 0.0000000000000555 ***
|
|
|
|
|
- state10 -228.9421 27.2168 -8.412 < 0.0000000000000002 ***
|
|
|
|
|
- state11 229.4834 90.4784 2.536 0.011206 *
|
|
|
|
|
- state12 8.6292 53.5113 0.161 0.871889
|
|
|
|
|
- state13 -414.2133 108.2687 -3.826 0.000131 ***
|
|
|
|
|
- state14 24.9990 33.2053 0.753 0.451537
|
|
|
|
|
- state15 -386.4679 38.6430 -10.001 < 0.0000000000000002 ***
|
|
|
|
|
- state16 -408.9313 35.7746 -11.431 < 0.0000000000000002 ***
|
|
|
|
|
- state17 -169.2716 39.6051 -4.274 0.0000192485172076 ***
|
|
|
|
|
- state18 -191.4328 31.9025 -6.001 0.0000000019842884 ***
|
|
|
|
|
- state19 -849.3718 25.4194 -33.414 < 0.0000000000000002 ***
|
|
|
|
|
- state2 -127.5145 72.9047 -1.749 0.080289 .
|
|
|
|
|
- state20 -627.1074 29.7862 -21.054 < 0.0000000000000002 ***
|
|
|
|
|
- state21 -739.1066 28.7611 -25.698 < 0.0000000000000002 ***
|
|
|
|
|
- state22 -873.9305 30.4180 -28.731 < 0.0000000000000002 ***
|
|
|
|
|
- state23 -803.9339 26.7202 -30.087 < 0.0000000000000002 ***
|
|
|
|
|
- state24 -541.8985 29.4421 -18.406 < 0.0000000000000002 ***
|
|
|
|
|
- state25 57.8958 313.5190 0.185 0.853493
|
|
|
|
|
- state26 -211.9977 199.1455 -1.065 0.287093
|
|
|
|
|
- state27 -594.3999 24.9000 -23.872 < 0.0000000000000002 ***
|
|
|
|
|
- state28 -381.2620 26.4571 -14.411 < 0.0000000000000002 ***
|
|
|
|
|
- state29 -367.1243 26.4633 -13.873 < 0.0000000000000002 ***
|
|
|
|
|
- state3 -479.1579 40.3655 -11.870 < 0.0000000000000002 ***
|
|
|
|
|
- state30 63.4353 74.2422 0.854 0.392869
|
|
|
|
|
- state31 157.6998 89.1517 1.769 0.076920 .
|
|
|
|
|
- state32 38.5378 26.3576 1.462 0.143717
|
|
|
|
|
- state33 -423.9537 25.3135 -16.748 < 0.0000000000000002 ***
|
|
|
|
|
- state34 -419.1475 40.5708 -10.331 < 0.0000000000000002 ***
|
|
|
|
|
- state35 416.7570 57.3598 7.266 0.0000000000003784 ***
|
|
|
|
|
- state36 -31.7199 29.3147 -1.082 0.279238
|
|
|
|
|
- state4 371.0493 182.0114 2.039 0.041496 *
|
|
|
|
|
- state5 -599.6303 58.8906 -10.182 < 0.0000000000000002 ***
|
|
|
|
|
- state6 -197.8508 55.0537 -3.594 0.000326 ***
|
|
|
|
|
- state7 253.3238 116.6312 2.172 0.029861 *
|
|
|
|
|
- state8 -291.8271 42.4682 -6.872 0.0000000000064448 ***
|
|
|
|
|
- state9 -734.4680 34.6586 -21.192 < 0.0000000000000002 ***
|
|
|
|
|
- ---
|
|
|
|
|
- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
|
|
|
|
|
- Residual standard error: 625.2 on 37580 degrees of freedom
|
|
|
|
|
- Multiple R-squared: 0.301, Adjusted R-squared: 0.3
|
|
|
|
|
- F-statistic: 299.7 on 54 and 37580 DF, p-value: < 0.00000000000000022
|
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
#+NAME: sassociation3
|
|
|
|
|
#+BEGIN_SRC R :results output graphics :exports results :file bsample2.png :width 2500 :height 1500 :res 300
|
|
|
|
|
library(data.table)
|
|
|
|
|
readRDS("plfsdata/plfsacjdata.rds")->worker
|
|
|
|
|
worker$standardwage->worker$wage
|
|
|
|
|
factor(worker$social_group)->worker$social_group
|
|
|
|
|
factor(worker$religion)->worker$religion
|
|
|
|
|
factor(worker$state)->worker$state
|
|
|
|
|
factor(worker$sector)->worker$sector
|
|
|
|
|
worker->t9
|
|
|
|
|
lm(wage~sex+age+years_edu+sector+social_group+religion+quarter+state,data=t9)->t
|
|
|
|
|
lm(log(wage)~sex+age+years_edu+sector+social_group+religion+quarter+state,data=t9)->t2
|
|
|
|
|
data.frame(yvar=t9$wage,residuals=residuals(t),variable="model1")->a
|
|
|
|
|
rbind(a,data.frame(yvar=log(t9$wage),residuals=residuals(t2),variable="model2"))->a
|
|
|
|
|
ggplot(a,aes(x=residuals,y=yvar,group=variable))->p
|
|
|
|
|
p+geom_point()+facet_wrap(.~variable,scales="free")
|
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
#+RESULTS: sassociation3
|
|
|
|
|
[[file:bsample2.png]]
|
|
|
|
|
|
|
|
|
|
#+NAME: roughwork
|
|
|
|
|
#+BEGIN_SRC R :results output list org :exports results
|
|
|
|
|
worker->t
|
|
|
|
|
t[,years_edu:=as.numeric(years_edu)]
|
|
|
|
|
t[years_edu==0,category:=3]
|
|
|
|
|
t[years_edu>0&years_edu<12,category:=2]
|
|
|
|
|
t[is.na(category),category:=1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ifelse(t$years_edu==0,1,
|
|
|
|
|
ifelse(t$years_edu<12,2,3))->t$category
|
|
|
|
|
|
|
|
|
|
t[sex!=3,.(length(person_no)),.(category,sex)]->t
|
|
|
|
|
t[,prop:=V1/sum(V1),sex]
|
|
|
|
|
t
|
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
#+RESULTS: roughwork
|
|
|
|
|
#+begin_src org
|
|
|
|
|
- category sex V1 prop
|
|
|
|
|
- 1: 1 2 3697 0.52995986
|
|
|
|
|
- 2: 1 1 7066 0.23051577
|
|
|
|
|
- 3: 2 2 3047 0.43678326
|
|
|
|
|
- 4: 2 1 20363 0.66430692
|
|
|
|
|
- 5: 3 1 3224 0.10517731
|
|
|
|
|
- 6: 3 2 232 0.03325688
|
|
|
|
|
#+end_src
|