#+TITLE: Correlation and Regressions #+PROPERTY: header-args:R :session acj :eval never-export #+STARTUP: hideall inlineimages hideblocks #+HTML_HEAD: #+NAME: sassociation1 #+BEGIN_SRC R :results output list org :exports results library(data.table) readRDS("plfsdata/plfsacjdata.rds")->worker worker$standardwage->worker$wage factor(worker$social_group)->worker$social_group factor(worker$religion)->worker$religion factor(as.numeric(worker$state))->worker$state factor(worker$sector)->worker$sector cor.test(worker$wage,worker$years_edu) cor.test(worker$wage,worker$age) #+end_src #+RESULTS: sassociation1 #+begin_src org - Pearson's product-moment correlation - data: worker$wage and worker$years_edu - t = 35.998, df = 37633, p-value < 0.00000000000000022 - alternative hypothesis: true correlation is not equal to 0 - 95 percent confidence interval: - 0.1726625 0.1921962 - sample estimates: - cor - 0.1824473 - Pearson's product-moment correlation - data: worker$wage and worker$age - t = 9.3777, df = 37633, p-value < 0.00000000000000022 - alternative hypothesis: true correlation is not equal to 0 - 95 percent confidence interval: - 0.03819950 0.05835859 - sample estimates: - cor - 0.04828396 #+end_src #+NAME: sassociation2 #+BEGIN_SRC R :results output list org :exports results lm(wage~sex+age+years_edu, data=worker)->t summary(t) lm(wage~sex+age+years_edu+sector+social_group+religion+quarter, data=worker)->t summary(t) lm(wage~sex+age+years_edu+ sector+social_group+religion+quarter+state, data=worker)->t summary(t) #+end_src #+RESULTS: sassociation2 #+begin_src org - Call: - lm(formula = wage ~ sex - age - years_edu, data = worker) - Residuals: - Min 1Q Median 3Q Max - -1638.7 -489.5 -72.1 437.6 12305.1 - Coefficients: - Estimate Std. Error t value Pr(>|t|) - (Intercept) 2185.0021 19.6473 111.211 < 0.0000000000000002 *** - sex -667.9011 9.5525 -69.919 < 0.0000000000000002 *** - age 1.9781 0.3152 6.276 0.000000000352 *** - years_edu 10.7387 0.8999 11.933 < 0.0000000000000002 *** - --- - Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - Residual standard error: 695.3 on 37631 degrees of freedom - Multiple R-squared: 0.1343, Adjusted R-squared: 0.1342 - F-statistic: 1946 on 3 and 37631 DF, p-value: < 0.00000000000000022 - Call: - lm(formula = wage ~ sex - age - years_edu - sector - social_group - religion - quarter, data = worker) - Residuals: - Min 1Q Median 3Q Max - -1789.3 -484.2 -60.3 432.2 12337.4 - Coefficients: - Estimate Std. Error t value Pr(>|t|) - (Intercept) 1716.2498 23.9940 71.528 < 0.0000000000000002 *** - sex -621.1712 9.4569 -65.684 < 0.0000000000000002 *** - age 1.5207 0.3096 4.912 0.00000090742386603 *** - years_edu 8.0624 0.8860 9.100 < 0.0000000000000002 *** - sector 211.8919 8.0576 26.297 < 0.0000000000000002 *** - social_group 15.1192 1.3090 11.550 < 0.0000000000000002 *** - religion 24.2759 3.0235 8.029 0.00000000000000101 *** - quarterQ2 33.0336 11.4750 2.879 0.00399 ** - quarterQ3 42.1100 10.9479 3.846 0.00012 *** - quarterQ4 60.3340 10.7247 5.626 0.00000001860438745 *** - quarterQ5 96.8388 13.8317 7.001 0.00000000000257946 *** - --- - Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - Residual standard error: 682.3 on 37624 degrees of freedom - Multiple R-squared: 0.1665, Adjusted R-squared: 0.1662 - F-statistic: 751.4 on 10 and 37624 DF, p-value: < 0.00000000000000022 - Call: - lm(formula = wage ~ sex - age - years_edu - sector - social_group - religion - quarter - state, data = worker) - Residuals: - Min 1Q Median 3Q Max - -2166.6 -420.9 -26.9 393.6 11885.4 - Coefficients: - Estimate Std. Error t value Pr(>|t|) - (Intercept) 2334.9216 31.6980 73.661 < 0.0000000000000002 *** - sex -600.1791 8.9602 -66.983 < 0.0000000000000002 *** - age -0.7958 0.2925 -2.720 0.006522 ** - years_edu 3.5156 0.8463 4.154 0.0000327426267356 *** - sector 170.5218 7.7194 22.090 < 0.0000000000000002 *** - social_group 11.0980 1.2958 8.565 < 0.0000000000000002 *** - religion 6.8266 3.0199 2.261 0.023793 * - quarterQ2 27.5969 10.5232 2.622 0.008733 ** - quarterQ3 47.4106 10.0603 4.713 0.0000024541348868 *** - quarterQ4 65.0550 9.8696 6.591 0.0000000000441218 *** - quarterQ5 111.5467 12.9159 8.636 < 0.0000000000000002 *** - state02 -182.7089 36.7495 -4.972 0.0000006664834133 *** - state03 -302.4308 31.4091 -9.629 < 0.0000000000000002 *** - state04 54.6670 157.9356 0.346 0.729243 - state05 -364.2626 49.7310 -7.325 0.0000000000002443 *** - state06 -158.2256 34.1364 -4.635 0.0000035796630662 *** - state07 103.8315 85.9533 1.208 0.227056 - state08 -232.7281 31.6792 -7.346 0.0000000000002077 *** - state09 -625.4561 26.6275 -23.489 < 0.0000000000000002 *** - state1 -329.4467 43.8025 -7.521 0.0000000000000555 *** - state10 -228.9421 27.2168 -8.412 < 0.0000000000000002 *** - state11 229.4834 90.4784 2.536 0.011206 * - state12 8.6292 53.5113 0.161 0.871889 - state13 -414.2133 108.2687 -3.826 0.000131 *** - state14 24.9990 33.2053 0.753 0.451537 - state15 -386.4679 38.6430 -10.001 < 0.0000000000000002 *** - state16 -408.9313 35.7746 -11.431 < 0.0000000000000002 *** - state17 -169.2716 39.6051 -4.274 0.0000192485172076 *** - state18 -191.4328 31.9025 -6.001 0.0000000019842884 *** - state19 -849.3718 25.4194 -33.414 < 0.0000000000000002 *** - state2 -127.5145 72.9047 -1.749 0.080289 . - state20 -627.1074 29.7862 -21.054 < 0.0000000000000002 *** - state21 -739.1066 28.7611 -25.698 < 0.0000000000000002 *** - state22 -873.9305 30.4180 -28.731 < 0.0000000000000002 *** - state23 -803.9339 26.7202 -30.087 < 0.0000000000000002 *** - state24 -541.8985 29.4421 -18.406 < 0.0000000000000002 *** - state25 57.8958 313.5190 0.185 0.853493 - state26 -211.9977 199.1455 -1.065 0.287093 - state27 -594.3999 24.9000 -23.872 < 0.0000000000000002 *** - state28 -381.2620 26.4571 -14.411 < 0.0000000000000002 *** - state29 -367.1243 26.4633 -13.873 < 0.0000000000000002 *** - state3 -479.1579 40.3655 -11.870 < 0.0000000000000002 *** - state30 63.4353 74.2422 0.854 0.392869 - state31 157.6998 89.1517 1.769 0.076920 . - state32 38.5378 26.3576 1.462 0.143717 - state33 -423.9537 25.3135 -16.748 < 0.0000000000000002 *** - state34 -419.1475 40.5708 -10.331 < 0.0000000000000002 *** - state35 416.7570 57.3598 7.266 0.0000000000003784 *** - state36 -31.7199 29.3147 -1.082 0.279238 - state4 371.0493 182.0114 2.039 0.041496 * - state5 -599.6303 58.8906 -10.182 < 0.0000000000000002 *** - state6 -197.8508 55.0537 -3.594 0.000326 *** - state7 253.3238 116.6312 2.172 0.029861 * - state8 -291.8271 42.4682 -6.872 0.0000000000064448 *** - state9 -734.4680 34.6586 -21.192 < 0.0000000000000002 *** - --- - Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - Residual standard error: 625.2 on 37580 degrees of freedom - Multiple R-squared: 0.301, Adjusted R-squared: 0.3 - F-statistic: 299.7 on 54 and 37580 DF, p-value: < 0.00000000000000022 #+end_src #+NAME: sassociation3 #+BEGIN_SRC R :results output graphics :exports results :file bsample2.png :width 2500 :height 1500 :res 300 library(data.table) readRDS("plfsdata/plfsacjdata.rds")->worker worker$standardwage->worker$wage factor(worker$social_group)->worker$social_group factor(worker$religion)->worker$religion factor(worker$state)->worker$state factor(worker$sector)->worker$sector worker->t9 lm(wage~sex+age+years_edu+sector+social_group+religion+quarter+state,data=t9)->t lm(log(wage)~sex+age+years_edu+sector+social_group+religion+quarter+state,data=t9)->t2 data.frame(yvar=t9$wage,residuals=residuals(t),variable="model1")->a rbind(a,data.frame(yvar=log(t9$wage),residuals=residuals(t2),variable="model2"))->a ggplot(a,aes(x=residuals,y=yvar,group=variable))->p p+geom_point()+facet_wrap(.~variable,scales="free") #+end_src #+RESULTS: sassociation3 [[file:bsample2.png]] #+NAME: roughwork #+BEGIN_SRC R :results output list org :exports results worker->t t[,years_edu:=as.numeric(years_edu)] t[years_edu==0,category:=3] t[years_edu>0&years_edu<12,category:=2] t[is.na(category),category:=1] ifelse(t$years_edu==0,1, ifelse(t$years_edu<12,2,3))->t$category t[sex!=3,.(length(person_no)),.(category,sex)]->t t[,prop:=V1/sum(V1),sex] t #+end_src #+RESULTS: roughwork #+begin_src org - category sex V1 prop - 1: 1 2 3697 0.52995986 - 2: 1 1 7066 0.23051577 - 3: 2 2 3047 0.43678326 - 4: 2 1 20363 0.66430692 - 5: 3 1 3224 0.10517731 - 6: 3 2 232 0.03325688 #+end_src