#+TITLE: Correlation and Regressions #+PROPERTY: header-args:R :session acj :eval never-export #+STARTUP: hideall inlineimages hideblocks #+HTML_HEAD: #+NAME: sassociation1 #+BEGIN_SRC R :results output raw list library(data.table) readRDS("plfsdata/plfsacjdata.rds")->worker worker$standardwage->worker$wage factor(worker$social_group)->worker$social_group factor(worker$religion)->worker$religion factor(as.numeric(worker$state))->worker$state factor(worker$sector)->worker$sector cor.test(worker$wage,worker$years_edu) cor.test(worker$wage,worker$age) #+end_src #+RESULTS: sassociation1 - Pearson's product-moment correlation - data: worker$wage and worker$years_edu - t = 35.998, df = 37633, p-value < 0.00000000000000022 - alternative hypothesis: true correlation is not equal to 0 - 95 percent confidence interval: - 0.1726625 0.1921962 - sample estimates: - cor - 0.1824473 - Pearson's product-moment correlation - data: worker$wage and worker$age - t = 9.3777, df = 37633, p-value < 0.00000000000000022 - alternative hypothesis: true correlation is not equal to 0 - 95 percent confidence interval: - 0.03819950 0.05835859 - sample estimates: - cor - 0.04828396 #+NAME: sassociation2 #+BEGIN_SRC R :results output raw lm(wage~sex+age+years_edu, data=worker)->t summary(t) lm(wage~sex+age+years_edu+sector+social_group+religion+quarter, data=worker)->t summary(t) lm(wage~sex+age+years_edu+ sector+social_group+religion+quarter+state, data=worker)->t summary(t) #+end_src #+RESULTS: sassociation2 Call: lm(formula = wage ~ sex age years_edu, data = worker) Residuals: Min 1Q Median 3Q Max -292.30 -74.80 -14.61 57.25 2170.72 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 327.62380 3.44462 95.11 <0.0000000000000002 *** sex -97.04291 1.67477 -57.94 <0.0000000000000002 *** age 1.38151 0.05526 25.00 <0.0000000000000002 *** years_edu 4.48268 0.15777 28.41 <0.0000000000000002 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 121.9 on 37631 degrees of freedom Multiple R-squared: 0.1261, Adjusted R-squared: 0.126 F-statistic: 1809 on 3 and 37631 DF, p-value: < 0.00000000000000022 Call: lm(formula = wage ~ sex age years_edu sector social_group religion quarter, data = worker) Residuals: Min 1Q Median 3Q Max -315.65 -72.11 -14.15 52.51 2140.20 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 257.94757 3.99129 64.628 < 0.0000000000000002 *** sex -86.56036 1.65149 -52.414 < 0.0000000000000002 *** age 1.23305 0.05423 22.735 < 0.0000000000000002 *** years_edu 3.76046 0.15523 24.225 < 0.0000000000000002 *** sector2 30.32294 1.41191 21.477 < 0.0000000000000002 *** social_group2 29.29921 2.21158 13.248 < 0.0000000000000002 *** social_group3 38.85215 2.10791 18.432 < 0.0000000000000002 *** social_group9 36.39078 2.37599 15.316 < 0.0000000000000002 *** religion2 15.28020 1.88933 8.088 0.000000000000000626 *** religion3 70.06425 3.05735 22.917 < 0.0000000000000002 *** religion4 0.90753 4.34322 0.209 0.834486 religion5 9.92952 16.27422 0.610 0.541774 religion6 -27.61440 5.46808 -5.050 0.000000443610331942 *** religion7 -28.12453 44.72117 -0.629 0.529427 religion9 22.53928 5.90700 3.816 0.000136 *** quarterQ2 11.19254 1.98958 5.626 0.000000018618898647 *** quarterQ3 15.15791 1.89871 7.983 0.000000000000001465 *** quarterQ4 25.67351 1.85931 13.808 < 0.0000000000000002 *** quarterQ5 32.23325 2.39793 13.442 < 0.0000000000000002 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 118.3 on 37616 degrees of freedom Multiple R-squared: 0.1778, Adjusted R-squared: 0.1774 F-statistic: 451.8 on 18 and 37616 DF, p-value: < 0.00000000000000022 Call: lm(formula = wage ~ sex age years_edu sector social_group religion quarter state, data = worker) Residuals: Min 1Q Median 3Q Max -421.28 -53.87 -3.68 47.22 1919.02 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 444.01115 4.77338 93.018 < 0.0000000000000002 *** sex -90.39577 1.35581 -66.673 < 0.0000000000000002 *** age 0.09864 0.04447 2.218 0.026552 * years_edu 0.78564 0.12845 6.116 0.00000000096630021 *** sector2 20.38448 1.17589 17.335 < 0.0000000000000002 *** social_group2 8.16327 1.89780 4.301 0.00001701239040995 *** social_group3 7.00001 1.82389 3.838 0.000124 *** social_group9 14.48824 2.02081 7.170 0.00000000000076649 *** religion2 3.06706 1.61097 1.904 0.056937 . religion3 3.99327 3.09503 1.290 0.196982 religion4 1.21196 4.75423 0.255 0.798784 religion5 18.31698 13.00836 1.408 0.159112 religion6 11.40087 4.59791 2.480 0.013158 * religion7 1.18490 35.69842 0.033 0.973522 religion9 22.95917 4.86976 4.715 0.00000243003135992 *** quarterQ2 10.21411 1.58877 6.429 0.00000000013002120 *** quarterQ3 16.82258 1.51923 11.073 < 0.0000000000000002 *** quarterQ4 25.14926 1.49016 16.877 < 0.0000000000000002 *** quarterQ5 33.08983 1.95044 16.965 < 0.0000000000000002 *** state02 -68.16250 5.62831 -12.111 < 0.0000000000000002 *** state03 -96.24984 5.63379 -17.084 < 0.0000000000000002 *** state04 -71.16512 23.87015 -2.981 0.002872 ** state05 -115.06098 7.55209 -15.236 < 0.0000000000000002 *** state06 -69.65578 5.27142 -13.214 < 0.0000000000000002 *** state07 -46.63160 13.00126 -3.587 0.000335 *** state08 -93.14096 4.86947 -19.128 < 0.0000000000000002 *** state09 -130.45617 4.11177 -31.728 < 0.0000000000000002 *** state1 -14.79943 6.61379 -2.238 0.025249 * state10 -111.30326 4.22934 -26.317 < 0.0000000000000002 *** state11 -30.89194 13.71763 -2.252 0.024329 * state12 -69.11195 8.17680 -8.452 < 0.0000000000000002 *** state13 -23.54944 16.54163 -1.424 0.154557 state14 -58.14358 5.19732 -11.187 < 0.0000000000000002 *** state15 -67.54526 6.62181 -10.200 < 0.0000000000000002 *** state16 -45.92342 5.48738 -8.369 < 0.0000000000000002 *** state17 -53.92123 6.67236 -8.081 0.00000000000000066 *** state18 -86.09270 4.86428 -17.699 < 0.0000000000000002 *** state19 -150.31537 3.89883 -38.554 < 0.0000000000000002 *** state2 -56.38856 11.04817 -5.104 0.00000033437294375 *** state20 -137.77298 4.59079 -30.011 < 0.0000000000000002 *** state21 -147.27217 4.45380 -33.067 < 0.0000000000000002 *** state22 -172.27146 4.70857 -36.587 < 0.0000000000000002 *** state23 -161.52791 4.12939 -39.117 < 0.0000000000000002 *** state24 -145.44044 4.54569 -31.995 < 0.0000000000000002 *** state25 -63.12764 47.32958 -1.334 0.182281 state26 -102.84861 30.08812 -3.418 0.000631 *** state27 -142.91329 3.89854 -36.658 < 0.0000000000000002 *** state28 -84.41173 4.12350 -20.471 < 0.0000000000000002 *** state29 -100.69647 4.08499 -24.650 < 0.0000000000000002 *** state3 -121.25472 6.44927 -18.801 < 0.0000000000000002 *** state30 25.50491 11.26961 2.263 0.023632 * state31 112.13284 13.52131 8.293 < 0.0000000000000002 *** state32 146.37176 4.10836 35.628 < 0.0000000000000002 *** state33 -40.33733 3.97872 -10.138 < 0.0000000000000002 *** state34 -61.09002 6.25575 -9.765 < 0.0000000000000002 *** state35 17.72228 8.72443 2.031 0.042227 * state36 -85.02720 4.53862 -18.734 < 0.0000000000000002 *** state4 -68.28980 27.50122 -2.483 0.013027 * state5 -144.20165 8.92408 -16.159 < 0.0000000000000002 *** state6 -71.93804 8.39931 -8.565 < 0.0000000000000002 *** state7 -41.36610 17.62915 -2.346 0.018958 * state8 -103.71783 6.46870 -16.034 < 0.0000000000000002 *** state9 -152.19543 5.27778 -28.837 < 0.0000000000000002 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 94.36 on 37572 degrees of freedom Multiple R-squared: 0.4771, Adjusted R-squared: 0.4763 F-statistic: 553 on 62 and 37572 DF, p-value: < 0.00000000000000022 #+NAME: sassociation3 #+BEGIN_SRC R :results output graphics :file bsample2.png :width 2500 :height 1500 :res 300 library(data.table) readRDS("plfsdata/plfsacjdata.rds")->worker worker$standardwage->worker$wage factor(worker$social_group)->worker$social_group factor(worker$religion)->worker$religion factor(worker$state)->worker$state factor(worker$sector)->worker$sector worker->t9 lm(wage~sex+age+years_edu+sector+social_group+religion+quarter+state,data=t9)->t lm(log(wage)~sex+age+years_edu+sector+social_group+religion+quarter+state,data=t9)->t2 data.frame(yvar=t9$wage,residuals=residuals(t),variable="model1")->a rbind(a,data.frame(yvar=log(t9$wage),residuals=residuals(t2),variable="model2"))->a ggplot(a,aes(x=residuals,y=yvar,group=variable))->p p+geom_point()+facet_wrap(.~variable,scales="free") #+end_src #+RESULTS: sassociation3 [[file:bsample2.png]] #+NAME: roughwork #+BEGIN_SRC R :results value :colnames yes :hlines worker->t t[,years_edu:=as.numeric(years_edu)] t[years_edu==0,category:=3] t[years_edu>0&years_edu<12,category:=2] t[is.na(category),category:=1] ifelse(t$years_edu==0,1, ifelse(t$years_edu<12,2,3))->t$category t[sex!=3,.(length(person_no)),.(category,sex)]->t t[,prop:=V1/sum(V1),sex] t #+end_src #+RESULTS: roughwork | category | sex | V1 | prop | |----------+-----+-------+-------------------| | 1 | 2 | 3697 | 0.529959862385321 | | 1 | 1 | 7066 | 0.230515773333768 | | 2 | 2 | 3047 | 0.436783256880734 | | 2 | 1 | 20363 | 0.664306919387988 | | 3 | 1 | 3224 | 0.105177307278244 | | 3 | 2 | 232 | 0.033256880733945 |