You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

234 lines
9.6 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#+TITLE: Correlation and Regressions
#+PROPERTY: header-args:R :session acj :eval never-export
#+STARTUP: hideall inlineimages hideblocks
#+HTML_HEAD: <style>#content{max-width:1200px;} </style>
#+NAME: sassociation1
#+BEGIN_SRC R :results output list org :exports results
library(data.table)
readRDS("plfsdata/plfsacjdata.rds")->worker
worker$standardwage->worker$wage
factor(worker$social_group)->worker$social_group
factor(worker$religion)->worker$religion
factor(as.numeric(worker$state))->worker$state
factor(worker$sector)->worker$sector
cor.test(worker$wage,worker$years_edu)
cor.test(worker$wage,worker$age)
#+end_src
#+RESULTS: sassociation1
#+begin_src org
- Pearson's product-moment correlation
- data: worker$wage and worker$years_edu
- t = 35.998, df = 37633, p-value < 0.00000000000000022
- alternative hypothesis: true correlation is not equal to 0
- 95 percent confidence interval:
- 0.1726625 0.1921962
- sample estimates:
- cor
- 0.1824473
- Pearson's product-moment correlation
- data: worker$wage and worker$age
- t = 9.3777, df = 37633, p-value < 0.00000000000000022
- alternative hypothesis: true correlation is not equal to 0
- 95 percent confidence interval:
- 0.03819950 0.05835859
- sample estimates:
- cor
- 0.04828396
#+end_src
#+NAME: sassociation2
#+BEGIN_SRC R :results output list org :exports results
lm(wage~sex+age+years_edu,
data=worker)->t
summary(t)
lm(wage~sex+age+years_edu+sector+social_group+religion+quarter,
data=worker)->t
summary(t)
lm(wage~sex+age+years_edu+
sector+social_group+religion+quarter+state,
data=worker)->t
summary(t)
#+end_src
#+RESULTS: sassociation2
#+begin_src org
- Call:
- lm(formula = wage ~ sex
- age
- years_edu, data = worker)
- Residuals:
- Min 1Q Median 3Q Max
- -1638.7 -489.5 -72.1 437.6 12305.1
- Coefficients:
- Estimate Std. Error t value Pr(>|t|)
- (Intercept) 2185.0021 19.6473 111.211 < 0.0000000000000002 ***
- sex -667.9011 9.5525 -69.919 < 0.0000000000000002 ***
- age 1.9781 0.3152 6.276 0.000000000352 ***
- years_edu 10.7387 0.8999 11.933 < 0.0000000000000002 ***
- ---
- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
- Residual standard error: 695.3 on 37631 degrees of freedom
- Multiple R-squared: 0.1343, Adjusted R-squared: 0.1342
- F-statistic: 1946 on 3 and 37631 DF, p-value: < 0.00000000000000022
- Call:
- lm(formula = wage ~ sex
- age
- years_edu
- sector
- social_group
- religion
- quarter, data = worker)
- Residuals:
- Min 1Q Median 3Q Max
- -1789.3 -484.2 -60.3 432.2 12337.4
- Coefficients:
- Estimate Std. Error t value Pr(>|t|)
- (Intercept) 1716.2498 23.9940 71.528 < 0.0000000000000002 ***
- sex -621.1712 9.4569 -65.684 < 0.0000000000000002 ***
- age 1.5207 0.3096 4.912 0.00000090742386603 ***
- years_edu 8.0624 0.8860 9.100 < 0.0000000000000002 ***
- sector 211.8919 8.0576 26.297 < 0.0000000000000002 ***
- social_group 15.1192 1.3090 11.550 < 0.0000000000000002 ***
- religion 24.2759 3.0235 8.029 0.00000000000000101 ***
- quarterQ2 33.0336 11.4750 2.879 0.00399 **
- quarterQ3 42.1100 10.9479 3.846 0.00012 ***
- quarterQ4 60.3340 10.7247 5.626 0.00000001860438745 ***
- quarterQ5 96.8388 13.8317 7.001 0.00000000000257946 ***
- ---
- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
- Residual standard error: 682.3 on 37624 degrees of freedom
- Multiple R-squared: 0.1665, Adjusted R-squared: 0.1662
- F-statistic: 751.4 on 10 and 37624 DF, p-value: < 0.00000000000000022
- Call:
- lm(formula = wage ~ sex
- age
- years_edu
- sector
- social_group
- religion
- quarter
- state, data = worker)
- Residuals:
- Min 1Q Median 3Q Max
- -2166.6 -420.9 -26.9 393.6 11885.4
- Coefficients:
- Estimate Std. Error t value Pr(>|t|)
- (Intercept) 2334.9216 31.6980 73.661 < 0.0000000000000002 ***
- sex -600.1791 8.9602 -66.983 < 0.0000000000000002 ***
- age -0.7958 0.2925 -2.720 0.006522 **
- years_edu 3.5156 0.8463 4.154 0.0000327426267356 ***
- sector 170.5218 7.7194 22.090 < 0.0000000000000002 ***
- social_group 11.0980 1.2958 8.565 < 0.0000000000000002 ***
- religion 6.8266 3.0199 2.261 0.023793 *
- quarterQ2 27.5969 10.5232 2.622 0.008733 **
- quarterQ3 47.4106 10.0603 4.713 0.0000024541348868 ***
- quarterQ4 65.0550 9.8696 6.591 0.0000000000441218 ***
- quarterQ5 111.5467 12.9159 8.636 < 0.0000000000000002 ***
- state02 -182.7089 36.7495 -4.972 0.0000006664834133 ***
- state03 -302.4308 31.4091 -9.629 < 0.0000000000000002 ***
- state04 54.6670 157.9356 0.346 0.729243
- state05 -364.2626 49.7310 -7.325 0.0000000000002443 ***
- state06 -158.2256 34.1364 -4.635 0.0000035796630662 ***
- state07 103.8315 85.9533 1.208 0.227056
- state08 -232.7281 31.6792 -7.346 0.0000000000002077 ***
- state09 -625.4561 26.6275 -23.489 < 0.0000000000000002 ***
- state1 -329.4467 43.8025 -7.521 0.0000000000000555 ***
- state10 -228.9421 27.2168 -8.412 < 0.0000000000000002 ***
- state11 229.4834 90.4784 2.536 0.011206 *
- state12 8.6292 53.5113 0.161 0.871889
- state13 -414.2133 108.2687 -3.826 0.000131 ***
- state14 24.9990 33.2053 0.753 0.451537
- state15 -386.4679 38.6430 -10.001 < 0.0000000000000002 ***
- state16 -408.9313 35.7746 -11.431 < 0.0000000000000002 ***
- state17 -169.2716 39.6051 -4.274 0.0000192485172076 ***
- state18 -191.4328 31.9025 -6.001 0.0000000019842884 ***
- state19 -849.3718 25.4194 -33.414 < 0.0000000000000002 ***
- state2 -127.5145 72.9047 -1.749 0.080289 .
- state20 -627.1074 29.7862 -21.054 < 0.0000000000000002 ***
- state21 -739.1066 28.7611 -25.698 < 0.0000000000000002 ***
- state22 -873.9305 30.4180 -28.731 < 0.0000000000000002 ***
- state23 -803.9339 26.7202 -30.087 < 0.0000000000000002 ***
- state24 -541.8985 29.4421 -18.406 < 0.0000000000000002 ***
- state25 57.8958 313.5190 0.185 0.853493
- state26 -211.9977 199.1455 -1.065 0.287093
- state27 -594.3999 24.9000 -23.872 < 0.0000000000000002 ***
- state28 -381.2620 26.4571 -14.411 < 0.0000000000000002 ***
- state29 -367.1243 26.4633 -13.873 < 0.0000000000000002 ***
- state3 -479.1579 40.3655 -11.870 < 0.0000000000000002 ***
- state30 63.4353 74.2422 0.854 0.392869
- state31 157.6998 89.1517 1.769 0.076920 .
- state32 38.5378 26.3576 1.462 0.143717
- state33 -423.9537 25.3135 -16.748 < 0.0000000000000002 ***
- state34 -419.1475 40.5708 -10.331 < 0.0000000000000002 ***
- state35 416.7570 57.3598 7.266 0.0000000000003784 ***
- state36 -31.7199 29.3147 -1.082 0.279238
- state4 371.0493 182.0114 2.039 0.041496 *
- state5 -599.6303 58.8906 -10.182 < 0.0000000000000002 ***
- state6 -197.8508 55.0537 -3.594 0.000326 ***
- state7 253.3238 116.6312 2.172 0.029861 *
- state8 -291.8271 42.4682 -6.872 0.0000000000064448 ***
- state9 -734.4680 34.6586 -21.192 < 0.0000000000000002 ***
- ---
- Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
- Residual standard error: 625.2 on 37580 degrees of freedom
- Multiple R-squared: 0.301, Adjusted R-squared: 0.3
- F-statistic: 299.7 on 54 and 37580 DF, p-value: < 0.00000000000000022
#+end_src
#+NAME: sassociation3
#+BEGIN_SRC R :results output graphics :exports results :file bsample2.png :width 2500 :height 1500 :res 300
library(data.table)
readRDS("plfsdata/plfsacjdata.rds")->worker
worker$standardwage->worker$wage
factor(worker$social_group)->worker$social_group
factor(worker$religion)->worker$religion
factor(worker$state)->worker$state
factor(worker$sector)->worker$sector
worker->t9
lm(wage~sex+age+years_edu+sector+social_group+religion+quarter+state,data=t9)->t
lm(log(wage)~sex+age+years_edu+sector+social_group+religion+quarter+state,data=t9)->t2
data.frame(yvar=t9$wage,residuals=residuals(t),variable="model1")->a
rbind(a,data.frame(yvar=log(t9$wage),residuals=residuals(t2),variable="model2"))->a
ggplot(a,aes(x=residuals,y=yvar,group=variable))->p
p+geom_point()+facet_wrap(.~variable,scales="free")
#+end_src
#+RESULTS: sassociation3
[[file:bsample2.png]]
#+NAME: roughwork
#+BEGIN_SRC R :results output list org :exports results
worker->t
t[,years_edu:=as.numeric(years_edu)]
t[years_edu==0,category:=3]
t[years_edu>0&years_edu<12,category:=2]
t[is.na(category),category:=1]
ifelse(t$years_edu==0,1,
ifelse(t$years_edu<12,2,3))->t$category
t[sex!=3,.(length(person_no)),.(category,sex)]->t
t[,prop:=V1/sum(V1),sex]
t
#+end_src
#+RESULTS: roughwork
#+begin_src org
- category sex V1 prop
- 1: 1 2 3697 0.52995986
- 2: 1 1 7066 0.23051577
- 3: 2 2 3047 0.43678326
- 4: 2 1 20363 0.66430692
- 5: 3 1 3224 0.10517731
- 6: 3 2 232 0.03325688
#+end_src