11 KiB
Correlation and Regressions
library(data.table)
readRDS("plfsdata/plfsacjdata.rds")->worker
worker$standardwage->worker$wage
factor(worker$social_group)->worker$social_group
factor(worker$religion)->worker$religion
factor(as.numeric(worker$state))->worker$state
factor(worker$sector)->worker$sector
cor.test(worker$wage,worker$years_edu)
cor.test(worker$wage,worker$age)
- Pearson's product-moment correlation
- data: worker$wage and worker$years_edu
- t = 35.998, df = 37633, p-value < 0.00000000000000022
- alternative hypothesis: true correlation is not equal to 0
- 95 percent confidence interval:
- 0.1726625 0.1921962
- sample estimates:
- cor
- 0.1824473
- Pearson's product-moment correlation
- data: worker$wage and worker$age
- t = 9.3777, df = 37633, p-value < 0.00000000000000022
- alternative hypothesis: true correlation is not equal to 0
- 95 percent confidence interval:
- 0.03819950 0.05835859
- sample estimates:
- cor
- 0.04828396
lm(wage~sex+age+years_edu,
data=worker)->t
summary(t)
lm(wage~sex+age+years_edu+sector+social_group+religion+quarter,
data=worker)->t
summary(t)
lm(wage~sex+age+years_edu+
sector+social_group+religion+quarter+state,
data=worker)->t
summary(t)
Call: lm(formula = wage ~ sex age years_edu, data = worker)
Residuals: Min 1Q Median 3Q Max -292.30 -74.80 -14.61 57.25 2170.72
Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 327.62380 3.44462 95.11 <0.0000000000000002 * sex -97.04291 1.67477 -57.94 <0.0000000000000002 * age 1.38151 0.05526 25.00 <0.0000000000000002 * years_edu 4.48268 0.15777 28.41 <0.0000000000000002 * — Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 121.9 on 37631 degrees of freedom Multiple R-squared: 0.1261, Adjusted R-squared: 0.126 F-statistic: 1809 on 3 and 37631 DF, p-value: < 0.00000000000000022
Call: lm(formula = wage ~ sex age years_edu sector social_group
religion quarter, data = worker)
Residuals: Min 1Q Median 3Q Max -315.65 -72.11 -14.15 52.51 2140.20
Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 257.94757 3.99129 64.628 < 0.0000000000000002 * sex -86.56036 1.65149 -52.414 < 0.0000000000000002 * age 1.23305 0.05423 22.735 < 0.0000000000000002 * years_edu 3.76046 0.15523 24.225 < 0.0000000000000002 * sector2 30.32294 1.41191 21.477 < 0.0000000000000002 * social_group2 29.29921 2.21158 13.248 < 0.0000000000000002 * social_group3 38.85215 2.10791 18.432 < 0.0000000000000002 * social_group9 36.39078 2.37599 15.316 < 0.0000000000000002 * religion2 15.28020 1.88933 8.088 0.000000000000000626 * religion3 70.06425 3.05735 22.917 < 0.0000000000000002 * religion4 0.90753 4.34322 0.209 0.834486 religion5 9.92952 16.27422 0.610 0.541774 religion6 -27.61440 5.46808 -5.050 0.000000443610331942 * religion7 -28.12453 44.72117 -0.629 0.529427 religion9 22.53928 5.90700 3.816 0.000136 * quarterQ2 11.19254 1.98958 5.626 0.000000018618898647 * quarterQ3 15.15791 1.89871 7.983 0.000000000000001465 * quarterQ4 25.67351 1.85931 13.808 < 0.0000000000000002 * quarterQ5 32.23325 2.39793 13.442 < 0.0000000000000002 * — Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 118.3 on 37616 degrees of freedom Multiple R-squared: 0.1778, Adjusted R-squared: 0.1774 F-statistic: 451.8 on 18 and 37616 DF, p-value: < 0.00000000000000022
Call: lm(formula = wage ~ sex age years_edu sector social_group
religion quarter state, data = worker)
Residuals: Min 1Q Median 3Q Max -421.28 -53.87 -3.68 47.22 1919.02
Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 444.01115 4.77338 93.018 < 0.0000000000000002 * sex -90.39577 1.35581 -66.673 < 0.0000000000000002 * age 0.09864 0.04447 2.218 0.026552 * years_edu 0.78564 0.12845 6.116 0.00000000096630021 * sector2 20.38448 1.17589 17.335 < 0.0000000000000002 * social_group2 8.16327 1.89780 4.301 0.00001701239040995 * social_group3 7.00001 1.82389 3.838 0.000124 * social_group9 14.48824 2.02081 7.170 0.00000000000076649 * religion2 3.06706 1.61097 1.904 0.056937 . religion3 3.99327 3.09503 1.290 0.196982 religion4 1.21196 4.75423 0.255 0.798784 religion5 18.31698 13.00836 1.408 0.159112 religion6 11.40087 4.59791 2.480 0.013158 * religion7 1.18490 35.69842 0.033 0.973522 religion9 22.95917 4.86976 4.715 0.00000243003135992 * quarterQ2 10.21411 1.58877 6.429 0.00000000013002120 * quarterQ3 16.82258 1.51923 11.073 < 0.0000000000000002 * quarterQ4 25.14926 1.49016 16.877 < 0.0000000000000002 * quarterQ5 33.08983 1.95044 16.965 < 0.0000000000000002 * state02 -68.16250 5.62831 -12.111 < 0.0000000000000002 * state03 -96.24984 5.63379 -17.084 < 0.0000000000000002 * state04 -71.16512 23.87015 -2.981 0.002872 * state05 -115.06098 7.55209 -15.236 < 0.0000000000000002 ** state06 -69.65578 5.27142 -13.214 < 0.0000000000000002 * state07 -46.63160 13.00126 -3.587 0.000335 * state08 -93.14096 4.86947 -19.128 < 0.0000000000000002 * state09 -130.45617 4.11177 -31.728 < 0.0000000000000002 * state1 -14.79943 6.61379 -2.238 0.025249 * state10 -111.30326 4.22934 -26.317 < 0.0000000000000002 * state11 -30.89194 13.71763 -2.252 0.024329 * state12 -69.11195 8.17680 -8.452 < 0.0000000000000002 * state13 -23.54944 16.54163 -1.424 0.154557 state14 -58.14358 5.19732 -11.187 < 0.0000000000000002 * state15 -67.54526 6.62181 -10.200 < 0.0000000000000002 * state16 -45.92342 5.48738 -8.369 < 0.0000000000000002 * state17 -53.92123 6.67236 -8.081 0.00000000000000066 * state18 -86.09270 4.86428 -17.699 < 0.0000000000000002 * state19 -150.31537 3.89883 -38.554 < 0.0000000000000002 * state2 -56.38856 11.04817 -5.104 0.00000033437294375 * state20 -137.77298 4.59079 -30.011 < 0.0000000000000002 * state21 -147.27217 4.45380 -33.067 < 0.0000000000000002 * state22 -172.27146 4.70857 -36.587 < 0.0000000000000002 * state23 -161.52791 4.12939 -39.117 < 0.0000000000000002 * state24 -145.44044 4.54569 -31.995 < 0.0000000000000002 * state25 -63.12764 47.32958 -1.334 0.182281 state26 -102.84861 30.08812 -3.418 0.000631 * state27 -142.91329 3.89854 -36.658 < 0.0000000000000002 * state28 -84.41173 4.12350 -20.471 < 0.0000000000000002 * state29 -100.69647 4.08499 -24.650 < 0.0000000000000002 * state3 -121.25472 6.44927 -18.801 < 0.0000000000000002 * state30 25.50491 11.26961 2.263 0.023632 * state31 112.13284 13.52131 8.293 < 0.0000000000000002 * state32 146.37176 4.10836 35.628 < 0.0000000000000002 * state33 -40.33733 3.97872 -10.138 < 0.0000000000000002 * state34 -61.09002 6.25575 -9.765 < 0.0000000000000002 * state35 17.72228 8.72443 2.031 0.042227 * state36 -85.02720 4.53862 -18.734 < 0.0000000000000002 * state4 -68.28980 27.50122 -2.483 0.013027 * state5 -144.20165 8.92408 -16.159 < 0.0000000000000002 * state6 -71.93804 8.39931 -8.565 < 0.0000000000000002 * state7 -41.36610 17.62915 -2.346 0.018958 * state8 -103.71783 6.46870 -16.034 < 0.0000000000000002 * state9 -152.19543 5.27778 -28.837 < 0.0000000000000002 * — Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 94.36 on 37572 degrees of freedom Multiple R-squared: 0.4771, Adjusted R-squared: 0.4763 F-statistic: 553 on 62 and 37572 DF, p-value: < 0.00000000000000022
library(data.table)
readRDS("plfsdata/plfsacjdata.rds")->worker
worker$standardwage->worker$wage
factor(worker$social_group)->worker$social_group
factor(worker$religion)->worker$religion
factor(worker$state)->worker$state
factor(worker$sector)->worker$sector
worker->t9
lm(wage~sex+age+years_edu+sector+social_group+religion+quarter+state,data=t9)->t
lm(log(wage)~sex+age+years_edu+sector+social_group+religion+quarter+state,data=t9)->t2
data.frame(yvar=t9$wage,residuals=residuals(t),variable="model1")->a
rbind(a,data.frame(yvar=log(t9$wage),residuals=residuals(t2),variable="model2"))->a
ggplot(a,aes(x=residuals,y=yvar,group=variable))->p
p+geom_point()+facet_wrap(.~variable,scales="free")
worker->t
t[,years_edu:=as.numeric(years_edu)]
t[years_edu==0,category:=3]
t[years_edu>0&years_edu<12,category:=2]
t[is.na(category),category:=1]
ifelse(t$years_edu==0,1,
ifelse(t$years_edu<12,2,3))->t$category
t[sex!=3,.(length(person_no)),.(category,sex)]->t
t[,prop:=V1/sum(V1),sex]
t
category | sex | V1 | prop |
---|---|---|---|
1 | 2 | 3697 | 0.529959862385321 |
1 | 1 | 7066 | 0.230515773333768 |
2 | 2 | 3047 | 0.436783256880734 |
2 | 1 | 20363 | 0.664306919387988 |
3 | 1 | 3224 | 0.105177307278244 |
3 | 2 | 232 | 0.033256880733945 |