You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

11 KiB

Correlation and Regressions

  library(data.table)
  readRDS("plfsdata/plfsacjdata.rds")->worker
  worker$standardwage->worker$wage
  factor(worker$social_group)->worker$social_group
  factor(worker$religion)->worker$religion
  factor(as.numeric(worker$state))->worker$state
  factor(worker$sector)->worker$sector

  cor.test(worker$wage,worker$years_edu)
  cor.test(worker$wage,worker$age)
- Pearson's product-moment correlation
- data:  worker$wage and worker$years_edu
- t = 35.998, df = 37633, p-value < 0.00000000000000022
- alternative hypothesis: true correlation is not equal to 0
- 95 percent confidence interval:
- 0.1726625 0.1921962
- sample estimates:
- cor 
- 0.1824473
- Pearson's product-moment correlation
- data:  worker$wage and worker$age
- t = 9.3777, df = 37633, p-value < 0.00000000000000022
- alternative hypothesis: true correlation is not equal to 0
- 95 percent confidence interval:
- 0.03819950 0.05835859
- sample estimates:
- cor 
- 0.04828396
  lm(wage~sex+age+years_edu,
     data=worker)->t
  summary(t)

  lm(wage~sex+age+years_edu+sector+social_group+religion+quarter,
     data=worker)->t
  summary(t)

  lm(wage~sex+age+years_edu+
       sector+social_group+religion+quarter+state,
     data=worker)->t
  summary(t)
- Call:
- lm(formula = wage ~ sex
- age
- years_edu, data = worker)
- Residuals:
- Min      1Q  Median      3Q     Max 
- -292.30  -74.80  -14.61   57.25 2170.72 
- Coefficients:
- Estimate Std. Error t value            Pr(>|t|)    
- (Intercept) 327.62380    3.44462   95.11 <0.0000000000000002 ***
- sex         -97.04291    1.67477  -57.94 <0.0000000000000002 ***
- age           1.38151    0.05526   25.00 <0.0000000000000002 ***
- years_edu     4.48268    0.15777   28.41 <0.0000000000000002 ***
- ---
- Signif. codes:  0 *** 0.001 ** 0.01 * 0.05 . 0.1   1
- Residual standard error: 121.9 on 37631 degrees of freedom
- Multiple R-squared:  0.1261,	Adjusted R-squared:  0.126 
- F-statistic:  1809 on 3 and 37631 DF,  p-value: < 0.00000000000000022
- Call:
- lm(formula = wage ~ sex
- age
- years_edu
- sector
- social_group
- religion
- quarter, data = worker)
- Residuals:
- Min      1Q  Median      3Q     Max 
- -315.65  -72.11  -14.15   52.51 2140.20 
- Coefficients:
- Estimate Std. Error t value             Pr(>|t|)    
- (Intercept)   257.94757    3.99129  64.628 < 0.0000000000000002 ***
- sex           -86.56036    1.65149 -52.414 < 0.0000000000000002 ***
- age             1.23305    0.05423  22.735 < 0.0000000000000002 ***
- years_edu       3.76046    0.15523  24.225 < 0.0000000000000002 ***
- sector2        30.32294    1.41191  21.477 < 0.0000000000000002 ***
- social_group2  29.29921    2.21158  13.248 < 0.0000000000000002 ***
- social_group3  38.85215    2.10791  18.432 < 0.0000000000000002 ***
- social_group9  36.39078    2.37599  15.316 < 0.0000000000000002 ***
- religion2      15.28020    1.88933   8.088 0.000000000000000626 ***
- religion3      70.06425    3.05735  22.917 < 0.0000000000000002 ***
- religion4       0.90753    4.34322   0.209             0.834486    
- religion5       9.92952   16.27422   0.610             0.541774    
- religion6     -27.61440    5.46808  -5.050 0.000000443610331942 ***
- religion7     -28.12453   44.72117  -0.629             0.529427    
- religion9      22.53928    5.90700   3.816             0.000136 ***
- quarterQ2      11.19254    1.98958   5.626 0.000000018618898647 ***
- quarterQ3      15.15791    1.89871   7.983 0.000000000000001465 ***
- quarterQ4      25.67351    1.85931  13.808 < 0.0000000000000002 ***
- quarterQ5      32.23325    2.39793  13.442 < 0.0000000000000002 ***
- ---
- Signif. codes:  0 *** 0.001 ** 0.01 * 0.05 . 0.1   1
- Residual standard error: 118.3 on 37616 degrees of freedom
- Multiple R-squared:  0.1778,	Adjusted R-squared:  0.1774 
- F-statistic: 451.8 on 18 and 37616 DF,  p-value: < 0.00000000000000022
- Call:
- lm(formula = wage ~ sex
- age
- years_edu
- sector
- social_group
- religion
- quarter
- state, data = worker)
- Residuals:
- Min      1Q  Median      3Q     Max 
- -421.28  -53.87   -3.68   47.22 1919.02 
- Coefficients:
- Estimate Std. Error t value             Pr(>|t|)    
- (Intercept)    444.01115    4.77338  93.018 < 0.0000000000000002 ***
- sex            -90.39577    1.35581 -66.673 < 0.0000000000000002 ***
- age              0.09864    0.04447   2.218             0.026552 *  
- years_edu        0.78564    0.12845   6.116  0.00000000096630021 ***
- sector2         20.38448    1.17589  17.335 < 0.0000000000000002 ***
- social_group2    8.16327    1.89780   4.301  0.00001701239040995 ***
- social_group3    7.00001    1.82389   3.838             0.000124 ***
- social_group9   14.48824    2.02081   7.170  0.00000000000076649 ***
- religion2        3.06706    1.61097   1.904             0.056937 .  
- religion3        3.99327    3.09503   1.290             0.196982    
- religion4        1.21196    4.75423   0.255             0.798784    
- religion5       18.31698   13.00836   1.408             0.159112    
- religion6       11.40087    4.59791   2.480             0.013158 *  
- religion7        1.18490   35.69842   0.033             0.973522    
- religion9       22.95917    4.86976   4.715  0.00000243003135992 ***
- quarterQ2       10.21411    1.58877   6.429  0.00000000013002120 ***
- quarterQ3       16.82258    1.51923  11.073 < 0.0000000000000002 ***
- quarterQ4       25.14926    1.49016  16.877 < 0.0000000000000002 ***
- quarterQ5       33.08983    1.95044  16.965 < 0.0000000000000002 ***
- state02        -68.16250    5.62831 -12.111 < 0.0000000000000002 ***
- state03        -96.24984    5.63379 -17.084 < 0.0000000000000002 ***
- state04        -71.16512   23.87015  -2.981             0.002872 ** 
- state05       -115.06098    7.55209 -15.236 < 0.0000000000000002 ***
- state06        -69.65578    5.27142 -13.214 < 0.0000000000000002 ***
- state07        -46.63160   13.00126  -3.587             0.000335 ***
- state08        -93.14096    4.86947 -19.128 < 0.0000000000000002 ***
- state09       -130.45617    4.11177 -31.728 < 0.0000000000000002 ***
- state1         -14.79943    6.61379  -2.238             0.025249 *  
- state10       -111.30326    4.22934 -26.317 < 0.0000000000000002 ***
- state11        -30.89194   13.71763  -2.252             0.024329 *  
- state12        -69.11195    8.17680  -8.452 < 0.0000000000000002 ***
- state13        -23.54944   16.54163  -1.424             0.154557    
- state14        -58.14358    5.19732 -11.187 < 0.0000000000000002 ***
- state15        -67.54526    6.62181 -10.200 < 0.0000000000000002 ***
- state16        -45.92342    5.48738  -8.369 < 0.0000000000000002 ***
- state17        -53.92123    6.67236  -8.081  0.00000000000000066 ***
- state18        -86.09270    4.86428 -17.699 < 0.0000000000000002 ***
- state19       -150.31537    3.89883 -38.554 < 0.0000000000000002 ***
- state2         -56.38856   11.04817  -5.104  0.00000033437294375 ***
- state20       -137.77298    4.59079 -30.011 < 0.0000000000000002 ***
- state21       -147.27217    4.45380 -33.067 < 0.0000000000000002 ***
- state22       -172.27146    4.70857 -36.587 < 0.0000000000000002 ***
- state23       -161.52791    4.12939 -39.117 < 0.0000000000000002 ***
- state24       -145.44044    4.54569 -31.995 < 0.0000000000000002 ***
- state25        -63.12764   47.32958  -1.334             0.182281    
- state26       -102.84861   30.08812  -3.418             0.000631 ***
- state27       -142.91329    3.89854 -36.658 < 0.0000000000000002 ***
- state28        -84.41173    4.12350 -20.471 < 0.0000000000000002 ***
- state29       -100.69647    4.08499 -24.650 < 0.0000000000000002 ***
- state3        -121.25472    6.44927 -18.801 < 0.0000000000000002 ***
- state30         25.50491   11.26961   2.263             0.023632 *  
- state31        112.13284   13.52131   8.293 < 0.0000000000000002 ***
- state32        146.37176    4.10836  35.628 < 0.0000000000000002 ***
- state33        -40.33733    3.97872 -10.138 < 0.0000000000000002 ***
- state34        -61.09002    6.25575  -9.765 < 0.0000000000000002 ***
- state35         17.72228    8.72443   2.031             0.042227 *  
- state36        -85.02720    4.53862 -18.734 < 0.0000000000000002 ***
- state4         -68.28980   27.50122  -2.483             0.013027 *  
- state5        -144.20165    8.92408 -16.159 < 0.0000000000000002 ***
- state6         -71.93804    8.39931  -8.565 < 0.0000000000000002 ***
- state7         -41.36610   17.62915  -2.346             0.018958 *  
- state8        -103.71783    6.46870 -16.034 < 0.0000000000000002 ***
- state9        -152.19543    5.27778 -28.837 < 0.0000000000000002 ***
- ---
- Signif. codes:  0 *** 0.001 ** 0.01 * 0.05 . 0.1   1
- Residual standard error: 94.36 on 37572 degrees of freedom
- Multiple R-squared:  0.4771,	Adjusted R-squared:  0.4763 
- F-statistic:   553 on 62 and 37572 DF,  p-value: < 0.00000000000000022
  library(data.table)
  readRDS("plfsdata/plfsacjdata.rds")->worker
  worker$standardwage->worker$wage
  factor(worker$social_group)->worker$social_group
  factor(worker$religion)->worker$religion
  factor(worker$state)->worker$state
  factor(worker$sector)->worker$sector
  worker->t9
  lm(wage~sex+age+years_edu+sector+social_group+religion+quarter+state,data=t9)->t
  lm(log(wage)~sex+age+years_edu+sector+social_group+religion+quarter+state,data=t9)->t2
  data.frame(yvar=t9$wage,residuals=residuals(t),variable="model1")->a
  rbind(a,data.frame(yvar=log(t9$wage),residuals=residuals(t2),variable="model2"))->a
  ggplot(a,aes(x=residuals,y=yvar,group=variable))->p
  p+geom_point()+facet_wrap(.~variable,scales="free")

/Courseware/quantitative-methods/src/commit/2611bf2a3c86f808c6f950e4268d96959a408489/bsample2.png

  worker->t
  t[,years_edu:=as.numeric(years_edu)]
  t[years_edu==0,category:=3]
  t[years_edu>0&years_edu<12,category:=2]
  t[is.na(category),category:=1]


  ifelse(t$years_edu==0,1,
    ifelse(t$years_edu<12,2,3))->t$category

  t[sex!=3,.(length(person_no)),.(category,sex)]->t
    t[,prop:=V1/sum(V1),sex]
  t
category sex V1 prop
1 2 3697 0.529959862385321
1 1 7066 0.230515773333768
2 2 3047 0.436783256880734
2 1 20363 0.664306919387988
3 1 3224 0.105177307278244
3 2 232 0.033256880733945