diff --git a/.gitignore b/.gitignore
index 827ea8b..551fcd0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
*
!index.org
!acjlectures.org
+!acjlecturesday1.org
+!acjlecturesday2.org
!.gitignore
!.gitattributes
!graphics
diff --git a/README.md b/README.md
index 63ce5f6..09b2d76 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,6 @@
# quantitative-methods
-[Lectures on Quantitative Methods for ACJ Students](../blob/master/acjlectures.org)
+
+## Lectures on Quantitative Methods for ACJ Students
+
+[Day 1](../blob/master/acjlecturesday1.org)
+[Day 2](../blob/master/acjlecturesday1.org)
diff --git a/acjlectures.org b/acjlecturesday1.org
similarity index 84%
rename from acjlectures.org
rename to acjlecturesday1.org
index 89c7629..ff6783c 100644
--- a/acjlectures.org
+++ b/acjlecturesday1.org
@@ -4,7 +4,7 @@
#+SETUPFILE: https://fniessen.github.io/org-html-themes/setup/theme-readtheorg.setup
#+HTML_HEAD:
-* Title slide :slide:
+* Title slide :slide:
#+BEGIN_SRC emacs-lisp-slide
(org-show-animate '("Quantitative Methods, Part-II" "Vikas Rawal" "Prachi Bansal" "" "" ""))
#+END_SRC
@@ -14,24 +14,24 @@
(org-show-animate '("Why do financial journalists need to know quantitative methods?" "" "" ""))
#+END_SRC
-** What do we aim to achieve in this course? :slide:
+** What do we aim to achieve in this course? :slide:
**** Make friends with numbers
**** Learn how to read numbers, how to present them, and how to write about them
**** Learn how to use computers to work with numbers
-** Two Types of Statistics :slide:
+** Two Types of Statistics :slide:
*** Descriptive Statistics
**** Use summaries of data for the entire population to describe a population
**** Use summaries of sample data to describe a sample
*** Inferential Statistics
**** Use sample data to describe a population
-** Descriptive Statistics :slide:
+** Descriptive Statistics :slide:
+ Frequency
+ Measures of central tendency
+ Summary positions
+ Measures of dispersion
-*** Frequency :slide:
+*** Frequency :slide:
#+NAME: worker-code0
#+begin_src R :results value :export results :colnames yes :hline
@@ -93,13 +93,13 @@
| M | 7 |
| F | 9 |
-*** Measures of Central Tendency :slide:
+*** Measures of Central Tendency :slide:
#+NAME: mid-code
#+begin_src R :results value :export results :colnames yes :hline
workers[,.(mean_salary=round(mean(salary),1),
median_salary=quantile(salary,prob=0.5))]
-#+end_src
+#+End_src
#+RESULTS: mid-code
| mean_salary | median_salary |
@@ -118,7 +118,7 @@
| M | 172428.6 | 50000 |
| F | 46333.3 | 45000 |
-*** Measures of Position :slide:
+*** Measures of Position :slide:
+ First quartile
+ Second quartile (median)
@@ -128,9 +128,9 @@
+ Quintiles
+ Percentiles
-*** Measures of Dispersion :slide:
+*** Measures of Dispersion :slide:
-**** Range and other measures based on positions :slide:
+**** Range and other measures based on positions :slide:
$range=max-min$
@@ -147,7 +147,7 @@ $range=max-min$
range=max(salary)-min(salary))]
#+end_src
-**** Range and other measures based on positions :slide:
+**** Range and other measures based on positions :slide:
+ Distance between any two positions (Deciles, Quintiles, Percentiles) can be used as a measure of dispersion.
@@ -216,35 +216,35 @@ $cov=\frac{standard.deviation}{mean}$
** Graphical Displays of Quantitative Information: Common Pitfalls
-*** Common uses of statistical graphics :slide:
+*** Common uses of statistical graphics :slide:
+ To show trends over time
+ To show mid-point variations across categories
+ To show composition
+ (less commonly, though more usefully) to show/analyse dispersion
-*** Mis-representation :slide:
+*** Mis-representation :slide:
#+CAPTION: "and sometimes the fact that numbers have a magnitude as well as an order is simply forgotten"
[[file:graphics/tufte-insanity.png]]
-*** Mis-representation :slide:
+*** Mis-representation :slide:
#+CAPTION: Another example borrowed from Tufte
[[file:graphics/tufte-fuel.png]]
-*** Mis-representation :slide:
+*** Mis-representation :slide:
#+CAPTION: Tufte's graph on fuel economy of cars
#+attr_html: :width 400px
[[file:graphics/tufte-fuel2.png]]
-*** Mis-representation :slide:
+*** Mis-representation :slide:
#+CAPTION: Nobel prizes awarded in science (National Science Foundation, 1974)
#+attr_html: :width 300px
[[file:graphics/nobel-wrong.png]]
-*** Mis-representation :slide:
+*** Mis-representation :slide:
#+CAPTION: Nobel prizes awarded in science (corrected by Tufte)
#+attr_html: :width 300px
@@ -266,7 +266,7 @@ $cov=\frac{standard.deviation}{mean}$
[[file:graphics/piketty2_c.png]]
-*** The problem multiplied with the coming in of spreadsheets :slide:
+*** The problem multiplied with the coming in of spreadsheets :slide:
#+ATTR_html: :width 300px
[[file:graphics/chart1.png]]
@@ -277,8 +277,8 @@ $cov=\frac{standard.deviation}{mean}$
#+ATTR_html: :width 300px
[[file:graphics/chart3.png]]
-** Graphical Displays of Quantitative Information: Dispersion :slide:
-*** Histogram :slide:
+** Graphical Displays of Quantitative Information: Dispersion :slide:
+*** Histogram :slide:
#+RESULTS: ccpc-wheat-hist1
#+attr_html: :width 800px
@@ -292,7 +292,7 @@ $cov=\frac{standard.deviation}{mean}$
hist(b$yield,main="Histogram of wheat yields",ylim=c(0,4000))
#+END_SRC
-*** Histogram with relative densities :slide:
+*** Histogram with relative densities :slide:
#+RESULTS: ccpc-wheat-hist2
#+attr_html: :width 600px
@@ -306,13 +306,13 @@ $cov=\frac{standard.deviation}{mean}$
hist(b$yield,freq=F,main="Histogram of wheat yields",ylim=c(0,0.00040))
#+END_SRC
-*** Boxplot :slide:
+*** Boxplot :slide:
+ Invented by John Tukey in 1970
+ Many variations proposed since then, though the essential form and idea as remained intact.
-*** Boxplot of wheat yields :slide:
+*** Boxplot of wheat yields :slide:
#+RESULTS: ccpc-wheat-box1
[[file:boxplotyield1.png]]
@@ -325,7 +325,7 @@ $cov=\frac{standard.deviation}{mean}$
boxplot(b$yield,main="Boxplot of wheat yields")
#+END_SRC
-*** Violin plots :slide:
+*** Violin plots :slide:
#+RESULTS: ccpc-wheat-vio1
[[file:vioplotyield1.png]]
@@ -342,7 +342,7 @@ $cov=\frac{standard.deviation}{mean}$
-*** Boxplots: Useful to identify extreme values :slide:
+*** Boxplots: Useful to identify extreme values :slide:
#+RESULTS: ccpc-wheat-box2
@@ -355,7 +355,7 @@ $cov=\frac{standard.deviation}{mean}$
boxplot(b$yield,main="Magnified tail of the boxplot",ylim=c(7000,25000))
#+END_SRC
-*** Boxplots: Useful for comparisons across categories :slide:
+*** Boxplots: Useful for comparisons across categories :slide:
#+RESULTS: ccpc-crop-box3
[[file:boxplotyield3.png]]
@@ -369,7 +369,7 @@ $cov=\frac{standard.deviation}{mean}$
boxplot(yield~Crop_code,data=b,main="Boxplots of yields of various crops",las=3,ylim=c(0,8000),outline=F)
#+END_SRC
-*** Violin plots :slide:
+*** Violin plots :slide:
#+RESULTS: ccpc-crop-vio
[[file:vioplotyield3.png]]
@@ -389,4 +389,3 @@ $cov=\frac{standard.deviation}{mean}$
-* Day 2
diff --git a/acjlecturesday2.org b/acjlecturesday2.org
new file mode 100644
index 0000000..9131d9d
--- /dev/null
+++ b/acjlecturesday2.org
@@ -0,0 +1,236 @@
+#+TITLE: Quantitative Methods
+#+PROPERTY: header-args:R :session acj :eval never-export
+#+STARTUP: hideall inlineimages hideblocks
+#+HTML_HEAD:
+
+* Title slide :slide:
+#+BEGIN_SRC emacs-lisp-slide
+(org-show-animate '("Quantitative Methods, Part-II" "Vikas Rawal" "Prachi Bansal" "" "" ""))
+#+END_SRC
+* Day 2
+
+** Paul Krugman on Fiscal Austerity :slide:
+
+#+attr_html: :width 1200px
+[[file:krugman1.png]]
+
+** Paul Krugman on Fiscal Austerity :slide:
+
+"Here’s what fiscal policy should do: it should support demand when the economy is weak, and it should pull that support back when the economy is strong. As John Maynard Keynes said, “The boom, not the slump, is the right time for austerity.” And up until 2010 the U.S. more or less followed that prescription. Since then, however, fiscal policy has become perverse: first austerity despite high unemployment, now expansion despite low unemployment.
+
+** Unemployment and Fiscal Austerity :slide:
+
+#+RESULTS: graph
+[[file:krugman2.png]]
+#+attr_html :width 500px
+[[file:krugman2.png]]
+
+
+#+NAME: graph
+#+BEGIN_SRC R :results output graphics :exports results :file krugman2.png :width 3774 :height 3774 :res 600
+ library(data.table)
+ library(ggplot2)
+ fread("~/ssercloud/acj2018/krugmandata.csv")->a
+ as.Date(a$date,format=c("%m/%d/%Y"))->a$date
+ factor(ifelse(a$date<"10-01-01","2000-2009","2010-2018"))->a$Period
+ melt(a,id=c("date","Period"),m=c("impact","unemployment"))->t
+ levels(t$variable)<-c("Fiscal stimulus","Unemployment rate")
+ ggplot(t,aes(x=date,y=value,group=variable,colour=Period))->p
+ p+geom_line(size=1.2)+facet_wrap(~variable,scales="free_y",ncol=1)->p
+ p+scale_y_continuous("Per cent")+theme(legend.position="bottom")
+#+END_SRC
+
+
+** Sampling Distributions :slide:
+
+#+RESULTS: sampling
+[[file:bsample.png]]
+
+#+NAME: sampling
+#+BEGIN_SRC R :results output graphics :exports results :file bsample.png :width 4500 :height 3000 :res 600
+ library(data.table)
+ readRDS("plfsdata/plfsacjdata.rds")->worker
+ worker$standardwage->worker$wage
+ #read.table("~/ssercloud/acj2018/worker.csv",sep=",",header=T)->worker
+ c(1:nrow(worker))->worker$SamplingFrameOrder
+ worker[sex!=3,]->worker
+ library(ggplot2)
+ ggplot(worker,aes(wage))+geom_density(colour="black",size=1)+scale_y_continuous(limits=c(0,0.05))+scale_x_continuous(limits=c(0,1000),breaks=c(0,mean(worker$wage),1000))->p
+ # p+facet_wrap(~sex)->p
+ p+annotate("text",x=520,y=0.045,
+ label=paste("Population mean = ",round(mean(worker$wage)),sep=""))->p
+ p+theme_bw()->p
+ p
+
+
+
+ sample(1:nrow(worker),5, replace=FALSE)->a1
+ worker[a1,]->s1
+ mean(s1$wage)->t1
+ for (i in c(1:9999)) {
+ sample(1:nrow(worker),5, replace=FALSE)->a1
+ worker[a1,]->s1
+ c(t1,mean(s1$wage))->t1
+ }
+
+ data.frame(sno=c(1:10000),meancol=t1)->t1
+ p+geom_density(data=t1,aes(meancol),colour="blue",size=1)-> p
+ paste("Distribution of sample means (5): mean = ",
+ round(mean(t1$meancol)),
+ "; stdev = ",
+ round(sqrt(var(t1$meancol))),sep="")->lab
+ p+annotate("text",x=700,y=0.033,label=lab,colour="blue")->p
+ p
+
+ sample(1:nrow(worker),20, replace=FALSE)->a1
+ worker[a1,]->s1
+ mean(s1$wage)->t0
+ for (i in c(1:9999)) {
+ sample(1:nrow(worker),20, replace=FALSE)->a1
+ worker[a1,]->s1
+ c(t0,mean(s1$wage))->t0
+ }
+
+ data.frame(sno=c(1:10000),meancol=t0)->t0
+ p+geom_density(data=t0,aes(meancol),colour="darkolivegreen",size=1)-> p
+ paste("Distribution of sample means (20): mean = ",
+ round(mean(t0$meancol)),
+ "; stdev = ",
+ round(sqrt(var(t0$meancol))),sep="")->lab
+ p+annotate("text",x=700,y=0.036,label=lab,colour="darkolivegreen")->p
+ p
+
+ sample(1:nrow(worker),50, replace=FALSE)->a1
+ worker[a1,]->s1
+ mean(s1$wage)->t
+ for (i in c(1:9999)) {
+ sample(1:nrow(worker),50, replace=FALSE)->a1
+ worker[a1,]->s1
+ c(t,mean(s1$wage))->t
+ }
+
+ data.frame(sno=c(1:10000),meancol=t)->t
+ p+geom_density(data=t,aes(meancol),colour="red",size=1)-> p
+ paste("Distribution of sample means (50): mean = ",
+ round(mean(t$meancol)),
+ "; stdev = ",
+ round(sqrt(var(t$meancol))),sep="")->lab
+ p+annotate("text",x=700,y=0.039,label=lab,colour="red")->p
+ p
+
+ sample(1:nrow(worker),200, replace=FALSE)->a1
+ worker[a1,]->s1
+ mean(s1$wage)->t4
+ for (i in c(1:9999)) {
+ sample(1:nrow(worker),200, replace=FALSE)->a1
+ worker[a1,]->s1
+ c(t4,mean(s1$wage))->t4
+ }
+
+ data.frame(sno=c(1:10000),meancol=t4)->t4
+ p+geom_density(data=t4,aes(meancol),colour="pink",size=1)-> p
+ paste("Distribution of sample means (200): mean = ",
+ round(mean(t4$meancol)),
+ "; stdev = ",
+ round(sqrt(var(t4$meancol))),sep="")->lab
+ p+annotate("text",x=700,y=0.042,label=lab,colour="pink")->p
+ p
+#+end_src
+
+#+NAME: sampling2
+#+BEGIN_SRC R :results output graphics :exports results :file bsample2.png :width 4500 :height 3000 :res 600
+ library(data.table)
+ readRDS("plfsdata/plfsacjdata.rds")->worker
+ worker$standardwage->worker$wage
+ #read.table("~/ssercloud/acj2018/worker.csv",sep=",",header=T)->worker
+ c(1:nrow(worker))->worker$SamplingFrameOrder
+ worker[sex!=3,]->worker
+ library(ggplot2)
+ ggplot(worker,aes(wage))+geom_density(colour="black",size=1)+scale_y_continuous(limits=c(0,0.05))+scale_x_continuous(limits=c(0,600),breaks=c(0,mean(worker$wage),1000))->p
+ # p+facet_wrap(~sex)->p
+ p+annotate("text",x=380,y=0.045,
+ label=paste("Population mean = ",round(mean(worker$wage)),sep=""))->p
+ p+annotate("text",x=400,y=0.042,
+ label="Distribution of sample means:")->p
+ p+theme_bw()->p
+ p
+
+
+
+ sample(1:nrow(worker),5, replace=FALSE)->a1
+ worker[a1,]->s1
+ mean(s1$wage)->t1
+ for (i in c(1:9999)) {
+ sample(1:nrow(worker),5, replace=FALSE)->a1
+ worker[a1,]->s1
+ c(t1,mean(s1$wage))->t1
+ }
+
+ data.frame(sno=c(1:10000),meancol=t1)->t1
+ p+geom_density(data=t1,aes(meancol),colour="blue",size=1)-> p
+ paste("Sample size 5: mean = ",
+ round(mean(t1$meancol)),
+ "; stdev = ",
+ round(sqrt(var(t1$meancol))),sep="")->lab
+ p+annotate("text",x=450,y=0.030,label=lab,colour="blue")->p
+ p
+
+ sample(1:nrow(worker),20, replace=FALSE)->a1
+ worker[a1,]->s1
+ mean(s1$wage)->t0
+ for (i in c(1:9999)) {
+ sample(1:nrow(worker),20, replace=FALSE)->a1
+ worker[a1,]->s1
+ c(t0,mean(s1$wage))->t0
+ }
+
+ data.frame(sno=c(1:10000),meancol=t0)->t0
+ p+geom_density(data=t0,aes(meancol),colour="darkolivegreen",size=1)-> p
+ paste("Sample size 20: mean = ",
+ round(mean(t0$meancol)),
+ "; stdev = ",
+ round(sqrt(var(t0$meancol))),sep="")->lab
+ p+annotate("text",x=450,y=0.033,label=lab,colour="darkolivegreen")->p
+ p
+
+ sample(1:nrow(worker),50, replace=FALSE)->a1
+ worker[a1,]->s1
+ mean(s1$wage)->t
+ for (i in c(1:9999)) {
+ sample(1:nrow(worker),50, replace=FALSE)->a1
+ worker[a1,]->s1
+ c(t,mean(s1$wage))->t
+ }
+
+ data.frame(sno=c(1:10000),meancol=t)->t
+ p+geom_density(data=t,aes(meancol),colour="red",size=1)-> p
+ paste("Sample size 50: mean = ",
+ round(mean(t$meancol)),
+ "; stdev = ",
+ round(sqrt(var(t$meancol))),sep="")->lab
+ p+annotate("text",x=450,y=0.036,label=lab,colour="red")->p
+ p
+
+ sample(1:nrow(worker),200, replace=FALSE)->a1
+ worker[a1,]->s1
+ mean(s1$wage)->t4
+ for (i in c(1:9999)) {
+ sample(1:nrow(worker),200, replace=FALSE)->a1
+ worker[a1,]->s1
+ c(t4,mean(s1$wage))->t4
+ }
+
+ data.frame(sno=c(1:10000),meancol=t4)->t4
+ p+geom_density(data=t4,aes(meancol),colour="pink",size=1)-> p
+ paste("Sample size 200: mean = ",
+ round(mean(t4$meancol)),
+ "; stdev = ",
+ round(sqrt(var(t4$meancol))),sep="")->lab
+ p+annotate("text",x=450,y=0.039,label=lab,colour="pink")->p
+ p
+#+end_src
+
+#+RESULTS: sampling2
+[[file:bsample2.png]]
+
+