OECD data, G7 Okun Law

In this section we will examine OECD data sets and SDMX data transmission. We will use a well known story, the Okun Law, for demonstration purposes.

Load the required library:

library(OECD)
library(rsdmx)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggthemes)
library(gganimate)
library(ggExtra)
library(DT)
library(tidyreg)
library(stargazer)
library(skimr)
library(stringr)
library(lubridate)
library(writexl)
library(haven)
library(purrr)
library(broom)

And see, if needed, the info of our session:

devtools::session_info()
##  setting  value                       
##  version  R version 3.4.3 (2017-11-30)
##  system   i686, linux-gnu             
##  ui       X11                         
##  language en_US                       
##  collate  en_US.UTF-8                 
##  tz       Portugal                    
##  date     2018-11-13                  
## 
##  package     * version    date       source                              
##  assertthat    0.2.0      2017-04-11 CRAN (R 3.4.3)                      
##  backports     1.1.2      2017-12-13 CRAN (R 3.4.3)                      
##  base        * 3.4.3      2017-12-29 local                               
##  bindr         0.1.1      2018-03-13 cran (@0.1.1)                       
##  bindrcpp      0.2.2      2018-03-29 cran (@0.2.2)                       
##  bitops        1.0-6      2013-08-17 CRAN (R 3.4.3)                      
##  broom       * 0.5.0      2018-07-17 CRAN (R 3.4.3)                      
##  cluster       2.0.6      2017-03-10 CRAN (R 3.4.3)                      
##  colorspace    1.3-2      2016-12-14 CRAN (R 3.4.3)                      
##  compiler      3.4.3      2017-12-29 local                               
##  crayon        1.3.4      2017-09-16 CRAN (R 3.4.3)                      
##  datasets    * 3.4.3      2017-12-29 local                               
##  DEoptimR      1.0-8      2016-11-19 CRAN (R 3.4.3)                      
##  devtools      1.13.4     2017-11-09 CRAN (R 3.4.3)                      
##  digest        0.6.18     2018-10-10 cran (@0.6.18)                      
##  dplyr       * 0.7.8      2018-11-10 cran (@0.7.8)                       
##  DT          * 0.5        2018-11-05 CRAN (R 3.4.3)                      
##  evaluate      0.10.1     2017-06-24 CRAN (R 3.4.3)                      
##  farver        1.0.0.9999 2018-11-09 Github (thomasp85/farver@5439336)   
##  fit.models    0.5-14     2017-04-06 CRAN (R 3.4.3)                      
##  forcats       0.2.0      2017-01-23 CRAN (R 3.4.3)                      
##  gganimate   * 0.9.9.9999 2018-11-09 Github (thomasp85/gganimate@cc23618)
##  ggExtra     * 0.8        2018-04-04 CRAN (R 3.4.3)                      
##  ggplot2     * 3.1.0.9000 2018-11-12 Github (tidyverse/ggplot2@f5a88a7)  
##  ggthemes    * 3.5.0      2018-05-07 cran (@3.5.0)                       
##  gifski        0.8.6      2018-09-28 cran (@0.8.6)                       
##  glue          1.3.0      2018-07-17 CRAN (R 3.4.3)                      
##  graphics    * 3.4.3      2017-12-29 local                               
##  grDevices   * 3.4.3      2017-12-29 local                               
##  grid          3.4.3      2017-12-29 local                               
##  gtable        0.2.0      2016-02-26 CRAN (R 3.4.3)                      
##  haven       * 1.1.2      2018-06-27 CRAN (R 3.4.3)                      
##  hms           0.4.2      2018-03-10 cran (@0.4.2)                       
##  htmltools     0.3.6      2017-04-28 CRAN (R 3.4.3)                      
##  htmlwidgets   1.3        2018-09-30 cran (@1.3)                         
##  httpuv        1.4.4.2    2018-07-02 cran (@1.4.4.2)                     
##  knitr         1.20       2018-02-20 cran (@1.20)                        
##  later         0.7.3      2018-06-08 cran (@0.7.3)                       
##  lattice       0.20-35    2017-03-25 CRAN (R 3.4.3)                      
##  lazyeval      0.2.1      2017-10-29 CRAN (R 3.4.3)                      
##  lubridate   * 1.7.4      2018-04-11 CRAN (R 3.4.3)                      
##  magrittr      1.5        2014-11-22 CRAN (R 3.4.3)                      
##  MASS          7.3-47     2017-02-26 CRAN (R 3.4.3)                      
##  memoise       1.1.0      2017-04-21 CRAN (R 3.4.3)                      
##  methods     * 3.4.3      2017-12-29 local                               
##  mime          0.5        2016-07-07 CRAN (R 3.4.3)                      
##  miniUI        0.1.1.1    2018-05-18 cran (@0.1.1.1)                     
##  munsell       0.5.0      2018-06-12 CRAN (R 3.4.3)                      
##  mvtnorm       1.0-6      2017-03-02 CRAN (R 3.4.3)                      
##  nlme          3.1-131    2017-02-06 CRAN (R 3.4.3)                      
##  OECD        * 0.2.3.999  2018-11-10 Github (expersso/OECD@728ed21)      
##  pcaPP         1.9-72     2017-06-27 CRAN (R 3.4.3)                      
##  pillar        1.2.3      2018-05-25 cran (@1.2.3)                       
##  pkgconfig     2.0.2      2018-08-16 cran (@2.0.2)                       
##  plyr          1.8.4      2016-06-08 CRAN (R 3.4.3)                      
##  png           0.1-7      2013-12-03 CRAN (R 3.4.3)                      
##  prettydoc     0.2.1      2018-01-16 CRAN (R 3.4.3)                      
##  prettyunits   1.0.2      2015-07-13 cran (@1.0.2)                       
##  progress      1.2.0      2018-06-14 CRAN (R 3.4.3)                      
##  promises      1.0.1      2018-04-13 cran (@1.0.1)                       
##  purrr       * 0.2.5      2018-05-29 cran (@0.2.5)                       
##  R6            2.3.0      2018-10-04 cran (@2.3.0)                       
##  Rcpp          1.0.0      2018-11-07 cran (@1.0.0)                       
##  RCurl         1.95-4.11  2018-07-15 cran (@1.95-4.)                     
##  rlang         0.3.0.1    2018-10-25 cran (@0.3.0.1)                     
##  rmarkdown     1.10       2018-06-11 cran (@1.10)                        
##  robust        0.4-18     2017-04-27 CRAN (R 3.4.3)                      
##  robustbase    0.93-3     2018-09-21 cran (@0.93-3)                      
##  rprojroot     1.3-2      2018-01-03 cran (@1.3-2)                       
##  rrcov         1.4-4      2018-05-24 CRAN (R 3.4.3)                      
##  rsdmx       * 0.5-13     2018-09-21 cran (@0.5-13)                      
##  scales        1.0.0      2018-08-09 CRAN (R 3.4.3)                      
##  shiny         1.1.0      2018-05-17 cran (@1.1.0)                       
##  skimr       * 1.0.3      2018-06-07 CRAN (R 3.4.3)                      
##  stargazer   * 5.2.2      2018-05-30 CRAN (R 3.4.3)                      
##  stats       * 3.4.3      2017-12-29 local                               
##  stats4        3.4.3      2017-12-29 local                               
##  stringi       1.2.4      2018-07-20 cran (@1.2.4)                       
##  stringr     * 1.3.1      2018-05-10 cran (@1.3.1)                       
##  tibble        1.4.2      2018-01-22 cran (@1.4.2)                       
##  tidyr       * 0.8.2      2018-10-28 cran (@0.8.2)                       
##  tidyreg     * 0.0.1      2018-11-10 Github (mkearney/tidyreg@bce34a5)   
##  tidyselect    0.2.5      2018-10-11 cran (@0.2.5)                       
##  tools         3.4.3      2017-12-29 local                               
##  tweenr        0.1.5.9999 2018-11-09 Github (thomasp85/tweenr@48ea2fd)   
##  utils       * 3.4.3      2017-12-29 local                               
##  withr         2.1.2.9000 2018-11-06 Github (jimhester/withr@be57595)    
##  writexl     * 0.2        2017-09-06 CRAN (R 3.4.3)                      
##  XML           3.98-1.16  2018-08-19 cran (@3.98-1.)                     
##  xtable        1.8-3      2018-08-29 cran (@1.8-3)                       
##  yaml          2.2.0      2018-07-25 cran (@2.2.0)

Usually, we examine the seesion information if there a problem to be solved. Otherwise just skip it.

SDMX data download from OECD

We will get two datasets from OECD database, GDP and unemployment rate.

GDP relies on the Quartely National Accounts section, QNA. Unemployment has its own section, thus its own dataset/table.

In both cases we will use the SDMX interface, the standard method to transmit statistical data.

The whole process relies on a url that we have to submit to OECD server. Then we will read the server’s response as a data frame in R.

url_une <- "https://stats.oecd.org/restsdmx/sdmx.ashx/GetData/STLABOUR/USA.LRHUTTTT.STSA.Q/all?startTime=1950-Q1"
sdmx    <- readSDMX(url_une)
UNE     <- as_data_frame(sdmx)

The url has one stable and one variable component.

Stable component is : https://stats.oecd.org/restsdmx/sdmx.ashx/GetData/

Variable component is : STLABOUR/USA.LRHUTTTT.STSA.Q/all?startTime=1950-Q1

As it easy to understand the stable component points to the OECD data server, while the variable one defines which exactly dataset is to be downloaded.

So:

  1. STLABOUR is the name of the dataset, contatins all data about unmployement

  2. Then it comes to specific details. A quantity like unemployment is given for many countries. It can recorded for various flavors, like male/female unemployment, or young/older unemployment. It is can be measured in different ways, and the time series can be montlhy, quartely or yearly. This is defined in the COUNTRY.SUBJECT.MEASURE.TIME format. One can also give multiple values separated by the + symbol. For example, USA downloads data for USA, while USA+CAN downloads data for both USA and CAN.

  3. USA : Country definition

  4. LRHUTTTT : total unemployment seasonally adjusted

  5. STSA : Quarter to quarter change

  6. Q : Quartely time series

  7. startTime defines the staring period of our request, while endTime defines the ending period. In our example above, since we have not defined any ending period the program will download data until the most recet period available.

GDP

In the same way we get the GDP values from the QNA dataset:

url_qna <- "https://stats.oecd.org/restsdmx/sdmx.ashx/GetData/QNA/USA.B1_GE.LNBQRSA.Q/all?startTime=1950-Q1"
sdmx    <- readSDMX(url_qna)
QNA     <- as_data_frame(sdmx)

Check data

UNE %>% 
    select(LOCATION, SUBJECT, MEASURE) %>% 
    distinct()
## # A tibble: 1 x 3
##   LOCATION SUBJECT  MEASURE
##   <chr>    <chr>    <chr>  
## 1 USA      LRHUTTTT STSA
QNA %>% 
    filter(MEASURE == 'LNBQRSA' & FREQUENCY == 'Q') %>% 
    select(LOCATION, SUBJECT, MEASURE) %>% 
    distinct() 
## # A tibble: 1 x 3
##   LOCATION SUBJECT MEASURE
##   <chr>    <chr>   <chr>  
## 1 USA      B1_GE   LNBQRSA

Let us now keep the columns of interest and discard all others:

une <- UNE %>% 
    select(obsTime, obsValue) %>% 
    rename(Time = obsTime, une = obsValue)
gdp <- QNA %>% 
    select(obsTime, obsValue) %>% 
    rename(Time = obsTime, gdp = obsValue)

Plotting

We always plot our data before proceed to further analysis. Always.

ggplot(une) +
    aes(x = Time, y = une, group = 1) +
    geom_line()

ggplot(gdp) +
    aes(x = Time, y = gdp, group = 1) +
    geom_line()

Calculate the differences

Add some explation here:

une <- une %>% 
    mutate(d_une = c(NA, diff(une)))
gdp <- gdp %>% 
    mutate(d_gdp = c(NA, 100*diff(log(gdp))))

Now we join the two datasets into one:

USA <- inner_join(une , gdp, by = "Time")

inner_join comes from dplyr package. It merges two tables into one based on identical values of a common column. we may view the result:

datatable(USA)

OLS to estimate Okun Law coeeficient

USA %>% 
    ggplot(aes(x = d_une, y = d_gdp)) +
    geom_point() +
    geom_smooth(method = "lm") +
    xlim(c(-1, 2)) + 
    ylim(c(-3, 4))

lm (d_gdp ~ d_une, data = USA) %>% summary()
## 
## Call:
## lm(formula = d_gdp ~ d_une, data = USA)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.73036 -0.41408 -0.00752  0.34945  2.51617 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.73945    0.04072   18.16   <2e-16 ***
## d_une       -1.60753    0.11643  -13.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6489 on 252 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.4307, Adjusted R-squared:  0.4284 
## F-statistic: 190.6 on 1 and 252 DF,  p-value: < 2.2e-16
lm (d_gdp ~ d_une, data = USA) %>% stargazer(type = "text") 
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                d_gdp           
## -----------------------------------------------
## d_une                        -1.608***         
##                               (0.116)          
##                                                
## Constant                     0.739***          
##                               (0.041)          
##                                                
## -----------------------------------------------
## Observations                    254            
## R2                             0.431           
## Adjusted R2                    0.428           
## Residual Std. Error      0.649 (df = 252)      
## F Statistic          190.620*** (df = 1; 252)  
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
USA %>% 
    tidy_regression(d_gdp ~ d_une) %>% 
    tidy_summary()
## # A tidy model
## Model formula  : d_gdp ~ d_une
## Model type     : Ordinary Least Squares (OLS) regression
## Model pkg::fun : stats::lm()
## Model data     : 254 (observations) X 2 (variables)
## $fit
## fit_stat     n     df    estimate    p.value  stars
## F          254      1    190.620      <.001   ***
## R^2        254      -      0.431       -         
## Adj R^2    254      -      0.428       -         
## RMSE       254      -      0.649       -         
## AIC        254      -    505.095       -         
## BIC        254      -    515.707       -         
## 
## $coef
## term            est     s.e.     est.se    p.value  stars   std.est
## (Intercept)   0.739    0.041     18.161      <.001   ***      <.001
## d_une        -1.608    0.116    -13.807      <.001   ***     -0.563

G7 countries

USA is OK, but what happens in other countries? Let us perform the same type of calculations with the G7 coutries: USA, Canada, Great Britain, Japan, Germany, France and Italy.

We have to modify the sdmx request accordingly:

url_une <- "https://stats.oecd.org/restsdmx/sdmx.ashx/GetData/STLABOUR/USA+CAN+GBR+DEU+FRA+ITA+JPN.LRHUTTTT.STSA.Q/all?startTime=1950-Q1"
sdmx    <- readSDMX(url_une)
UNE     <- as_data_frame(sdmx)

url_qna <- "https://stats.oecd.org/restsdmx/sdmx.ashx/GetData/QNA/USA+CAN+GBR+DEU+FRA+ITA+JPN.B1_GE.LNBQRSA.Q/all?startTime=1950-Q1"
sdmx    <- readSDMX(url_qna)
QNA     <- as_data_frame(sdmx)

A needed trick, find the maximum date of available data for all countries. First let’s construct two tables:

time_une <- 
    UNE %>% 
    group_by(LOCATION) %>% 
    summarise(min_T_une = min(obsTime), max_T_une = max(obsTime))
time_une
## # A tibble: 7 x 3
##   LOCATION min_T_une max_T_une
##   <chr>    <chr>     <chr>    
## 1 CAN      1955-Q1   2018-Q3  
## 2 DEU      1991-Q1   2018-Q3  
## 3 FRA      1983-Q1   2018-Q3  
## 4 GBR      1983-Q1   2018-Q2  
## 5 ITA      1983-Q1   2018-Q3  
## 6 JPN      1955-Q1   2018-Q3  
## 7 USA      1955-Q1   2018-Q3
time_qna <- 
    QNA %>% 
    group_by(LOCATION) %>% 
    summarise(min_T_qna = min(obsTime), max_T_qna = max(obsTime))
time_qna
## # A tibble: 7 x 3
##   LOCATION min_T_qna max_T_qna
##   <chr>    <chr>     <chr>    
## 1 CAN      1961-Q1   2018-Q2  
## 2 DEU      1991-Q1   2018-Q2  
## 3 FRA      1980-Q1   2018-Q3  
## 4 GBR      1955-Q1   2018-Q3  
## 5 ITA      1995-Q1   2018-Q3  
## 6 JPN      1994-Q1   2018-Q2  
## 7 USA      1950-Q1   2018-Q3

And procceed as follows:

time_window <- inner_join(time_une, time_qna, by="LOCATION") %>% 
    mutate(max_min_T = max(min_T_une, min_T_qna), 
           min_max_T = min(max_T_une, max_T_qna)) %>% 
    select(max_min_T, min_max_T) %>% 
    distinct()
time_window
## # A tibble: 1 x 2
##   max_min_T min_max_T
##   <chr>     <chr>    
## 1 1995-Q1   2018-Q2
min_time <- time_window[1, 1] %>% unlist() %>% as.character()
max_time <- time_window[1, 2] %>% unlist() %>% as.character()
min_time
## [1] "1995-Q1"
max_time
## [1] "2018-Q2"

Reshape the QNA and unemployment tables:

une <- UNE %>% 
    filter(obsTime >= min_time & obsTime <= max_time) %>% 
    select(LOCATION, obsTime, obsValue) %>% 
    arrange(LOCATION, obsTime) %>% 
    rename(Country = LOCATION, Quarter = obsTime, une = obsValue) %>% 
    spread(Country, une)

gdp <- QNA %>% 
    filter(obsTime >= min_time & obsTime <= max_time) %>% 
    select(LOCATION, obsTime, obsValue) %>% 
    arrange(LOCATION, obsTime) %>% 
    rename(Country = LOCATION, Quarter = obsTime, gdp = obsValue) %>% 
    spread(Country, gdp)

View the resuts:

datatable(une)
datatable(gdp)

Define two functions to transform the data:

diff_1 <- function (x) {
    r <- diff(x)
    r <- c(NA, r)
    return (r)
}

ret_1 <- function(x) { 
    r <- 100 * log(x / lag(x))
    return(r)
}

Create two more columns for date and time:

start_year <- gdp[1, 1] %>% 
    unlist() %>% 
    as.character() %>% 
    str_sub(1, 4) %>% 
    as.numeric()

start_date <- paste(start_year, "01", "01", sep = "-")
end_date   <- paste(year(now()) + 1, "01", "01", sep = "-") 

Time    <- seq(start_year, start_year + nrow(gdp)/4 - 1/4, by = 1/4)
Date    <- seq(ymd(start_date), ymd(end_date), by = '3 months')[1:nrow(gdp)]

Create two more columns for date and time:

start_year <- gdp[1, 1] %>% 
    unlist() %>% 
    as.character() %>% 
    str_sub(1, 4) %>% 
    as.numeric()

start_date <- paste(start_year, "01", "01", sep = "-")
end_date   <- paste(year(now()) + 1, "01", "01", sep = "-") 

Time    <- seq(start_year, start_year + nrow(gdp)/4 - 1/4, by = 1/4)
Date    <- seq(ymd(start_date), ymd(end_date), by = '3 months')[1:nrow(gdp)]

Combine and view:

qtd <- gdp %>% 
    select(Quarter) %>% 
    mutate(Time = Time, Date = Date)
datatable(qtd)

Caluculate differences in unemployment table:

une_dr <- une %>% 
    mutate_at(names(select(., -Quarter)), diff_1 )
datatable(une_dr)

Caluculate returns in GDP table:

gdp_dr <- gdp %>% 
    mutate_at(names(select(., -Quarter)), ret_1 )
datatable(gdp_dr)

Add two more columns for date/time and reshape the dataset

une_dr_spr <- inner_join(qtd, une_dr, by="Quarter")
gdp_dr_spr <- inner_join(qtd, gdp_dr, by="Quarter")

une_dr_gtr <- une_dr_spr %>% 
    gather(Country, une, -Quarter, -Time, -Date)

gdp_dr_gtr <- gdp_dr_spr %>% 
    gather(Country, gdp, -Quarter, -Time, -Date)

Now we have:

datatable(une_dr_gtr)
datatable(gdp_dr_gtr)

Examine the descriotive statistics:

une_dr_spr %>% select(-Time) %>% skim()
## Skim summary statistics
##  n obs: 94 
##  n variables: 9 
## 
## ── Variable type:character ─────────────────────────────────────────────────────────
##  variable missing complete  n min max empty n_unique
##   Quarter       0       94 94   7   7     0       94
## 
## ── Variable type:Date ──────────────────────────────────────────────────────────────
##  variable missing complete  n        min        max     median n_unique
##      Date       0       94 94 1995-01-01 2018-04-01 2006-08-16       94
## 
## ── Variable type:numeric ───────────────────────────────────────────────────────────
##  variable missing complete  n    mean   sd    p0   p25    p50   p75 p100
##       CAN       1       93 94 -0.041  0.24 -0.53 -0.17 -0.067 0.067 1.27
##       DEU       1       93 94 -0.051  0.21 -0.53 -0.17 -0.1   0.1   0.4 
##       FRA       1       93 94 -0.029  0.22 -0.53 -0.13  0     0.1   0.77
##       GBR       1       93 94 -0.051  0.21 -0.43 -0.17 -0.067 0.033 0.73
##       ITA       1       93 94 -0.0065 0.28 -0.73 -0.23 -0.033 0.17  0.83
##       JPN       1       93 94 -0.0072 0.15 -0.27 -0.1  -0.033 0.067 0.53
##       USA       1       93 94 -0.017  0.29 -0.47 -0.2  -0.067 0.067 1.4 
##      hist
##  ▁▇▇▂▁▁▁▁
##  ▁▃▂▇▃▂▃▂
##  ▂▁▆▇▃▁▁▁
##  ▂▅▇▃▁▁▁▁
##  ▁▂▇▆▆▃▂▁
##  ▃▅▇▅▂▁▁▁
##  ▂▇▂▁▁▁▁▁
gdp_dr_spr %>% select(-Time) %>% skim()
## Skim summary statistics
##  n obs: 94 
##  n variables: 9 
## 
## ── Variable type:character ─────────────────────────────────────────────────────────
##  variable missing complete  n min max empty n_unique
##   Quarter       0       94 94   7   7     0       94
## 
## ── Variable type:Date ──────────────────────────────────────────────────────────────
##  variable missing complete  n        min        max     median n_unique
##      Date       0       94 94 1995-01-01 2018-04-01 2006-08-16       94
## 
## ── Variable type:numeric ───────────────────────────────────────────────────────────
##  variable missing complete  n mean   sd    p0    p25  p50  p75 p100
##       CAN       1       93 94 0.6  0.61 -2.31  0.28  0.65 1    1.8 
##       DEU       1       93 94 0.36 0.81 -4.6   0.073 0.4  0.82 2.04
##       FRA       1       93 94 0.4  0.46 -1.66  0.17  0.42 0.7  1.25
##       GBR       1       93 94 0.52 0.6  -2.2   0.28  0.57 0.84 1.91
##       ITA       1       93 94 0.15 0.7  -2.79 -0.1   0.24 0.49 1.63
##       JPN       1       93 94 0.24 0.98 -5    -0.11  0.26 0.8  2.42
##       USA       1       93 94 0.61 0.6  -2.19  0.36  0.63 0.92 1.81
##      hist
##  ▁▁▁▁▅▇▆▂
##  ▁▁▁▁▁▇▇▂
##  ▁▁▁▁▅▇▇▂
##  ▁▁▁▁▅▇▃▁
##  ▁▁▁▂▃▇▃▁
##  ▁▁▁▁▂▇▆▁
##  ▁▁▁▁▃▇▆▂

Combine all in one single dataset:

une_1 <- une_dr %>% 
    gather(Country, une, -Quarter)
gdp_1 <- gdp_dr %>% 
    gather(Country, gdp, -Quarter)
une_gdp <- inner_join(une_1, gdp_1, by=c("Quarter", "Country"))
G7_une_gdp <- inner_join(qtd, une_gdp, by="Quarter")
datatable(G7_une_gdp)

If needed, we can export the tables to system files, in various formats:

write_xlsx(une_dr_spr, "../data/unemployment_by_country.xlsx")
## [1] "/home/astavrak/edu/erasmus/lectures/2018-Aveiro/data/unemployment_by_country.xlsx"
write_xlsx(une_dr_gtr, "../data/unemployment_panel.xlsx")
## [1] "/home/astavrak/edu/erasmus/lectures/2018-Aveiro/data/unemployment_panel.xlsx"
write_xlsx(gdp_dr_spr, "../data/gdp_by_country.xlsx")
## [1] "/home/astavrak/edu/erasmus/lectures/2018-Aveiro/data/gdp_by_country.xlsx"
write_xlsx(gdp_dr_gtr, "../data/gdp_panel.xlsx")
## [1] "/home/astavrak/edu/erasmus/lectures/2018-Aveiro/data/gdp_panel.xlsx"
write_xlsx(G7_une_gdp, "../data/G7_une_gdp.xlsx")
## [1] "/home/astavrak/edu/erasmus/lectures/2018-Aveiro/data/G7_une_gdp.xlsx"
write_dta(une_dr_spr, "../data/unemployment_by_country.dta")
write_dta(une_dr_gtr, "../data/unemployment_panel.dta")
write_dta(gdp_dr_spr, "../data/gdp_by_country.dta")
write_dta(gdp_dr_gtr, "../data/gdp_panel.dta")
write_dta(G7_une_gdp, "../data/G7_une_gdp.dta")

save(une_dr_spr, une_dr_gtr, gdp_dr_spr, gdp_dr_gtr, G7_une_gdp, 
     file="../data/G7.Rda")

Various plotting attempts

G7_une_gdp %>% 
    ggplot() +
    geom_line(aes(x = Time, y = une, color="red")) +
    geom_line(aes(x = Time, y = gdp, color="blue")) +
    facet_wrap(~Country) + 
    theme_hc() + 
    labs(y = "% Change") +
    theme(legend.position = "none") 

G7_une_gdp %>% 
    ggplot(aes(x = une, y = gdp)) +
    geom_point() +
    geom_smooth(method = "lm", se = TRUE) +
    theme_stata() +
    labs(x = "Change in Unmployment Rate", y = "Change in GDP Growth") +
    theme(text = element_text(size = 14))

G7_une_gdp %>% 
    filter(Country == 'USA') %>% 
    ggplot(aes(x = une, y = gdp)) +
    geom_point() +
    geom_smooth(method = "lm", se = FALSE) +
    theme_stata() +
    labs(x = "Change in Unmployment Rate", y = "Change in GDP Growth")

G7_une_gdp %>% 
    ggplot(aes(x = une, y = gdp, group = Country, colour = Country)) +
    geom_point() +
    geom_smooth(method = "lm", se = FALSE) +
    theme_stata() +
    labs(x = "Change in Unmployment Rate", y = "Change in GDP Growth")

G7_une_gdp %>% 
    ggplot(aes(x = une, y = gdp, group = Country, colour = Country)) +
    geom_point() +
    geom_smooth(method = "lm", se = FALSE) +
    facet_wrap(~Country) +
    scale_color_discrete(guide = FALSE) +
    theme_hc() +
    labs(x = "Change in Unmployment Rate", y = "Change in GDP Growth")

Regression

G7_une_gdp %>% 
    filter(Country == 'USA') %>% 
    ols_regression(gdp ~ une, data = .) %>% 
    tidy_summary()
## # A tidy model
## Model formula  : gdp ~ une
## Model type     : Ordinary Least Squares (OLS) regression
## Model pkg::fun : stats::lm()
## Model data     : 93 (observations) X 2 (variables)
## $fit
## fit_stat     n     df    estimate    p.value  stars
## F           93      1     37.904      <.001   ***
## R^2         93      -      0.294       -         
## Adj R^2     93      -      0.286       -         
## RMSE        93      -      0.506       -         
## AIC         93      -    141.095       -         
## BIC         93      -    148.692       -         
## 
## $coef
## term            est     s.e.    est.se    p.value  stars   std.est
## (Intercept)   0.587    0.053    11.168      <.001   ***      <.001
## une          -1.104    0.179    -6.157      <.001   ***     -0.325
lm_fit <- G7_une_gdp %>% 
    group_by(Country) %>% 
    nest() %>% 
    mutate(model = map(data, ~lm(gdp ~ une, data = .))) 
lm_fit$model[[1]] %>% tidy_summary()
## NULL
## $fit
## fit_stat     n     df    estimate    p.value  stars
## F           93      1     52.110      <.001   ***
## R^2         93      -      0.364       -         
## Adj R^2     93      -      0.357       -         
## RMSE        93      -      0.489       -         
## AIC         93      -    134.834       -         
## BIC         93      -    142.432       -         
## 
## $coef
## term            est     s.e.    est.se    p.value  stars   std.est
## (Intercept)   0.536    0.051    10.425      <.001   ***      <.001
## une          -1.523    0.211    -7.219      <.001   ***     -0.368
G7_une_gdp %>%
    nest(-Country) %>% 
    mutate(
        fit = map(data, ~ tidy_regression(gdp ~ une, data = .)),
        tidied = map(fit, tidy)
    ) %>% 
    unnest(tidied) %>% 
    datatable(options = list(pageLength = 20))