We load some required libraries:

library(tidyverse)
library(comtradr)
library(eurostat)
library(scales)
library(glue)
library(directlabels)
library(treemap)
library(igraph)
library(countrycode)
library(sf)
library(gt)
library(gridExtra)
library(ggthemes)

# original = imager::load.image("../data/UNcomtrade.png")

Retrieve data

Most easily we can retrieve data from web interface:

We can use the ct_search function for basic search of the comtrade database. For example, to retrieve the trade_value (imports and exports) of Greece we can write:

df <- ct_search(
  reporters = "Greece",
  partners = "World", 
  freq = "annual")

which can be analyzed as: 1. Retrieve values reported by Greece (the reporter) 2. Retrieve values for the whole World (Partner) 3. Retrieve one value for each year (annual data)

The UN comtrade API provides a large number of columns. There are not all relevant for any kind of query, here we do need most of the them:

str(df)
## 'data.frame':    66 obs. of  35 variables:
##  $ classification        : chr  "H5" "H5" "H5" "H5" ...
##  $ year                  : int  2017 2017 2018 2018 2019 2019 2020 2020 2021 2021 ...
##  $ period                : int  2017 2017 2018 2018 2019 2019 2020 2020 2021 2021 ...
##  $ period_desc           : chr  "2017" "2017" "2018" "2018" ...
##  $ aggregate_level       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ is_leaf_code          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trade_flow_code       : int  1 2 1 2 1 2 1 2 1 2 ...
##  $ trade_flow            : chr  "Import" "Export" "Import" "Export" ...
##  $ reporter_code         : int  300 300 300 300 300 300 300 300 300 300 ...
##  $ reporter              : chr  "Greece" "Greece" "Greece" "Greece" ...
##  $ reporter_iso          : chr  "GRC" "GRC" "GRC" "GRC" ...
##  $ partner_code          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ partner               : chr  "World" "World" "World" "World" ...
##  $ partner_iso           : chr  "WLD" "WLD" "WLD" "WLD" ...
##  $ second_partner_code   : logi  NA NA NA NA NA NA ...
##  $ second_partner        : chr  NA NA NA NA ...
##  $ second_partner_iso    : chr  NA NA NA NA ...
##  $ customs_proc_code     : chr  NA NA NA NA ...
##  $ customs               : chr  NA NA NA NA ...
##  $ mode_of_transport_code: chr  NA NA NA NA ...
##  $ mode_of_transport     : chr  NA NA NA NA ...
##  $ commodity_code        : chr  "TOTAL" "TOTAL" "TOTAL" "TOTAL" ...
##  $ commodity             : chr  "All Commodities" "All Commodities" "All Commodities" "All Commodities" ...
##  $ qty_unit_code         : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ qty_unit              : chr  "No Quantity" "No Quantity" "No Quantity" "No Quantity" ...
##  $ alt_qty_unit_code     : logi  NA NA NA NA NA NA ...
##  $ alt_qty_unit          : chr  NA NA NA NA ...
##  $ qty                   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ alt_qty               : logi  NA NA NA NA NA NA ...
##  $ netweight_kg          : int  NA NA NA 0 0 0 NA NA 0 NA ...
##  $ gross_weight_kg       : logi  NA NA NA NA NA NA ...
##  $ trade_value_usd       : num  5.53e+10 3.22e+10 6.51e+10 3.95e+10 6.22e+10 ...
##  $ cif_trade_value_usd   : logi  NA NA NA NA NA NA ...
##  $ fob_trade_value_usd   : logi  NA NA NA NA NA NA ...
##  $ flag                  : int  4 4 4 4 4 4 4 4 4 0 ...
##  - attr(*, "url")= chr "https://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=all&r=300&p=0&rg=all&cc=TOTAL&head=H&fmt=json"
##  - attr(*, "time_stamp")= POSIXct[1:1], format: "2023-01-17 13:19:21"
##  - attr(*, "req_duration")= num 4.25

But it is essential to know that the structure of returned table is always the same. Later, we will write more complex queries, we will use more columns. For now, we need only three columns:

df %>% 
  select(period, trade_flow, trade_value_usd) %>% 
  head()
##   period trade_flow trade_value_usd
## 1   2017     Import     55300597553
## 2   2017     Export     32154816405
## 3   2018     Import     65141447585
## 4   2018     Export     39490521437
## 5   2019     Import     62198045283
## 6   2019     Export     37885526504

Thus, the period (or year), the flow indicator (import or export) and the value (given in USD). We can easily plot the data to examine the evolution of import and export of Greece.

df %>% 
  select(period, trade_flow, trade_value_usd) %>% 
  ggplot(aes(x = period, y = trade_value_usd, color = trade_flow)) +
  geom_line() 

Or, bit better:

df %>% 
  select(period, trade_flow, trade_value_usd) %>% 
  mutate(trade_value_usd = trade_value_usd * 1e-9) %>% # to billions
  ggplot(aes(x = period, y = trade_value_usd, color = trade_flow)) +
  geom_line(linewidth = 1) +
  scale_color_brewer(type = "qual", palette = 2) +
  scale_x_continuous(breaks = pretty_breaks(10), expand = expansion(c(0, 0.1))) +
  scale_y_continuous(breaks = pretty_breaks(10)) +
  geom_dl(aes(label = trade_flow),
          method = list(dl.trans(x = x + 0.2, y = y - 0.3), "last.bumpup")) +
  labs(title = "Greece, Total Value of International Trade",
       subitle = "",
       x = "Year",
       y = "USD Billions",
       caption = "data from: UN comtrade database") +
  theme(legend.position = "none") +
  NULL

# ct_update_databases(commodity_type = "BEC")
# ct_commodity_db_type()

Retrieve trade by product category

Here, we retrieve export data for 2-digit categories of products, only for one year (2020):

df <- ct_search(
  reporters       = "Greece",
  partners        = "World",
  freq            = "annual",
  start_date      = 2020,
  end_date        = 2020,
  trade_direction = "exports",
  commod_codes    = "AG2"
)

97 rows of data:

nrow(df)
## [1] 97

because there are 97 categories of goods (only first six shown):

df %>% 
  select(commodity_code, commodity, trade_value_usd) %>% 
  head()
##   commodity_code                                                                                                        commodity trade_value_usd
## 1             01                                                                                                    Animals; live        16774786
## 2             02                                                                                       Meat and edible meat offal        97792607
## 3             03                                                   Fish and crustaceans, molluscs and other aquatic invertebrates       819858905
## 4             04 Dairy produce; birds' eggs; natural honey; edible products of animal origin, not elsewhere specified or included       915565017
## 5             05                                                  Animal originated products; not elsewhere specified or included        12186351
## 6             06                      Trees and other plants, live; bulbs, roots and the like; cut flowers and ornamental foliage        23785357

Making a simple plot of 97 categories is not easy. The best choice is to present data in a treemap:

df %>% 
  as_tibble() %>% 
  select(commodity_code, commodity, value = trade_value_usd) %>% 
  treemap(
    index     = "commodity",
    vSize     = "value",
    algorithm = "pivotSize",
    palette   = "Set2",
    title     = "Exports of Greece"
    )