Especially with the recent change with AR, we're probably going to eventually need to switch to their new format. We should investigate the differences and see if we can switch to using the new format.
library(tidyverse)
library(pacta.data.preparation)
ar_data_path <- "~/Documents/Data/Asset Resolution/2022-08-15_AR_2021Q4"
ar_advanced_company_indicators_path <- file.path(ar_data_path, "2022-08-24_AR_2021Q4_RMI-Company-Indicators.xlsx")
masterdata_debt_path <- file.path(ar_data_path, "2022-10-05_rmi_masterdata_debt_2021q4.csv")
masterdata_ownership_path <- file.path(ar_data_path, "2022-08-15_rmi_masterdata_ownership_2021q4.csv")
ar_advanced_company_indicators <- import_ar_advanced_company_indicators(ar_advanced_company_indicators_path, fix_names = TRUE)
masterdata_debt <- readr::read_csv(masterdata_debt_path, na = "", show_col_types = FALSE)
masterdata_ownership <- readr::read_csv(masterdata_ownership_path, na = "", show_col_types = FALSE)
# -------------------------------------------------------------------------
ownership_data <-
ar_advanced_company_indicators %>%
filter(consolidation_method == "Equity Ownership") %>%
filter(value_type == "production") %>%
filter(
asset_sector == "Aviation" & activity_unit %in% c("pkm", "tkm") |
asset_sector == "Cement" & activity_unit == "t cement" |
asset_sector == "Coal" & activity_unit == "t coal" |
asset_sector == "HDV" & activity_unit == "# vehicles" |
asset_sector == "LDV" & activity_unit == "# vehicles" |
asset_sector == "Oil&Gas" & activity_unit == "GJ" |
asset_sector == "Power" & activity_unit == "MW" |
asset_sector == "Shipping" & activity_unit == "dwt km" |
asset_sector == "Steel" & activity_unit == "t steel"
) %>%
pivot_wider(names_from = "year", values_fill = 0) %>%
group_by(asset_sector) %>%
summarise(new_2016 = sum(`2016`, na.rm = TRUE), new_n = length(unique(company_id)))
masterdata_ownership %>%
filter(company_id %in% ar_advanced_company_indicators$company_id) %>%
group_by(sector) %>%
summarise(old_2016 = sum(`_2016`, na.rm = TRUE), old_n = length(unique(company_id))) %>%
full_join(ownership_data, by = c(sector = "asset_sector")) %>%
mutate(
new_2016 = if_else(is.na(new_2016), as.numeric(0), as.numeric(new_2016)),
new_n = if_else(is.na(new_n), as.numeric(0), as.numeric(new_n))
) %>%
mutate(diff = old_2016 - new_2016) %>%
mutate(percent_diff = round(diff / old_2016 * 100, digits = 2))
#> # A tibble: 9 × 7
#> sector old_2016 old_n new_2016 new_n diff percent_diff
#> <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 Aviation 6.01e12 1820 6.01e12 1820 -3.12e-2 0
#> 2 Cement 5.01e 9 2070 4.67e 9 2035 3.46e+8 6.91
#> 3 Coal 1.09e10 2051 1.09e10 2061 -1.47e-2 0
#> 4 HDV 0 575 0 575 0 NaN
#> 5 LDV 1.39e 8 394 1.39e 8 435 -3.73e-1 0
#> 6 Oil&Gas 7.11e11 4563 7.11e11 4563 -5.32e-2 0
#> 7 Power 1.30e 7 36846 1.30e 7 36875 -3.26e-1 0
#> 8 Shipping 2.56e14 11167 2.56e14 11169 2.95e+9 0
#> 9 Steel 2.73e 9 1105 2.73e 9 1105 -5.76e-3 0
# -------------------------------------------------------------------------
fin_control_data <-
ar_advanced_company_indicators %>%
filter(consolidation_method == "Financial Control") %>%
filter(value_type == "production") %>%
filter(
asset_sector == "Aviation" & activity_unit %in% c("pkm", "tkm") |
asset_sector == "Cement" & activity_unit == "t cement" |
asset_sector == "Coal" & activity_unit == "t coal" |
asset_sector == "HDV" & activity_unit == "# vehicles" |
asset_sector == "LDV" & activity_unit == "# vehicles" |
asset_sector == "Oil&Gas" & activity_unit == "GJ" |
asset_sector == "Power" & activity_unit == "MW" |
asset_sector == "Shipping" & activity_unit == "dwt km" |
asset_sector == "Steel" & activity_unit == "t steel"
) %>%
pivot_wider(names_from = "year", values_fill = 0) %>%
group_by(asset_sector) %>%
summarise(new_2016 = sum(`2016`, na.rm = TRUE), new_n = length(unique(company_id)))
masterdata_debt %>%
filter(company_id %in% ar_advanced_company_indicators$company_id) %>%
group_by(sector) %>%
summarise(old_2016 = sum(`_2016`, na.rm = TRUE), old_n = length(unique(company_id))) %>%
full_join(fin_control_data, by = c(sector = "asset_sector")) %>%
mutate(
new_2016 = if_else(is.na(new_2016), as.numeric(0), as.numeric(new_2016)),
new_n = if_else(is.na(new_n), as.numeric(0), as.numeric(new_n))
) %>%
mutate(diff = old_2016 - new_2016) %>%
mutate(percent_diff = round(diff / old_2016 * 100, digits = 2))
#> # A tibble: 9 × 7
#> sector old_2016 old_n new_2016 new_n diff percent_diff
#> <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 Aviation 3.24e12 1302 5.94e12 1609 -2.70e12 -83.3
#> 2 Cement 2.89e 9 1459 4.27e 9 1685 -1.39e 9 -48.0
#> 3 Coal 6.31e 9 1349 1.12e10 1629 -4.87e 9 -77.2
#> 4 HDV 0 283 0 426 0 NaN
#> 5 LDV 9.36e 7 194 1.16e 8 300 -2.24e 7 -23.9
#> 6 Oil&Gas 3.06e11 2864 6.85e11 3864 -3.79e11 -124.
#> 7 Power 6.12e 6 29582 1.28e 7 34012 -6.68e 6 -109.
#> 8 Shipping 1.70e14 9844 2.53e14 10518 -8.30e13 -48.9
#> 9 Steel 1.66e 9 757 2.70e 9 924 -1.03e 9 -62.1