Skip to content

Commit

Permalink
Merge pull request #28 from epiforecasts/update-mappings
Browse files Browse the repository at this point in the history
Update mappings (#20)
  • Loading branch information
Sophie Meakin authored Sep 22, 2022
2 parents e526c2a + 6d26e89 commit c5d3a0f
Show file tree
Hide file tree
Showing 8 changed files with 361 additions and 181 deletions.
18 changes: 13 additions & 5 deletions R/get_names.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
#' Link Mapping to Trust and Geography Names
#'
#' @param mapping data.frame containing trust_code, p_trust, geo_code and p_geo.
#' Defaults to `trust_utla_mapping` if not supplied.
#' @param geo_names A dataframe containing `geo_code` and `geo_name`. Used to
#' @param mapping A data.frame containing geo_code, trust_code, p_geo and p_trust.
#' @param geo_names A data.frame containing `geo_code` and `geo_name`. Used to
#' assign meaningful to geographies.
#' @return A data.frame containing a UTLA to trust level admissions map combined
#' meaningful names.
#' @export
#' @importFrom dplyr left_join select
get_names <- function(mapping, geo_names) {
if (missing(geo_names)) {
geo_names <- covid19.nhs.data::utla_names

if (missing(mapping)) {
stop("Missing mapping - please specify a LTLA- or UTLA-Trust mapping.")
} else {
# Geography names checks
if (missing(geo_names)) {
stop("Missing geo_names - please specify appropriate geography names.")
}
}

out <- mapping %>%
left_join(covid19.nhs.data::trust_names, by = "trust_code") %>%
left_join(geo_names, by = "geo_code") %>%
select(trust_code, trust_name, geo_code, geo_name, p_trust, p_geo)

return(out)

}
45 changes: 45 additions & 0 deletions R/load_mappings.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#' Load local authority to Trust mappings
#'
#' @description Load LTLA- or UTLA-Trust mappings from two data sources: HES (Hospital Episodes Statistics) until September 2020, and linked COVID-19 cases and admissions June 2020 - May 2021. Replaces previous datasets trust_ltla_mapping and trust_utla_mapping (the HES mappings).
#'
#' @param scale Character string defining the geographical scale. Supported options are "ltla" (lower-tier local authority) and "utla" (upper-tier local authority).
#' @param source Character string defining the souce of the mapping. supported options are "link" (linked COVID-19 cases and admissions) and "sus" (Secondary Uses Service, originally the only mapping available).
#' @importFrom dplyr filter select
#'
#' @return A data.frame with the following columns: `geo_code` (9-digit LTLA or UTLA ID); `trust_code`; `n` (the number of reported admissions); `p_geo` (the proportion of all admissions from a LTLA/UTLA that go to a given Trust); `p_trust` (the proportion of all admissions to a Trust that come from a given LTLA/UTLA); `source` ("SUS" or "Link"); and `level` ("ltla" or "utla").
#' @export
#'
load_mapping <- function(scale, source) {

# Checks for geographical scale
if(missing(scale)) {
message("Parameter missing - specify geographical scale.")
} else {
scale <- match.arg(scale, choices = c("ltla", "utla"))
}

# Checks for data source
if(missing(source)) {
message("Parameter missing - specify mapping source.")
} else {

source <- tolower(source)
source <- match.arg(source, choices = c("sus", "link"))

if(source == "sus") {
source <- "SUS"
} else if (source == "link") {
source <- "Link"
}

}

# Return mapping
out <- covid19.nhs.data::mappings %>%
filter(map_source == source,
map_level == scale) %>%
select(-c(map_source, map_level))

return(out)

}
5 changes: 2 additions & 3 deletions R/map_admissions.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ map_admissions <- function(admissions, shapefile, date, scale_fill) {
admissions <- admissions %>%
filter(date %in% max_date)
}

g <- shapefile %>%
inner_join(admissions, by = "geo_code") %>%
ggplot() +
Expand All @@ -45,7 +45,6 @@ map_admissions <- function(admissions, shapefile, date, scale_fill) {
theme_void() +
guides(fill = guide_colorbar(title = "Admissions")) +
theme(legend.position = "bottom", legend.justification = "left")

return(g)
}

22 changes: 13 additions & 9 deletions R/summarise_mapping.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,23 @@
#' @export
summarise_mapping <- function(trust = NULL, geography = NULL, mapping, shapefile, geo_names) {

# Trust/geography checks
if (is.null(trust) & is.null(geography)) {
stop("Either a trust or a geography must be specified")
}

if (missing(shapefile)){
shapefile <- covid19.nhs.data::england_utla_shape
}
# Mapping checks
if (missing(mapping)) {
mapping <- covid19.nhs.data::trust_utla_mapping
}
if (missing(geo_names)) {
geo_names <- covid19.nhs.data::utla_names
stop("Missing mapping - please specify a LTLA- or UTLA-Trust mapping.")
} else {
# Shapefile checks
if (missing(shapefile)) {
stop("Missing shapefile - please specify an appropriate shapefile.")
}
# Geography names checks
if (missing(geo_names)) {
stop("Missing geo_names - please specify appropriate geography names.")
}
}

if(!is.null(trust)){
Expand Down Expand Up @@ -71,7 +76,7 @@ summarise_mapping <- function(trust = NULL, geography = NULL, mapping, shapefile
} else if (!is.null(geography)){

geography <- toupper(geography)

## Table summary of mapping
tb <- mapping %>%
filter(geo_code == geography) %>%
Expand All @@ -82,4 +87,3 @@ summarise_mapping <- function(trust = NULL, geography = NULL, mapping, shapefile
return(list(summary_table = tb))
}
}

Binary file added R/sysdata.rda
Binary file not shown.
201 changes: 201 additions & 0 deletions data-raw/make_mappings.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@

pacman::p_load(
here,
rio,
magrittr,
janitor,
dplyr,
stringr,

usethis
)

pacman::p_load_gh(
"epiforecasts/hospitalcatchment.utils"
)



# SUS mapping (until September 2020) --------------------------------------

# Load raw data

## Site-Trust lookup
site_trust_lookup <- import(file = here("data-raw", "raw", "england_trusts", "trust_list.csv")) %>%
select(trust_code = V1,
stp_code = V4) %>%
distinct()

# Trust mergers
trust_mergers <- hospitalcatchment.utils::download_nhs_mergers() %>%
filter(!(org_code_old == "RW6" & org_code == "R0A"))

## LTLA-UTLA lookup
ltla_utla_lookup <- import(file = here("data-raw", "raw", "england_ltla", "ltla_utla_list.csv")) %>%
select(ltla_code = LTLA19CD,
utla_code = UTLA19CD)

## NHS site-LTLA mapping
nhs_mapping_raw <- import(file = here("data-raw", "trust-ltla-mapping", "mapping_raw.csv")) %>%
clean_names() %>%
rename(site_code = der_provider_site_code,
ltla_code = der_postcode_dist_unitary_auth,
n = spells)


# Make LTLA-Trust mapping (private)
ltla_trust_sus_private <- nhs_mapping_raw %>%
mutate(trust_code = str_sub(site_code, 1, 3)) %>%
filter(substr(trust_code, 1, 1) == "R") %>%
# Trust changes (mergers)
left_join(trust_mergers %>%
filter(date_effective < as.Date("2020-10-01")),
by = c("trust_code" = "org_code_old")) %>%
mutate(trust_code = ifelse(!is.na(org_code), org_code, trust_code)) %>%
#
group_by(ltla_code, trust_code) %>%
summarise(n = sum(n, na.rm = TRUE),
.groups = "drop")

# Make LTLA-Trust mapping (public)
ltla_trust_sus <- ltla_trust_sus_private %>%
# Drop pairs where there are fewer than 10 admissions
filter(n >= 10) %>%
# Get % LTLA to Trust
group_by(ltla_code) %>%
mutate(p_geo = n/sum(n)) %>%
ungroup() %>%
# Get % Trust from LTLA
group_by(trust_code) %>%
mutate(p_trust = n/sum(n)) %>%
ungroup() %>%
arrange(ltla_code, trust_code) %>%
rename(geo_code = ltla_code) %>%
mutate(map_source = "SUS",
map_level = "ltla")

# Make UTLA-Trust mapping (public)
utla_trust_sus <- ltla_trust_sus_private %>%
left_join(ltla_utla_lookup, by = "ltla_code") %>%
group_by(utla_code, trust_code) %>%
summarise(n = sum(n),
.groups = "drop") %>%
# Drop pairs where there are fewer than 10 admissions
filter(n >= 10) %>%
# Get % LTLA to Trust
group_by(utla_code) %>%
mutate(p_geo = n/sum(n)) %>%
ungroup() %>%
# Get % Trust from LTLA
group_by(trust_code) %>%
mutate(p_trust = n/sum(n)) %>%
ungroup() %>%
arrange(utla_code, trust_code) %>%
rename(geo_code = utla_code) %>%
mutate(map_source = "SUS",
map_level = "utla")




# Linked COVID-19 cases-admissions ----------------------------------------

# Load and link case and admissions data
dat_pil <- import(file = here("data-raw",
"trust-ltla-mapping",
"english_pillars_raw.rds")) %>%
select(finalid, age_pil = age, sex_pil = sex,
utla_code, ltla_code,
home_cat = cat,
date_onset = onsetdate,
date_specimen_pil = date_specimen,
date_report = lab_report_date) %>%
mutate(date_onset = lubridate::dmy(date_onset))

dat_adm <- import(file = here("data-raw",
"trust-ltla-mapping",
"english_hospitals_raw.rds")) %>%
select(finalid = final_id, age_adm = agegrp, sex_adm = sex,
date_specimen_adm = specimen_date,
trust_code = provider_code, trust_type, hospital_in, hospital_out) %>%
left_join(trust_mergers, by = c("trust_code" = "org_code_old")) %>%
mutate(trust_code = ifelse(!is.na(org_code) & hospital_in >= date_effective,
org_code,
trust_code)) %>%
select(-c(org_code, date_effective))


# Combine
dat <- dat_pil %>%
left_join(dat_adm, by = "finalid") %>%
distinct()


# Clean data (first admission, subject to admission delay constraints)
dat_clean <- dat %>%
filter(substr(trust_code, 1, 1) == "R") %>%
filter(!is.na(ltla_code),
hospital_out > date_specimen_adm,
hospital_in <= date_specimen_adm + 28) %>%
filter(home_cat %in% c("Residential dwelling (including houses, flats, sheltered accommodation)")) %>%
filter(date_specimen_adm >= as.Date("2020-06-01"),
date_specimen_adm < as.Date("2021-06-01")) %>%
group_by(finalid) %>%
filter(hospital_in == pmin(hospital_in)) %>%
ungroup()


# Make LTLA-Trust mapping (private)
ltla_trust_link_private <- dat_clean %>%
group_by(ltla_code, trust_code) %>%
summarise(n = n(),
.groups = "drop")

# Make LTLA-Trust mapping (public)
ltla_trust_link <- ltla_trust_link_private %>%
# Drop pairs where there are fewer than 10 admissions
filter(n >= 10) %>%
# Get % LTLA to Trust
group_by(ltla_code) %>%
mutate(p_geo = n/sum(n)) %>%
ungroup() %>%
# Get % Trust from LTLA
group_by(trust_code) %>%
mutate(p_trust = n/sum(n)) %>%
ungroup() %>%
arrange(ltla_code, trust_code) %>%
rename(geo_code = ltla_code) %>%
mutate(map_source = "Link",
map_level = "ltla")

# Make UTLA-Trust mapping (public)
utla_trust_link <- ltla_trust_link_private %>%
left_join(ltla_utla_lookup, by = "ltla_code") %>%
group_by(utla_code, trust_code) %>%
summarise(n = sum(n),
.groups = "drop") %>%
# Drop pairs where there are fewer than 10 admissions
filter(n >= 10) %>%
# Get % LTLA to Trust
group_by(utla_code) %>%
mutate(p_geo = n/sum(n)) %>%
ungroup() %>%
# Get % Trust from LTLA
group_by(trust_code) %>%
mutate(p_trust = n/sum(n)) %>%
ungroup() %>%
arrange(utla_code, trust_code) %>%
rename(geo_code = utla_code) %>%
mutate(map_source = "Link",
map_level = "utla")



# Combine all mappings ----------------------------------------------------

mappings <- ltla_trust_sus %>%
bind_rows(utla_trust_sus) %>%
bind_rows(ltla_trust_link) %>%
bind_rows(utla_trust_link)

usethis::use_data(mappings, internal = TRUE, overwrite = TRUE)
Loading

0 comments on commit c5d3a0f

Please sign in to comment.