Main (data management and disproportionality) script template
You may want to organize your research folder using small scripts, each designed for a part of your analysis plan.
Inspired by Reproducible Research in R
- /folder
- ../data
- ../scripts
- ../etc.
The main script might be located in scripts, under the name “main.R”.
- ../scripts/01_main.R
If you haven’t, visit vignette("getting_started")
,
vignette("basic_workflow")
and
vignette("descriptive")
for details on what this template
does.
Paths
You may create a separate script with paths, that you would call at the beginning of your main script.
library(vigicaen)
library(rlang)
library(here) # if you like the here syntax for file paths
library(dplyr)
# #### PATHS #### ####
path_base <- "../vigibase_ecl/main"
# #### IMPORT #### ####
demo <- dt_parquet(path_base, "demo", in_memory = FALSE)
adr <- dt_parquet(path_base, "adr", in_memory = FALSE)
drug <- dt_parquet(path_base, "drug", in_memory = FALSE)
link <- dt_parquet(path_base, "link", in_memory = FALSE)
out <- dt_parquet(path_base, "out", in_memory = FALSE)
srce <- dt_parquet(path_base, "srce", in_memory = FALSE)
followup <- dt_parquet(path_base, "followup", in_memory = FALSE)
suspdup <- dt_parquet(path_base, "suspdup", in_memory = FALSE)
source("00_dict.R") # from template_dictionary
Demo
# ---- Deduplicating ---- ####
demo <- demo |>
filter(!(UMCReportId %in% suspdup$SuspectedduplicateReportId))
# ---- Drugs ---- ####
# From a list of drugs
demo <-
demo |>
add_drug(
d_code = d_drecno,
drug_data = drug
)
# From ATC
demo <-
demo |>
add_drug(
d_code = atc_drecno,
drug_data = drug
)
# ---- Reactions ---- ####
demo <-
demo |>
add_adr(
a_code = a_llt,
adr_data = adr
)
# ---- Demographics ---- ####
# Age, sex
demo <-
demo |>
mutate(
age = cut(as.integer(AgeGroup),
breaks = c(0,4,5,6,7,8),
include.lowest = TRUE, right = TRUE,
labels = c("<18", "18-45","45-64", "65-74", "75+")),
sex = case_when(Gender == "1" ~ 1,
Gender == "2" ~ 2,
Gender %in% c("-","0","9") ~ NA_real_,
TRUE ~ NA_real_)
)
# Death + outcome availability
demo <-
demo |>
mutate(death =
ifelse(UMCReportId %in% out$UMCReportId,
UMCReportId %in%
(out |>
filter(Seriousness == "1") |>
pull(UMCReportId)
),
NA)
)
# follow-up, seriousness
demo <-
demo |>
mutate(
fup = if_else(UMCReportId %in% followup$UMCReportId, 1, 0),
serious =
ifelse(
UMCReportId %in% out$UMCReportId,
UMCReportId %in%
(out |>
filter(Serious == "Y") |>
pull(UMCReportId)
),
NA)
)
# year
demo <-
demo |>
mutate(
year = as.numeric(substr(FirstDateDatabase, start = 1, stop = 4))
)
# type of reporter
demo <-
demo |>
left_join(
srce |> transmute(UMCReportId, type_reporter = Type),
by = "UMCReportId")
# explicit multi-level vars
demo <-
demo |>
mutate(
Type = factor(Type, levels = c("1", "2", "3", "4", "5")),
type_reporter = factor(type_reporter,
levels = c("1", "2", "3", "4", "5")),
Region = factor(Region, levels = c("1", "2", "3", "4", "5", "6"))
)
levels(demo$Type) <-
c("Spontaneous",
"Report from study",
"Other",
"Not available to sender (unknown)",
"PMS/Special monitoring")
levels(demo$type_reporter) <-
c("Physician",
"Pharmacist",
"Other Health Professional",
"Lawyer",
"Consumer or other non health professional")
levels(demo$Region) <-
c("African Region",
"Region of the Americas",
"South-East Asia Region",
"European Region",
"Eastern Mediterranean Region",
"Western Pacific Region"
)
# ---- Check ---- ####
demo |>
check_dm(cols = c(names(d_drecno), names(a_llt), "fup"))
#> [,1]
#> ipilimumab 86
#> atezolizumab 69
#> durvalumab 68
#> nivolumab 225
#> pembrolizumab 298
#> avelumab 43
#> cemiplimab 27
#> tremelimumab 27
#> a_embolism 9
#> a_colitis 104
#> a_pneumonitis 103
#> fup 292
Basic models
# ---- Bivariate ---- ####
rb <-
demo |>
compute_dispro(
y = names(a_llt),
x = names(d_drecno)
)
# remove the hashes to save your results
## write.csv2(rb, here("outputs", "rb.csv"), row.names = FALSE)
# ---- Multivariate ---- ####
mod <-
glm(a_colitis ~ nivolumab + age + sex,
family = "binomial",
data = demo)
rm <-
summary(mod)$coefficients |>
compute_or_mod(
estimate = Estimate,
std_er = Std..Error
)
# remove the hashes to save your results
## write.csv2(rm, here("outputs", "rm.csv"), row.names = FALSE)
Basic descriptive
# ---- General description ---- ####
r_desc <-
demo |>
desc_facvar(
vf = c("age", "sex",
"type_reporter",
"Type",
"year",
names(d_drecno),
names(a_llt),
"serious", "death"),
ncat_max = 20
)
# ---- Time to onset ---- ####
r_tto <-
desc_tto(
link,
adr_s = names(a_llt),
drug_s = names(d_drecno)
)
# ---- Dechallenge ---- ####
r_dch <-
desc_dch(
link,
adr_s = names(a_llt),
drug_s = names(d_drecno)
)
# ---- Rechallenge ---- ####
r_rch <-
desc_rch(
link,
demo_data = demo,
adr_s = names(a_llt),
drug_s = names(d_drecno)
)