Skip to contents

Main (data management and disproportionality) script template

You may want to organize your research folder using small scripts, each designed for a part of your analysis plan.

Inspired by Reproducible Research in R

  • /folder
  • ../data
  • ../scripts
  • ../etc.

The main script might be located in scripts, under the name “main.R”.

  • ../scripts/01_main.R

If you haven’t, visit vignette("getting_started"), vignette("basic_workflow") and vignette("descriptive") for details on what this template does.

Paths

You may create a separate script with paths, that you would call at the beginning of your main script.

library(vigicaen)
library(rlang)
library(here) # if you like the here syntax for file paths
library(dplyr)
# #### PATHS #### ####

path_base   <- "../vigibase_ecl/main"

# #### IMPORT #### ####

demo     <- dt_parquet(path_base, "demo", in_memory = FALSE)
adr      <- dt_parquet(path_base, "adr",  in_memory = FALSE)
drug     <- dt_parquet(path_base, "drug", in_memory = FALSE)
link     <- dt_parquet(path_base, "link", in_memory = FALSE)
out      <- dt_parquet(path_base, "out",  in_memory = FALSE)
srce     <- dt_parquet(path_base, "srce", in_memory = FALSE)
followup <- dt_parquet(path_base, "followup", in_memory = FALSE)

suspdup  <- dt_parquet(path_base, "suspdup", in_memory = FALSE)

source("00_dict.R") # from template_dictionary

Demo

# ---- Deduplicating ---- ####

demo <- demo |> 
  filter(!(UMCReportId %in% suspdup$SuspectedduplicateReportId))

# ---- Drugs ---- ####

# From a list of drugs

demo <-
  demo |>
  add_drug(
    d_code = d_drecno,
    drug_data = drug
  )

# From ATC

demo <-
  demo |>
  add_drug(
    d_code = atc_drecno,
    drug_data = drug
  )

# ---- Reactions ---- ####

demo <-
  demo |>
  add_adr(
    a_code = a_llt,
    adr_data = adr
  )

# ---- Demographics ---- ####

# Age, sex

demo <-
  demo |>
  mutate(
    age = cut(as.integer(AgeGroup),
              breaks = c(0,4,5,6,7,8),
              include.lowest = TRUE, right = TRUE,
              labels = c("<18", "18-45","45-64", "65-74", "75+")),

    sex = case_when(Gender == "1" ~ 1,
                    Gender == "2" ~ 2,
                    Gender %in% c("-","0","9") ~ NA_real_,
                    TRUE ~ NA_real_)
  )

# Death + outcome availability

demo <- 
  demo |> 
  mutate(death = 
           ifelse(UMCReportId %in% out$UMCReportId,
                  UMCReportId %in% 
                    (out |> 
                    filter(Seriousness == "1") |> 
                    pull(UMCReportId)
                    ),
                  NA)
         )

# follow-up, seriousness

demo <-
  demo |>
  mutate(
    fup = if_else(UMCReportId %in% followup$UMCReportId, 1, 0),
    serious = 
      ifelse(
        UMCReportId %in% out$UMCReportId,
        UMCReportId %in% 
          (out |> 
          filter(Serious == "Y") |> 
          pull(UMCReportId)
          ),
        NA)
  )

# year

demo <- 
  demo |> 
  mutate(
    year = as.numeric(substr(FirstDateDatabase, start = 1, stop = 4))
    )

# type of reporter

demo <-
  demo |>
  left_join(
    srce |> transmute(UMCReportId, type_reporter = Type),
    by = "UMCReportId")

# explicit multi-level vars

demo <-
  demo |> 
  mutate(
    Type = factor(Type, levels = c("1", "2", "3", "4", "5")),
    type_reporter = factor(type_reporter,
                           levels = c("1", "2", "3", "4", "5")),
    Region = factor(Region, levels = c("1", "2", "3", "4", "5", "6"))
  )

levels(demo$Type) <-
  c("Spontaneous",                     
     "Report from study",                        
     "Other",   
     "Not available to sender (unknown)",   
     "PMS/Special monitoring")

levels(demo$type_reporter) <-
  c("Physician",
    "Pharmacist",
    "Other Health Professional",
    "Lawyer",
    "Consumer or other non health professional")

levels(demo$Region) <-
  c("African Region",                                    
    "Region of the Americas",                            
    "South-East Asia Region",                            
    "European Region",                                   
    "Eastern Mediterranean Region",                      
    "Western Pacific Region"  
  )

# ---- Check ---- ####

demo |>
  check_dm(cols = c(names(d_drecno), names(a_llt), "fup"))
#>               [,1]
#> ipilimumab      86
#> atezolizumab    69
#> durvalumab      68
#> nivolumab      225
#> pembrolizumab  298
#> avelumab        43
#> cemiplimab      27
#> tremelimumab    27
#> a_embolism       9
#> a_colitis      104
#> a_pneumonitis  103
#> fup            292
link <-
  link |> 
    add_drug(
      d_code = d_drecno,
      drug_data = drug
    ) |> 
    add_adr(
      a_code = a_llt,
      adr_data = adr
    )

Basic models

# ---- Bivariate ---- ####

rb <- 
  demo |> 
  compute_dispro(
    y = names(a_llt),
    x = names(d_drecno)
  )

# remove the hashes to save your results
## write.csv2(rb, here("outputs", "rb.csv"), row.names = FALSE)

# ---- Multivariate ---- ####

mod <-
  glm(a_colitis ~ nivolumab + age + sex,
      family = "binomial",
      data = demo)

rm <-
  summary(mod)$coefficients |>
  compute_or_mod(
    estimate = Estimate,
    std_er = Std..Error
    )

# remove the hashes to save your results
## write.csv2(rm, here("outputs", "rm.csv"), row.names = FALSE)

Basic descriptive

# ---- General description ---- ####

r_desc <- 
  demo |> 
  desc_facvar(
    vf = c("age", "sex",
           "type_reporter",
           "Type",
           "year",
           names(d_drecno),
           names(a_llt),
           "serious", "death"),
    ncat_max = 20
  )

# ---- Time to onset ---- ####

r_tto <-
  desc_tto(
    link,
    adr_s = names(a_llt),
    drug_s = names(d_drecno)
    ) 

# ---- Dechallenge ---- ####

r_dch <-
  desc_dch(
    link,
    adr_s = names(a_llt),
    drug_s = names(d_drecno)
    )

# ---- Rechallenge ---- ####

r_rch <-
  desc_rch(
    link,
    demo_data = demo,
    adr_s = names(a_llt),
    drug_s = names(d_drecno)
  )