req.packcages <- c("labelled", "data.table", "dplyr", "tidyr", "ggplot2", "scales", "gridExtra", "grid", "gtable") for(pack in req.packcages) if(!(pack %in% rownames(installed.packages()))) install.packages(pack) # Load required packages -------------------------------------------------- require(labelled) # Manipulating Labelled Data require(data.table) # Enhanced data.frame require(dplyr) # dplyr: a grammar of data manipulation require(tidyr) # Easily Tidy Data with 'spread()' and 'gather()' Functions require(ggplot2) # Create Elegant Data Visualisations Using the Grammar of Graphics require(scales) # Generic plot scaling methods require(gridExtra) # To arrange multiple grid-based plots on a page, and draw tables require(grid) # A rewrite of the graphics layout capabilities, plus some support for interaction require(gtable) # Extract label info from plot # Generate ADSL data ----------------------------------------------------- set.seed(1234) adsl_gen <- function(project, trial, n, trt, n_countries = 9) { country_base <- tibble(country = c("IND", "JPN", "BEL", "BGR", "CZE", "DEU", "EST", "FIN", "GBR", "HRV", "HUN", "ISR", "ITA", "LTU", "LVA", "POL", "RUS", "SRB", "SVK", "TUR", "UKR", "CAN", "USA"), region = c("ASIA", "ASIA", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "EUROPE", "NORTH AMERICA", "NORTH AMERICA")) country_base_trial <- sample_n(country_base, n_countries) %>% mutate(country_num = 1:n_countries, fill_c = sapply(2-nchar(as.character(country_num)), function(x) paste(rep(0, x), collapse = ""))) base <- sample_n(country_base_trial, n, replace = TRUE) %>% group_by(country) %>% mutate(siteid = paste0(country_num, fill_c, sample(1:3, n(), replace = TRUE))) %>% group_by(siteid) %>% arrange(country_num, siteid) %>% mutate(studyid = paste(project, trial, sep = "-"), subjid_1 = 1:n(), n0 = 3-nchar(as.character(subjid_1)), fill = sapply(n0, function(x) paste(rep(0, x), collapse = "")), subjid = paste0(siteid, fill, subjid_1), usubjid = paste(studyid, subjid, sep = "/")) %>% select(studyid, usubjid, subjid, siteid, country, region) # generate tretament adsl_1 <- base %>% group_by(siteid) %>% mutate(trtp = sample(names(trt), n(), replace = TRUE), trtpn = trt[trtp], trta = trtp, trtan = trtpn, age = round(runif(n(), 18, 79)), ageu = "years", sex = sample(c("M", "F"), n(), replace = TRUE)) # Generate study status and dates lambda_rand = -log(1-0.2)/14 survival = 1-(1-exp(-lambda_rand*14)) adsl_2 <- adsl_1 %>% group_by(country) %>% mutate(scdt = as.Date("2015/01/01") + floor(rexp(n(), rate=runif(1, 1/110, 1/90))), screen_time = floor(rexp(n(), rate=-log(1-0.2)/14)), randdt = case_when(screen_time >= 14 ~ scdt + 14), study_time = case_when( trtpn == 1 ~ floor(rexp(n(), rate=-log(1-0.08)/182)), TRUE ~ floor(rexp(n(), rate=-log(1-0.08)/182))), eosdt = case_when( study_time >= 182 ~ randdt + 182, TRUE ~ randdt + study_time), eosstt = case_when( screen_time < 14 ~ "SCREEN FAILURE", study_time < 182 ~ "DISCONTINUED", TRUE ~ "COMPLETED")) # Generate treatment status and dates adsl <- adsl_2 %>% group_by(eosstt) %>% mutate( trt_time = case_when( trtpn == 1 ~ floor(rexp(n(), rate=-log(1-(1-(1-0.3)/(1-0.08)))/182)), TRUE ~ floor(rexp(n(), rate=-log(1-(1-(1-0.2)/(1-0.08)))/182))), eotdt = case_when( study_time < 182 ~ eosdt, trt_time >= 182 ~ randdt + 182, TRUE ~ randdt + trt_time), eotstt = case_when( screen_time < 14 ~ "SCREEN FAILURE", study_time < 182 | trt_time < 182 ~ "DISCONTINUED", TRUE ~ "COMPLETED"), fasfl = ifelse(screen_time >= 14, "Y", "N"), saffl = fasfl) %>% select(-screen_time, -study_time, -trt_time) labels <- c("Study Identifier", "Unique Subject Identifier", "Subject Identifier for the Study", "Study Site Identifier", "Country", "Geographic Region Grouping", "Treatment planned", "Treatment planned (N)", "Treatment actual", "Treatment actual (N)", "Age", "Age Units", "Sex", "Date of screening", "Date of randomization", "End of Study Date", "End of Study Status", "End of Treatment Decision Date", "End of Treatment Status", "Full Analysis Set Population Flag", "Safety Population Flag" ) names(labels) <- colnames(adsl) for (name in names(labels)) var_label(adsl[[name]]) <- labels[[name]] adsl } adsl.4321 <- adsl_gen(project = "nn4654" , trial = "4321", n = 1000, n_countries = 12, trt = structure(1:2, names=c("new drug", "old drug"))) adsl.5678 <- adsl_gen(project = "nn4654" , trial = "5678", n = 600, n_countries = 9, trt = structure(1:2, names=c("new drug", "old drug"))) adsl <- bind_rows(adsl.4321, adsl.5678) # Generate ADAE --------------------------------------------- sample_base <- fread("C:/Users/sffl/Desktop/R course/data/adverse_events.csv", sep = ";") %>% sample_n(100) %>% mutate(rate_trt1 = exp(rnorm(n(), log(1/5000), sd=0.2)), diff = sample(c(1,0), n(), replace = TRUE, prob=c(0.05,0.95)), rate_trt2 = case_when( diff == 1 ~ exp(log(rate_trt1)+rnorm(1, 0.8, sd= 1)), diff == 0 ~ rate_trt1 ), true_rate = (rate_trt1*exp(-rate_trt1))/(rate_trt2*exp(-rate_trt2))) adae <- bind_cols(adsl[rep(seq_len(nrow(adsl)), each= nrow(sample_base)),], sample_base[rep(seq_len(nrow(sample_base)), nrow(adsl)),]) %>% group_by(aehlgt) %>% mutate( wait_time1 = case_when( trtpn == 1 ~ floor(rexp(n(), rate_trt1)), trtpn == 2 ~ floor(rexp(n(), rate_trt2))), wait_time2 = wait_time1 + case_when( trtpn == 1 ~ floor(rexp(n(), rate_trt1)), trtpn == 2 ~ floor(rexp(n(), rate_trt2))), wait_time3 = wait_time2 + case_when( trtpn == 1 ~ floor(rexp(n(), rate_trt1)), trtpn == 2 ~ floor(rexp(n(), rate_trt2)))) %>% select(-rate_trt1,-rate_trt1,-rate_trt2) %>% gather(event, time, wait_time1:wait_time3) %>% arrange(usubjid, aehlgt) %>% filter(time <= eosdt - scdt) %>% mutate(astdt = scdt + time, trtemfl = ifelse(time < 14, "N", "Y")) %>% select(-event, -time) #save(adsl, file = "C:/R/Development/nnpackages/stdRate/data/adsl.RData") #save(adae, file = "C:/R/Development/nnpackages/stdRate/data/adae.RData")