Spanish Flu Data

Spanish Flu in Chicago (from Kaggle)

spanish_rawdf = read.csv("./data/spanish_flu_chicago.csv") 

save(spanish_rawdf, file = "rdas/spanish_rawdf.rda")

spanish_df = 
  spanish_rawdf %>% 
  clean_names() %>% 
  group_by(week) %>% 
  summarize(mortality = n())

save(spanish_df, file = "rdas/spanish_df.rda")

SARS-CoV-1 Data

SARS-CoV-1 Global and U.S. Data (from Kaggle)

sars_global = read.csv("./data/sars_2003_complete_dataset_clean.csv") %>% 
  clean_names() %>% 
  mutate(
    date = as.Date(date),
    pandemic_interval = "2003/03/17" %--% date,
    pandemic_duration = as.duration(pandemic_interval) / ddays(1)
  ) %>% 
    select(pandemic_duration, sars_cumulative_cases = cumulative_number_of_case_s, sars_cumulative_deaths = number_of_deaths) %>% 
  group_by(pandemic_duration) %>% 
  summarize(
    sars_cumulative_cases = sum(sars_cumulative_cases),
    sars_cumulative_deaths = sum(sars_cumulative_deaths)
  ) %>% 
  mutate(
    sars_new_cases = sars_cumulative_cases - lag(sars_cumulative_cases),
    sars_new_deaths = sars_cumulative_deaths - lag(sars_cumulative_deaths)
  )

save(sars_global, file = "rdas/sars_global.rda")

sars_us = read.csv("./data/sars_2003_complete_dataset_clean.csv") %>% 
  clean_names() %>% 
  select(-number_recovered, sars_cumulative_cases = cumulative_number_of_case_s, sars_cumulative_deaths = number_of_deaths) %>%
  filter(country == "United States") %>%
  mutate(
    date = as.Date(date),
    pandemic_interval = "2003/03/17" %--% date,
    pandemic_duration = as.duration(pandemic_interval) / ddays(1),
    sars_new_cases = sars_cumulative_cases - lag(sars_cumulative_cases),
    sars_new_deaths = sars_cumulative_deaths - lag(sars_cumulative_deaths)
  )

save(sars_us, file = "rdas/sars_us.rda")

SARS-CoV-1 Global (from Kaggle)

sars_df = 
  read_csv("./data/sars_2003_complete_dataset_clean.csv") %>% 
  janitor::clean_names() %>% 
  select(-number_recovered) %>% 
  group_by(country) %>% 
  mutate(deaths_cum = cumsum(number_of_deaths))

save(sars_df, file = "rdas/sars_df.rda")

H1N1 Data

H1N1 Global and U.S. Mortality Data (from Kaggle)

h1n1_kaggle = 
  read.csv("./data/h1n1_data.csv") %>% 
  clean_names() %>% 
  select(-link) %>% 
  mutate(
    date = as.Date(date, "%m/%d/%y"),
    pandemic_interval = "2009/04/24" %--% date,
    pandemic_duration = as.duration(pandemic_interval) / ddays(1)
  ) %>% 
 rename(h1n1_cumulative_cases = cumulative_no_of_cases, h1n1_cumulative_deaths = cumulative_no_of_deaths) %>% 
  arrange(pandemic_duration)

h1n1_global_deaths = 
  h1n1_kaggle %>%   
  group_by(pandemic_duration) %>% 
  summarize(
    h1n1_cumulative_cases = sum(h1n1_cumulative_cases, na.rm = TRUE),
    h1n1_cumulative_deaths = sum(h1n1_cumulative_deaths, na.rm = TRUE)
  ) %>% 
  mutate(
    h1n1_new_cases = h1n1_cumulative_cases - lag(h1n1_cumulative_cases),
    h1n1_new_deaths = h1n1_cumulative_deaths - lag(h1n1_cumulative_deaths)
  ) %>% 
  select(pandemic_duration, h1n1_new_deaths, h1n1_cumulative_deaths)

save(h1n1_global_deaths, file = "rdas/h1n1_global_deaths.rda")

h1n1_us_deaths = 
  h1n1_kaggle %>% 
  filter(country == "United States of America") %>% 
  mutate(
    h1n1_new_cases = h1n1_cumulative_cases - lag(h1n1_cumulative_cases),
    h1n1_new_deaths = h1n1_cumulative_deaths - lag(h1n1_cumulative_deaths)
  ) %>% 
  select(pandemic_duration, h1n1_new_deaths, h1n1_cumulative_deaths)

save(h1n1_us_deaths, file = "rdas/h1n1_us_deaths.rda")

H1N1 Global and U.S. Case Data (from WHO)

h1n1_global_cases = 
  read.csv("./data/global_FluNetInteractiveReport.csv", skip = 3) %>% 
  clean_names() %>% 
  select(country, sdate, edate, h1n1_new_cases = ah1n12009) %>% 
  mutate(
    sdate = as.Date(sdate, "%m/%d/%y"),
    edate = as.Date(edate, "%m/%d/%y"),
    pandemic_interval = "2009/04/06" %--% edate,
    pandemic_duration = as.duration(pandemic_interval) / ddays(1)
  ) %>% 
    filter(pandemic_duration >= 0) %>% 
  arrange(pandemic_duration) %>% 
  group_by(pandemic_duration) %>% 
  summarize(
    h1n1_new_cases = sum(h1n1_new_cases, na.rm = TRUE)
  ) %>% 
  mutate(
    h1n1_cumulative_cases = CUMULATIVE_SUM(h1n1_new_cases)
  )

save(h1n1_global_cases, file = "rdas/h1n1_global_cases.rda")

h1n1_us_cases = 
  read_excel("./data/USA_FluNetInteractiveReport.xlsx", skip = 5) %>% 
  clean_names() %>% 
  select(start_date:end_date, h1n1_new_cases = a_h1n1_pdm09) %>% 
  mutate(
    start_date = as.Date(start_date),
    end_date = as.Date(end_date),
    pandemic_interval = "2009/04/06" %--% start_date,
    pandemic_duration = as.duration(pandemic_interval) / ddays(1)
  ) %>%  
  filter(start_date > "2009/04/06") %>% 
  filter(start_date < "2010/04/06") %>% 
  mutate(
    h1n1_new_cases = as.numeric(h1n1_new_cases),
    h1n1_cumulative_cases = CUMULATIVE_SUM(h1n1_new_cases)
  ) %>% 
  select(pandemic_duration, h1n1_new_cases, h1n1_cumulative_cases)

save(h1n1_us_cases, file = "rdas/h1n1_us_cases.rda")

Influenza B Data

Influenza B Global and U.S. Data (from WHO)

influenza_global_cases = 
  read_csv("./data/global_FluNetInteractiveReport.csv", skip = 3) %>% 
  clean_names() %>% 
  select(country, sdate, edate, influenza_b_new_cases = inf_b) %>% 
  mutate(
    sdate = as.Date(sdate, "%m/%d/%y"),
    edate = as.Date(edate, "%m/%d/%y"),
    pandemic_interval = "2009/04/06" %--% edate,
    pandemic_duration = as.duration(pandemic_interval) / ddays(1)
  ) %>% 
    filter(pandemic_duration >= 0) %>% 
  arrange(pandemic_duration) %>% 
  group_by(pandemic_duration) %>% 
  summarize(
    influenza_b_new_cases = sum(influenza_b_new_cases, na.rm = TRUE)
  ) %>% 
  mutate(
    influenza_b_cumulative_cases = CUMULATIVE_SUM(influenza_b_new_cases)
  )

save(influenza_global_cases, file = "rdas/influenza_global_cases.rda")

influenza_us_cases = 
  read_excel("./data/USA_FluNetInteractiveReport.xlsx", skip = 5) %>% 
  clean_names() %>% 
  select(start_date:end_date, b_total) %>% 
  mutate(start_date = as.Date(start_date),
         end_date = as.Date(end_date),
         pandemic_interval = "2019/09/30" %--% end_date, 
         pandemic_duration = as.duration(pandemic_interval) / ddays(1)) %>% 
  filter(start_date > "2019/09/30") %>% 
  filter(start_date < "2020/09/30") %>% 
  drop_na() %>% 
  mutate(
    influenza_new_cases = as.numeric(b_total),
    influenza_cumulative_cases = CUMULATIVE_SUM(influenza_new_cases)
  ) %>% 
  select(pandemic_duration, influenza_new_cases, influenza_cumulative_cases)

save(influenza_us_cases, file = "rdas/influenza_us_cases.rda")

COVID-19 Data

COVID-19 Global Data (from WHO)

covid_global = 
  read_csv("./data/covid19_global.csv") %>% 
  clean_names() %>% 
  select(-country_code, -who_region, covid_new_cases = new_cases, covid_cumulative_cases = cumulative_cases, covid_new_deaths = new_deaths, covid_cumulative_deaths = cumulative_deaths) %>% 
  mutate(
    date = as.Date(date_reported, "%m/%d/%Y"),
    pandemic_interval = "2020/01/03" %--% date,
    pandemic_duration = as.duration(pandemic_interval) / ddays(1)
  ) %>% 
  group_by(pandemic_duration) %>% 
  summarize(across(covid_new_cases:covid_cumulative_deaths, sum))

save(covid_global, file = "rdas/covid_global.rda")

COVID-19 U.S. Data (from CDC)

get_all_cases = function(url) {
  
  all_cases = vector("list", length = 0)
  
  loop_index = 1
  chunk_size = 99999
  DO_NEXT = TRUE
  
  while (DO_NEXT) {
    message("Getting data, page ", loop_index)
    
    all_cases[[loop_index]] = 
      GET(url,
         query = list(`$order` = "cdc_report_dt",
                       `$limit` = chunk_size,
                       `$offset` = as.integer((loop_index - 1) * chunk_size)
                       )
          ) %>%
      content("text") %>%
      fromJSON() %>%
      as_tibble()
    
    DO_NEXT = dim(all_cases[[loop_index]])[1] == chunk_size
    loop_index = loop_index + 1
  }
  
  all_cases
  
}

url = "https://data.cdc.gov/resource/vbim-akqf.json"

covid_us_cases = 
  get_all_cases(url) %>%
  bind_rows() %>% 
  mutate(cdc_report_dt = as.Date(cdc_report_dt),
         pandemic_interval = "2020/01/22" %--% cdc_report_dt, 
         pandemic_duration = as.duration(pandemic_interval) / ddays(1)) %>%
  arrange(cdc_report_dt) %>% 
  mutate(case_id = row_number()) %>% 
  relocate(case_id, pandemic_duration) %>% 
  group_by(pandemic_duration) %>% 
  summarize(covid_new_cases = n_distinct(case_id), 
            covid_cumulative_cases = max(case_id))

save(covid_us_cases, file = "rdas/covid_us_cases.rda")

covid_us_deaths = 
  GET("https://data.cdc.gov/resource/r8kw-7aab.json", query = list("$limit" = 10000)) %>% 
  content("text") %>% 
  fromJSON() %>% 
  as_tibble() %>% 
  clean_names() %>% 
  filter(state == "United States") %>% 
  mutate(start_week = as.Date(start_week),
         covid_new_deaths = as.numeric(covid_deaths), 
         pandemic_interval = "2020/01/22" %--% start_week, 
         pandemic_duration = as.duration(pandemic_interval) / ddays(1), 
         covid_cumulative_deaths = CUMULATIVE_SUM(covid_new_deaths)) %>% 
  select(pandemic_duration, covid_new_deaths, covid_cumulative_deaths)

save(covid_us_deaths, file = "rdas/covid_us_deaths.rda")

COVID-19 Chicago Data (from cityofchicago.org)

covid19_chicago_df =
  read.csv("./data/covid19_chicago.csv") %>% 
  clean_names() %>% 
  filter(deaths_total != 0) %>% 
  mutate(
    date = as.Date(date, "%m/%d/%y")
  ) 
  
 row1 = covid19_chicago_df %>%  
     filter(date >= as.Date("2020-04-07") & date <= as.Date("2020-04-13")) %>% 
     summarize(
       week = 1,
       mortality = sum(deaths_total))
 
 row2 = covid19_chicago_df %>%  
     filter(date >= as.Date("2020-04-14") & date <= as.Date("2020-04-20")) %>% 
     summarize(
       week = 2,
        mortality = sum(deaths_total))
 
 row3 = covid19_chicago_df %>%  
     filter(date >= as.Date("2020-04-21") & date <= as.Date("2020-04-27")) %>% 
     summarize(
       week = 3,
        mortality = sum(deaths_total))
 
 row4 = covid19_chicago_df %>%  
     filter(date >= as.Date("2020-04-28") & date <= as.Date("2020-05-04")) %>% 
     summarize(
       week = 4,
        mortality = sum(deaths_total))
 
 row5 = covid19_chicago_df %>%  
     filter(date >= as.Date("2020-05-05") & date <= as.Date("2020-05-11")) %>% 
     summarize(
       week = 5,
        mortality = sum(deaths_total))
 
 row6 = covid19_chicago_df %>%  
     filter(date >= as.Date("2020-05-12") & date <= as.Date("2020-05-18")) %>% 
     summarize(
       week = 6,
        mortality = sum(deaths_total))
 
 row7 = covid19_chicago_df %>%  
     filter(date >= as.Date("2020-05-19") & date <= as.Date("2020-05-25")) %>% 
     summarize(
       week = 7,
        mortality = sum(deaths_total))
 
covid19_chicago = bind_rows(row1, row2, row3, row4, row5, row6, row7)

save(covid19_chicago, file = "rdas/covid19_chicago.rda")

Merged Data

Global Cases

global_cases_first = full_join(covid_global, h1n1_global_cases)
global_cases_second = full_join(global_cases_first, influenza_global_cases)

global_cases = 
  full_join(global_cases_second, sars_global) %>% 
  rename(influenzab_new_cases = influenza_b_new_cases, 
         influenzab_cumulative_cases = influenza_b_cumulative_cases) %>% 
  select(pandemic_duration, h1n1_new_cases, h1n1_cumulative_cases, covid_new_cases, covid_cumulative_cases, sars_new_cases, sars_cumulative_cases, influenzab_new_cases, influenzab_cumulative_cases) %>%
  arrange(pandemic_duration) %>% 
  pivot_longer(
    h1n1_new_cases:influenzab_cumulative_cases,
    names_to = "class",
    values_to = "n"
  ) %>% 
  separate(class, into = c("disease", "trend"), sep = "_", extra = "merge")

save(global_cases, file = "rdas/global_cases.rda")

Global Deaths

global_deaths = 
  full_join(h1n1_global_deaths, covid_global)

global_deaths =
  full_join(global_deaths, sars_global) %>% 
  select(pandemic_duration, h1n1_new_deaths, h1n1_cumulative_deaths, covid_new_deaths, covid_cumulative_deaths, sars_new_deaths, sars_cumulative_deaths) %>% 
  arrange(pandemic_duration) %>% 
  pivot_longer(
    h1n1_new_deaths:sars_cumulative_deaths,
    names_to = "class",
    values_to = "n"
  ) %>% 
  separate(class, into = c("disease", "trend"), sep = "_", extra = "merge")

save(global_deaths, file = "rdas/global_deaths.rda")

U.S. Cases

us_cases_first = full_join(covid_us_cases, h1n1_us_cases)
us_cases_second = full_join(us_cases_first, influenza_us_cases)

us_cases = full_join(us_cases_second, sars_us) %>% 
  select(pandemic_duration, h1n1_new_cases, h1n1_cumulative_cases, covid_new_cases, covid_cumulative_cases, sars_new_cases, sars_cumulative_cases, influenza_new_cases, influenza_cumulative_cases) %>% 
  arrange(pandemic_duration) %>% 
  pivot_longer(
    h1n1_new_cases:influenza_cumulative_cases,
    names_to = "class",
    values_to = "n"
  ) %>% 
  separate(class, into = c("disease", "trend"), sep = "_", extra = "merge")

save(us_cases, file = "rdas/us_cases.rda")

Basic Reproductive Rate

r0_data_url = "https://www.thelancet.com/journals/laninf/article/PIIS1473-3099(20)30484-9/fulltext"
reproductive_rate_html = read_html(r0_data_url)

table_r0 =
  reproductive_rate_html %>% 
  html_nodes(css = "table") %>%  
  first() %>% 
  html_table() %>% 
  clean_names() %>% 
  select(-interpretation) %>% 
  filter(x == "Transmissibility, R0" | x == "Incubation period, days" | x == "Interval between symptom onset and maximum infectivity, days" | x == "Proportion with mild illness") %>% 
  rename(
     covid_19 = sars_co_v_2,
     sars_2003 = sars_co_v,
     spanish_flu = pandemic_influenza_1918,
     h1n1_2009 = pandemic_influenza_2009
  ) %>% 
  mutate(x = replace(x, x == "Transmissibility, R0", "Basic Reproductive rate, R0")) %>%
  as_tibble()

save(table_r0, file = "covid19_future.github.io/rdas/table_r0.rda")