Visualizing Cumulative Growth of COVID-19 Cases by Day
Mar 12, 2020 #eda #time-series
library(tidyverse)
library(plotly)
raw <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv")
df <- raw %>%
select(-c(`Province/State`, Lat, Long)) %>%
rename(country = `Country/Region`) %>%
filter(!country %in% c("China", "Mainland China", "Others")) %>%
gather(date, cases, -country) %>%
mutate(date = as.Date(date, format = "%m/%d/%y")) %>%
group_by(country, date) %>%
summarise(cases = sum(cases)) %>%
ungroup()
split_by_country <- df %>% split(.$country)
ind <- split_by_country %>%
map_lgl(~ max(.x$cases, na.rm = TRUE) >= 100) %>%
which()
add_nday <- . %>%
arrange(date) %>%
filter(cases >= 100) %>%
mutate(nday = seq(1, length(date)))
pals = c(
"US" = "#374c80",
"Korea, South" = "#bc5090",
"Singapore" = "#ef5675",
"Italy" = "#ffa600",
"Default" = "#003f5c"
)
p <- split_by_country[ind] %>%
map(add_nday) %>%
bind_rows() %>%
mutate(
col = if_else(country %in% names(pals), country, "Default"),
alpha = if_else(country %in% names(pals), 1, 0.9)
) %>%
ggplot(aes(nday, cases, col = col, alpha = alpha, group = country)) +
geom_line(show.legend = FALSE, size = 1.3) +
scale_y_log10(breaks = c(100, 200, 500, 1000, 2000, 5000, 10000)) +
scale_color_manual(values = pals) +
coord_cartesian(xlim = c(1, 20)) +
theme_minimal(base_family = "Menlo", base_size = 11) +
labs(x = "", y = "")
ggplotly(p, tooltip = c("y")) %>%
layout(
title = list(
text = paste0(
'A True Test to Nation\'s Leadership & Crisis Management',
'<br>',
'<sup>',
'COVID-19 cumulative growth since 100th case by day',
'</sup>'
),
x = 0, y = 0.98, xanchor = "left",
font = list(size = 12)
),
legend = list(orientation = "h", y = "0")
)