let url = "https://raw.githubusercontent.com/CSSEGISandData/
COVID-19/master/csse_covid_19_data/
csse_covid_19_time_series/
time_series_covid19_confirmed_global.csv"
d3.csv(url, function (rawdata) {
rawdata.forEach(function (d) {
delete d['Province/State']
delete d['Lat']
delete d['Long']
})
var data = d3.nest()
.key(function (d) { return d['Country/Region'] })
.entries(rawdata);
data = data.filter(function (d) {
return d.key == "Italy" ||
d.key == "US" ||
d.key == "Spain"
})
data.forEach(function (d, i) {
let parsedData = d.values
parsedData.forEach(function (j) {
delete j['Country/Region']
})
let keys = Object.keys(parsedData[0])
let values = Object.values(parsedData[0])
var i;
for (i = 0; i < keys.length; i++) {
d.values[i] = { date: parseTime2(keys[i]), cumsum: +values[i]
};
}
})
})
If you're looking for R code to do the same thing, try this:
library("dplyr")
data = read.csv("https://raw.githubusercontent.com/CSSEGISandData/
COVID-19/master/csse_covid_19_data/
csse_covid_19_time_series/
time_series_covid19_confirmed_global.csv")
data = data %>%
select(-Province.State, -Lat, -Long) %>%
melt(id.vars = "Country.Region")
names(data) = c("country", "date", "cumsum")
data = data %>%
mutate(date = as.character(date)) %>%
mutate(date = gsub("X", "", date)) %>%
mutate(date = as.Date(date, format = "%m.%d.%y")) %>%
group_by(country) %>%
arrange(date) %>%
unique() %>%
ungroup() %>%
group_by(country, date) %>%
mutate(cumsum = sum(cumsum)) %>%
unique() %>%
group_by(country) %>%
mutate(nthday = seq(1, length(cumsum), 1)) %>%
filter(!is.na(cumsum)) %>%
mutate(lag = c(0, diff(cumsum))) %>%
ungroup() %>%
filter(country %in% c("US", "Italy", "Spain", "China"))