Time series
This link has many data sources suitable for time-series CEIC data base
Tableau pivot function is equivalent to the R Dplyr ::pivotlong
## 3) View my Tableau dashboard here to see the heatmap of cyber attacks.
packages = c('scales', 'viridis', 'lubridate', 'ggthemes', 'gridExtra',
'tidyverse', 'readxl', 'knitr', 'data.table')
for(p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
attacks <- read_csv("data/eventlog.csv")
kable(head(attacks))
| timestamp | source_country | tz |
|---|---|---|
| 2015-03-12 15:59:16 | CN | Asia/Shanghai |
| 2015-03-12 16:00:48 | FR | Europe/Paris |
| 2015-03-12 16:02:26 | CN | Asia/Shanghai |
| 2015-03-12 16:02:38 | US | America/Chicago |
| 2015-03-12 16:03:22 | CN | Asia/Shanghai |
| 2015-03-12 16:03:45 | CN | Asia/Shanghai |
Group by the timezone to count the events. Use levels to sequence the days by the predefined sequence wkday. Also convert into factor to allow counting.
wkday_levels <- c('Sunday', 'Monday',
'Tuesday', 'Wednesday',
'Thursday', 'Friday',
'Saturday')
attacks <- attacks %>%
group_by(tz) %>%
do(make_hr_wkday(.$timestamp,
.$source_country,
.$tz ) ) %>%
ungroup() %>%
mutate(wkday = factor(wkday,
levels = wkday_levels),
hour = factor(hour,
levels = 0:23))
head (attacks, 5)
# A tibble: 5 x 4
tz source_country wkday hour
<chr> <chr> <fct> <fct>
1 Africa/Cairo BG Saturday 20
2 Africa/Cairo TW Sunday 6
3 Africa/Cairo TW Sunday 8
4 Africa/Cairo CN Sunday 11
5 Africa/Cairo US Sunday 15
Use geom_tile to plot heatmap. Viridis is a colour scheme
# This is to pull some data/ counting required, after finishing, ungroup the data to reuse later on.
grouped <- attacks %>%
count(wkday, hour) %>%
ungroup()
ggplot(grouped, aes(hour, wkday, fill = n)) +
geom_tile(color = "white", size = 0.1) +
theme_tufte(base_family = "Helvetica") +
coord_equal() +
scale_fill_viridis(name = "# of Events", label = comma) +
labs(x = NULL, y = NULL,
title = "Events per day of week & time of day") +
theme(axis.ticks = element_blank(),
plot.title = element_text(hjust = 0.5),
legend.title = element_text(size = 8),
legend.text = element_text(size = 6) )
air <- read_excel("data/arrivals_by_air.xlsx")
From the existing field month-year, using lubriate month and year function to pull out the year and month into 2 new columns. Set the numbers to factors as 1 to 12 and label the labels as the month abbreviation.
air$month <- factor(month(air$`Month-Year`),
levels=1:12,
labels=month.abb,
ordered=TRUE)
#ymd tells lubridate this is the existing format of data in month-year
air$year <- year(ymd(air$`Month-Year`))
colnames(air)
[1] "Month-Year" "Republic of South Africa"
[3] "Canada" "USA"
[5] "Bangladesh" "Brunei"
[7] "China" "Hong Kong SAR (China)"
[9] "India" "Indonesia"
[11] "Japan" "South Korea"
[13] "Kuwait" "Malaysia"
[15] "Myanmar" "Pakistan"
[17] "Philippines" "Saudi Arabia"
[19] "Sri Lanka" "Taiwan"
[21] "Thailand" "United Arab Emirates"
[23] "Vietnam" "Belgium & Luxembourg"
[25] "CIS" "Finland"
[27] "France" "Germany"
[29] "Ireland" "Italy"
[31] "Netherlands" "Spain"
[33] "Switzerland" "United Kingdom"
[35] "Australia" "New Zealand"
[37] "month" "year"
Extract the country you want, the month and the year for cycle plot
New_Zealand <- air %>%
select(`New Zealand`,
month,
year) %>%
filter(year >= 2010)
The reference line needs to be calculated, add a hline using the mean.
hline.data <- New_Zealand %>%
group_by(month) %>%
summarise(avgvalue = mean(`New Zealand`)) %>%
ungroup()
ggplot() +
geom_line(data=New_Zealand, aes(x=year, y=`New Zealand`, group=month),
colour="black") +
geom_hline(aes(yintercept=avgvalue),
data=hline.data, linetype=6, colour="red", size=0.5) +
facet_grid(~month) +
labs(axis.text.x = element_blank()) +
xlab("") +
ylab("No. of Visitors")
Compare with the one done using Tableau here