Take-home Exercise 2 - Age-sex Pyramid

Exercise to replicate Age-Sex pyramid in R

Frostbear https://sg.linkedin.com/in/farahfoo (SMU Masters in IT business (Fintech and Analytics))https://scis.smu.edu.sg/master-it-business
2022-02-05

Installing and loading package

packages = c('tidyverse', 'readxl', 'ggthemes')

for(p in packages){
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}

Loading data

pop_data <- read_csv("data/respopagesextod2021.csv")
glimpse (data)
function (..., list = character(), package = NULL, lib.loc = NULL, 
    verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE)  

Summarising data

cut_data <- pop_data %>%
  select(c("AG","Sex","Pop"))

summary_sex <- pop_data %>%
  group_by(AG, Sex) %>%
  summarise(Pop = sum(Pop)) %>%
  ungroup()


head (summary_sex,5)
# A tibble: 5 x 3
  AG       Sex        Pop
  <chr>    <chr>    <dbl>
1 0_to_4   Females  87730
2 0_to_4   Males    91400
3 10_to_14 Females  97980
4 10_to_14 Males   102330
5 15_to_19 Females 100190

Plotting double geom_bar Age-sex pyramid

ggplot(summary_sex, aes(x=AG)) +
  geom_bar(data=summary_sex[summary_sex$Sex=="Males",], aes(y=Pop*-1), stat="identity", fill="blue") +
  geom_bar(data=summary_sex[summary_sex$Sex=="Females",], aes(y=Pop), stat="identity", fill="pink") +
  geom_hline(yintercept=0, colour="white", lwd=1)+
coord_flip () +
scale_y_continuous(breaks = seq(-160000,160000,40000)) +
  labs(y="Population", x="Gender") +
  ggtitle("                        Male                                                Female")

Since the age-sex pyramid is not showing the age banding in sequential order, we set the AG as factor level and run the graph again
order <- c("0_to_4", "5_to_9", "10_to_14", "15_to_19", "20_to_24", "25_to_29", "30_to_34", "35_to_39", "40_to_44", "45_to_49", "50_to_54", "55_to_59", "60_to_64", "65_to_69", "70_to_74", "75_to_79", "80_to_84", "85_to_89", "90_and_over")

summary_sex1 <- summary_sex %>%
  mutate(AG =  factor(AG, levels = order)) %>%
  arrange(AG)

head(summary_sex1,5)
# A tibble: 5 x 3
  AG       Sex        Pop
  <fct>    <chr>    <dbl>
1 0_to_4   Females  87730
2 0_to_4   Males    91400
3 5_to_9   Females  97120
4 5_to_9   Males   102390
5 10_to_14 Females  97980

Replotting a sorted pyramid

ggplot(summary_sex1, aes(x = AG)) +

  geom_bar(data=summary_sex1[summary_sex1$Sex=="Males",], aes(y=Pop*-1), stat="identity", fill="blue") +
  
  geom_bar(data=summary_sex1[summary_sex1$Sex=="Females",], aes(y=Pop), stat="identity", fill="pink") +
  
  geom_hline(yintercept=0, colour="white", lwd=1)+
coord_flip () +
  
scale_y_continuous(breaks = seq(-160000,160000,40000)) +
  labs(y="Population", x="Gender") +
  ggtitle("                        Male                                                Female")