What kinds of households own cars in the Triangle?

1 Introduction

Let’s find out what proportion of households in the Triangle within each of the U.S. Census ACS household income brackets own 0, 1, 2, and 3+ cars!

2 Get some data

Let’s pull in the census data necessary. Vehicles by Income is not tabulated in the ACS area releases so we’ll need to use the microdata (2019 this case). Thus, we’ll define the Triangle as all Census Public Use Microdata Areas that intersect the Raleigh-Durham-Cary Combined Statistical Area:

show the code

mapview(PUMAs, layer.name="PUMAs in analysis") + mapview(csa,color = "red", lwd=2,alpha.regions=0, col.region="red", layer.name="Raleigh-Durham-Cary CSA")

3 Summarize data

Now we summarize the household level data by breaking the household income variable into 11 brackets, and the vehicles variable into categories of 0,1,2, and 3+ vehicles. We also plot the results

show the code

#write_csv(data,"data.csv")

data <- read_csv("data.csv")

data2 <- data %>% filter(SPORDER==1) %>% 
  filter(PUMA %in% geoids$PUMACE10[which(geoids$perc_puma_area>50)]) %>%
  mutate(
    household_income_bracket = case_when(
      HINCP < 10000 ~ "<$10k",
      HINCP >= 10000 & HINCP < 15000 ~ "$10k-$15k",
      HINCP >= 15000 & HINCP < 20000 ~ "$15k-$25k",
      HINCP >= 25000 & HINCP < 35000 ~ "$25k-$35k",
      HINCP >= 35000 & HINCP < 50000 ~ "$35k-$50k",
      HINCP >= 50000 & HINCP < 75000 ~ "$50k-$75k",
      HINCP >= 75000 & HINCP < 100000 ~ "$75k-$100k",
      HINCP >= 100000 & HINCP < 125000 ~ "$100k-$125k",
      HINCP >= 125000 & HINCP < 150000 ~ "$125k-$150k",
      HINCP >= 150000 & HINCP < 200000 ~ "$150k-$200k",
      HINCP >= 200000 ~ ">$200k"),
    vehicles = case_when(
      VEH == 0 ~ "0",
      VEH == 1 ~ "1",
      VEH == 2 ~ "2",
      VEH > 2 ~ "3+",
    )
  )

plot_data <- data2 %>%
  group_by(household_income_bracket,vehicles)  %>%
  summarize(proportion = sum(WGTP)) %>%
  group_by(household_income_bracket) %>%
  mutate(vehicle_percentage = 100*proportion/sum(proportion)) %>%
  ungroup() %>% 
  filter(!is.na(household_income_bracket))

 

plot_data$vehicles<- factor(plot_data$vehicles, levels = c("3+", "2","1","0")) 

plot_data$household_income_bracket <- factor(plot_data$household_income_bracket, levels = c("<$10k","$10k-$15k","$15k-$25k","$25k-$35k","$35k-$50k","$50k-$75k","$75k-$100k","$100k-$125k","$125k-$150k","$150k-$200k",">$200k")) 

# Stacked + percent
ggplot(plot_data, aes(fill=vehicles, y=proportion, x=household_income_bracket)) + 
    geom_bar(position="fill", stat="identity")

--- title: "What kinds of households own cars in the Triangle?" format: html: title-block-banner: true toc: true toc-location: left toc-depth: 3 html-math-method: katex code-fold: true code-summary: "show the code" code-overflow: wrap code-tools: true number-sections: true theme: dark: slate light: flatly fig-width: 9 fig-height: 6 editor: visual --- ## Introduction Let's find out what proportion of households in the Triangle within each of the U.S. Census ACS household income brackets own 0, 1, 2, and 3+ cars! ## Get some data Let's pull in the census data necessary. Vehicles by Income is not tabulated in the ACS area releases so we'll need to use the microdata (2019 this case). Thus, we'll define the Triangle as all Census Public Use Microdata Areas that intersect the Raleigh-Durham-Cary Combined Statistical Area: ```{r, output=FALSE, echo=FALSE, message=FALSE, warning=FALSE} library(tidyverse) library(tidycensus) library(survey) library(srvyr) library(sf) library(mapview) library(viridis) census_api_key("b25f8b1b7bf10561c9cbc3a20a4d2572677f1f05", install=TRUE,overwrite=TRUE) pumas <- tigris::pumas(state="NC",year = 2019) %>% mutate(area_puma=as.numeric(st_area(.))) csa <- tigris::combined_statistical_areas(year = 2019) %>% filter(GEOID==450) %>% #get Raleigh combined statistical area mutate(area_csa=as.numeric(st_area(.))) geoids <- st_intersection(csa,pumas) %>% mutate(int_area=as.numeric(st_area(.)), perc_puma_area = 100*int_area/area_puma) PUMAs <- pumas %>% filter(PUMACE10 %in% geoids$PUMACE10[which(geoids$perc_puma_area>50)]) mapview(PUMAs, layer.name="PUMAs in analysis") + mapview(csa,color = "red", lwd=2,alpha.regions=0, col.region="red", layer.name="Raleigh-Durham-Cary CSA") v20 <- pums_variables %>% filter(year == 2019, survey == "acs5") # data <- get_pums( # variables = c("PUMA", "HINCP", "FHINCP", "VEH", "FVEHP"), # state = "NC", # survey = "acs5", # recode = TRUE, # year = 2019, # rep="housing" # ) ``` ```{r, results="asis"} mapview(PUMAs, layer.name="PUMAs in analysis") + mapview(csa,color = "red", lwd=2,alpha.regions=0, col.region="red", layer.name="Raleigh-Durham-Cary CSA") ``` ## Summarize data Now we summarize the household level data by breaking the household income variable into 11 brackets, and the vehicles variable into categories of 0,1,2, and 3+ vehicles. We also plot the results ```{r, results="asis", message=FALSE, warning=FALSE, message=FALSE} #write_csv(data,"data.csv") data <- read_csv("data.csv") data2 <- data %>% filter(SPORDER==1) %>% filter(PUMA %in% geoids$PUMACE10[which(geoids$perc_puma_area>50)]) %>% mutate( household_income_bracket = case_when( HINCP < 10000 ~ "<$10k", HINCP >= 10000 & HINCP < 15000 ~ "$10k-$15k", HINCP >= 15000 & HINCP < 20000 ~ "$15k-$25k", HINCP >= 25000 & HINCP < 35000 ~ "$25k-$35k", HINCP >= 35000 & HINCP < 50000 ~ "$35k-$50k", HINCP >= 50000 & HINCP < 75000 ~ "$50k-$75k", HINCP >= 75000 & HINCP < 100000 ~ "$75k-$100k", HINCP >= 100000 & HINCP < 125000 ~ "$100k-$125k", HINCP >= 125000 & HINCP < 150000 ~ "$125k-$150k", HINCP >= 150000 & HINCP < 200000 ~ "$150k-$200k", HINCP >= 200000 ~ ">$200k"), vehicles = case_when( VEH == 0 ~ "0", VEH == 1 ~ "1", VEH == 2 ~ "2", VEH > 2 ~ "3+", ) ) plot_data <- data2 %>% group_by(household_income_bracket,vehicles) %>% summarize(proportion = sum(WGTP)) %>% group_by(household_income_bracket) %>% mutate(vehicle_percentage = 100*proportion/sum(proportion)) %>% ungroup() %>% filter(!is.na(household_income_bracket)) plot_data$vehicles<- factor(plot_data$vehicles, levels = c("3+", "2","1","0")) plot_data$household_income_bracket <- factor(plot_data$household_income_bracket, levels = c("<$10k","$10k-$15k","$15k-$25k","$25k-$35k","$35k-$50k","$50k-$75k","$75k-$100k","$100k-$125k","$125k-$150k","$150k-$200k",">$200k")) # Stacked + percent ggplot(plot_data, aes(fill=vehicles, y=proportion, x=household_income_bracket)) + geom_bar(position="fill", stat="identity") ```