library("broom")
library("dplyr")
library("ggplot2")
library("ggrepel")
library("modelsummary")
library("ggdag")
library("purrr")
library("stringr")
library("ggpubr")
# For Maps
library("sf")
library("stars")
library("rnaturalearth")
library("rnaturalearthdata")
library("ggspatial")
library("lemon")
# For Animation
library("gifski")
library("av")
library("magick")
library("gganimate")
# Removing previous files and setting the
# directory
rm(list = ls())
setwd("/Users/bgpopescu/Dropbox/john_cabot/teaching/stats/week13/lab/")
1 Intro
Open a new Quarto document. Select all text by pressing Ctrl + A (Windows) or Cmd + A (Mac), then press Delete. Next, copy and paste the following preamble.
---
title: "Lab 13"
author: "Your Name"
date: "September 13, 2024"
format:
html:
toc: true
number-sections: true
colorlinks: true
smooth-scroll: true
embed-resources: true
---
Render the document and save it under “week13” in your “stats” folder.
2 Background
In this hypothetical situation (the data is made up), the World Bank is planning on launching a program designed to reduce the risk of malaria in Malawi by providing insecticide-treated bed nets. So they provided insecticide-treated bed nets only to city B from 2017 to 2020. The World Bank selected 24 individuals (over 3 years) from city B and they want to investigate whether receiving such nets has any effect on people’s risk of malaria.
In this document, we want to calculate \(\beta_3\) from:
\[ Y_{it} = \beta_0 + \beta_1 \text{Group}_i + \beta_2 \text{Time}_t +\beta_3 (\text{Group}_i \times \text{Time}_t) + \epsilon_{it} \]
where:
Group = 1 if this is the treatment group
Time = 1 if this is the period after intervention
\(\beta_0\) - mean of the control group in the pre-treatment period
\(\beta_1\) - the change in outcome across groups
\(\beta_2\) - the change in outcome within groups.
\(\beta_3\) - the differences in differences
In our case, this is equivalent to:
\[ \text{Malaria Risk}_{it} = \beta_0 + \beta_1 \text{City B}_i + \beta_2 \text{Post-2017}_t +\beta_3 (\text{City B}_i \times \text{Post-2017}_t) + \epsilon_{it} \]
Preliminaries: Create a folder within the relevant week called “lab”. Within it, create another folder called “data” and another one, called “graphs”.
In a first instance, we want to draw a causal model which explains the effect of the insecticide-treated bed nets on participant risk of malaria, given the time and group. See the DAG models below (together with the code to produce them).
Step 1 Code
<- dagify(malaria_risk ~ net + year + city,
malaria_dag ~ year + city,
net exposure = "net",
outcome = "malaria_risk",
labels = c(malaria_risk = "Malaria Risk",
net = "Mosquito Net",
year = "Year",
city = "City",
pre_malaria_risk = "Pre Malaria Risk"),
coords = list(x = c(malaria_risk = 3, net = 1, year = 2, city = 2),
y = c(malaria_risk = 2, net = 2, year = 3, city = 1)))
<-data.frame(tidy_dagitty(malaria_dag))
bigger_dag $type[bigger_dag$name=="malaria_risk"]<-"Outcome"
bigger_dag$type[bigger_dag$name=="net"]<-"Intervention"
bigger_dag<-min(bigger_dag$x)
min_lon_dag_x<-max(bigger_dag$x)
max_lon_dag_x<-min(bigger_dag$y)
min_lat_dag_y<-max(bigger_dag$y) max_lat_dag_y
Step 2 Code
= c("Outcome"="green3",
col "Intervention"="red",
"Confounder"="grey60")
<-c("Outcome", "Intervention", "Confounder")
order_col
<-ggplot(data = bigger_dag, aes(x = x, y = y, xend = xend, yend = yend, color=type)) +
example_daggeom_dag_point() +
geom_dag_edges() +
coord_sf(xlim = c(min_lon_dag_x-1, max_lon_dag_x+1), ylim = c(min_lat_dag_y-1, max_lat_dag_y+1))+
scale_colour_manual(values = col,
name = "Group",
breaks = order_col)+
geom_label_repel(data = subset(bigger_dag, !duplicated(bigger_dag$label)),
aes(label = label), fill = alpha(c("white"),0.8))+
theme_bw()+
labs(x = "", y="")
#We now save the graph in a folder called "graphs"
ggsave(example_dag, file = "./graphs/example_dag1.jpg",
height = 20, width = 25,
units = "cm", dpi = 200)
example_dag
3 Loading the Data
Go to the following links and download the following:
Place all the data in the data folder.
3.1 Loading the Shape Files
st_read
reads the shapefiles from your hardrive. read_stars
reads a tif file that shows the elevation for your area of interest. ne_countries
loads a shapefile with all the countries around the world.
#Country shapefile
<- st_read("./data/malawi-latest-free/gadm41_MWI_0.shp")
mwi #Region 1 shapefile
<- st_read("./data/malawi-latest-free/gadm41_MWI_1.shp")
mwi1 #Region 3 shapefile
<- st_read("./data/malawi-latest-free/gadm41_MWI_3.shp")
mwi3 #Selected communes shapefile
<- st_read("./data/malawi-latest-free/selected_communes3.shp")
select_comm #Raster with elevation
<- read_stars("./data/elev.tif")
elev #World Shapefile
<- ne_countries(scale = "medium", returnclass = "sf")
world #Switched off spherical geometry (s2)
::sf_use_s2(FALSE) sf
3.2 Simplifying the shapefiles for better rendering
st_simplify
simplifies the polygons for quicker rendering.
#Simplifying polygons for quicker rendering
#Note: you have to play around with dTolerance to figure out the appropriate value
#Too little - your computer will need more time to render the file
#Too much - the country polygons will be too simple
<-st_simplify(mwi, dTolerance = 0.005)
mwi<-st_simplify(mwi1, dTolerance = 0.005)
mwi1<-st_simplify(mwi3, dTolerance = 0.005)
mwi3<-st_simplify(select_comm, dTolerance = 0.0005)
select_comm<-st_simplify(world, dTolerance = 0.005) world
3.3 Calculating the appropriate zoom level for Malawi
We now calculate the zoom level for the entire country of Malawi.
#Calculating the appropriate zoom level
<- st_bbox(mwi)
mwi_extent
<-mwi_extent["xmin"]
min_lon_x<-mwi_extent["xmax"]
max_lon_x
<-mwi_extent["ymin"]
min_lat_y<-mwi_extent["ymax"] max_lat_y
We now create a new polygon to emphasize where Malawi is on the World Map.
#Creating a dataframe to emphasize Malawi and adding some error
= 5
error
= data.frame("lon" = c(min_lon_x - error, max_lon_x + error,
sp_df - error, max_lon_x + error),
min_lon_x "lat" = c(min_lat_y - error, min_lat_y - error,
+ error, max_lat_y + error)) max_lat_y
We now make a spatial polygon out of it so that we can highlight where Malawi is on the world map.
#Creating polygon to indicate zoom
<- sp_df %>%
poly st_as_sf(coords = c("lon", "lat"),
crs = st_crs(mwi)) %>%
st_bbox() %>%
st_as_sfc()
#Examining what we just created
ggplot() +
geom_sf(data = mwi, fill="red", color = "red", linewidth = 0.9)+
geom_sf(data = poly, fill=NA, color = "red", linewidth = 0.3)
3.4 Cropping the raster
We now crop our raster only to the area of interest. Again, if we have too much, our computer will have a hard time rendering the file.
#Step1: Reducing raster resolution: Higher dx/day values mean lower resolution
<-st_warp(elev, st_as_stars(st_bbox(elev), dx = 0.05, dy = 0.05))
elev2#Step2: Removing really high pixel values for good contrast in MWI
> 2000] <- NA
elev2[elev2 #Step3: Cropping
<-st_crop(elev2, poly) dem_mwi
<-ggplot() +
map_wordgeom_sf(data = world, fill=NA, color = "black", linewidth = 0.3)+
geom_sf(data = mwi, fill="red", color = "red", linewidth = 0.7)+
geom_sf(data = poly, fill=NA, color = "red", linewidth = 0.9)+
theme_bw()+
labs(x = "Longitude", y="Latitude")+
ggtitle("Malawi on the World Map")+
theme(axis.text.x = element_text(size=14),
axis.text.y = element_text(size=14),
axis.title=element_text(size=14),
plot.title = element_text(hjust = 0.5),
legend.position.inside = c(1, 0),
#Legend.position values should be between 0 and 1. c(0,0) corresponds to the "bottom left"
#and c(1,1) corresponds to the "top right" position.
legend.box.background = element_rect(fill='white'),
legend.background = element_blank(),
legend.text=element_text(size=12))
#ggspatial::annotation_scale(location = 'tr')
map_word
ggsave(map_word, file="./graphs/map_world.jpg", height=15, width=30, units = "cm", dpi=300)
3.5 Mapping the region of interest - Malawi
#Selecting only the region of interest
<-subset(mwi1, NAME_1=="Nkhata Bay")
mwi1_lab#Defining colors for raster
<- colorRampPalette(c("grey90", "grey30"))
grey_palette # Generate a sequence of colors based on the number of breaks desired
<- 10 # Adjust this based on your data
num_breaks <- grey_palette(num_breaks) grey_colors
This is the region of Nkhata Bay where the World Bank conducts the study. Within the region of Nkhata Bay, the World Bank chose city B to provide nets and examined its effects from 2017 onwards. We now want to draw a new map to emphasize where Nkhata Bay is.
<-0.2
error<-ggplot() +
map_malawigeom_stars(data=elev2, show.legend=FALSE) +
scale_fill_gradientn(colours = grey_colors)+
geom_sf(data = select_comm, fill="green", color = "green", linewidth = 0.4)+
geom_sf(data = mwi1, fill=NA, color = "blue", linewidth = 0.2)+
geom_sf(data = mwi1_lab, fill=NA, color = "blue", linewidth = 0.9)+
geom_sf_text(data=mwi1_lab, aes(label = NAME_1), size=3.5)+
theme_bw()+
coord_sf(xlim = c(min_lon_x-5*error, max_lon_x+5*error),
ylim = c(min_lat_y-error, max_lat_y+error))+
labs(x = "Longitude", y="Latitude")+
ggtitle("Nkhata Bay in Malawi")+
theme(axis.text.x = element_text(size=14),
axis.text.y = element_text(size=14),
axis.title=element_text(size=14),
plot.title = element_text(hjust = 0.5),
legend.position.inside = c(1, 0),
#legend.position.inside values should be between 0 and 1. c(0,0) corresponds to the "bottom left"
#and c(1,1) corresponds to the "top right" position.
legend.box.background = element_rect(fill='white'),
legend.background = element_blank(),
legend.text=element_text(size=12))+
::annotation_scale(location = 'tr')
ggspatial#Saving the map
ggsave(map_malawi, file="./graphs/map_malawi.jpg", height=15, width=15, units = "cm", dpi=300)
map_malawi
3.6 Calculating the appropriate zoom level for the Selected Communes
We now calculate the zoom level for the region of interest - Nkhata Bay.
#Calculating the appropriate zoom level for the selected commune
<- st_bbox(select_comm)
mwi_extent
<-mwi_extent["xmin"]
min_lon_x<-mwi_extent["xmax"]
max_lon_x
<-mwi_extent["ymin"]
min_lat_y<-mwi_extent["ymax"] max_lat_y
#Re-reading the shapefiles.
#We now want more accurate polygons since we are zooming in.
#This is why we load them again
<- st_read("./data/malawi-latest-free/gadm41_MWI_0.shp")
mwi <- st_read("./data/malawi-latest-free/gadm41_MWI_1.shp")
mwi1 <- st_read("./data/malawi-latest-free/gadm41_MWI_3.shp")
mwi3 <- st_read("./data/malawi-latest-free/selected_communes3.shp")
select_comm <- read_stars("./data/elev.tif")
dem
#Step1: Reducing raster resolution: Higher dx/day values mean lower resolution
<-st_warp(dem, st_as_stars(st_bbox(elev), dx = 0.02, dy = 0.02))
elev2#Step2: Removing really high pixel values for good contrast in MWI
> 2000] <- NA
elev2[elev2
#Step3: Creating a dataframe to emphasize Malawi and adding some error
= 0.5
error = data.frame("lon" = c(min_lon_x - error, max_lon_x + error,
sp_df - error, max_lon_x + error),
min_lon_x "lat" = c(min_lat_y - error, min_lat_y - error,
+ error, max_lat_y + error))
max_lat_y
#Step4: Creating polygon to indicate zoom
<- sp_df %>%
poly st_as_sf(coords = c("lon", "lat"),
crs = st_crs(mwi)) %>%
st_bbox() %>%
st_as_sfc()
#Step5: Cropping
<-st_crop(elev2, poly)
dem_mwi
<- ne_countries(scale = "medium", returnclass = "sf")
world #Turning off spherical geometries
::sf_use_s2(FALSE) sf
#We also want to include roads in our map, to provide more context.
#Reading the OSM roads data
<-st_read("./data/malawi-latest-free/gis_osm_roads_free_1_crop2.shp")
roads#Step1: Dissolving polylines
<- st_combine(roads)
roads2 #Step2: Turning roads2 into an sf object
<-st_as_sf(roads2)
roads3#Step3: Only the roads of interest
<- sf::st_intersection(roads3, select_comm) roads_crop1
3.7 Mapping the Individuals
Let us first look at our data.
<- read.csv(file = './data/did_data_geo_malawi.csv')
diff_data glimpse(diff_data, n=10)
Rows: 8,000
Columns: 10
$ year <int> 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2013, 201…
$ income <dbl> 1284.551, 1285.832, 1285.997, 1285.157, 1286.313, 1287.24…
$ age <int> 33, 34, 35, 36, 37, 38, 39, 40, 51, 52, 53, 54, 55, 56, 5…
$ sex <chr> "Woman", "Woman", "Woman", "Woman", "Woman", "Woman", "Wo…
$ malaria_risk <dbl> 36.26529, 35.10382, 73.17664, 28.98390, 51.18721, 51.9071…
$ id <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, …
$ lat <dbl> -11.27476, -11.27476, -11.27476, -11.27476, -11.27476, -1…
$ lon <dbl> 34.03006, 34.03006, 34.03006, 34.03006, 34.03006, 34.0300…
$ after <int> 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, …
$ city <chr> "City A", "City A", "City A", "City A", "City A", "City A…
<-0.05
error<-ggplot() +
figure_animatedgeom_sf(data = mwi1_lab, fill=NA, color = "blue", linewidth = 0.9)+
geom_point(data = diff_data, aes(x=lon, y=lat, color = city, size = malaria_risk, alpha=0.3))+
theme_bw()+
labs(x = "Longitude", y="Latitude")+
ggtitle("The Three Districts Selected in Nkhata Bay, Malawi\n Location of 1000 Individuals\n For the Experiment")+
theme(axis.text.x = element_text(size=14),
axis.text.y = element_text(size=14),
axis.title=element_text(size=14),
plot.title = element_text(hjust = 0.5),
legend.box.background = element_rect(fill='white'),
legend.background = element_blank(),
legend.text=element_text(size=12),
legend.position="bottom")+
coord_sf(xlim = c(min_lon_x-3*error, max_lon_x+3*error), ylim = c(min_lat_y-error, max_lat_y+error))+
::annotation_scale(location = 'tr')+
ggspatiallabs(title = 'Year: {frame_time}', x = 'lon', y = 'lat')+
transition_states(year)+
transition_time(as.integer(year))
figure_animated