#Removing previous datasets in memory
rm(list = ls())
#Loading the relevant libraries
library(ggplot2)
library(gridExtra)
library(dplyr)
library(purrr)
library(sf)
library(rnaturalearth)
library(rnaturalearthdata)
library(ggrepel)
1 Intro
In this lab session, we delve into the practical application of T-tests, correlations, and mapping techniques.
From our lecture, we gained insights into Z-tests, which assess differences between two groups when the standard deviation or variance is known. Typically, a sample size larger than 30 is recommended to perform a z-test. However, in many instances, the population standard deviation is unknown, leading us to rely more frequently on t-tests. To elucidate the mechanics of t-tests, let’s revisit our sample of Latin American countries and compare them with global data.
2 Loading the Data
Let us go back to the old Life Expectancy dataset. If you don’t have it anymore, you can download Life Expectancy and urbanization from the following links:
Let us re-examine the distribution of our life expectancy dataset by making a histogram. We need to first load the data:
#Setting path
setwd("/Users/bgpopescu/Dropbox/john_cabot/teaching/stats/week7/lab/")
#Step1: Loading the data
<- read.csv(file = './data/life-expectancy.csv')
life_expectancy_df <- read.csv(file = './data/share-of-population-urban.csv') urbanization_df
3 Cleaning the Data
In the next few lines we focus on the period starting with 1900, we average life expectancy over country (the original dataset is a panel - with countries and years), we eliminate entities which do not have country codes or have strange ones. By taking the average by country, we are getting rid of the time component.
#Step1: Selecting after 1900
<-subset(life_expectancy_df, Year>1900)
life_expectancy_df#Step2: Calculating the mean
<-life_expectancy_df%>%
life_expectancy_df2::group_by(Entity, Code)%>%
dplyr::summarize(life_exp_mean=mean(Life.expectancy.at.birth..historical.))
dplyr#Step3: Cleaning the Data
<- c("OWID_KOS", "OWID_WRL", "")
weird_labels <-subset(life_expectancy_df2, !(Code %in% weird_labels)) clean_life_expectancy_df
Let us look at our data. We have learned so far to use head
.
head(clean_life_expectancy_df, n=5)
We can also use potentially a more powerful function to look at our data. This is called glimpse
.
glimpse(clean_life_expectancy_df)
Rows: 235
Columns: 3
Groups: Entity [235]
$ Entity <chr> "Afghanistan", "Albania", "Algeria", "American Samoa", "…
$ Code <chr> "AFG", "ALB", "DZA", "ASM", "AND", "AGO", "AIA", "ATG", …
$ life_exp_mean <dbl> 45.38333, 68.28611, 57.53013, 68.63750, 77.04861, 45.084…
As you can see, this tells us exactly how many observations (235), how many columns/variables (3) we have, the first values that these variables take, and the type of variables that we are dealing with: in this case, character and integers. We should consider using glimpse
from now on to understand our data better.
4 Mapping the Data
We will now try to map the data. The package rnaturalearth
provides the geographic data for all countries in the world. Use ne_countries
to pull country data and choose the scale for how detailed your want your maps to be. Larger scales take more space and time to process, while smaller scales take less space and less time to process. (rnaturalearthhires
is necessary for scale = “large”).
<- ne_countries(scale = "medium", returnclass = "sf") world
We are now working with a different object. The data that we just cleaned - clean_life_expectancy_df, falls under three different categories of objects: data frame, grouped data frame, a tibble, and a dataframe.
class(clean_life_expectancy_df)
[1] "grouped_df" "tbl_df" "tbl" "data.frame"
Tibble is the central data structure for the set of packages known as the tidyverse
, which include dplyr
, ggplot2
, tidyr
, and readr
. The general ethos is that tibbles are lazy and surly: they do less and complain more than base data.frames. This forces problems to be tackled earlier and more explicitly, typically leading to code that is more expressive and robust.
If we look at the class for our new object world, it is a different class.
class(world)
[1] "sf" "data.frame"
glimpse(world)
Rows: 242
Columns: 169
$ featurecla <chr> "Admin-0 country", "Admin-0 country", "Admin-0 country", "A…
$ scalerank <int> 1, 1, 1, 3, 5, 6, 1, 1, 1, 3, 5, 3, 3, 3, 3, 1, 5, 3, 3, 3,…
$ labelrank <int> 3, 3, 3, 2, 3, 6, 4, 3, 4, 6, 6, 6, 6, 6, 4, 5, 2, 4, 5, 6,…
$ sovereignt <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ sov_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_dif <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ level <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
$ type <chr> "Sovereign country", "Sovereign country", "Sovereign countr…
$ tlc <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",…
$ admin <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ adm0_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ geou_dif <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ geounit <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ gu_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ su_dif <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ subunit <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ su_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ brk_diff <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,…
$ name <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ name_long <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ brk_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ brk_name <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ brk_group <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ abbrev <chr> "Zimb.", "Zambia", "Yem.", "Viet.", "Ven.", "Vat.", "Van.",…
$ postal <chr> "ZW", "ZM", "YE", "VN", "VE", "V", "VU", "UZ", "UY", "FSM",…
$ formal_en <chr> "Republic of Zimbabwe", "Republic of Zambia", "Republic of …
$ formal_fr <chr> NA, NA, NA, NA, "República Bolivariana de Venezuela", NA, N…
$ name_ciawf <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Hol…
$ note_adm0 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "U.S.A.", "U.S.…
$ note_brk <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ name_sort <chr> "Zimbabwe", "Zambia", "Yemen, Rep.", "Vietnam", "Venezuela,…
$ name_alt <chr> NA, NA, NA, NA, NA, "Holy See", NA, NA, NA, NA, NA, NA, NA,…
$ mapcolor7 <int> 1, 5, 5, 5, 1, 1, 6, 2, 1, 5, 2, 4, 4, 4, 4, 4, 4, 6, 6, 6,…
$ mapcolor8 <int> 5, 8, 3, 6, 3, 3, 3, 3, 2, 2, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6,…
$ mapcolor9 <int> 3, 5, 3, 5, 1, 4, 7, 5, 2, 4, 5, 1, 1, 1, 1, 1, 1, 6, 6, 6,…
$ mapcolor13 <int> 9, 13, 11, 4, 4, 2, 3, 4, 10, 13, 3, 1, 1, 1, 1, 1, 1, 3, 3…
$ pop_est <dbl> 14645468, 17861030, 29161922, 96462106, 28515829, 825, 2998…
$ pop_rank <int> 14, 14, 15, 16, 15, 2, 10, 15, 12, 9, 8, 8, 9, 9, 8, 12, 17…
$ pop_year <int> 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,…
$ gdp_md <int> 21440, 23309, 22581, 261921, 482359, -99, 934, 57921, 56045…
$ gdp_year <int> 2019, 2019, 2019, 2019, 2014, 2019, 2019, 2019, 2019, 2018,…
$ economy <chr> "5. Emerging region: G20", "7. Least developed region", "7.…
$ income_grp <chr> "5. Low income", "4. Lower middle income", "4. Lower middle…
$ fips_10 <chr> "ZI", "ZA", "YM", "VM", "VE", "VT", "NH", "UZ", "UY", "FM",…
$ iso_a2 <chr> "ZW", "ZM", "YE", "VN", "VE", "VA", "VU", "UZ", "UY", "FM",…
$ iso_a2_eh <chr> "ZW", "ZM", "YE", "VN", "VE", "VA", "VU", "UZ", "UY", "FM",…
$ iso_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ iso_a3_eh <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ iso_n3 <chr> "716", "894", "887", "704", "862", "336", "548", "860", "85…
$ iso_n3_eh <chr> "716", "894", "887", "704", "862", "336", "548", "860", "85…
$ un_a3 <chr> "716", "894", "887", "704", "862", "336", "548", "860", "85…
$ wb_a2 <chr> "ZW", "ZM", "RY", "VN", "VE", "-99", "VU", "UZ", "UY", "FM"…
$ wb_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "-99", "VUT", "UZB", "UR…
$ woe_id <int> 23425004, 23425003, 23425002, 23424984, 23424982, 23424986,…
$ woe_id_eh <int> 23425004, 23425003, 23425002, 23424984, 23424982, 23424986,…
$ woe_note <chr> "Exact WOE match as country", "Exact WOE match as country",…
$ adm0_iso <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_diff <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ adm0_tlc <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_us <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_fr <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_ru <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_es <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_cn <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_tw <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_in <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_np <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_pk <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_de <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_gb <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_br <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_il <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_ps <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_sa <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_eg <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_ma <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_pt <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_ar <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_jp <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_ko <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_vn <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_tr <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_id <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_pl <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_gr <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_it <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_nl <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_se <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_bd <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_ua <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_un <int> -99, -99, -99, -99, -99, -99, -99, -99, -99, -99, -99, -99,…
$ adm0_a3_wb <int> -99, -99, -99, -99, -99, -99, -99, -99, -99, -99, -99, -99,…
$ continent <chr> "Africa", "Africa", "Asia", "Asia", "South America", "Europ…
$ region_un <chr> "Africa", "Africa", "Asia", "Asia", "Americas", "Europe", "…
$ subregion <chr> "Eastern Africa", "Eastern Africa", "Western Asia", "South-…
$ region_wb <chr> "Sub-Saharan Africa", "Sub-Saharan Africa", "Middle East & …
$ name_len <int> 8, 6, 5, 7, 9, 7, 7, 10, 7, 10, 12, 14, 15, 4, 14, 11, 24, …
$ long_len <int> 8, 6, 5, 7, 9, 7, 7, 10, 7, 30, 16, 24, 28, 4, 14, 11, 13, …
$ abbrev_len <int> 5, 6, 4, 5, 4, 4, 4, 4, 4, 6, 6, 6, 11, 4, 9, 4, 6, 10, 6, …
$ tiny <int> -99, -99, -99, 2, -99, 4, 2, 5, -99, -99, 2, 3, 3, 2, 3, -9…
$ homepart <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -99, -99, -99, -99, -99, 1…
$ min_zoom <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ min_label <dbl> 2.5, 3.0, 3.0, 2.0, 2.5, 5.0, 4.0, 3.0, 3.0, 5.0, 5.0, 5.0,…
$ max_label <dbl> 8.0, 8.0, 8.0, 7.0, 7.5, 10.0, 9.0, 8.0, 8.0, 10.0, 10.0, 1…
$ label_x <dbl> 29.92544, 26.39530, 45.87438, 105.38729, -64.59938, 12.4534…
$ label_y <dbl> -18.911640, -14.660804, 15.328226, 21.715416, 7.182476, 41.…
$ ne_id <dbl> 1159321441, 1159321439, 1159321425, 1159321417, 1159321411,…
$ wikidataid <chr> "Q954", "Q953", "Q805", "Q881", "Q717", "Q237", "Q686", "Q2…
$ name_ar <chr> "زيمبابوي", "زامبيا", "اليمن", "فيتنام", "فنزويلا", "الفاتي…
$ name_bn <chr> "জিম্বাবুয়ে", "জাম্বিয়া", "ইয়েমেন", "ভিয়েতনাম", "ভেনেজুয়েলা", "…
$ name_de <chr> "Simbabwe", "Sambia", "Jemen", "Vietnam", "Venezuela", "Vat…
$ name_en <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ name_es <chr> "Zimbabue", "Zambia", "Yemen", "Vietnam", "Venezuela", "Ciu…
$ name_fa <chr> "زیمبابوه", "زامبیا", "یمن", "ویتنام", "ونزوئلا", "واتیکان"…
$ name_fr <chr> "Zimbabwe", "Zambie", "Yémen", "Viêt Nam", "Venezuela", "Ci…
$ name_el <chr> "Ζιμπάμπουε", "Ζάμπια", "Υεμένη", "Βιετνάμ", "Βενεζουέλα", …
$ name_he <chr> "זימבבואה", "זמביה", "תימן", "וייטנאם", "ונצואלה", "קריית ה…
$ name_hi <chr> "ज़िम्बाब्वे", "ज़ाम्बिया", "यमन", "वियतनाम", "वेनेज़ुएला", "वैटिकन नग…
$ name_hu <chr> "Zimbabwe", "Zambia", "Jemen", "Vietnám", "Venezuela", "Vat…
$ name_id <chr> "Zimbabwe", "Zambia", "Yaman", "Vietnam", "Venezuela", "Vat…
$ name_it <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Cit…
$ name_ja <chr> "ジンバブエ", "ザンビア", "イエメン", "ベトナム", "ベネズエ…
$ name_ko <chr> "짐바브웨", "잠비아", "예멘", "베트남", "베네수엘라", "바티…
$ name_nl <chr> "Zimbabwe", "Zambia", "Jemen", "Vietnam", "Venezuela", "Vat…
$ name_pl <chr> "Zimbabwe", "Zambia", "Jemen", "Wietnam", "Wenezuela", "Wat…
$ name_pt <chr> "Zimbábue", "Zâmbia", "Iémen", "Vietname", "Venezuela", "Va…
$ name_ru <chr> "Зимбабве", "Замбия", "Йемен", "Вьетнам", "Венесуэла", "Ват…
$ name_sv <chr> "Zimbabwe", "Zambia", "Jemen", "Vietnam", "Venezuela", "Vat…
$ name_tr <chr> "Zimbabve", "Zambiya", "Yemen", "Vietnam", "Venezuela", "Va…
$ name_uk <chr> "Зімбабве", "Замбія", "Ємен", "В'єтнам", "Венесуела", "Вати…
$ name_ur <chr> "زمبابوے", "زیمبیا", "یمن", "ویتنام", "وینیزویلا", "ویٹیکن …
$ name_vi <chr> "Zimbabwe", "Zambia", "Yemen", "Việt Nam", "Venezuela", "Th…
$ name_zh <chr> "津巴布韦", "赞比亚", "也门", "越南", "委内瑞拉", "梵蒂冈",…
$ name_zht <chr> "辛巴威", "尚比亞", "葉門", "越南", "委內瑞拉", "梵蒂岡", "…
$ fclass_iso <chr> "Admin-0 country", "Admin-0 country", "Admin-0 country", "A…
$ tlc_diff <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_tlc <chr> "Admin-0 country", "Admin-0 country", "Admin-0 country", "A…
$ fclass_us <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_fr <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_ru <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_es <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_cn <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_tw <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_in <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_np <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_pk <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_de <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_gb <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_br <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_il <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_ps <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_sa <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_eg <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_ma <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_pt <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_ar <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_jp <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_ko <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_vn <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_tr <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_id <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_pl <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_gr <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_it <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_nl <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_se <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_bd <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_ua <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((31.28789 -2..., MULTIPOLYGON (…
At its most basic, an sf object is a collection of simple features that includes attributes and geometries in the form of a data frame. In other words, it is a data frame (or tibble) with rows of features, columns of attributes, and a special geometry column that contains the spatial aspects of the features.
We can use this format to make maps.
ggplot() +
geom_sf(data = world)
But let us also look more closely at the attribute table of this spatial dataframe.
glimpse(world)
Rows: 242
Columns: 169
$ featurecla <chr> "Admin-0 country", "Admin-0 country", "Admin-0 country", "A…
$ scalerank <int> 1, 1, 1, 3, 5, 6, 1, 1, 1, 3, 5, 3, 3, 3, 3, 1, 5, 3, 3, 3,…
$ labelrank <int> 3, 3, 3, 2, 3, 6, 4, 3, 4, 6, 6, 6, 6, 6, 4, 5, 2, 4, 5, 6,…
$ sovereignt <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ sov_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_dif <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ level <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
$ type <chr> "Sovereign country", "Sovereign country", "Sovereign countr…
$ tlc <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",…
$ admin <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ adm0_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ geou_dif <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ geounit <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ gu_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ su_dif <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ subunit <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ su_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ brk_diff <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,…
$ name <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ name_long <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ brk_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ brk_name <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ brk_group <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ abbrev <chr> "Zimb.", "Zambia", "Yem.", "Viet.", "Ven.", "Vat.", "Van.",…
$ postal <chr> "ZW", "ZM", "YE", "VN", "VE", "V", "VU", "UZ", "UY", "FSM",…
$ formal_en <chr> "Republic of Zimbabwe", "Republic of Zambia", "Republic of …
$ formal_fr <chr> NA, NA, NA, NA, "República Bolivariana de Venezuela", NA, N…
$ name_ciawf <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Hol…
$ note_adm0 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "U.S.A.", "U.S.…
$ note_brk <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ name_sort <chr> "Zimbabwe", "Zambia", "Yemen, Rep.", "Vietnam", "Venezuela,…
$ name_alt <chr> NA, NA, NA, NA, NA, "Holy See", NA, NA, NA, NA, NA, NA, NA,…
$ mapcolor7 <int> 1, 5, 5, 5, 1, 1, 6, 2, 1, 5, 2, 4, 4, 4, 4, 4, 4, 6, 6, 6,…
$ mapcolor8 <int> 5, 8, 3, 6, 3, 3, 3, 3, 2, 2, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6,…
$ mapcolor9 <int> 3, 5, 3, 5, 1, 4, 7, 5, 2, 4, 5, 1, 1, 1, 1, 1, 1, 6, 6, 6,…
$ mapcolor13 <int> 9, 13, 11, 4, 4, 2, 3, 4, 10, 13, 3, 1, 1, 1, 1, 1, 1, 3, 3…
$ pop_est <dbl> 14645468, 17861030, 29161922, 96462106, 28515829, 825, 2998…
$ pop_rank <int> 14, 14, 15, 16, 15, 2, 10, 15, 12, 9, 8, 8, 9, 9, 8, 12, 17…
$ pop_year <int> 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,…
$ gdp_md <int> 21440, 23309, 22581, 261921, 482359, -99, 934, 57921, 56045…
$ gdp_year <int> 2019, 2019, 2019, 2019, 2014, 2019, 2019, 2019, 2019, 2018,…
$ economy <chr> "5. Emerging region: G20", "7. Least developed region", "7.…
$ income_grp <chr> "5. Low income", "4. Lower middle income", "4. Lower middle…
$ fips_10 <chr> "ZI", "ZA", "YM", "VM", "VE", "VT", "NH", "UZ", "UY", "FM",…
$ iso_a2 <chr> "ZW", "ZM", "YE", "VN", "VE", "VA", "VU", "UZ", "UY", "FM",…
$ iso_a2_eh <chr> "ZW", "ZM", "YE", "VN", "VE", "VA", "VU", "UZ", "UY", "FM",…
$ iso_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ iso_a3_eh <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ iso_n3 <chr> "716", "894", "887", "704", "862", "336", "548", "860", "85…
$ iso_n3_eh <chr> "716", "894", "887", "704", "862", "336", "548", "860", "85…
$ un_a3 <chr> "716", "894", "887", "704", "862", "336", "548", "860", "85…
$ wb_a2 <chr> "ZW", "ZM", "RY", "VN", "VE", "-99", "VU", "UZ", "UY", "FM"…
$ wb_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "-99", "VUT", "UZB", "UR…
$ woe_id <int> 23425004, 23425003, 23425002, 23424984, 23424982, 23424986,…
$ woe_id_eh <int> 23425004, 23425003, 23425002, 23424984, 23424982, 23424986,…
$ woe_note <chr> "Exact WOE match as country", "Exact WOE match as country",…
$ adm0_iso <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_diff <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ adm0_tlc <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_us <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_fr <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_ru <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_es <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_cn <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_tw <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_in <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_np <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_pk <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_de <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_gb <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_br <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_il <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_ps <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_sa <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_eg <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_ma <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_pt <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_ar <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_jp <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_ko <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_vn <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_tr <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_id <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_pl <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_gr <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_it <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_nl <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_se <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_bd <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_ua <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ adm0_a3_un <int> -99, -99, -99, -99, -99, -99, -99, -99, -99, -99, -99, -99,…
$ adm0_a3_wb <int> -99, -99, -99, -99, -99, -99, -99, -99, -99, -99, -99, -99,…
$ continent <chr> "Africa", "Africa", "Asia", "Asia", "South America", "Europ…
$ region_un <chr> "Africa", "Africa", "Asia", "Asia", "Americas", "Europe", "…
$ subregion <chr> "Eastern Africa", "Eastern Africa", "Western Asia", "South-…
$ region_wb <chr> "Sub-Saharan Africa", "Sub-Saharan Africa", "Middle East & …
$ name_len <int> 8, 6, 5, 7, 9, 7, 7, 10, 7, 10, 12, 14, 15, 4, 14, 11, 24, …
$ long_len <int> 8, 6, 5, 7, 9, 7, 7, 10, 7, 30, 16, 24, 28, 4, 14, 11, 13, …
$ abbrev_len <int> 5, 6, 4, 5, 4, 4, 4, 4, 4, 6, 6, 6, 11, 4, 9, 4, 6, 10, 6, …
$ tiny <int> -99, -99, -99, 2, -99, 4, 2, 5, -99, -99, 2, 3, 3, 2, 3, -9…
$ homepart <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -99, -99, -99, -99, -99, 1…
$ min_zoom <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ min_label <dbl> 2.5, 3.0, 3.0, 2.0, 2.5, 5.0, 4.0, 3.0, 3.0, 5.0, 5.0, 5.0,…
$ max_label <dbl> 8.0, 8.0, 8.0, 7.0, 7.5, 10.0, 9.0, 8.0, 8.0, 10.0, 10.0, 1…
$ label_x <dbl> 29.92544, 26.39530, 45.87438, 105.38729, -64.59938, 12.4534…
$ label_y <dbl> -18.911640, -14.660804, 15.328226, 21.715416, 7.182476, 41.…
$ ne_id <dbl> 1159321441, 1159321439, 1159321425, 1159321417, 1159321411,…
$ wikidataid <chr> "Q954", "Q953", "Q805", "Q881", "Q717", "Q237", "Q686", "Q2…
$ name_ar <chr> "زيمبابوي", "زامبيا", "اليمن", "فيتنام", "فنزويلا", "الفاتي…
$ name_bn <chr> "জিম্বাবুয়ে", "জাম্বিয়া", "ইয়েমেন", "ভিয়েতনাম", "ভেনেজুয়েলা", "…
$ name_de <chr> "Simbabwe", "Sambia", "Jemen", "Vietnam", "Venezuela", "Vat…
$ name_en <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ name_es <chr> "Zimbabue", "Zambia", "Yemen", "Vietnam", "Venezuela", "Ciu…
$ name_fa <chr> "زیمبابوه", "زامبیا", "یمن", "ویتنام", "ونزوئلا", "واتیکان"…
$ name_fr <chr> "Zimbabwe", "Zambie", "Yémen", "Viêt Nam", "Venezuela", "Ci…
$ name_el <chr> "Ζιμπάμπουε", "Ζάμπια", "Υεμένη", "Βιετνάμ", "Βενεζουέλα", …
$ name_he <chr> "זימבבואה", "זמביה", "תימן", "וייטנאם", "ונצואלה", "קריית ה…
$ name_hi <chr> "ज़िम्बाब्वे", "ज़ाम्बिया", "यमन", "वियतनाम", "वेनेज़ुएला", "वैटिकन नग…
$ name_hu <chr> "Zimbabwe", "Zambia", "Jemen", "Vietnám", "Venezuela", "Vat…
$ name_id <chr> "Zimbabwe", "Zambia", "Yaman", "Vietnam", "Venezuela", "Vat…
$ name_it <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Cit…
$ name_ja <chr> "ジンバブエ", "ザンビア", "イエメン", "ベトナム", "ベネズエ…
$ name_ko <chr> "짐바브웨", "잠비아", "예멘", "베트남", "베네수엘라", "바티…
$ name_nl <chr> "Zimbabwe", "Zambia", "Jemen", "Vietnam", "Venezuela", "Vat…
$ name_pl <chr> "Zimbabwe", "Zambia", "Jemen", "Wietnam", "Wenezuela", "Wat…
$ name_pt <chr> "Zimbábue", "Zâmbia", "Iémen", "Vietname", "Venezuela", "Va…
$ name_ru <chr> "Зимбабве", "Замбия", "Йемен", "Вьетнам", "Венесуэла", "Ват…
$ name_sv <chr> "Zimbabwe", "Zambia", "Jemen", "Vietnam", "Venezuela", "Vat…
$ name_tr <chr> "Zimbabve", "Zambiya", "Yemen", "Vietnam", "Venezuela", "Va…
$ name_uk <chr> "Зімбабве", "Замбія", "Ємен", "В'єтнам", "Венесуела", "Вати…
$ name_ur <chr> "زمبابوے", "زیمبیا", "یمن", "ویتنام", "وینیزویلا", "ویٹیکن …
$ name_vi <chr> "Zimbabwe", "Zambia", "Yemen", "Việt Nam", "Venezuela", "Th…
$ name_zh <chr> "津巴布韦", "赞比亚", "也门", "越南", "委内瑞拉", "梵蒂冈",…
$ name_zht <chr> "辛巴威", "尚比亞", "葉門", "越南", "委內瑞拉", "梵蒂岡", "…
$ fclass_iso <chr> "Admin-0 country", "Admin-0 country", "Admin-0 country", "A…
$ tlc_diff <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_tlc <chr> "Admin-0 country", "Admin-0 country", "Admin-0 country", "A…
$ fclass_us <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_fr <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_ru <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_es <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_cn <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_tw <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_in <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_np <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_pk <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_de <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_gb <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_br <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_il <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_ps <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_sa <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_eg <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_ma <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_pt <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_ar <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_jp <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_ko <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_vn <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_tr <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_id <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_pl <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_gr <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_it <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_nl <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_se <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_bd <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fclass_ua <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((31.28789 -2..., MULTIPOLYGON (…
Our data has a lot of variables, which we don’t need. It seems that the variable adm0_a3
might be a good variable to keep: we will use this variable to merge our clean data frame. It might also be helpful to keep admin
, continent
, sovereignt
. These will help us identify some smaller geographic units (islands) which fall under larger jurisdictions.
<-subset(world, select = c(admin, adm0_a3, sovereignt, continent))
world2glimpse(world2)
Rows: 242
Columns: 5
$ admin <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ adm0_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ sovereignt <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ continent <chr> "Africa", "Africa", "Asia", "Asia", "South America", "Europ…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((31.28789 -2..., MULTIPOLYGON (…
We now have a spatial dataframe with fewer variables. As mentioned, it seems that a good variable to merge on is adm0_a3
. Let us look at both:
<-world2$adm0_a3
ctries_sp ctries_sp
[1] "ZWE" "ZMB" "YEM" "VNM" "VEN" "VAT" "VUT" "UZB" "URY" "FSM" "MHL" "MNP"
[13] "VIR" "GUM" "ASM" "PRI" "USA" "SGS" "IOT" "SHN" "PCN" "AIA" "FLK" "CYM"
[25] "BMU" "VGB" "TCA" "MSR" "JEY" "GGY" "IMN" "GBR" "ARE" "UKR" "UGA" "TKM"
[37] "TUR" "TUN" "TTO" "TON" "TGO" "TLS" "THA" "TZA" "TJK" "TWN" "SYR" "CHE"
[49] "SWE" "SWZ" "SUR" "SDS" "SDN" "LKA" "ESP" "KOR" "ZAF" "SOM" "SOL" "SLB"
[61] "SVK" "SVN" "SGP" "SLE" "SYC" "SRB" "SEN" "SAU" "STP" "SMR" "WSM" "VCT"
[73] "LCA" "KNA" "RWA" "RUS" "ROU" "QAT" "PRT" "POL" "PHL" "PER" "PRY" "PNG"
[85] "PAN" "PLW" "PAK" "OMN" "NOR" "PRK" "NGA" "NER" "NIC" "NZL" "NIU" "COK"
[97] "NLD" "ABW" "CUW" "NPL" "NRU" "NAM" "MOZ" "MAR" "SAH" "MNE" "MNG" "MDA"
[109] "MCO" "MEX" "MUS" "MRT" "MLT" "MLI" "MDV" "MYS" "MWI" "MDG" "MKD" "LUX"
[121] "LTU" "LIE" "LBY" "LBR" "LSO" "LBN" "LVA" "LAO" "KGZ" "KWT" "KOS" "KIR"
[133] "KEN" "KAZ" "JOR" "JPN" "JAM" "ITA" "ISR" "PSX" "IRL" "IRQ" "IRN" "IDN"
[145] "IND" "ISL" "HUN" "HND" "HTI" "GUY" "GNB" "GIN" "GTM" "GRD" "GRC" "GHA"
[157] "DEU" "GEO" "GMB" "GAB" "FRA" "SPM" "WLF" "MAF" "BLM" "PYF" "NCL" "ATF"
[169] "ALD" "FIN" "FJI" "ETH" "EST" "ERI" "GNQ" "SLV" "EGY" "ECU" "DOM" "DMA"
[181] "DJI" "GRL" "FRO" "DNK" "CZE" "CYN" "CYP" "CUB" "HRV" "CIV" "CRI" "COD"
[193] "COG" "COM" "COL" "CHN" "MAC" "HKG" "CHL" "TCD" "CAF" "CPV" "CAN" "CMR"
[205] "KHM" "MMR" "BDI" "BFA" "BGR" "BRN" "BRA" "BWA" "BIH" "BOL" "BTN" "BEN"
[217] "BLZ" "BEL" "BLR" "BRB" "BGD" "BHR" "BHS" "AZE" "AUT" "AUS" "IOA" "HMD"
[229] "NFK" "ATC" "ARM" "ARG" "ATG" "AGO" "AND" "DZA" "ALB" "AFG" "KAS" "ATA"
[241] "SXM" "TUV"
<-world2$adm0_a3
ctries_spglimpse(ctries_sp)
chr [1:242] "ZWE" "ZMB" "YEM" "VNM" "VEN" "VAT" "VUT" "UZB" "URY" "FSM" ...
<-clean_life_expectancy_df$Code
ctries_dfglimpse(ctries_df)
chr [1:235] "AFG" "ALB" "DZA" "ASM" "AND" "AGO" "AIA" "ATG" "ARG" "ARM" ...
Let us now examine whether there are differences when it comes to what is included/excluded in the two lists. We can do this with the help of the setdiff
function, which indicates which elements of a vector or data frame X are not existent in a vector or data frame Y.
#which elements in ctries_sp are not existent ctries_df?
<-setdiff(ctries_sp, ctries_df)
countries_dif#printing them
countries_dif
[1] "SGS" "IOT" "PCN" "SDS" "SOL" "SAH" "KOS" "PSX" "BLM" "ATF" "ALD" "CYN"
[13] "IOA" "HMD" "NFK" "ATC" "KAS" "ATA"
These are countries that are in the spatial dataframe, but not in the our dataframe. What are these countries?
#Within the dataframe world2, we select the adm0_a3 and check
#if it matches elements in countries_dif
<-subset(world2, adm0_a3 %in% countries_dif)
world3#We investigate the first 10 entries
head(world3, n = 10)
#We make a list for better viewing of those
list(world3$admin)[[1]]
[1] "South Georgia and the Islands" "British Indian Ocean Territory"
[3] "Pitcairn Islands" "South Sudan"
[5] "Somaliland" "Western Sahara"
[7] "Kosovo" "Palestine"
[9] "Saint Barthelemy" "French Southern and Antarctic Lands"
[11] "Aland" "Northern Cyprus"
[13] "Indian Ocean Territories" "Heard Island and McDonald Islands"
[15] "Norfolk Island" "Ashmore and Cartier Islands"
[17] "Siachen Glacier" "Antarctica"
Let us now perform a left merge so that we can have the data that we cleaned onto the spatial dataframe.
#Performing a left join
<-left_join(world2, clean_life_expectancy_df, by = c("adm0_a3" = "Code"))
merged#Investigating the first 4 entries.
glimpse(merged, n = 4)
Rows: 242
Columns: 7
$ admin <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "…
$ adm0_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", …
$ sovereignt <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "…
$ continent <chr> "Africa", "Africa", "Asia", "Asia", "South America", "Eu…
$ Entity <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "…
$ life_exp_mean <dbl> 54.24722, 51.56389, 51.64861, 65.73194, 65.33421, 75.331…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((31.28789 -2..., MULTIPOLYGO…
This looks good. Let us now make our first map.
ggplot() +
geom_sf(data = merged, aes(fill = life_exp_mean))+
ggtitle("Avg. Life Expectancy 1901-2021")
We have a just made a choropleth map, which indicates intensity of a variable. Let us now add a few embellishments to make our map nicer.
ggplot() +
geom_sf(data = merged, aes(fill = life_exp_mean))+
theme_bw()+
xlab("X - Longitude") + ylab("Y - Latitude")+
scale_y_continuous(breaks=seq(-90, 90, by = 10), limits = c(-90,90))+
ggtitle("Avg. Life Expectancy 1901-2021")
A map like this has exactly the same structure of a regular ggplot graph.
5 Performing a t-test
Let us now go back to our original task, that of calculating a t-test. Because we have this new variable - continent, we no longer need to manually ascribe a continent to our countries. Let us quickly see our labels for the continents.
unique(merged$continent)
[1] "Africa" "Asia"
[3] "South America" "Europe"
[5] "Oceania" "North America"
[7] "Seven seas (open ocean)" "Antarctica"
We can use the “South America” label. Note that this does not capture all the countries which we previously referred to as “Latin America.” For example, Mexico is not included here, as it is part of North America. This is to say that Latin America can be referred to as both aa geographic and a linguistic space.
<-subset(merged, continent=="South America") sample_latam
Let us quickly map South America.
ggplot() +
geom_sf(data = sample_latam, aes(fill = life_exp_mean))+
theme_bw()+
xlab("X - Longitude") + ylab("Y - Latitude")+
ggtitle("Avg. Life Expectancy 1901-2021")
We are now ready to perform a t-test to see if the life expectancy in Latin America is different from life expectancy around the world. We should perform one more step: excluding Latin America from our world sample so that we are not comparing the Latin American sample to the world sample (which includes Latin America).
#Choose everything that is not South America
<-subset(merged, continent!="South America")
world_nolatam#Examining the result
glimpse(world_nolatam)
Rows: 229
Columns: 7
$ admin <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Vatican", "Va…
$ adm0_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VAT", "VUT", "UZB", "FSM", …
$ sovereignt <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Vatican", "Va…
$ continent <chr> "Africa", "Africa", "Asia", "Asia", "Europe", "Oceania",…
$ Entity <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Vatican", "Va…
$ life_exp_mean <dbl> 54.24722, 51.56389, 51.64861, 65.73194, 75.33194, 61.269…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((31.28789 -2..., MULTIPOLYGO…
Note that our sample got reduced from 242 (size of world sample including Latin America) to 229 (size of world sample excluding Latin America). Let us now perform a t-test.
<-t.test(sample_latam$life_exp_mean,
test_greatermu=mean(world_nolatam$life_exp_mean, na.rm=T),
sigma.x=sd(sample_latam$life_exp_mean),
alternative = c("greater"))
test_greater
One Sample t-test
data: sample_latam$life_exp_mean
t = 0.79922, df = 12, p-value = 0.2198
alternative hypothesis: true mean is greater than 62.32984
95 percent confidence interval:
61.1916 Inf
sample estimates:
mean of x
63.25522
The result indicates that we do not reject the H0 because p is larger than 0.05, so the mean life expectancy for Latin America (as defined here), is not greater than that of the world. In other words, there isn’t sufficient evidence to conclude that the true mean life expectancy is greater than 62.3298423
But is it less?
<-t.test(sample_latam$life_exp_mean,
test_lessmu=mean(world_nolatam$life_exp_mean, na.rm=T),
sigma.x=sd(sample_latam$life_exp_mean),
alternative = c("less"))
test_less
One Sample t-test
data: sample_latam$life_exp_mean
t = 0.79922, df = 12, p-value = 0.7802
alternative hypothesis: true mean is less than 62.32984
95 percent confidence interval:
-Inf 65.31885
sample estimates:
mean of x
63.25522
No, it does not seem to be the case. There isn’t sufficient evidence to conclude that the true mean life expectancy is less than 62.3298423.
Is it equal to that of the world?
<-t.test(sample_latam$life_exp_mean, world_nolatam$life_exp_mean,
test_twosidedalternative = c("two.sided"))
test_twosided
Welch Two Sample t-test
data: sample_latam$life_exp_mean and world_nolatam$life_exp_mean
t = 0.70627, df = 19.589, p-value = 0.4883
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.811395 3.662154
sample estimates:
mean of x mean of y
63.25522 62.32984
<-t.test(sample_latam$life_exp_mean,
test_twosidedmu=mean(world_nolatam$life_exp_mean, na.rm=T),
sigma.x=sd(sample_latam$life_exp_mean),
alternative = c("two.sided"))
test_twosided
One Sample t-test
data: sample_latam$life_exp_mean
t = 0.79922, df = 12, p-value = 0.4397
alternative hypothesis: true mean is not equal to 62.32984
95 percent confidence interval:
60.73248 65.77796
sample estimates:
mean of x
63.25522
Yes, statistically it is similar. This test prevents us from rejecting the H0. Thus, there isn’t sufficient evidence to conclude that the true mean life expectancy differs significantly from the mean proposed, 62.3298423.
Another good way to visualize the similarity between the two groups is by using a boxplot.
#Creating a new variable - sample with NA values
$sample<-NA
merged#Replace all the NA values with "Rest of the World"
$sample<-"Rest of the World"
merged#Replace the values with South America, if continent=="South America"
$sample[merged$continent=="South America"]<-"South America" merged
ggplot(merged, aes(x = sample, y = life_exp_mean, color = sample)) +
geom_boxplot() +
theme_bw()
And this is the interpretation of a box plot.
We can also add more of the observations to the boplot by using geom_jitter
, as shown below:
ggplot(merged, aes(x = sample, y = life_exp_mean, color = sample)) +
geom_boxplot() +
geom_jitter() +
theme_bw()
6 Mapping Life Expectancy in Europe
What if we wanted to examine life expectancy in Europe over time? The first step is to fix the coordinates and add limits to our graph. This time around we need to choose a maximum Y (North Point), minimum Y (South Point), a maximum X (West Point), and minimum X (East Point). One good way would be to take these coordinates from the existing countries. Let us choose four countries that would allow us to decide on the coordinates for Europe: Norway, Portugal, Ukraine, and Greece.
#Step1: Selecting the country
<-subset(merged, Entity == "Norway")
norway#Step2: Extracting its coordinates
st_bbox(norway)
xmin ymin xmax ymax
-9.098877 58.020947 33.629297 80.477832
For our map of Europe we are interested in the Northern most point in Norway. This will be our maximum Y.
<-st_bbox(norway)["ymax"]
max_lat_y max_lat_y
ymax
80.47783
Let us now get another country that is in the south: Greece.
#Step1: Selecting the country
<-subset(merged, Entity == "Greece")
greece#Step2: Extracting its coordinates
st_bbox(greece)
xmin ymin xmax ymax
19.64648 34.93447 28.23184 41.74380
For our map of Europe we are interested in the Southern most point in Greece. This will be the minimum Y.
<-st_bbox(greece)["ymin"]
min_lat_y min_lat_y
ymin
34.93447
Let us now get a country in the East: Ukraine.
#Step1: Selecting the country
<-subset(merged, Entity == "Ukraine")
ukraine#Step2: Extracting its coordinates
st_bbox(ukraine)
xmin ymin xmax ymax
22.13184 45.23413 40.12832 52.35356
For our map of Europe we are interested in the Eastern most point in Ukraine. This will be the maximum X.
<-st_bbox(ukraine)["xmax"]
max_lon_x max_lon_x
xmax
40.12832
Let’s choose now a country in the West: Portugal
#Step1: Selecting the country
<-subset(merged, Entity == "Portugal")
portugal#Step2: Extracting its coordinates
st_bbox(portugal)
xmin ymin xmax ymax
-31.28296 32.64829 -6.21250 42.13740
For our map of Europe we are interested in the Western most point in Portugal. This will be the minimum X.
<-st_bbox(portugal)["xmin"]
min_lon_x min_lon_x
xmin
-31.28296
We are now ready:
<-ggplot() +
figure1geom_sf() +
geom_sf(data = merged, aes(fill = life_exp_mean))+
theme_bw()+
xlab("X - Longitude") + ylab("Y - Latitude")+
coord_sf(xlim = c(min_lon_x, max_lon_x), ylim = c(min_lat_y, max_lat_y))+
ggtitle("Avg. Life Expectancy 1901-2021")
ggsave(figure1, file = "./graphs/figure1.jpg",
height = 20, width = 20,
units = "cm", dpi = 300)
figure1
Let us also visualize the points that we calculated. But to make things easier, we should create a dataframe.
<-data.frame(x_lon=c(min_lon_x, max_lon_x, min_lon_x, max_lon_x),
euro_extremey_lat=c(min_lat_y, max_lat_y, max_lat_y, min_lat_y),
name = (c("South-West", "North-East", "North-West", "South-East")))
euro_extreme
Let us visualize the four points that we calculated.
<-ggplot() +
figure1geom_sf() +
geom_sf(data = merged, aes(fill = life_exp_mean))+
geom_point(data= euro_extreme, aes(x=x_lon,
y=y_lat),
fill="blue", color="darkred", size=3)+
geom_label_repel(data= euro_extreme, aes(x=x_lon, y=y_lat,
label = name), fill = alpha(c("white"),0.8))+
theme_bw()+
xlab("X - Longitude") + ylab("Y - Latitude")+
coord_sf(xlim = c(min_lon_x-3, max_lon_x+3), ylim = c(min_lat_y-3, max_lat_y+3))+
ggtitle("Avg. Life Expectancy 1901-2021")
figure1
This looks pretty good. Next step is to redo the merge in our data. Remember that we calculated average life expectancy for the entire period: historical years up to the present day. Thus, we need to do a new merge in which we merge the spatial dataframe to the specific year on life expectancy.
#Making a more friendly label for life expectancy
names(life_expectancy_df)[4]<-"life_exp_mean"
#Selecting only 1950
<-subset(life_expectancy_df, Year==1950)
life_expectancy_1950#Merging countries (life_expectancy_1950) to the shapefile (world2)
<-left_join(world2, life_expectancy_1950, by = c("adm0_a3"="Code"))
world_1950#Examining the result
glimpse(world_1950)
Rows: 242
Columns: 8
$ admin <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "…
$ adm0_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", …
$ sovereignt <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "…
$ continent <chr> "Africa", "Africa", "Asia", "Asia", "South America", "Eu…
$ Entity <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "…
$ Year <int> 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 19…
$ life_exp_mean <dbl> 49.6, 45.1, 31.7, 49.2, 51.1, 65.9, 44.5, 54.8, 65.6, 49…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((31.28789 -2..., MULTIPOLYGO…
What if we were interested in life expectancy in Europe in 1950?
Let us map life expectancy in 1950.
<-ggplot() +
figure2geom_sf(data = world_1950, aes(fill = life_exp_mean))+
theme_bw()+
xlab("X - Longitude") + ylab("Y - Latitude")+
coord_sf(xlim = c(min_lon_x, max_lon_x), ylim = c(min_lat_y, max_lat_y))+
ggtitle("Life Expectancy in 1950")
figure2
Let us map life expectancy in 2020.
#Selecting only 2020
<-subset(life_expectancy_df, Year==2020)
life_expectancy_2020#Merging countries (life_expectancy_2020) to the shapefile (world2)
<-left_join(world2, life_expectancy_2020, by = c("adm0_a3"="Code"))
world_2020#Examining the result
glimpse(world_2020)
Rows: 242
Columns: 8
$ admin <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "…
$ adm0_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", …
$ sovereignt <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "…
$ continent <chr> "Africa", "Africa", "Asia", "Asia", "South America", "Eu…
$ Entity <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "…
$ Year <int> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 20…
$ life_exp_mean <dbl> 61.1, 62.4, 64.7, 75.4, 71.1, 83.0, 70.3, 70.3, 78.4, 70…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((31.28789 -2..., MULTIPOLYGO…
<-ggplot() +
figure3geom_sf() +
geom_sf(data = world_2020, aes(fill = life_exp_mean))+
theme_bw()+
xlab("X - Longitude") + ylab("Y - Latitude")+
coord_sf(xlim = c(min_lon_x, max_lon_x),
ylim = c(min_lat_y, max_lat_y))+
ggtitle("Life Expectancy in 2020")
figure3
Let us put them side by side.
grid.arrange(figure2, figure3, ncol=2)
We don’t see much of a difference because we also need to fix the color scheme in a way that has the same minmum and maximum. This is how we do this. We first identify the minimum and maximum value in the variable that we want to plot.
<- max(life_expectancy_df$life_exp_mean, na.rm=T)
vmax vmax
[1] 86.5
<- min(life_expectancy_df$life_exp_mean, na.rm=T)
vmin vmin
[1] 12
Let us now replot the maps
library(ggpubr)
<-ggplot() +
figure2geom_sf() +
geom_sf(data = world_1950, aes(fill = life_exp_mean))+
theme_bw()+
xlab("X - Longitude") + ylab("Y - Latitude")+
coord_sf(xlim = c(min_lon_x, max_lon_x), ylim = c(min_lat_y, max_lat_y))+
scale_fill_gradient(limits = c(vmin, vmax), name = "Life Expectancy")+
ggtitle("Life Expectancy in 1950")
<-ggplot() +
figure3geom_sf() +
geom_sf(data = world_2020, aes(fill = life_exp_mean))+
theme_bw()+
xlab("X - Longitude") + ylab("Y - Latitude")+
coord_sf(xlim = c(min_lon_x, max_lon_x),
ylim = c(min_lat_y, max_lat_y))+
scale_fill_gradient(limits = c(vmin, vmax), name = "Life Expectancy")+
ggtitle("Life Expectancy in 2020")
ggarrange(figure2, figure3, ncol=2, common.legend = TRUE)
Now, the differences are more visible. What if we were interested in plotting the change in life expectancy between 1950 and 2020?
#Step1: Selecting only 1950 and 2002
<-subset(life_expectancy_df, Year %in% c(1950, 2020))
life_expectancy_1950_2020#Step2: Calculating the difference betweem the 2020 (life_expectancy_values) and 1950 (lag(life_expectancy_values)) by country
<-life_expectancy_1950_2020%>%
life_expectancy_1950_2020b::group_by(Code) %>%
dplyr::summarise(differ=life_exp_mean - lag(life_exp_mean))
dplyr#Step3: Removing countries with NA code and where is the difference calculated is NA
<-subset(life_expectancy_1950_2020b, Code!="" & !is.na(differ)) life_expectancy_1950_2020c
#Step4: Examining the result
glimpse(life_expectancy_1950_2020c)
Rows: 237
Columns: 2
Groups: Code [237]
$ Code <chr> "ABW", "AFG", "AGO", "AIA", "ALB", "AND", "ARE", "ARG", "ARM", …
$ differ <dbl> 18.5, 34.9, 26.0, 21.6, 32.3, 14.4, 37.8, 14.7, 12.9, 11.4, 21.…
Let us now plot the change.
#Performing a left join
<-left_join(world2, life_expectancy_1950_2020c, by = c("adm0_a3"="Code"))
world_change#Investigating the result
glimpse(world_change)
Rows: 242
Columns: 6
$ admin <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ adm0_a3 <chr> "ZWE", "ZMB", "YEM", "VNM", "VEN", "VAT", "VUT", "UZB", "UR…
$ sovereignt <chr> "Zimbabwe", "Zambia", "Yemen", "Vietnam", "Venezuela", "Vat…
$ continent <chr> "Africa", "Africa", "Asia", "Asia", "South America", "Europ…
$ differ <dbl> 11.5, 17.3, 33.0, 26.2, 20.0, 17.1, 25.8, 15.5, 12.8, 21.3,…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((31.28789 -2..., MULTIPOLYGON (…
<-ggplot() +
figure4geom_sf() +
geom_sf(data = world_change, aes(fill = differ))+
theme_bw()+
xlab("X - Longitude") + ylab("Y - Latitude")+
coord_sf(xlim = c(min_lon_x, max_lon_x), ylim = c(min_lat_y, max_lat_y))+
ggtitle("Life Expectancy Differece: 2020-1950")
figure4
What is the country in Europe with the highest change in life expectancy between 1950 and 2020?
#Step1: Selecting Europe
<-subset(world_change, continent=="Europe")
world_change_res#Step2: Selecting the maximum value for differ (difference between 2020 and 1950)
<-subset(world_change_res, differ==max(world_change_res$differ, na.rm=T))
world_change_resx#Step3: Identifying the country
$admin world_change_resx
[1] "Albania"
What is the country in Europe with the lowest change in life expectancy between 1950 and 2020?
#Step2: Selecting the minimum value for differ (difference between 2020 and 1950)
<-subset(world_change_res, differ==min(world_change_res$differ, na.rm=T))
world_change_resx#Step3: Identifying the country
$admin world_change_resx
[1] "Latvia"
7 Correlation between two Variables
Let us now calculate the correlation between urbanization and life expectancy.
#Providing more friendly names
names(urbanization_df)[4]<-"urb_mean"
#Selecting only relevant variables
<-subset(urbanization_df, select=c(Code, urb_mean, Year)) urbanization_df2
#Performing a left join
<-left_join(life_expectancy_df, urbanization_df2, by = c("Code"="Code", "Year"="Year"))
merged_df#Removing NA values
<-na.omit(merged_df) merged_df2
What is the correlation in life expectancy and urbanization for the entire dataset?
#Calculating the correlation
cor(merged_df2$life_exp_mean, merged_df2$urb_mean)
[1] 0.4579985