library(readxl) # use package readxl
HDR25_Statistical_Annex_HDI_Table <- read_excel("HDR25_Statistical_Annex_HDI_Table.xlsx") # read and name file
View(HDR25_Statistical_Annex_HDI_Table)
HDR25_Statistical_Annex_HDI_Table # view file
str(HDR25_Statistical_Annex_HDI_Table) # view data types for each column
## tibble [278 × 15] (S3: tbl_df/tbl/data.frame)
## $ ...1 : chr [1:278] NA NA NA NA ...
## $ Table 1. Human Development Index and its components: chr [1:278] NA NA NA NA ...
## $ ...3 : chr [1:278] NA NA NA "Human Development Index (HDI)" ...
## $ ...4 : logi [1:278] NA NA NA NA NA NA ...
## $ ...5 : chr [1:278] NA "SDG3" NA "Life expectancy at birth" ...
## $ ...6 : chr [1:278] NA NA NA NA ...
## $ ...7 : chr [1:278] NA "SDG4.3" NA "Expected years of schooling" ...
## $ ...8 : chr [1:278] NA NA NA NA ...
## $ ...9 : chr [1:278] NA "SDG4.4" NA "Mean years of schooling" ...
## $ ...10 : chr [1:278] NA NA NA NA ...
## $ ...11 : chr [1:278] NA "SDG8.5" NA "Gross national income (GNI) per capita" ...
## $ ...12 : chr [1:278] NA NA NA NA ...
## $ ...13 : chr [1:278] NA NA NA "GNI per capita rank minus HDI rank" ...
## $ ...14 : chr [1:278] NA NA NA NA ...
## $ ...15 : chr [1:278] NA NA NA "HDI rank" ...
head(HDR25_Statistical_Annex_HDI_Table) # view first 6 rows of data
tail(HDR25_Statistical_Annex_HDI_Table) # view last 6 rows of data
as.list(HDR25_Statistical_Annex_HDI_Table[4,]) # get a list of the fourth row which is where the real column names are
## $...1
## [1] NA
##
## $`Table 1. Human Development Index and its components`
## [1] NA
##
## $...3
## [1] "Human Development Index (HDI)"
##
## $...4
## [1] NA
##
## $...5
## [1] "Life expectancy at birth"
##
## $...6
## [1] NA
##
## $...7
## [1] "Expected years of schooling"
##
## $...8
## [1] NA
##
## $...9
## [1] "Mean years of schooling"
##
## $...10
## [1] NA
##
## $...11
## [1] "Gross national income (GNI) per capita"
##
## $...12
## [1] NA
##
## $...13
## [1] "GNI per capita rank minus HDI rank"
##
## $...14
## [1] NA
##
## $...15
## [1] "HDI rank"
colnames(HDR25_Statistical_Annex_HDI_Table) <- as.list(HDR25_Statistical_Annex_HDI_Table[4,]) # use the fourth row to rename the columns
names(HDR25_Statistical_Annex_HDI_Table)[2]=c("Country") # change the name of the second column to country since it was in a different row than the others
## Warning: The `value` argument of `names<-()` can't be empty as of tibble 3.0.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
HDR25_Statistical_Annex_HDI_Table <- HDR25_Statistical_Annex_HDI_Table[-c(1:7),] # remove the first 7 rows
HDR25_Statistical_Annex_HDI_Table <-
HDR25_Statistical_Annex_HDI_Table[,-c(1,4,6,8,10,12,14,15)] # delete these columns that have nothing in them
HDR25_Statistical_Annex_HDI_Table # view data
HDR25_Statistical_Annex_HDI_Table <- HDR25_Statistical_Annex_HDI_Table[-c(75,126,170,197:271),] # delete the rows at the bottom that are not needed
colnames(HDR25_Statistical_Annex_HDI_Table) # view column names
## [1] "Country"
## [2] "Human Development Index (HDI)"
## [3] "Life expectancy at birth"
## [4] "Expected years of schooling"
## [5] "Mean years of schooling"
## [6] "Gross national income (GNI) per capita"
## [7] "GNI per capita rank minus HDI rank"
names(HDR25_Statistical_Annex_HDI_Table)[2]=c("Human_Dev_Index") # change column name to remove spaces
names(HDR25_Statistical_Annex_HDI_Table)[3]=c("Life_Exp") # change column name to remove spaces
names(HDR25_Statistical_Annex_HDI_Table)[4]=c("School_Exp") # change column name to remove spaces
names(HDR25_Statistical_Annex_HDI_Table)[5]=c("Mean_Yrs_School") # change column name to remove spaces
names(HDR25_Statistical_Annex_HDI_Table)[6]=c("Gross_Nat_Income_per_cap") # change column name to remove spaces
names(HDR25_Statistical_Annex_HDI_Table)[7]=c("Rank_GNI_minus_HDI") # change column name to remove spaces
colnames(HDR25_Statistical_Annex_HDI_Table) # view column names
## [1] "Country" "Human_Dev_Index"
## [3] "Life_Exp" "School_Exp"
## [5] "Mean_Yrs_School" "Gross_Nat_Income_per_cap"
## [7] "Rank_GNI_minus_HDI"
HDR25_Statistical_Annex_HDI_Table$Human_Dev_Index=as.numeric(HDR25_Statistical_Annex_HDI_Table$Human_Dev_Index) # change column to numeric data type
HDR25_Statistical_Annex_HDI_Table$Life_Exp=as.numeric(HDR25_Statistical_Annex_HDI_Table$Life_Exp) # change column to numeric data type
HDR25_Statistical_Annex_HDI_Table$School_Exp=as.numeric(HDR25_Statistical_Annex_HDI_Table$School_Exp) # change column to numeric data type
HDR25_Statistical_Annex_HDI_Table$Mean_Yrs_School=as.numeric(HDR25_Statistical_Annex_HDI_Table$Mean_Yrs_School) # change column to numeric data type
HDR25_Statistical_Annex_HDI_Table$Gross_Nat_Income_per_cap=as.numeric(HDR25_Statistical_Annex_HDI_Table$Gross_Nat_Income_per_cap) # change column to numeric data type
HDR25_Statistical_Annex_HDI_Table$Rank_GNI_minus_HDI=as.numeric(HDR25_Statistical_Annex_HDI_Table$Rank_GNI_minus_HDI) # change column to numeric data type
str(HDR25_Statistical_Annex_HDI_Table) # view data types for each column
## tibble [193 × 7] (S3: tbl_df/tbl/data.frame)
## $ Country : chr [1:193] "Iceland" "Norway" "Switzerland" "Denmark" ...
## $ Human_Dev_Index : num [1:193] 0.972 0.97 0.97 0.962 0.959 0.959 0.958 0.955 0.955 0.951 ...
## $ Life_Exp : num [1:193] 82.7 83.3 84 81.9 81.4 ...
## $ School_Exp : num [1:193] 18.9 18.8 16.7 18.7 17.3 ...
## $ Mean_Yrs_School : num [1:193] 13.9 13.1 13.9 13 14.3 ...
## $ Gross_Nat_Income_per_cap: num [1:193] 69117 112710 81949 76008 64053 ...
## $ Rank_GNI_minus_HDI : num [1:193] 12 0 5 4 13 10 14 4 6 9 ...
summary(HDR25_Statistical_Annex_HDI_Table) # view statistical summary
## Country Human_Dev_Index Life_Exp School_Exp
## Length:193 Min. :0.3880 Min. :54.46 Min. : 5.635
## Class :character 1st Qu.:0.6220 1st Qu.:67.39 1st Qu.:11.505
## Mode :character Median :0.7620 Median :73.49 Median :13.336
## Mean :0.7408 Mean :73.11 Mean :13.585
## 3rd Qu.:0.8620 3rd Qu.:78.34 3rd Qu.:15.888
## Max. :0.9720 Max. :85.71 Max. :20.846
## Mean_Yrs_School Gross_Nat_Income_per_cap Rank_GNI_minus_HDI
## Min. : 1.412 Min. : 688.3 Min. :-54.0000
## 1st Qu.: 6.780 1st Qu.: 5746.6 1st Qu.: -7.0000
## Median : 9.933 Median : 15866.5 Median : 1.0000
## Mean : 9.173 Mean : 24620.7 Mean : 0.2021
## 3rd Qu.:11.642 3rd Qu.: 36793.0 3rd Qu.: 10.0000
## Max. :14.296 Max. :166811.7 Max. : 38.0000
library(dplyr) # use package dplyr
HDR25_Statistical_Annex_HDI_Table |>
group_by() |>
summarize(Life_Exp_Mean = mean(Life_Exp),
School_Exp_Mean = mean(School_Exp),
Mean_Yrs_School_Mean = mean(Mean_Yrs_School),
Gross_Nat_Income_per_cap_Mean = mean(Gross_Nat_Income_per_cap)) # Calculate and show the mean of each column listed
saveRDS(HDR25_Statistical_Annex_HDI_Table,"HDR_Cleaned_Formatted.RDS") # code is saving the file as an RDS file
HDR_Cleaned_FormattedRDS=readRDS("HDR_Cleaned_Formatted.RDS") # code is reading the RDS file
str(HDR_Cleaned_FormattedRDS) # code is checking the data types of the RDS file
## tibble [193 × 7] (S3: tbl_df/tbl/data.frame)
## $ Country : chr [1:193] "Iceland" "Norway" "Switzerland" "Denmark" ...
## $ Human_Dev_Index : num [1:193] 0.972 0.97 0.97 0.962 0.959 0.959 0.958 0.955 0.955 0.951 ...
## $ Life_Exp : num [1:193] 82.7 83.3 84 81.9 81.4 ...
## $ School_Exp : num [1:193] 18.9 18.8 16.7 18.7 17.3 ...
## $ Mean_Yrs_School : num [1:193] 13.9 13.1 13.9 13 14.3 ...
## $ Gross_Nat_Income_per_cap: num [1:193] 69117 112710 81949 76008 64053 ...
## $ Rank_GNI_minus_HDI : num [1:193] 12 0 5 4 13 10 14 4 6 9 ...
write.csv(HDR25_Statistical_Annex_HDI_Table,"HDR_Cleaned_Formatted.csv", row.names = FALSE) # code is saving the data frame as a csv file
HDR_Cleaned_FormattedCSV=read.csv("HDR_Cleaned_Formatted.csv") # code is reading the csv file
str(HDR_Cleaned_FormattedCSV) # code is checking the data types of the csv file
## 'data.frame': 193 obs. of 7 variables:
## $ Country : chr "Iceland" "Norway" "Switzerland" "Denmark" ...
## $ Human_Dev_Index : num 0.972 0.97 0.97 0.962 0.959 0.959 0.958 0.955 0.955 0.951 ...
## $ Life_Exp : num 82.7 83.3 84 81.9 81.4 ...
## $ School_Exp : num 18.9 18.8 16.7 18.7 17.3 ...
## $ Mean_Yrs_School : num 13.9 13.1 13.9 13 14.3 ...
## $ Gross_Nat_Income_per_cap: num 69117 112710 81949 76008 64053 ...
## $ Rank_GNI_minus_HDI : int 12 0 5 4 13 10 14 4 6 9 ...