##################################################################################################### # Direct estimates # This is an example using eu-silc 2017. # In this code I find the numbers of minors in each household # and then I create 2 datasets, one at households level and one at "unit" level. # At the end, I compute the "at risk of poverty rate" at provincial level (please, note that the survey # is designed to obtain good results at regional level...) # Please, consider that you have to change the name of all the files with your files. ##################################################################################################### rm(list=ls()) library(MASS) library(xlsx) reg.fam <- read.table(file = "19401.IST180.A.2017.1.csv", header = TRUE, sep = ",") h.file <- read.table(file = "19370.IST180.A.2017.1.csv", header = TRUE, sep = ",") d.file <- read.table(file = "19363.IST180.A.2017.1.csv", header = TRUE, sep = ",") r.file <- read.table(file = "19368.IST180.A.2017.1.csv", header = TRUE, sep = ",") # # # #HH EQUIV SIZE # # r.file$age <- 2017-r.file$RB080 # with this command you can find the age of per people # inside each households. Then you can count the number # of minors summary(r.file$age) # # With the following command you can find equivalised size for each family hhid <- (unique(r.file$RB040)) for(i in 1:length(hhid)){ sel <- r.file$RB040 == hhid[i] r.file.hh <- r.file[sel,] r.file[sel,"hhsizeequiv"] <- 1+(sum(r.file.hh$age >= 14)-1)*0.5+sum(r.file.hh$age < 14)*0.3 } # (table(r.file$hhsizeequiv)) # # #merge h and r file r.file2 <- merge(r.file, h.file, by.x = "RB040", by.y = "HB030") r.file2$eqhhincome <- r.file2$HY020/r.file2$hhsizeequiv summary(r.file2$eqhhincome) boxplot(r.file2$eqhhincome) boxplot(r.file2$eqhhincome+12850, log = "y") # # #merge r2 and reg.fam # # length(unique(reg.fam$RB030)) # # length(unique(r.file2$RB030)) eusilc <- merge(r.file2, reg.fam, by.x = "RB030", by.y = "RB030") sum(eusilc$RB050) # save(eusilc, file = "eusilc2017_persons.RData") # data at persons level # eusilc$hhmember <- as.numeric(substr(eusilc$RB030, start = 9, stop = 10)) sel <- which(eusilc$hhmember == 1) eusilchh <- eusilc[sel,] hhid <- unique(eusilc$RB040) sel <- which(eusilc$RB040 == hhid[1]) eusilchh <- eusilc[sel[1],] for(i in 2:length(hhid)){ sel <- which(eusilc$RB040 == hhid[i]) eusilchh <- rbind(eusilchh, eusilc[sel[1],]) } save(eusilchh, file = "eusilc2017_hh.RData") # data at households level #####Direct estimation load("eusilc2017_persons.RData") load("eusilc2017_hh.RData") #sample sizes at provncial level summary(as.numeric(table(eusilc$RILPROV))) summary(as.numeric(table(eusilchh$RILPROV))) ###Using laeken library(laeken) #PL npl <- arpt(inc = eusilchh$eqhhincome, weights = eusilchh$RB050, p = 0.6) #hcr hcr <- arpr(inc = eusilchh$eqhhincome, weights = eusilchh$RB050, breakdown = eusilchh$RILPROV, p = 0.6) var.hcr <- bootVar(inc = eusilchh$eqhhincome, weights = eusilchh$RB050, breakdown = eusilchh$RILPROV, indicator = hcr, R = 100, bootType = "naive") ###Using survey library(survey) eusilc$relpoor <- 0 eusilc$relpoor[eusilc$eqhhincome <= npl] <- 1 table(eusilc$relpoor) design1 <- svydesign(ids=~RB040,strat=~REGIONE,weight=~RB050,data=eusilc,nest=TRUE) means <- svyby(~cbind(eqhhincome),~REGIONE,design=design1, svymean) names(means)=c("Regione","mean.HH.EQ.INC","sd.mean.HH.EQ.INC") hcr2 <- svyby(~cbind(relpoor),~REGIONE,design=design1, svymean) names(hcr2)=c("Regione","HCR","sd.HCR") hcr2 hcr var.hcr