#####################################################################################################
# Direct estimates


# This is an example using eu-silc 2017.
# In this code I find the numbers of minors in each household
# and then I create 2 datasets, one at households level and one at "unit" level. 
# At the end, I compute the "at risk of poverty rate" at provincial level (please, note that the survey 
# is designed to obtain good results at regional level...)

# Please, consider that you have to change the name of all the files with your files.
#####################################################################################################
rm(list=ls())

library(MASS)
library(xlsx)

 reg.fam <- read.table(file = "19401.IST180.A.2017.1.csv", header = TRUE, sep = ",") 
 h.file <- read.table(file = "19370.IST180.A.2017.1.csv", header = TRUE, sep = ",") 
 d.file <- read.table(file = "19363.IST180.A.2017.1.csv", header = TRUE, sep = ",")
 r.file <- read.table(file = "19368.IST180.A.2017.1.csv", header = TRUE, sep = ",") 
# 
# 
# #HH EQUIV SIZE
# #
 
 r.file$age <- 2017-r.file$RB080  # with this command you can find the age of per people
                                  # inside each households. Then you can count the number
                                  # of minors
 summary(r.file$age)
# 
# With the following command you can find equivalised size for each family
 
hhid <- (unique(r.file$RB040))
 for(i in 1:length(hhid)){
   sel <- r.file$RB040 == hhid[i]
   r.file.hh <- r.file[sel,]
   r.file[sel,"hhsizeequiv"] <- 1+(sum(r.file.hh$age >= 14)-1)*0.5+sum(r.file.hh$age < 14)*0.3
 }
# 
 (table(r.file$hhsizeequiv))
# 
 
# #merge h and r file
 r.file2 <- merge(r.file, h.file, by.x = "RB040", by.y = "HB030")
 r.file2$eqhhincome <- r.file2$HY020/r.file2$hhsizeequiv
 summary(r.file2$eqhhincome)
 boxplot(r.file2$eqhhincome)
 boxplot(r.file2$eqhhincome+12850, log = "y")
# 
# #merge r2 and reg.fam
# # length(unique(reg.fam$RB030))
# # length(unique(r.file2$RB030))
 eusilc <- merge(r.file2, reg.fam, by.x = "RB030", by.y = "RB030")
 sum(eusilc$RB050)
# 
 save(eusilc, file = "eusilc2017_persons.RData")  # data at persons level
# 

 eusilc$hhmember <- as.numeric(substr(eusilc$RB030, start = 9, stop = 10))
 sel <- which(eusilc$hhmember == 1)
 eusilchh <- eusilc[sel,]


 hhid <- unique(eusilc$RB040)
 sel <- which(eusilc$RB040 == hhid[1])
 eusilchh <- eusilc[sel[1],]
 for(i in 2:length(hhid)){
   sel <- which(eusilc$RB040 == hhid[i])
   eusilchh <- rbind(eusilchh, eusilc[sel[1],])
 }

 save(eusilchh, file = "eusilc2017_hh.RData")    # data at households level

#####Direct estimation
load("eusilc2017_persons.RData")
load("eusilc2017_hh.RData")

#sample sizes at provncial level
summary(as.numeric(table(eusilc$RILPROV)))
summary(as.numeric(table(eusilchh$RILPROV)))


###Using laeken
library(laeken)
#PL
npl <- arpt(inc = eusilchh$eqhhincome, weights = eusilchh$RB050, p = 0.6)

#hcr
hcr <- arpr(inc = eusilchh$eqhhincome, weights = eusilchh$RB050, breakdown = eusilchh$RILPROV, p = 0.6)
var.hcr <- bootVar(inc = eusilchh$eqhhincome, weights = eusilchh$RB050, breakdown = eusilchh$RILPROV, indicator = hcr, R = 100, bootType = "naive")


###Using survey
library(survey)
eusilc$relpoor <- 0
eusilc$relpoor[eusilc$eqhhincome <= npl] <- 1
table(eusilc$relpoor)
design1 <- svydesign(ids=~RB040,strat=~REGIONE,weight=~RB050,data=eusilc,nest=TRUE)
means <- svyby(~cbind(eqhhincome),~REGIONE,design=design1, svymean)
names(means)=c("Regione","mean.HH.EQ.INC","sd.mean.HH.EQ.INC")
hcr2 <- svyby(~cbind(relpoor),~REGIONE,design=design1, svymean)
names(hcr2)=c("Regione","HCR","sd.HCR")
hcr2
hcr
var.hcr