inseefr / gustave Goto Github PK
View Code? Open in Web Editor NEWGustave: a User-oriented Statistical Toolkit for Analytical Variance Estimation
Home Page: https://CRAN.R-project.org/package=gustave
Gustave: a User-oriented Statistical Toolkit for Analytical Variance Estimation
Home Page: https://CRAN.R-project.org/package=gustave
It is currently impossible to retrieve outputs, other than 'var', computed in the variance function, inside the display function, to add them to the final output of the Gustave variance estimation.
In the MWE below 'addtional_output' cannot be retrieved in 'display_function', neither directly if added as an argument to the function, nor through metadata or any other default argument of 'display_function'.
if(!require("gustave")){
install.packages("gustave")
library("gustave")
}
data <- data.frame(variable = rep(1, 1000), weight = rep(1, 1000), id =seq(from = 1, to = 1000))
Any_statistic_wrapper <- define_statistic_wrapper(
statistic_function = function(Var_int, weight){
return(list(lin = Var_int, point = 5))
},
arg_type = list(data = c('Var_int'), weight = 'weight'),
display_function = function(point, var, metadata, alpha){
output_df = data.frame(variance = var, estimation = point)
}
)
Compute_Var <- function(y){
return(list(var = 1, addtional_output = 2))
}
Var_computation <- define_variance_wrapper(
variance_function = Compute_Var,
reference_id = data$id,
reference_weight = data$weight,
default_id = 'id'
)
# Current output
Var_computation(data, Any_statistic_wrapper(variable))
# Desired output
output_df = data.frame(variance = 1, estimation = 5, addtional_output = 2)
print(output_df)
At least in simple cases, the output of the variance function should be of length the number of columns of the Y variable in input.
MWE (thanks to @ThomasDeroyon) :
data <- data.frame(id = 1:100, sex= c(rep(1, 50), rep(2, 50)),
y = c(rnorm(50, 1, 3), rnorm(50, 2, 4)), weight = rep(10, 100))
# Dont't work
variance_function <- function(y) 1
variance_wrapper <- gustave::define_variance_wrapper(variance_function = variance_function,
reference_id = data$id,
reference_weight = data$weight,
default_id = "id")
variance_wrapper(data, as.factor(sex))
# Works
variance_function <- function(y) rep(1, NCOL(y))
variance_wrapper <- gustave::define_variance_wrapper(variance_function = variance_function,
reference_id = data$id,
reference_weight = data$weight,
default_id = "id")
variance_wrapper(data, as.factor(sex))
# Example taken from documentation (define_variance_wrapper) :
var_lfs <- function(y, ind, dwel, area){
variance <- list()
# Variance associated with the sampling of the dwellings
y <- sum_by(y, ind$id_dwel)
variance[["dwel"]] <- var_srs(
y = y, pik = dwel$pik_dwel, strata = dwel$id_area,
w = (1 / dwel$pik_area^2 - dwel$q_area)
)
# Variance associated with the sampling of the areas
y <- sum_by(y = y, by = dwel$id_area, w = 1 / dwel$pik_dwel)
variance[["area"]] <- varDT(y = y, precalc = area)
Reduce(`+`, variance)
}
technical_data_lfs <- list()
technical_data_lfs$area <- varDT(
y = NULL,
pik = lfs_samp_area$pik_area,
x = as.matrix(lfs_samp_area[c("pik_area", "income")]),
id = lfs_samp_area$id_area
)
lfs_samp_dwel$q_area <- with(technical_data_lfs$area, setNames(diago, id))[lfs_samp_dwel$id_area]
technical_data_lfs$dwel <- lfs_samp_dwel[c("id_dwel", "pik_dwel", "id_area", "pik_area", "q_area")]
technical_data_lfs$ind <- lfs_samp_ind[c("id_ind", "id_dwel", "sampling_weight")]
precision_lfs <- define_variance_wrapper(
variance_function = var_lfs,
technical_data = technical_data_lfs,
reference_id = technical_data_lfs$ind$id_ind,
reference_weight = technical_data_lfs$ind$sampling_weight,
default_id = "id_ind"
)
precision_lfs(lfs_samp_ind, as.factor(unemp))
call mod n est variance std cv lower upper
1 total(y = as.factor(unemp)) FALSE 116 12865.413 532925.1 730.0172 5.674262 11434.605 14296.220
2 total(y = as.factor(unemp)) TRUE 116 1082.138 137355.3 370.6147 34.248393 355.746 1808.529
Warning messages:
1: In Matrix::sparseMatrix(x = unname(y), i = seq_along(y), j = rep(1, :
'giveCsparse' has been deprecated; setting 'repr = "T"' for you
2: In Matrix::sparseMatrix(x = unname(y), i = seq_along(y), j = rep(1, :
'giveCsparse' has been deprecated; setting 'repr = "T"' for you
3: In Matrix::sparseMatrix(x = unname(y), i = seq_along(y), j = rep(1, :
'giveCsparse' has been deprecated; setting 'repr = "T"' for you
4: In Matrix::sparseMatrix(x = unname(y), i = seq_along(y), j = rep(1, :
'giveCsparse' has been deprecated; setting 'repr = "T"' for you
5: In Matrix::sparseMatrix(x = unname(y), i = seq_along(y), j = rep(1, :
'giveCsparse' has been deprecated; setting 'repr = "T"' for you
Some things to change due to the transfer to InseeFr :
The variance wrappers created with gustave have difficulties with qualitative variables with one modality. This problem can also occur when using the by or where parameters in case the qualitative variable of interest has only one modality in the modality of the where variable, or in one modality of the by variable :
# Simple variance function
compute_variance <- function(y){
rep(1, ncol(y))
}
# Data generation
set.seed(215)
d <- data.frame(id = 1 : 1000,
weight = c(rep(10, 1000)),
x = 1000 * rnorm(1000, mean = 10, sd = 2),
a = c(rep('a', 500), rep('b', 500)),
y = rep('c', 1000),
z = c(rep('m1', 250), rep('m2', 250), rep('m3', 500))
)
# Variance wrapper definition
precision <-
gustave::define_variance_wrapper(variance_function = compute_variance,
reference_id = d$id,
reference_weight = d$weight,
default_id = 'id')
# The wrapper is working :
precision(d, mean(x))
# But not with one modality qualitative variables :
precision(d, mean(y))
precision(d, mean(z), by = a)
precision(d, mean(z), where = (a == 'b'))
The problem comes from inner function discretize_qualitative_var, which cannot handel qualitative variables with one modality. It is sufficient to add a default behaviour in case the qualitative variable discretize_qualitative_var is applied to has only one modality, and to modify the version of discretize_qualitative_var in the parent environements of the precision function to correct the problem :
discretize<- function (var, logical = FALSE){
var <- droplevels(as.factor(var))
result <- Matrix(nrow = length(var), ncol = length(levels(var)))
if (length(levels(var)) == 1){
result[!is.na(var), ] <- 1
} else {
result[!is.na(var), ] <- Matrix::sparse.model.matrix(~var -
1)
}
result[is.na(var), ] <- NA
if (!logical) result <- result * 1
rownames(result) <- names(var)
colnames(result) <- levels(var)
result
}
parent.env(environment(precision))$discretize_qualitative_var <- discretize
environment(parent.env(environment(precision))$total)$discretize_qualitative_var <- discretize
parent.env(environment(parent.env(environment(precision))$total))$discretize_qualitative_var <- discretize
environment(parent.env(environment(precision))$mean)$discretize_qualitative_var <- discretize
parent.env(environment(parent.env(environment(precision))$mean))$discretize_qualitative_var <- discretize
precision(d, mean(y))
precision(d, mean(z), where = a == 'b')
precision(d, mean(z), by = a)
A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.