bedapub / designit Goto Github PK
View Code? Open in Web Editor NEWBlocking and randomization for experimental design
Home Page: https://bedapub.github.io/designit/
License: Other
Blocking and randomization for experimental design
Home Page: https://bedapub.github.io/designit/
License: Other
This example data did not do well using the standard options for the plate scoring function in the multi_plate_wrapper.
samples <- tibble::tribble(
~OriginalSampleID, ~StudyID, ~Diagnosis, ~SeverityGroup, ~Status, ~Gender, ~Age,
1L, "S4", "Disease", "Severe", "high", "Male", 91,
2L, "S4", "Disease", "MildModerate", "normal", "Female", 80,
3L, "S4", "Disease", "MildModerate", "normal", "Male", 82,
4L, "S4", "Disease", "MildModerate", "normal", "Female", 66,
5L, "S4", "Disease", "MildModerate", "normal", "Male", 89,
6L, "S4", "Disease", "Severe", "normal", "Male", 64,
7L, "S4", "Disease", "Severe", "normal", "Male", 61,
8L, "S4", "Disease", "MildModerate", "normal", "Female", 77,
9L, "S4", "Disease", "Severe", "high", "Male", 86,
10L, "S4", "Disease", "MildModerate", "normal", "Male", 65,
11L, "S4", "Disease", "MildModerate", "normal", "Male", 73,
12L, "S4", "Disease", "MildModerate", "normal", "Male", 72,
13L, "S4", "Disease", "MildModerate", "normal", "Male", 76,
14L, "S4", "Disease", "MildModerate", "normal", "Male", 62,
15L, "S4", "Disease", "MildModerate", "normal", "Female", 78,
16L, "S4", "Control", "Control", "normal", "Female", 70,
17L, "S4", "Control", "Control", "normal", "Male", 75,
18L, "S4", "Control", "Control", "normal", "Female", 75,
19L, "S4", "Control", "Control", "normal", "Female", 73,
20L, "S4", "Control", "Control", "normal", "Female", 75,
21L, "S4", "Control", "Control", "normal", "Male", 57,
22L, "S4", "Control", "Control", "normal", "Female", 69,
23L, "S4", "Control", "Control", "normal", "Female", 74,
24L, "S4", "Control", "Control", "normal", "Female", 72,
25L, "S4", "Control", "Control", "normal", "Male", 63,
26L, "S4", "Control", "Control", "normal", "Male", 55,
27L, "S4", "Control", "Control", "normal", "Female", 55,
28L, "S4", "Control", "Control", "normal", "Female", 63,
29L, "S4", "Control", "Control", "normal", "Female", 66,
30L, "S3", "Control", "Control", "high", "Female", 48,
31L, "S3", "Control", "Control", "high", "Male", 63,
32L, "S2", "Control", "Control", "high", "Female", 67,
33L, "S2", "Control", "Control", "high", "Female", 82,
34L, "S2", "Control", "Control", "high", "Female", 81,
35L, "S1", "Control", "Control", "high", "Male", 72,
36L, "S1", "Control", "Control", "high", "Female", 63,
37L, "S2", "Control", "Control", "normal", "Female", 79,
38L, "S2", "Control", "Control", "normal", "Male", 72,
39L, "S2", "Control", "Control", "normal", "Male", 66,
40L, "S2", "Control", "Control", "normal", "Female", 77,
41L, "S2", "Control", "Control", "normal", "Male", 62,
42L, "S2", "Control", "Control", "normal", "Male", 81,
43L, "S2", "Control", "Control", "normal", "Male", 75,
44L, "S3", "Disease", "MildModerate", "normal", "Female", 47,
45L, "S3", "Disease", "MildModerate", "normal", "Male", 55,
46L, "S2", "Disease", "MildModerate", "normal", "Male", 75,
47L, "S2", "Disease", "MildModerate", "normal", "Female", 87,
48L, "S2", "Disease", "MildModerate", "normal", "Female", 53,
49L, "S2", "Disease", "MildModerate", "normal", "Female", 71,
50L, "S2", "Disease", "MildModerate", "normal", "Female", 78,
51L, "S2", "Disease", "MildModerate", "normal", "Female", 76,
52L, "S2", "Disease", "MildModerate", "normal", "Male", 73,
53L, "S2", "Disease", "MildModerate", "normal", "Female", 65,
54L, "S3", "Disease", "MildModerate", "high", "Female", 52,
55L, "S3", "Disease", "MildModerate", "high", "Male", 45,
56L, "S3", "Disease", "MildModerate", "high", "Male", 48,
57L, "S3", "Disease", "MildModerate", "high", "Male", 47,
58L, "S2", "Disease", "MildModerate", "high", "Female", 75,
59L, "S2", "Disease", "MildModerate", "high", "Male", 63,
60L, "S2", "Disease", "MildModerate", "high", "Male", 62,
61L, "S2", "Disease", "MildModerate", "high", "Female", 77,
62L, "S1", "Disease", "MildModerate", "high", "Female", 69,
63L, "S1", "Disease", "MildModerate", "high", "Male", 83,
64L, "S1", "Disease", "MildModerate", "high", "Female", 67,
65L, "S3", "Disease", "Severe", "normal", "Female", 56,
66L, "S3", "Disease", "Severe", "normal", "Male", 54,
67L, "S3", "Disease", "Severe", "normal", "Male", 50,
68L, "S2", "Disease", "Severe", "high", "Female", 74,
69L, "S2", "Disease", "Severe", "normal", "Male", 70,
70L, "S2", "Disease", "Severe", "normal", "Female", 75,
71L, "S2", "Disease", "Severe", "normal", "Male", 78,
72L, "S2", "Disease", "Severe", "normal", "Female", 57,
73L, "S2", "Disease", "Severe", "normal", "Male", 70,
74L, "S2", "Disease", "Severe", "normal", "Male", 72,
75L, "S2", "Disease", "Severe", "normal", "Female", 76,
76L, "S2", "Disease", "Severe", "high", "Male", 76,
77L, "S2", "Disease", "Severe", "high", "Male", 86,
78L, "S2", "Disease", "Severe", "high", "Male", 74,
79L, "S2", "Disease", "Severe", "high", "Male", 59,
80L, "S2", "Disease", "Severe", "high", "Female", 73,
81L, "S3", "Disease", "Severe", "high", "Male", 65,
82L, "S3", "Disease", "Severe", "high", "Female", 66,
83L, "S2", "Disease", "Severe", "high", "Female", 53,
84L, "S2", "Disease", "Severe", "high", "Male", 73,
85L, "S1", "Disease", "Severe", "high", "Female", 57,
86L, "S1", "Disease", "Severe", "high", "Male", 77
)
bc <- BatchContainer$new(
dimensions = list(
"plate" = 1,
"row" = 8,
"column" = 11
)
)
assign_random(bc, samples)
# set scoring function for each balance variable
scoring_funcs <- purrr::map(
params$balance_variables,
~ mk_plate_scoring_functions(bc, row = "row", column = "column", group = .x,
p = 2, penalize_lines = "soft")
) %>% unlist()
names(scoring_funcs) <- params$balance_variables
bc$scoring_f <- scoring_funcs
traces <- optimize_design(
bc,
max_iter = 5000,
quiet = TRUE,
# not actually needed... since here is only one plate
shuffle_proposal_func = mk_subgroup_shuffling_function(
subgroup_vars = "plate",
restrain_on_subgroup_levels = 1
),
acceptance_func = accept_leftmost_improvement
)
Changing the mk_plate_scoring_functions options p = 1, penalize_lines = "none"
works a lot better.
Please DO NOT use any confidential data when submitting an issue.
For Roche internal data use code.roche.com.
Please briefly describe your problem and what output you expect.
If you have a question, please don't use this form. Instead, ask in the Discussions.
When possible, please include a minimal reproducible example (AKA a reprex).
If you've never heard of a reprex before, start by reading https://www.tidyverse.org/help/#reprex.
Brief description of the problem
# insert reprex here
Would it be possible to give names to the optimization indices?
In case of the plate wrapper they could be the plate names.
Similarly, if we think about the "optimize_design_by " function, it could just be a column for the "by" variable levels.
It adds another column to the score tables but if they are made factors, maybe that's not too bad?
It could simplify trace plots if not all "scores" were used in all optimization runs:
Currently, we have some code to to cache slow parts of vignettes.
This code is ugly and relies on global variables, it also causes some warnings dring R check.
Instead we would like to put slow code into a child markdown document, which can be rendered manually when needed.
After discussing with @banfai we decided to put all the child documents (Rmd
s and md
s) into vignettes/cached
.
in the current optimize_multi_plate_design
function for each plate a plate specific scoring function is generated with mk_plate_scoring_function
. For these scoring functions a plate specific distance matrix is pre-calculated that will be used every time the scoring is done. Such pre-calculation would not be possible if we want to use the scoring functions in a 'group_by' manner and apply them on the fly to a given subset (plate) of all samples.
TODO: expand this
Stan's book, p. 253
After samples are assigned randomly to the BC and plotted,
Please DO NOT use any confidential data when submitting an issue.
For Roche internal data use code.roche.com.
Please briefly describe your problem and what output you expect.
If you have a question, please don't use this form. Instead, ask in the Discussions.
When possible, please include a minimal reproducible example (AKA a reprex).
If you've never heard of a reprex before, start by reading https://www.tidyverse.org/help/#reprex.
Brief description of the problem
# insert reprex here
At the moment accept_strict_improvement()
is marked as internal. This means that it's not exported and a user cannot read the documentation.
I propose to exporting it. @ingitwetrust what do you think?
A colleague had a case which requires shuffling constraints (some plate locations are for controls) and multiple group variables for plate optimization.
We do not have, I believe, and example for this.
This would require manual two-step approach with:
shuffle_with_constraints()
mk_plate_scoring_functions()
Please DO NOT use any confidential data when submitting an issue.
For Roche internal data use code.roche.com.
Please briefly describe your problem and what output you expect.
If you have a question, please don't use this form. Instead, ask in the Discussions.
When possible, please include a minimal reproducible example (AKA a reprex).
If you've never heard of a reprex before, start by reading https://www.tidyverse.org/help/#reprex.
Brief description of the problem
# insert reprex here
optimize_multi_plate_design
does not really optimize within plate distribution of covariates. Instead, clusters form.
Replacing the parameters p = 1 and penalize_lines = "none" fixes the issue but these cannot be set in optimize_multi_plate_design
Example
samples <- structure(list(Subject.Number = c(1, 2, 3, 3, 4, 4, 5, 5, 6,
7, 7, 8, 8, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15, 15, 16, 17,
17, 18, 18, 19, 19, 20, 21, 21, 22, 22, 23, 24, 25, 26, 26, 27,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
42, 43, 43, 44, 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54,
55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68), Gender = c("Male",
"Male", "Female", "Female", "Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male",
"Female", "Female", "Female", "Female", "Female", "Male", "Male",
"Female", "Female", "Female", "Female", "Female", "Female", "Female",
"Female", "Male", "Male", "Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male",
"Male", "Female", "Female", "Male", "Female", "Male", "Male",
"Male", "Female", "Male", "Male", "Male", "Female", "Female",
"Female", "Female", "Male", "Female", "Female", "Male", "Female",
"Male", "Male", "Male", "Female", "Female", "Male", "Male", "Male",
"Female", "Female", "Male", "Male", "Female", "Male", "Male",
"Male", "Male", "Female", "Female", "Female"), Timepoint = c("DAY 1",
"DAY 1", "DAY 1", "DAY 168", "DAY 1", "DAY 168", "DAY 1", "DAY 112",
"DAY 1", "DAY 1", "DAY 112", "DAY 1", "DAY 112", "DAY 1", "DAY 1",
"DAY 112", "DAY 1", "DAY 1", "DAY 1", "DAY 168", "DAY 1", "DAY 112",
"DAY 1", "DAY 112", "DAY 1", "DAY 1", "DAY 112", "DAY 1", "DAY 112",
"DAY 1", "DAY 112", "DAY 1", "DAY 1", "DAY 112", "DAY 1", "DAY 112",
"DAY 1", "DAY 1", "DAY 1", "DAY 1", "DAY 112", "DAY 1", "DAY 112",
"DAY 1", "DAY 1", "DAY 1", "DAY 1", "DAY 1", "DAY 1", "DAY 1",
"DAY 1", "DAY 1", "DAY 1", "DAY 1", "DAY 1", "DAY 1", "DAY 1",
"DAY 1", "DAY 168", "DAY 1", "DAY 112", "DAY 1", "DAY 112", "DAY 1",
"DAY 1", "DAY 112", "DAY 1", "DAY 1", "DAY 1", "DAY 1", "DAY 1",
"DAY 1", "DAY 1", "DAY 1", "DAY 1", "DAY 1", "DAY 1", "DAY 1",
"DAY 1", "DAY 1", "DAY 1", "DAY 1", "DAY 1", "DAY 1", "DAY 1",
"DAY 1", "DAY 1", "DAY 1"), Site = c("350545", "350545", "350545",
"350545", "350545", "350545", "350545", "350545", "350545", "350545",
"350545", "350545", "350545", "350545", "350545", "350545", "350545",
"350545", "350545", "350545", "350545", "350545", "350545", "350545",
"350545", "350545", "350545", "350545", "350545", "350545", "350545",
"350545", "350545", "350545", "350545", "350545", "350545", "350545",
"350545", "350545", "350545", "350545", "350545", "350545", "350545",
"350545", "350545", "350545", "350545", "350545", "350545", "350545",
"350545", "356312", "356312", "356312", "356312", "356312", "356312",
"356312", "356312", "356312", "356312", "356312", "356312", "356312",
"356312", "356312", "356312", "356312", "356312", "356312", "356312",
"356312", "356312", "356312", "356312", "356312", "356312", "356312",
"356312", "356312", "356312", "356312", "356312", "356312", "356312",
"356312"), Previous_treatment = c("Tx naive", "Pre-treated",
"Tx naive", "Tx naive", "Pre-treated", "Pre-treated", "Pre-treated",
"Pre-treated", "Pre-treated", "Pre-treated", "Pre-treated", "Pre-treated",
"Pre-treated", "Tx naive", "Pre-treated", "Pre-treated", "Pre-treated",
"Pre-treated", "Tx naive", "Tx naive", "Tx naive", "Tx naive",
"Tx naive", "Tx naive", "Tx naive", "Tx naive", "Tx naive", "Tx naive",
"Tx naive", "Tx naive", "Tx naive", "Tx naive", "Tx naive", "Tx naive",
"Pre-treated", "Pre-treated", "Tx naive", "Tx naive", "Pre-treated",
"Tx naive", "Tx naive", "Tx naive", "Tx naive", "Tx naive", "Tx naive",
"Tx naive", "Pre-treated", "Pre-treated", "Pre-treated", "Pre-treated",
"Pre-treated", "Pre-treated", "Tx naive", "Tx naive", "Pre-treated",
"Pre-treated", "Pre-treated", "Pre-treated", "Pre-treated", "Pre-treated",
"Pre-treated", "Pre-treated", "Pre-treated", "Pre-treated", "Pre-treated",
"Pre-treated", "Pre-treated", "Tx naive", "Pre-treated", "Pre-treated",
"Tx naive", "Tx naive", "Tx naive", "Tx naive", "Tx naive", NA,
"Pre-treated", "Pre-treated", "Pre-treated", "Tx naive", "Tx naive",
"Pre-treated", "Pre-treated", "Pre-treated", "Pre-treated", "Pre-treated",
"Pre-treated", "Tx naive")), row.names = c(NA, -88L), class = "data.frame")
# make batch container
bc <- BatchContainer$new(
dimensions = list(
"plate" = 1,
"row" = list(values =c(1:8)),
"column" = list(values = c(1:11))
)
)
# initial assignment
bc <- assign_in_order(bc, samples)
# factors to balance
balance_variables <- c("Site", "Timepoint", "Previous_treatment", "Gender")
# running the wrapper
bc1 <- optimize_multi_plate_design(bc,
within_plate_variables = balance_variables,
plate = "plate",
row = "row",
column = "column",
n_shuffle = 2,
max_iter = 3000,
quiet = TRUE
)
# plot site
print(plot_plate(bc$get_samples(remove_empty_locations = FALSE),
column = column, row = row,
.color = Site)
# running it manually
# set scoring function for each balance variable
scoring_funcs <- purrr::map(
balance_variables,
~ mk_plate_scoring_functions(bc, row = "row", column = "column", group = .x, p = 1, penalize_lines = "none")
) %>% unlist()
names(scoring_funcs) <- balance_variables
bc2 <- optimize_design(
bc,
scoring = scoring_funcs,
max_iter = 3000,
quiet = TRUE,
acceptance_func = accept_leftmost_improvement
)
# plot site
print(plot_plate(bc$get_samples(remove_empty_locations = FALSE),
column = column, row = row,
.color = Site)
Please DO NOT use any confidential data when submitting an issue.
For Roche internal data use code.roche.com.
Please briefly describe your problem and what output you expect.
If you have a question, please don't use this form. Instead, ask in the Discussions.
When possible, please include a minimal reproducible example (AKA a reprex).
If you've never heard of a reprex before, start by reading https://www.tidyverse.org/help/#reprex.
Brief description of the problem
# insert reprex here
Line 3 in 55bca85
add this:
authors:
footer:
roles: [cre, aut]
@idavydov @ingitwetrust @julianesiebourg
#NCS2022
E.g. formula to generate design matrix for analysis
code.roche.com/PMDA/packages/designit/issues
for confidential issuesDo a quick check if anything in randomization.R is still needed, otherwise remove file
Plumber API so the pkg can be used from other tools
TODO populate issue with details
https://bedapub.github.io/designit/articles/osat.html
This vignette can only be rendered if `OSAT` package is installed
Rewrite mk_plate_scoring_functions in a such a way that can create all temporary objects during the first call.
E.g.,
scoring_f <- mk_plate_scoring_functions(
row = "row", column = "column", group = "Sex"
)
bc <- optimize_design(bc, scoring = scoring_f, max_iter = 100)
line 49 and following
for (curr_plate in plate_levels) {
if (!quiet && length(plate_levels) > 1) cat(curr_plate, "... ")
bc <- optimize_design(bc,
scoring = scoring_funcs, # TODO: Here we should just use the scores of the current plate
max_iter = max_iter,
quiet = TRUE,
shuffle_proposal_func = mk_subgroup_shuffling_function(
subgroup_vars = plate,
restrain_on_subgroup_levels = curr_plate
),
acceptance_func = accept_leftmost_improvement
)
}
A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.