Chapter 3 Introduction to DataSheet Class in R-Instat
The DataSheet class in R-Instat is designed to handle data management within the R-Instat environment. This class leverages the R6 system to provide an object-oriented approach to data handling. Below is a detailed explanation of the various components and functionalities of the DataSheet class.
3.1 Class Definition
The DataSheet class is defined using the R6Class function. It includes public and private methods and fields, as well as active bindings.
DataSheet <- R6::R6Class("DataSheet",
public = list(
initialize = function(data = data.frame(), data_name = "",
variables_metadata = data.frame(), metadata = list(),
imported_from = "",
messages = TRUE, convert=TRUE, create = TRUE,
start_point=1, filters = list(), column_selections = list(),
objects = list(), calculations = list(), keys = list(),
comments = list(), keep_attributes = TRUE) {
...
}
),
private = list(
data = data.frame(),
filters = list(),
column_selections = list(),
objects = list(),
keys = list(),
comments = list(),
calculations = list(),
changes = list(),
.current_filter = list(),
.current_column_selection = list(),
.data_changed = FALSE,
.metadata_changed = FALSE,
.variables_metadata_changed = FALSE,
.last_graph = NULL
),
active = list(
data_changed = function(new_value) { ... },
metadata_changed = function(new_value) { ... },
variables_metadata_changed = function(new_value) { ... },
current_filter = function(filter) { ... },
current_column_selection = function(column_selection) { ... }
)
)3.1.1 Public Methods
3.1.1.1 initialize
The constructor method for the DataSheet class. It initializes the object with default or user-provided values.
initialize = function(data = data.frame(), data_name = "",
variables_metadata = data.frame(), metadata = list(),
imported_from = "",
messages = TRUE, convert=TRUE, create = TRUE,
start_point=1, filters = list(), column_selections = list(),
objects = list(), calculations = list(), keys = list(),
comments = list(), keep_attributes = TRUE) {
# Set up the data object
self$set_data(data, messages)
self$set_changes(list())
self$set_filters(filters)
self$set_column_selections(column_selections)
if(keep_attributes) {
self$set_meta(c(attributes(private$data), metadata))
} else {
self$set_meta(metadata)
self$clear_variables_metadata()
}
self$add_defaults_meta()
self$add_defaults_variables_metadata(self$get_column_names())
self$set_objects(objects)
self$set_calculations(calculations)
self$set_keys(keys)
self$set_comments(comments)
...
}3.1.1.2 set_data
Sets the data for the DataSheet object. Ensures the data is a data frame and performs checks on column names.
DataSheet$set("public", "set_data", function(new_data, messages=TRUE, check_names = TRUE) {
if(is.matrix(new_data)) new_data <- as.data.frame(new_data)
else if(tibble::is_tibble(new_data) || data.table::is.data.table(new_data)) new_data <- as.data.frame(new_data)
else if(is.ts(new_data)) {
ind <- zoo::index(new_data)
new_data <- data.frame(index = ind, value = new_data)
}
else if(is.array(new_data)) {
new_data <- as.data.frame(new_data)
}
else if(is.vector(new_data) && !is.list(new_data)) {
new_data <- as.data.frame(new_data)
}
if(!is.data.frame(new_data)) {
stop("Data set must be of type: data.frame")
}
else {
if(length(new_data) == 0 && messages) {
message("data is empty. Data will be an empty data frame.")
}
if(check_names) {
if("T" %in% names(new_data)) names(new_data)[names(new_data) == "T"] <- ".T"
valid_names <- make.names(iconv(names(new_data), to = "ASCII//TRANSLIT", sub = "."), unique = TRUE)
if(!all(names(new_data) == valid_names)) {
warning("Not all column names are syntactically valid or unique. make.names() and iconv() will be used to force them to be valid and unique.")
names(new_data) <- valid_names
}
}
private$data <- new_data
self$append_to_changes(list(Set_property, "data"))
self$data_changed <- TRUE
self$variables_metadata_changed <- TRUE
}
})3.1.1.3 set_meta
Sets the metadata for the DataSheet object.
DataSheet$set("public", "set_meta", function(new_meta) {
meta_data_copy <- new_meta
self$clear_metadata()
if(!is.list(meta_data_copy)) stop("new_meta must be of type: list")
for(name in names(meta_data_copy)) {
self$append_to_metadata(name, meta_data_copy[[name]])
}
self$metadata_changed <- TRUE
self$append_to_changes(list(Set_property, "meta data"))
})3.1.1.4 clear_metadata
Clears the metadata associated with the DataSheet object.
DataSheet$set("public", "clear_metadata", function() {
for(name in names(attributes(private$data))) {
if(!name %in% c(data_type_label, data_name_label, "row.names", "names")) attr(private$data, name) <- NULL
}
self$add_defaults_meta()
self$metadata_changed <- TRUE
self$append_to_changes(list(Set_property, "meta data"))
})3.1.1.5 set_filters
Sets the filters for the DataSheet object.
DataSheet$set("public", "set_filters", function(new_filters) {
if(!is.list(new_filters)) stop("Filters must be of type: list")
self$append_to_changes(list(Set_property, "filters"))
private$filters <- new_filters
if(!"no_filter" %in% names(private$filters)) {
self$add_filter(filter = list(), filter_name = "no_filter", replace = TRUE, set_as_current = TRUE, na.rm = FALSE, is_no_filter = TRUE)
}
})3.1.1.6 set_column_selections
Sets the column selections for the DataSheet object.
DataSheet$set("public", "set_column_selections", function(new_column_selections) {
stopifnot(is.list(new_column_selections))
self$append_to_changes(list(Set_property, "column selections"))
private$column_selections <- new_column_selections
if(!".everything" %in% names(private$column_selections)) {
self$add_column_selection(column_selection = list(), name = ".everything", replace = TRUE, set_as_current = TRUE, is_everything = TRUE)
}
})3.1.1.7 set_objects
Sets the objects for the DataSheet object.
DataSheet$set("public", "set_objects", function(new_objects) {
if(!is.list(new_objects)) stop("new_objects must be of type: list")
self$append_to_changes(list(Set_property, "objects"))
private$objects <- new_objects
})3.1.1.8 set_calculations
Sets the calculations for the DataSheet object.
DataSheet$set("public", "set_calculations", function(new_calculations) {
if(!is.list(new_calculations)) stop("new_calculations must be of type: list")
self$append_to_changes(list(Set_property, "calculations"))
private$calculations <- new_calculations
})3.1.2 Private Methods
The private methods are used internally within the DataSheet class and are not accessible directly from outside the class. These methods handle the core functionalities such as data manipulation and state changes.
3.1.3 Active Bindings
Active bindings provide a way to define properties with custom getters and setters. In the DataSheet class, active bindings are used to manage state changes for data, metadata, and variable metadata.
3.1.3.1 data_changed
data_changed = function(new_value) {
if(missing(new_value)) return(private$.data_changed)
else {
if(new_value != TRUE
&& new_value != FALSE) stop("new_val must be TRUE or FALSE")
private$.data_changed <- new_value
self$append_to_changes(list(Set_property, "data_changed"))
}
}3.1.3.2 metadata_changed
metadata_changed = function(new_value) {
if(missing(new_value)) return(private$.metadata_changed)
else {
if(new_value != TRUE && new_value != FALSE) stop("new_val must be TRUE or FALSE")
private$.metadata_changed <- new_value
self$append_to_changes(list(Set_property, "metadata_changed"))
}
}3.1.3.3 variables_metadata_changed
variables_metadata_changed = function(new_value) {
if(missing(new_value)) return(private$.variables_metadata_changed)
else {
if(new_value != TRUE && new_value != FALSE) stop("new_val must be TRUE or FALSE")
private$.variables_metadata_changed <- new_value
self$append_to_changes(list(Set_property, "variable_data_changed"))
}
}3.1.3.4 current_filter
current_filter = function(filter) {
if(missing(filter)) {
return(self$get_filter_as_logical(private$.current_filter$name))
} else {
private$.current_filter <- filter
self$data_changed <- TRUE
self$append_to_changes(list(Set_property, "current_filter"))
}
}3.1.3.5 current_column_selection
current_column_selection = function(column_selection) {
if(missing(column_selection)) {
if (!is.null(private$.current_column_selection)) {
return(self$get_column_selection_column_names(private$.current_column_selection$name))
} else return(names(private$data))
} else {
private$.current_column_selection <- column_selection
self$data_changed <- TRUE
self$append_to_changes(list(Set_property, "current_column_selection"))
}
}3.2 Conclusion
The DataSheet class in R-Instat is a comprehensive tool for data management, providing various methods to handle data, metadata, filters, column selections, and more. By using the R6 system, it offers an object-oriented approach that enhances the flexibility and functionality of data handling in R-Instat.