Chapter 3 Introduction to DataSheet Class in R-Instat

The DataSheet class in R-Instat is designed to handle data management within the R-Instat environment. This class leverages the R6 system to provide an object-oriented approach to data handling. Below is a detailed explanation of the various components and functionalities of the DataSheet class.

3.1 Class Definition

The DataSheet class is defined using the R6Class function. It includes public and private methods and fields, as well as active bindings.

DataSheet <- R6::R6Class("DataSheet",
  public = list(
    initialize = function(data = data.frame(), data_name = "", 
                          variables_metadata = data.frame(), metadata = list(), 
                          imported_from = "", 
                          messages = TRUE, convert=TRUE, create = TRUE, 
                          start_point=1, filters = list(), column_selections = list(), 
                          objects = list(), calculations = list(), keys = list(), 
                          comments = list(), keep_attributes = TRUE) {
      ...
    }
  ),
  private = list(
    data = data.frame(),
    filters = list(),
    column_selections = list(),
    objects = list(),
    keys = list(),
    comments = list(),
    calculations = list(),
    changes = list(), 
    .current_filter = list(),
    .current_column_selection = list(),
    .data_changed = FALSE,
    .metadata_changed = FALSE, 
    .variables_metadata_changed = FALSE,
    .last_graph = NULL
  ),
  active = list(
    data_changed = function(new_value) { ... },
    metadata_changed = function(new_value) { ... },
    variables_metadata_changed = function(new_value) { ... },
    current_filter = function(filter) { ... },
    current_column_selection = function(column_selection) { ... }
  )
)

3.1.1 Public Methods

3.1.1.1 initialize

The constructor method for the DataSheet class. It initializes the object with default or user-provided values.

initialize = function(data = data.frame(), data_name = "", 
                      variables_metadata = data.frame(), metadata = list(), 
                      imported_from = "", 
                      messages = TRUE, convert=TRUE, create = TRUE, 
                      start_point=1, filters = list(), column_selections = list(), 
                      objects = list(), calculations = list(), keys = list(), 
                      comments = list(), keep_attributes = TRUE) {
  # Set up the data object
  self$set_data(data, messages)
  self$set_changes(list())
  self$set_filters(filters)
  self$set_column_selections(column_selections)
  if(keep_attributes) {
    self$set_meta(c(attributes(private$data), metadata))
  } else {
    self$set_meta(metadata)
    self$clear_variables_metadata()
  }
  self$add_defaults_meta()
  self$add_defaults_variables_metadata(self$get_column_names())
  self$set_objects(objects)
  self$set_calculations(calculations)
  self$set_keys(keys)
  self$set_comments(comments)
  ...
}

3.1.1.2 set_data

Sets the data for the DataSheet object. Ensures the data is a data frame and performs checks on column names.

DataSheet$set("public", "set_data", function(new_data, messages=TRUE, check_names = TRUE) {
  if(is.matrix(new_data)) new_data <- as.data.frame(new_data)
  else if(tibble::is_tibble(new_data) || data.table::is.data.table(new_data)) new_data <- as.data.frame(new_data)
  else if(is.ts(new_data)) {
    ind <- zoo::index(new_data)
    new_data <- data.frame(index = ind, value = new_data)
  }
  else if(is.array(new_data)) {
    new_data <- as.data.frame(new_data)
  }
  else if(is.vector(new_data) && !is.list(new_data)) {
    new_data <- as.data.frame(new_data)
  }
  
  if(!is.data.frame(new_data)) {
    stop("Data set must be of type: data.frame")
  }
  else {
    if(length(new_data) == 0 && messages) {
      message("data is empty. Data will be an empty data frame.")
    }
    if(check_names) {
      if("T" %in% names(new_data)) names(new_data)[names(new_data) == "T"] <- ".T"
      valid_names <- make.names(iconv(names(new_data), to = "ASCII//TRANSLIT", sub = "."), unique = TRUE)
      if(!all(names(new_data) == valid_names)) {
        warning("Not all column names are syntactically valid or unique. make.names() and iconv() will be used to force them to be valid and unique.")
        names(new_data) <- valid_names
      }
    }
    private$data <- new_data
    self$append_to_changes(list(Set_property, "data"))
    self$data_changed <- TRUE
    self$variables_metadata_changed <- TRUE
  }
})

3.1.1.3 set_meta

Sets the metadata for the DataSheet object.

DataSheet$set("public", "set_meta", function(new_meta) {
  meta_data_copy <- new_meta
  self$clear_metadata()
  if(!is.list(meta_data_copy)) stop("new_meta must be of type: list")
  for(name in names(meta_data_copy)) {
    self$append_to_metadata(name, meta_data_copy[[name]])
  }
  self$metadata_changed <- TRUE
  self$append_to_changes(list(Set_property, "meta data"))
})

3.1.1.4 clear_metadata

Clears the metadata associated with the DataSheet object.

DataSheet$set("public", "clear_metadata", function() {
  for(name in names(attributes(private$data))) {
    if(!name %in% c(data_type_label, data_name_label, "row.names", "names")) attr(private$data, name) <- NULL
  }
  self$add_defaults_meta()
  self$metadata_changed <- TRUE
  self$append_to_changes(list(Set_property, "meta data"))
})

3.1.1.5 set_filters

Sets the filters for the DataSheet object.

DataSheet$set("public", "set_filters", function(new_filters) {
  if(!is.list(new_filters)) stop("Filters must be of type: list")
  self$append_to_changes(list(Set_property, "filters"))  
  private$filters <- new_filters
  if(!"no_filter" %in% names(private$filters)) {
    self$add_filter(filter = list(), filter_name = "no_filter", replace = TRUE, set_as_current = TRUE, na.rm = FALSE, is_no_filter = TRUE)
  }
})

3.1.1.6 set_column_selections

Sets the column selections for the DataSheet object.

DataSheet$set("public", "set_column_selections", function(new_column_selections) {
  stopifnot(is.list(new_column_selections))
  self$append_to_changes(list(Set_property, "column selections"))  
  private$column_selections <- new_column_selections
  if(!".everything" %in% names(private$column_selections)) {
    self$add_column_selection(column_selection = list(), name = ".everything", replace = TRUE, set_as_current = TRUE, is_everything = TRUE)
  }
})

3.1.1.7 set_objects

Sets the objects for the DataSheet object.

DataSheet$set("public", "set_objects", function(new_objects) {
  if(!is.list(new_objects)) stop("new_objects must be of type: list")
  self$append_to_changes(list(Set_property, "objects"))  
  private$objects <- new_objects
})

3.1.1.8 set_calculations

Sets the calculations for the DataSheet object.

DataSheet$set("public", "set_calculations", function(new_calculations) {
  if(!is.list(new_calculations)) stop("new_calculations must be of type: list")
  self$append_to_changes(list(Set_property, "calculations"))  
  private$calculations <- new_calculations
})

3.1.1.9 set_keys

Sets the keys for the DataSheet object.

DataSheet$set("public", "set_keys", function(new_keys) {
  if(!is.list(new_keys)) stop("new_keys must be of type: list")
  self$append_to_changes(list(Set_property, "keys"))  
  private$keys <- new_keys
})

3.1.1.10 set_comments

Sets the comments for the DataSheet object.

DataSheet$set("public", "set_comments", function(new_comments) {
  if(!is.list(new_comments)) stop("new_comments must be of type: list")
  self$append_to_changes(list(Set_property, "comments"))  
  private$comments <- new_comments
})

3.1.2 Private Methods

The private methods are used internally within the DataSheet class and are not accessible directly from outside the class. These methods handle the core functionalities such as data manipulation and state changes.

3.1.3 Active Bindings

Active bindings provide a way to define properties with custom getters and setters. In the DataSheet class, active bindings are used to manage state changes for data, metadata, and variable metadata.

3.1.3.1 data_changed

data_changed = function(new_value) {
  if(missing(new_value)) return(private$.data_changed)
  else {
    if(new_value != TRUE

 && new_value != FALSE) stop("new_val must be TRUE or FALSE")
    private$.data_changed <- new_value
    self$append_to_changes(list(Set_property, "data_changed"))
  }
}

3.1.3.2 metadata_changed

metadata_changed = function(new_value) {
  if(missing(new_value)) return(private$.metadata_changed)
  else {
    if(new_value != TRUE && new_value != FALSE) stop("new_val must be TRUE or FALSE")
    private$.metadata_changed <- new_value
    self$append_to_changes(list(Set_property, "metadata_changed"))
  }
}

3.1.3.3 variables_metadata_changed

variables_metadata_changed = function(new_value) {
  if(missing(new_value)) return(private$.variables_metadata_changed)
  else {
    if(new_value != TRUE && new_value != FALSE) stop("new_val must be TRUE or FALSE")
    private$.variables_metadata_changed <- new_value
    self$append_to_changes(list(Set_property, "variable_data_changed"))
  }
}

3.1.3.4 current_filter

current_filter = function(filter) {
  if(missing(filter)) {
    return(self$get_filter_as_logical(private$.current_filter$name))
  } else {
    private$.current_filter <- filter
    self$data_changed <- TRUE
    self$append_to_changes(list(Set_property, "current_filter"))
  }
}

3.1.3.5 current_column_selection

current_column_selection = function(column_selection) {
  if(missing(column_selection)) {
    if (!is.null(private$.current_column_selection)) {
      return(self$get_column_selection_column_names(private$.current_column_selection$name))
    } else return(names(private$data))
  } else {
    private$.current_column_selection <- column_selection
    self$data_changed <- TRUE
    self$append_to_changes(list(Set_property, "current_column_selection"))
  }
}

3.2 Conclusion

The DataSheet class in R-Instat is a comprehensive tool for data management, providing various methods to handle data, metadata, filters, column selections, and more. By using the R6 system, it offers an object-oriented approach that enhances the flexibility and functionality of data handling in R-Instat.