Source code for doenut.models.averaged_model_set

import logging

import pandas as pd

import doenut.utils
from doenut.data import ModifiableDataSet
from doenut.models.model_set import ModelSet
from doenut.models.averaged_model import AveragedModel

logger = doenut.utils.initialise_log(__name__, logging.DEBUG)


[docs] class AveragedModelSet(ModelSet): """Class to train and hold a group of related (averaged) models. When constructing the AveragedModelSet, you can define default values. Then when adding a new model to the set you only have to specify the parameters which differ from the default. Parameters ---------- default_inputs: pd.DataFrame, optional The default inputs to the model default_responses: pd.DataFrame, optional The default responses for the model default_scale_data: bool, optional Whether to scale the data before adding to the model by default default_scale_run_data: bool, optional Whether to scale the data for each train/test set by default default_fit_intercept: bool, optional Whether to fit the model's intercept to the axis by default default_response_key: str, optional The default column to pick from the responses default_drop_duplicates: {'no', 'yes', 'averages'}, optional What to do with duplicates in the inputs, by default default_input_selector: List, optional What columns from the input data to select by default """
[docs] @classmethod def multiple_response_columns( cls, inputs: pd.DataFrame = None, responses: pd.DataFrame = None, scale_data: bool = True, scale_run_data: bool = True, fit_intercept: bool = True, drop_duplicates: str = "yes", input_selector: list = [], ) -> "AveragedModelSet": logger.info("Generating AveragedModelSet") result = AveragedModelSet( inputs, responses, scale_data, scale_run_data, fit_intercept, [], drop_duplicates, input_selector, ) for column in responses.columns: logger.debug(f"Adding model for response key {column}") result.add_model(response_key=column) return result
def __init__( self, default_inputs: pd.DataFrame = None, default_responses: pd.DataFrame = None, default_scale_data: bool = True, default_scale_run_data: bool = True, default_fit_intercept: bool = True, default_response_key: list = [0], default_drop_duplicates: str = "yes", default_input_selector: list = [], ): super().__init__( default_inputs, default_responses, default_scale_data, default_fit_intercept, ) self.default_scale_run_data = default_scale_run_data self.default_response_key = default_response_key self.default_drop_duplicates = default_drop_duplicates self.default_input_selector = default_input_selector
[docs] def add_model( self, inputs=None, responses=None, scale_data=None, scale_run_data=None, fit_intercept=None, response_key=None, drop_duplicates=None, input_selector=None, ): """Add a new AveragedModel to the set Parameters ---------- inputs: pd.DataFrame, optional The inputs to the model responses: pd.DataFrame, optional The responses for the model scale_data: bool, optional Whether to scale the data before adding to the model scale_run_data: bool, optional Whether to scale the data for each train/test set fit_intercept: bool, optional Whether to fit the model's intercept to the axis response_key: str, optional The column to pick from the responses drop_duplicates: {'no', 'yes', 'averages'}, optional What to do with duplicates in the inputs input_selector: List, optional What columns from the input data to select Returns ------- doenut.models.AveragedModel The generated model """ inputs = self._validate_value("inputs", inputs) responses = self._validate_value("responses", responses) scale_data = self._validate_value("scale_data", scale_data) scale_run_data = self._validate_value("scale_run_data", scale_run_data) fit_intercept = self._validate_value("fit_intercept", fit_intercept) response_key = self._validate_value("response_key", response_key) drop_duplicates = self._validate_value( "drop_duplicates", drop_duplicates ) input_selector = self._validate_value("input_selector", input_selector) data = ModifiableDataSet(inputs, responses) # if scale_data: # data.scale() if input_selector: data.filter(input_selector) model = AveragedModel( data, scale_data, scale_run_data, fit_intercept, response_key, drop_duplicates, ) self.models.append(model) return model