Source code for lightwood.encoder.numeric.ts_numeric

from typing import Union, List, Dict

import torch
import numpy as np
import pandas as pd

from lightwood.encoder.numeric import NumericEncoder


[docs]class TsNumericEncoder(NumericEncoder): """ Variant of vanilla numerical encoder, supports dynamic mean re-scaling """ is_timeseries_encoder: bool = True def __init__(self, is_target: bool = False, positive_domain: bool = False, grouped_by=None): super(TsNumericEncoder, self).__init__(is_target=is_target, positive_domain=positive_domain) # time series normalization params self.normalizers = None self.group_combinations = None self.dependencies = grouped_by self.output_size = 1
[docs] def encode(self, data: Union[np.ndarray, pd.Series], dependency_data: Dict[str, List[pd.Series]] = {}): """ :param data: A pandas series containing the numbers to be encoded :param dependency_data: dict with grouped_by column info, to retrieve the correct normalizer for each datum :returns: A torch tensor with the representations of each number """ # noqa if not self.is_prepared: raise Exception('You need to call "prepare" before calling "encode" or "decode".') if not dependency_data: dependency_data = {'__default': [None] * len(data)} if isinstance(data, pd.Series): data = data.values # get array of series-wise observed means if self.normalizers is None: means = np.full((len(data)), fill_value=self._abs_mean) else: # use global mean as default for novel series means = np.full((len(data)), fill_value=self.normalizers['__default'].abs_mean) def _get_group_mean(group) -> float: if (group, ) in self.normalizers: return self.normalizers[(group, )].abs_mean else: return self.normalizers['__default'].abs_mean for i, group in enumerate(list(zip(*dependency_data.values()))): # TODO: support multigroup if group[0] is not None: means = np.vectorize(_get_group_mean, otypes=[float])(group[0].values) if len(data.shape) > 1 and data.shape[1] > 1: if len(means.shape) == 1: means = np.expand_dims(means, 1) means = np.repeat(means, data.shape[1], axis=1) def _norm_fn(x: float, mean: float) -> float: return x / mean # nones = np.vectorize(self._none_fn, otypes=[float])(data) # TODO encoded = np.vectorize(_norm_fn, otypes=[float])(data, means) # encoded[nones] = 0 # if measurement is None, it is zeroed out # TODO # TODO: mask for where mean is 0, then pass real as-is return torch.Tensor(encoded).unsqueeze(1)
[docs] def decode(self, encoded_values: torch.Tensor, decode_log: bool = None, dependency_data=None): if not self.is_prepared: raise Exception('You need to call "prepare" before calling "encode" or "decode".') assert isinstance(encoded_values, torch.Tensor), 'It is not a tensor!' # TODO: debug purposes assert not decode_log # TODO: debug purposes if not dependency_data: dependency_data = {'__default': [None] * len(encoded_values)} # force = True prevents side effects on the original encoded_values ev = encoded_values.numpy(force=True) # set global mean as default ret = np.full((ev.shape[0],), dtype=float, fill_value=self._abs_mean) # TODO: perhaps capture nan, infs, etc and set to pow(10,63)? # set means array if self.normalizers is None: means = np.full((ev.shape[0],), fill_value=self._abs_mean) else: means = np.full((len(encoded_values)), fill_value=self.normalizers['__default'].abs_mean) for i, group in enumerate(list(zip(*dependency_data.values()))): if group is not None: if tuple(group) in self.normalizers: means[i] = self.normalizers[tuple(group)].abs_mean else: means[i] = self.normalizers['__default'].abs_mean else: means[i] = self._abs_mean # set real value real_value = np.multiply(ev[:].reshape(-1,), means) valid_mask = np.ones_like(real_value, dtype=bool) # final filters if self.positive_domain: real_value = abs(real_value) ret[valid_mask] = real_value[valid_mask] # TODO probably not needed return ret.tolist()