Source code for base
from typing import Dict
from dataclasses import dataclass
from dataclasses_json import dataclass_json
[docs]@dataclass_json
@dataclass
class TypeInformation:
"""
For a dataset, provides information on columns types, how they're used, and any other potential identifiers.
``TypeInformation`` is generated within :py:func:`infer.infer_types`, where small samples of each column are evaluated in a custom framework to understand what kind of data type the model is. The user may override data types, but it is recommended to do so within a JSON-AI config file.
:param dtypes: For each column's name, the associated data type inferred.
:param additional_info: Any possible sub-categories or additional descriptive information.
:param identifiers: Columns within the dataset highly suspected of being identifiers or IDs. These do not contain informatic value, therefore will be ignored in subsequent training/analysis procedures unless manually indicated.
""" # noqa
dtypes: Dict[str, str]
additional_info: Dict[str, object]
identifiers: Dict[str, str]
def __init__(self):
self.dtypes = dict()
self.additional_info = dict()
self.identifiers = dict()
class BaseEngine:
def __init__(self, stable=True):
self.stable = stable # whether the engine is stable or not (i.e. experimental)
def infer(self, df) -> TypeInformation:
"""Given a dataframe, infer the types of each column and return a TypeInformation object."""
raise NotImplementedError
class ENGINES:
RULE_BASED = 'rule_based'