Source code for lightwood.helpers.general

from typing import Iterable

import numpy as np
from type_infer.helpers import is_nan_numeric


[docs]def is_none(value): """ Pandas has no way to guarantee "stability" for the type of a column, it choses to arbitrarily change it based on the values. Pandas also change the values in the columns based on the types. Lightwood relies on having ``None`` values for a cells that represent "missing" or "corrupt". When we assign ``None`` to a cell in a dataframe this might get turned to `nan` or other values, this function checks if a cell is ``None`` or any other values a pd.DataFrame might convert ``None`` to. It also checks some extra values (like ``''``) that pandas never converts ``None`` to (hopefully). But lightwood would still consider those values "None values", and this will allow for more generic use later. """ # noqa # TODO: consider removing this helper entirely, arguably should move into dataprep_ml # start dispatch with types that are expensive to e.g. cast into strings if type(value) in (np.ndarray,) and value.size == 0: return True if type(value) != str and isinstance(value, Iterable) and len(value) == 0: return True elif type(value) != str and isinstance(value, Iterable): return False if value is None: return True if is_nan_numeric(value): return True if str(value) == '': return True if str(value) in ('None', 'nan', 'NaN', 'np.nan', 'NAN', 'NONE'): return True return False