python - Is there a Numpy or Pandas setting to issue a warning whenever a NaN value is created -
i'm spending lot of time working pandas, uses numpy arrays store numbers.
in use-case, there should never nan values - indicative has gone wrong (usually pandas-related screwup such incorrectly joined dataframes, badly loaded data, etc)
it helpful if pandas or numpy had setting issue warning if nan value appeared in series in dataframe. (this question not nan replacement or imputation. warnings.)
yes, 1 write lots of local checks @ each stage (do thing. check whether created nans. other thing. check again whether created nans etc), that's horribly verbose , inefficient. want tell pandas if ever put nan value in dataframe, issue warning - once, global setting @ top of jupyter notebook.
does know whether global setting exists?
if want raise warning, can check if dataframe contains nan using df.isnull().values.any() tehn can use warnings module raise warning.
here's working example:
>>> stringio import stringio >>> import pandas pd >>> st = """ ... col1|col2 ... 1| ... 2|3 ... """ >>> df = pd.read_csv(stringio(st),sep="|") >>> df.head() col1 col2 0 1 nan 1 2 3 >>> import warnings ^ >>> if df.isnull().values.any(): ... warnings.warn("there nan") ... __main__:2: userwarning: there nan >>> if looking general setting within pandas, based on source code here, check dataframe class construct dataframe doesn't include way raise warning if there nan. so, core pandas has updated add that. here's extract of full check done dataframe class.
def __init__(self, data=none, index=none, columns=none, dtype=none, copy=false): if data none: data = {} if dtype not none: dtype = self._validate_dtype(dtype) if isinstance(data, dataframe): data = data._data if isinstance(data, blockmanager): mgr = self._init_mgr(data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy) elif isinstance(data, dict): mgr = self._init_dict(data, index, columns, dtype=dtype) elif isinstance(data, ma.maskedarray): import numpy.ma.mrecords mrecords # masked recarray if isinstance(data, mrecords.maskedrecords): mgr = _masked_rec_array_to_mgr(data, index, columns, dtype, copy) # masked array else: mask = ma.getmaskarray(data) if mask.any(): data, fill_value = maybe_upcast(data, copy=true) data[mask] = fill_value else: data = data.copy() mgr = self._init_ndarray(data, index, columns, dtype=dtype, copy=copy) elif isinstance(data, (np.ndarray, series, index)): if data.dtype.names: data_columns = list(data.dtype.names) data = dict((k, data[k]) k in data_columns) if columns none: columns = data_columns mgr = self._init_dict(data, index, columns, dtype=dtype) elif getattr(data, 'name', none) not none: mgr = self._init_dict({data.name: data}, index, columns, dtype=dtype) else: mgr = self._init_ndarray(data, index, columns, dtype=dtype, copy=copy) elif isinstance(data, (list, types.generatortype)): if isinstance(data, types.generatortype): data = list(data) if len(data) > 0: if is_list_like(data[0]) , getattr(data[0], 'ndim', 1) == 1: if is_named_tuple(data[0]) , columns none: columns = data[0]._fields arrays, columns = _to_arrays(data, columns, dtype=dtype) columns = _ensure_index(columns) # set index if index none: if isinstance(data[0], series): index = _get_names_from_index(data) elif isinstance(data[0], categorical): index = _default_index(len(data[0])) else: index = _default_index(len(data)) mgr = _arrays_to_mgr(arrays, columns, index, columns, dtype=dtype) else: mgr = self._init_ndarray(data, index, columns, dtype=dtype, copy=copy) else: mgr = self._init_dict({}, index, columns, dtype=dtype) elif isinstance(data, collections.iterator): raise typeerror("data argument can't iterator") else: try: arr = np.array(data, dtype=dtype, copy=copy) except (valueerror, typeerror) e: exc = typeerror('dataframe constructor called ' 'incompatible data , dtype: %s' % e) raise_with_traceback(exc) if arr.ndim == 0 , index not none , columns not none: if isinstance(data, compat.string_types) , dtype none: dtype = np.object_ if dtype none: dtype, data = infer_dtype_from_scalar(data) values = np.empty((len(index), len(columns)), dtype=dtype) values.fill(data) mgr = self._init_ndarray(values, index, columns, dtype=dtype, copy=false) else: raise valueerror('dataframe constructor not called!') ndframe.__init__(self, mgr, fastpath=true) so, need file feature request add pandas.
Comments
Post a Comment