remove: V10 momentum system - backtest proved it adds no value
- Removed v10 TradingView indicator (moneyline_v10_momentum_dots.pinescript) - Removed v10 penalty system from signal-quality.ts (-30/-25 point penalties) - Removed backtest result files (sweep_*.csv) - Updated copilot-instructions.md to remove v10 references - Simplified direction-specific quality thresholds (LONG 90+, SHORT 80+) Rationale: - 1,944 parameter combinations tested in backtest - All top results IDENTICAL (568 trades, $498 P&L, 61.09% WR) - Momentum parameters had ZERO impact on trade selection - Profit factor 1.027 too low (barely profitable after fees) - Max drawdown -$1,270 vs +$498 profit = terrible risk-reward - v10 penalties were blocking good trades (bug: applied to wrong positions) Keeping v9 as production system - simpler, proven, effective.
This commit is contained in:
286
.venv/lib/python3.7/site-packages/pandas/__init__.py
Normal file
286
.venv/lib/python3.7/site-packages/pandas/__init__.py
Normal file
@@ -0,0 +1,286 @@
|
||||
# flake8: noqa
|
||||
|
||||
__docformat__ = "restructuredtext"
|
||||
|
||||
# Let users know if they're missing any of our hard dependencies
|
||||
hard_dependencies = ("numpy", "pytz", "dateutil")
|
||||
missing_dependencies = []
|
||||
|
||||
for dependency in hard_dependencies:
|
||||
try:
|
||||
__import__(dependency)
|
||||
except ImportError as e:
|
||||
missing_dependencies.append(f"{dependency}: {e}")
|
||||
|
||||
if missing_dependencies:
|
||||
raise ImportError(
|
||||
"Unable to import required dependencies:\n" + "\n".join(missing_dependencies)
|
||||
)
|
||||
del hard_dependencies, dependency, missing_dependencies
|
||||
|
||||
# numpy compat
|
||||
from pandas.compat import (
|
||||
np_version_under1p18 as _np_version_under1p18,
|
||||
is_numpy_dev as _is_numpy_dev,
|
||||
)
|
||||
|
||||
try:
|
||||
from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib
|
||||
except ImportError as e: # pragma: no cover
|
||||
# hack but overkill to use re
|
||||
module = str(e).replace("cannot import name ", "")
|
||||
raise ImportError(
|
||||
f"C extension: {module} not built. If you want to import "
|
||||
"pandas from the source directory, you may need to run "
|
||||
"'python setup.py build_ext --force' to build the C extensions first."
|
||||
) from e
|
||||
|
||||
from pandas._config import (
|
||||
get_option,
|
||||
set_option,
|
||||
reset_option,
|
||||
describe_option,
|
||||
option_context,
|
||||
options,
|
||||
)
|
||||
|
||||
# let init-time option registration happen
|
||||
import pandas.core.config_init
|
||||
|
||||
from pandas.core.api import (
|
||||
# dtype
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
Float32Dtype,
|
||||
Float64Dtype,
|
||||
CategoricalDtype,
|
||||
PeriodDtype,
|
||||
IntervalDtype,
|
||||
DatetimeTZDtype,
|
||||
StringDtype,
|
||||
BooleanDtype,
|
||||
# missing
|
||||
NA,
|
||||
isna,
|
||||
isnull,
|
||||
notna,
|
||||
notnull,
|
||||
# indexes
|
||||
Index,
|
||||
CategoricalIndex,
|
||||
Int64Index,
|
||||
UInt64Index,
|
||||
RangeIndex,
|
||||
Float64Index,
|
||||
MultiIndex,
|
||||
IntervalIndex,
|
||||
TimedeltaIndex,
|
||||
DatetimeIndex,
|
||||
PeriodIndex,
|
||||
IndexSlice,
|
||||
# tseries
|
||||
NaT,
|
||||
Period,
|
||||
period_range,
|
||||
Timedelta,
|
||||
timedelta_range,
|
||||
Timestamp,
|
||||
date_range,
|
||||
bdate_range,
|
||||
Interval,
|
||||
interval_range,
|
||||
DateOffset,
|
||||
# conversion
|
||||
to_numeric,
|
||||
to_datetime,
|
||||
to_timedelta,
|
||||
# misc
|
||||
Flags,
|
||||
Grouper,
|
||||
factorize,
|
||||
unique,
|
||||
value_counts,
|
||||
NamedAgg,
|
||||
array,
|
||||
Categorical,
|
||||
set_eng_float_format,
|
||||
Series,
|
||||
DataFrame,
|
||||
)
|
||||
|
||||
from pandas.core.arrays.sparse import SparseDtype
|
||||
|
||||
from pandas.tseries.api import infer_freq
|
||||
from pandas.tseries import offsets
|
||||
|
||||
from pandas.core.computation.api import eval
|
||||
|
||||
from pandas.core.reshape.api import (
|
||||
concat,
|
||||
lreshape,
|
||||
melt,
|
||||
wide_to_long,
|
||||
merge,
|
||||
merge_asof,
|
||||
merge_ordered,
|
||||
crosstab,
|
||||
pivot,
|
||||
pivot_table,
|
||||
get_dummies,
|
||||
cut,
|
||||
qcut,
|
||||
)
|
||||
|
||||
import pandas.api
|
||||
from pandas.util._print_versions import show_versions
|
||||
|
||||
from pandas.io.api import (
|
||||
# excel
|
||||
ExcelFile,
|
||||
ExcelWriter,
|
||||
read_excel,
|
||||
# parsers
|
||||
read_csv,
|
||||
read_fwf,
|
||||
read_table,
|
||||
# pickle
|
||||
read_pickle,
|
||||
to_pickle,
|
||||
# pytables
|
||||
HDFStore,
|
||||
read_hdf,
|
||||
# sql
|
||||
read_sql,
|
||||
read_sql_query,
|
||||
read_sql_table,
|
||||
# misc
|
||||
read_clipboard,
|
||||
read_parquet,
|
||||
read_orc,
|
||||
read_feather,
|
||||
read_gbq,
|
||||
read_html,
|
||||
read_xml,
|
||||
read_json,
|
||||
read_stata,
|
||||
read_sas,
|
||||
read_spss,
|
||||
)
|
||||
|
||||
from pandas.io.json import _json_normalize as json_normalize
|
||||
|
||||
from pandas.util._tester import test
|
||||
import pandas.testing
|
||||
import pandas.arrays
|
||||
|
||||
# use the closest tagged version if possible
|
||||
from pandas._version import get_versions
|
||||
|
||||
v = get_versions()
|
||||
__version__ = v.get("closest-tag", v["version"])
|
||||
__git_version__ = v.get("full-revisionid")
|
||||
del get_versions, v
|
||||
|
||||
|
||||
# GH 27101
|
||||
def __getattr__(name):
|
||||
import warnings
|
||||
|
||||
if name == "datetime":
|
||||
warnings.warn(
|
||||
"The pandas.datetime class is deprecated "
|
||||
"and will be removed from pandas in a future version. "
|
||||
"Import from datetime module instead.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
from datetime import datetime as dt
|
||||
|
||||
return dt
|
||||
|
||||
elif name == "np":
|
||||
|
||||
warnings.warn(
|
||||
"The pandas.np module is deprecated "
|
||||
"and will be removed from pandas in a future version. "
|
||||
"Import numpy directly instead",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
import numpy as np
|
||||
|
||||
return np
|
||||
|
||||
elif name in {"SparseSeries", "SparseDataFrame"}:
|
||||
warnings.warn(
|
||||
f"The {name} class is removed from pandas. Accessing it from "
|
||||
"the top-level namespace will also be removed in the next version",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
return type(name, (), {})
|
||||
|
||||
elif name == "SparseArray":
|
||||
|
||||
warnings.warn(
|
||||
"The pandas.SparseArray class is deprecated "
|
||||
"and will be removed from pandas in a future version. "
|
||||
"Use pandas.arrays.SparseArray instead.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
from pandas.core.arrays.sparse import SparseArray as _SparseArray
|
||||
|
||||
return _SparseArray
|
||||
|
||||
raise AttributeError(f"module 'pandas' has no attribute '{name}'")
|
||||
|
||||
|
||||
# module level doc-string
|
||||
__doc__ = """
|
||||
pandas - a powerful data analysis and manipulation library for Python
|
||||
=====================================================================
|
||||
|
||||
**pandas** is a Python package providing fast, flexible, and expressive data
|
||||
structures designed to make working with "relational" or "labeled" data both
|
||||
easy and intuitive. It aims to be the fundamental high-level building block for
|
||||
doing practical, **real world** data analysis in Python. Additionally, it has
|
||||
the broader goal of becoming **the most powerful and flexible open source data
|
||||
analysis / manipulation tool available in any language**. It is already well on
|
||||
its way toward this goal.
|
||||
|
||||
Main Features
|
||||
-------------
|
||||
Here are just a few of the things that pandas does well:
|
||||
|
||||
- Easy handling of missing data in floating point as well as non-floating
|
||||
point data.
|
||||
- Size mutability: columns can be inserted and deleted from DataFrame and
|
||||
higher dimensional objects
|
||||
- Automatic and explicit data alignment: objects can be explicitly aligned
|
||||
to a set of labels, or the user can simply ignore the labels and let
|
||||
`Series`, `DataFrame`, etc. automatically align the data for you in
|
||||
computations.
|
||||
- Powerful, flexible group by functionality to perform split-apply-combine
|
||||
operations on data sets, for both aggregating and transforming data.
|
||||
- Make it easy to convert ragged, differently-indexed data in other Python
|
||||
and NumPy data structures into DataFrame objects.
|
||||
- Intelligent label-based slicing, fancy indexing, and subsetting of large
|
||||
data sets.
|
||||
- Intuitive merging and joining data sets.
|
||||
- Flexible reshaping and pivoting of data sets.
|
||||
- Hierarchical labeling of axes (possible to have multiple labels per tick).
|
||||
- Robust IO tools for loading data from flat files (CSV and delimited),
|
||||
Excel files, databases, and saving/loading data from the ultrafast HDF5
|
||||
format.
|
||||
- Time series-specific functionality: date range generation and frequency
|
||||
conversion, moving window statistics, date shifting and lagging.
|
||||
"""
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
28
.venv/lib/python3.7/site-packages/pandas/_config/__init__.py
Normal file
28
.venv/lib/python3.7/site-packages/pandas/_config/__init__.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""
|
||||
pandas._config is considered explicitly upstream of everything else in pandas,
|
||||
should have no intra-pandas dependencies.
|
||||
|
||||
importing `dates` and `display` ensures that keys needed by _libs
|
||||
are initialized.
|
||||
"""
|
||||
__all__ = [
|
||||
"config",
|
||||
"detect_console_encoding",
|
||||
"get_option",
|
||||
"set_option",
|
||||
"reset_option",
|
||||
"describe_option",
|
||||
"option_context",
|
||||
"options",
|
||||
]
|
||||
from pandas._config import config
|
||||
from pandas._config import dates # noqa:F401
|
||||
from pandas._config.config import (
|
||||
describe_option,
|
||||
get_option,
|
||||
option_context,
|
||||
options,
|
||||
reset_option,
|
||||
set_option,
|
||||
)
|
||||
from pandas._config.display import detect_console_encoding
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
875
.venv/lib/python3.7/site-packages/pandas/_config/config.py
Normal file
875
.venv/lib/python3.7/site-packages/pandas/_config/config.py
Normal file
@@ -0,0 +1,875 @@
|
||||
"""
|
||||
The config module holds package-wide configurables and provides
|
||||
a uniform API for working with them.
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
This module supports the following requirements:
|
||||
- options are referenced using keys in dot.notation, e.g. "x.y.option - z".
|
||||
- keys are case-insensitive.
|
||||
- functions should accept partial/regex keys, when unambiguous.
|
||||
- options can be registered by modules at import time.
|
||||
- options can be registered at init-time (via core.config_init)
|
||||
- options have a default value, and (optionally) a description and
|
||||
validation function associated with them.
|
||||
- options can be deprecated, in which case referencing them
|
||||
should produce a warning.
|
||||
- deprecated options can optionally be rerouted to a replacement
|
||||
so that accessing a deprecated option reroutes to a differently
|
||||
named option.
|
||||
- options can be reset to their default value.
|
||||
- all option can be reset to their default value at once.
|
||||
- all options in a certain sub - namespace can be reset at once.
|
||||
- the user can set / get / reset or ask for the description of an option.
|
||||
- a developer can register and mark an option as deprecated.
|
||||
- you can register a callback to be invoked when the option value
|
||||
is set or reset. Changing the stored value is considered misuse, but
|
||||
is not verboten.
|
||||
|
||||
Implementation
|
||||
==============
|
||||
|
||||
- Data is stored using nested dictionaries, and should be accessed
|
||||
through the provided API.
|
||||
|
||||
- "Registered options" and "Deprecated options" have metadata associated
|
||||
with them, which are stored in auxiliary dictionaries keyed on the
|
||||
fully-qualified key, e.g. "x.y.z.option".
|
||||
|
||||
- the config_init module is imported by the package's __init__.py file.
|
||||
placing any register_option() calls there will ensure those options
|
||||
are available as soon as pandas is loaded. If you use register_option
|
||||
in a module, it will only be available after that module is imported,
|
||||
which you should be aware of.
|
||||
|
||||
- `config_prefix` is a context_manager (for use with the `with` keyword)
|
||||
which can save developers some typing, see the docstring.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import namedtuple
|
||||
from contextlib import (
|
||||
ContextDecorator,
|
||||
contextmanager,
|
||||
)
|
||||
import re
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Iterable,
|
||||
cast,
|
||||
)
|
||||
import warnings
|
||||
|
||||
from pandas._typing import F
|
||||
|
||||
DeprecatedOption = namedtuple("DeprecatedOption", "key msg rkey removal_ver")
|
||||
RegisteredOption = namedtuple("RegisteredOption", "key defval doc validator cb")
|
||||
|
||||
# holds deprecated option metadata
|
||||
_deprecated_options: dict[str, DeprecatedOption] = {}
|
||||
|
||||
# holds registered option metadata
|
||||
_registered_options: dict[str, RegisteredOption] = {}
|
||||
|
||||
# holds the current values for registered options
|
||||
_global_config: dict[str, Any] = {}
|
||||
|
||||
# keys which have a special meaning
|
||||
_reserved_keys: list[str] = ["all"]
|
||||
|
||||
|
||||
class OptionError(AttributeError, KeyError):
|
||||
"""
|
||||
Exception for pandas.options, backwards compatible with KeyError
|
||||
checks
|
||||
"""
|
||||
|
||||
|
||||
#
|
||||
# User API
|
||||
|
||||
|
||||
def _get_single_key(pat: str, silent: bool) -> str:
|
||||
keys = _select_options(pat)
|
||||
if len(keys) == 0:
|
||||
if not silent:
|
||||
_warn_if_deprecated(pat)
|
||||
raise OptionError(f"No such keys(s): {repr(pat)}")
|
||||
if len(keys) > 1:
|
||||
raise OptionError("Pattern matched multiple keys")
|
||||
key = keys[0]
|
||||
|
||||
if not silent:
|
||||
_warn_if_deprecated(key)
|
||||
|
||||
key = _translate_key(key)
|
||||
|
||||
return key
|
||||
|
||||
|
||||
def _get_option(pat: str, silent: bool = False):
|
||||
key = _get_single_key(pat, silent)
|
||||
|
||||
# walk the nested dict
|
||||
root, k = _get_root(key)
|
||||
return root[k]
|
||||
|
||||
|
||||
def _set_option(*args, **kwargs) -> None:
|
||||
# must at least 1 arg deal with constraints later
|
||||
nargs = len(args)
|
||||
if not nargs or nargs % 2 != 0:
|
||||
raise ValueError("Must provide an even number of non-keyword arguments")
|
||||
|
||||
# default to false
|
||||
silent = kwargs.pop("silent", False)
|
||||
|
||||
if kwargs:
|
||||
kwarg = list(kwargs.keys())[0]
|
||||
raise TypeError(f'_set_option() got an unexpected keyword argument "{kwarg}"')
|
||||
|
||||
for k, v in zip(args[::2], args[1::2]):
|
||||
key = _get_single_key(k, silent)
|
||||
|
||||
o = _get_registered_option(key)
|
||||
if o and o.validator:
|
||||
o.validator(v)
|
||||
|
||||
# walk the nested dict
|
||||
root, k = _get_root(key)
|
||||
root[k] = v
|
||||
|
||||
if o.cb:
|
||||
if silent:
|
||||
with warnings.catch_warnings(record=True):
|
||||
o.cb(key)
|
||||
else:
|
||||
o.cb(key)
|
||||
|
||||
|
||||
def _describe_option(pat: str = "", _print_desc: bool = True):
|
||||
|
||||
keys = _select_options(pat)
|
||||
if len(keys) == 0:
|
||||
raise OptionError("No such keys(s)")
|
||||
|
||||
s = "\n".join(_build_option_description(k) for k in keys)
|
||||
|
||||
if _print_desc:
|
||||
print(s)
|
||||
else:
|
||||
return s
|
||||
|
||||
|
||||
def _reset_option(pat: str, silent: bool = False) -> None:
|
||||
|
||||
keys = _select_options(pat)
|
||||
|
||||
if len(keys) == 0:
|
||||
raise OptionError("No such keys(s)")
|
||||
|
||||
if len(keys) > 1 and len(pat) < 4 and pat != "all":
|
||||
raise ValueError(
|
||||
"You must specify at least 4 characters when "
|
||||
"resetting multiple keys, use the special keyword "
|
||||
'"all" to reset all the options to their default value'
|
||||
)
|
||||
|
||||
for k in keys:
|
||||
_set_option(k, _registered_options[k].defval, silent=silent)
|
||||
|
||||
|
||||
def get_default_val(pat: str):
|
||||
key = _get_single_key(pat, silent=True)
|
||||
return _get_registered_option(key).defval
|
||||
|
||||
|
||||
class DictWrapper:
|
||||
"""provide attribute-style access to a nested dict"""
|
||||
|
||||
def __init__(self, d: dict[str, Any], prefix: str = ""):
|
||||
object.__setattr__(self, "d", d)
|
||||
object.__setattr__(self, "prefix", prefix)
|
||||
|
||||
def __setattr__(self, key: str, val: Any) -> None:
|
||||
prefix = object.__getattribute__(self, "prefix")
|
||||
if prefix:
|
||||
prefix += "."
|
||||
prefix += key
|
||||
# you can't set new keys
|
||||
# can you can't overwrite subtrees
|
||||
if key in self.d and not isinstance(self.d[key], dict):
|
||||
_set_option(prefix, val)
|
||||
else:
|
||||
raise OptionError("You can only set the value of existing options")
|
||||
|
||||
def __getattr__(self, key: str):
|
||||
prefix = object.__getattribute__(self, "prefix")
|
||||
if prefix:
|
||||
prefix += "."
|
||||
prefix += key
|
||||
try:
|
||||
v = object.__getattribute__(self, "d")[key]
|
||||
except KeyError as err:
|
||||
raise OptionError("No such option") from err
|
||||
if isinstance(v, dict):
|
||||
return DictWrapper(v, prefix)
|
||||
else:
|
||||
return _get_option(prefix)
|
||||
|
||||
def __dir__(self) -> Iterable[str]:
|
||||
return list(self.d.keys())
|
||||
|
||||
|
||||
# For user convenience, we'd like to have the available options described
|
||||
# in the docstring. For dev convenience we'd like to generate the docstrings
|
||||
# dynamically instead of maintaining them by hand. To this, we use the
|
||||
# class below which wraps functions inside a callable, and converts
|
||||
# __doc__ into a property function. The doctsrings below are templates
|
||||
# using the py2.6+ advanced formatting syntax to plug in a concise list
|
||||
# of options, and option descriptions.
|
||||
|
||||
|
||||
class CallableDynamicDoc:
|
||||
def __init__(self, func, doc_tmpl):
|
||||
self.__doc_tmpl__ = doc_tmpl
|
||||
self.__func__ = func
|
||||
|
||||
def __call__(self, *args, **kwds):
|
||||
return self.__func__(*args, **kwds)
|
||||
|
||||
@property
|
||||
def __doc__(self):
|
||||
opts_desc = _describe_option("all", _print_desc=False)
|
||||
opts_list = pp_options_list(list(_registered_options.keys()))
|
||||
return self.__doc_tmpl__.format(opts_desc=opts_desc, opts_list=opts_list)
|
||||
|
||||
|
||||
_get_option_tmpl = """
|
||||
get_option(pat)
|
||||
|
||||
Retrieves the value of the specified option.
|
||||
|
||||
Available options:
|
||||
|
||||
{opts_list}
|
||||
|
||||
Parameters
|
||||
----------
|
||||
pat : str
|
||||
Regexp which should match a single option.
|
||||
Note: partial matches are supported for convenience, but unless you use the
|
||||
full option name (e.g. x.y.z.option_name), your code may break in future
|
||||
versions if new options with similar names are introduced.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : the value of the option
|
||||
|
||||
Raises
|
||||
------
|
||||
OptionError : if no such option exists
|
||||
|
||||
Notes
|
||||
-----
|
||||
The available options with its descriptions:
|
||||
|
||||
{opts_desc}
|
||||
"""
|
||||
|
||||
_set_option_tmpl = """
|
||||
set_option(pat, value)
|
||||
|
||||
Sets the value of the specified option.
|
||||
|
||||
Available options:
|
||||
|
||||
{opts_list}
|
||||
|
||||
Parameters
|
||||
----------
|
||||
pat : str
|
||||
Regexp which should match a single option.
|
||||
Note: partial matches are supported for convenience, but unless you use the
|
||||
full option name (e.g. x.y.z.option_name), your code may break in future
|
||||
versions if new options with similar names are introduced.
|
||||
value : object
|
||||
New value of option.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
|
||||
Raises
|
||||
------
|
||||
OptionError if no such option exists
|
||||
|
||||
Notes
|
||||
-----
|
||||
The available options with its descriptions:
|
||||
|
||||
{opts_desc}
|
||||
"""
|
||||
|
||||
_describe_option_tmpl = """
|
||||
describe_option(pat, _print_desc=False)
|
||||
|
||||
Prints the description for one or more registered options.
|
||||
|
||||
Call with not arguments to get a listing for all registered options.
|
||||
|
||||
Available options:
|
||||
|
||||
{opts_list}
|
||||
|
||||
Parameters
|
||||
----------
|
||||
pat : str
|
||||
Regexp pattern. All matching keys will have their description displayed.
|
||||
_print_desc : bool, default True
|
||||
If True (default) the description(s) will be printed to stdout.
|
||||
Otherwise, the description(s) will be returned as a unicode string
|
||||
(for testing).
|
||||
|
||||
Returns
|
||||
-------
|
||||
None by default, the description(s) as a unicode string if _print_desc
|
||||
is False
|
||||
|
||||
Notes
|
||||
-----
|
||||
The available options with its descriptions:
|
||||
|
||||
{opts_desc}
|
||||
"""
|
||||
|
||||
_reset_option_tmpl = """
|
||||
reset_option(pat)
|
||||
|
||||
Reset one or more options to their default value.
|
||||
|
||||
Pass "all" as argument to reset all options.
|
||||
|
||||
Available options:
|
||||
|
||||
{opts_list}
|
||||
|
||||
Parameters
|
||||
----------
|
||||
pat : str/regex
|
||||
If specified only options matching `prefix*` will be reset.
|
||||
Note: partial matches are supported for convenience, but unless you
|
||||
use the full option name (e.g. x.y.z.option_name), your code may break
|
||||
in future versions if new options with similar names are introduced.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
|
||||
Notes
|
||||
-----
|
||||
The available options with its descriptions:
|
||||
|
||||
{opts_desc}
|
||||
"""
|
||||
|
||||
# bind the functions with their docstrings into a Callable
|
||||
# and use that as the functions exposed in pd.api
|
||||
get_option = CallableDynamicDoc(_get_option, _get_option_tmpl)
|
||||
set_option = CallableDynamicDoc(_set_option, _set_option_tmpl)
|
||||
reset_option = CallableDynamicDoc(_reset_option, _reset_option_tmpl)
|
||||
describe_option = CallableDynamicDoc(_describe_option, _describe_option_tmpl)
|
||||
options = DictWrapper(_global_config)
|
||||
|
||||
#
|
||||
# Functions for use by pandas developers, in addition to User - api
|
||||
|
||||
|
||||
class option_context(ContextDecorator):
|
||||
"""
|
||||
Context manager to temporarily set options in the `with` statement context.
|
||||
|
||||
You need to invoke as ``option_context(pat, val, [(pat, val), ...])``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> with option_context('display.max_rows', 10, 'display.max_columns', 5):
|
||||
... ...
|
||||
"""
|
||||
|
||||
def __init__(self, *args):
|
||||
if len(args) % 2 != 0 or len(args) < 2:
|
||||
raise ValueError(
|
||||
"Need to invoke as option_context(pat, val, [(pat, val), ...])."
|
||||
)
|
||||
|
||||
self.ops = list(zip(args[::2], args[1::2]))
|
||||
|
||||
def __enter__(self):
|
||||
self.undo = [(pat, _get_option(pat, silent=True)) for pat, val in self.ops]
|
||||
|
||||
for pat, val in self.ops:
|
||||
_set_option(pat, val, silent=True)
|
||||
|
||||
def __exit__(self, *args):
|
||||
if self.undo:
|
||||
for pat, val in self.undo:
|
||||
_set_option(pat, val, silent=True)
|
||||
|
||||
|
||||
def register_option(
|
||||
key: str,
|
||||
defval: object,
|
||||
doc: str = "",
|
||||
validator: Callable[[Any], Any] | None = None,
|
||||
cb: Callable[[str], Any] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Register an option in the package-wide pandas config object
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key : str
|
||||
Fully-qualified key, e.g. "x.y.option - z".
|
||||
defval : object
|
||||
Default value of the option.
|
||||
doc : str
|
||||
Description of the option.
|
||||
validator : Callable, optional
|
||||
Function of a single argument, should raise `ValueError` if
|
||||
called with a value which is not a legal value for the option.
|
||||
cb
|
||||
a function of a single argument "key", which is called
|
||||
immediately after an option value is set/reset. key is
|
||||
the full name of the option.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError if `validator` is specified and `defval` is not a valid value.
|
||||
|
||||
"""
|
||||
import keyword
|
||||
import tokenize
|
||||
|
||||
key = key.lower()
|
||||
|
||||
if key in _registered_options:
|
||||
raise OptionError(f"Option '{key}' has already been registered")
|
||||
if key in _reserved_keys:
|
||||
raise OptionError(f"Option '{key}' is a reserved key")
|
||||
|
||||
# the default value should be legal
|
||||
if validator:
|
||||
validator(defval)
|
||||
|
||||
# walk the nested dict, creating dicts as needed along the path
|
||||
path = key.split(".")
|
||||
|
||||
for k in path:
|
||||
if not re.match("^" + tokenize.Name + "$", k):
|
||||
raise ValueError(f"{k} is not a valid identifier")
|
||||
if keyword.iskeyword(k):
|
||||
raise ValueError(f"{k} is a python keyword")
|
||||
|
||||
cursor = _global_config
|
||||
msg = "Path prefix to option '{option}' is already an option"
|
||||
|
||||
for i, p in enumerate(path[:-1]):
|
||||
if not isinstance(cursor, dict):
|
||||
raise OptionError(msg.format(option=".".join(path[:i])))
|
||||
if p not in cursor:
|
||||
cursor[p] = {}
|
||||
cursor = cursor[p]
|
||||
|
||||
if not isinstance(cursor, dict):
|
||||
raise OptionError(msg.format(option=".".join(path[:-1])))
|
||||
|
||||
cursor[path[-1]] = defval # initialize
|
||||
|
||||
# save the option metadata
|
||||
_registered_options[key] = RegisteredOption(
|
||||
key=key, defval=defval, doc=doc, validator=validator, cb=cb
|
||||
)
|
||||
|
||||
|
||||
def deprecate_option(
|
||||
key: str, msg: str | None = None, rkey: str | None = None, removal_ver=None
|
||||
) -> None:
|
||||
"""
|
||||
Mark option `key` as deprecated, if code attempts to access this option,
|
||||
a warning will be produced, using `msg` if given, or a default message
|
||||
if not.
|
||||
if `rkey` is given, any access to the key will be re-routed to `rkey`.
|
||||
|
||||
Neither the existence of `key` nor that if `rkey` is checked. If they
|
||||
do not exist, any subsequence access will fail as usual, after the
|
||||
deprecation warning is given.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key : str
|
||||
Name of the option to be deprecated.
|
||||
must be a fully-qualified option name (e.g "x.y.z.rkey").
|
||||
msg : str, optional
|
||||
Warning message to output when the key is referenced.
|
||||
if no message is given a default message will be emitted.
|
||||
rkey : str, optional
|
||||
Name of an option to reroute access to.
|
||||
If specified, any referenced `key` will be
|
||||
re-routed to `rkey` including set/get/reset.
|
||||
rkey must be a fully-qualified option name (e.g "x.y.z.rkey").
|
||||
used by the default message if no `msg` is specified.
|
||||
removal_ver : optional
|
||||
Specifies the version in which this option will
|
||||
be removed. used by the default message if no `msg` is specified.
|
||||
|
||||
Raises
|
||||
------
|
||||
OptionError
|
||||
If the specified key has already been deprecated.
|
||||
"""
|
||||
key = key.lower()
|
||||
|
||||
if key in _deprecated_options:
|
||||
raise OptionError(f"Option '{key}' has already been defined as deprecated.")
|
||||
|
||||
_deprecated_options[key] = DeprecatedOption(key, msg, rkey, removal_ver)
|
||||
|
||||
|
||||
#
|
||||
# functions internal to the module
|
||||
|
||||
|
||||
def _select_options(pat: str) -> list[str]:
|
||||
"""
|
||||
returns a list of keys matching `pat`
|
||||
|
||||
if pat=="all", returns all registered options
|
||||
"""
|
||||
# short-circuit for exact key
|
||||
if pat in _registered_options:
|
||||
return [pat]
|
||||
|
||||
# else look through all of them
|
||||
keys = sorted(_registered_options.keys())
|
||||
if pat == "all": # reserved key
|
||||
return keys
|
||||
|
||||
return [k for k in keys if re.search(pat, k, re.I)]
|
||||
|
||||
|
||||
def _get_root(key: str) -> tuple[dict[str, Any], str]:
|
||||
path = key.split(".")
|
||||
cursor = _global_config
|
||||
for p in path[:-1]:
|
||||
cursor = cursor[p]
|
||||
return cursor, path[-1]
|
||||
|
||||
|
||||
def _is_deprecated(key: str) -> bool:
|
||||
"""Returns True if the given option has been deprecated"""
|
||||
key = key.lower()
|
||||
return key in _deprecated_options
|
||||
|
||||
|
||||
def _get_deprecated_option(key: str):
|
||||
"""
|
||||
Retrieves the metadata for a deprecated option, if `key` is deprecated.
|
||||
|
||||
Returns
|
||||
-------
|
||||
DeprecatedOption (namedtuple) if key is deprecated, None otherwise
|
||||
"""
|
||||
try:
|
||||
d = _deprecated_options[key]
|
||||
except KeyError:
|
||||
return None
|
||||
else:
|
||||
return d
|
||||
|
||||
|
||||
def _get_registered_option(key: str):
|
||||
"""
|
||||
Retrieves the option metadata if `key` is a registered option.
|
||||
|
||||
Returns
|
||||
-------
|
||||
RegisteredOption (namedtuple) if key is deprecated, None otherwise
|
||||
"""
|
||||
return _registered_options.get(key)
|
||||
|
||||
|
||||
def _translate_key(key: str) -> str:
|
||||
"""
|
||||
if key id deprecated and a replacement key defined, will return the
|
||||
replacement key, otherwise returns `key` as - is
|
||||
"""
|
||||
d = _get_deprecated_option(key)
|
||||
if d:
|
||||
return d.rkey or key
|
||||
else:
|
||||
return key
|
||||
|
||||
|
||||
def _warn_if_deprecated(key: str) -> bool:
|
||||
"""
|
||||
Checks if `key` is a deprecated option and if so, prints a warning.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool - True if `key` is deprecated, False otherwise.
|
||||
"""
|
||||
d = _get_deprecated_option(key)
|
||||
if d:
|
||||
if d.msg:
|
||||
print(d.msg)
|
||||
warnings.warn(d.msg, FutureWarning)
|
||||
else:
|
||||
msg = f"'{key}' is deprecated"
|
||||
if d.removal_ver:
|
||||
msg += f" and will be removed in {d.removal_ver}"
|
||||
if d.rkey:
|
||||
msg += f", please use '{d.rkey}' instead."
|
||||
else:
|
||||
msg += ", please refrain from using it."
|
||||
|
||||
warnings.warn(msg, FutureWarning)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _build_option_description(k: str) -> str:
|
||||
"""Builds a formatted description of a registered option and prints it"""
|
||||
o = _get_registered_option(k)
|
||||
d = _get_deprecated_option(k)
|
||||
|
||||
s = f"{k} "
|
||||
|
||||
if o.doc:
|
||||
s += "\n".join(o.doc.strip().split("\n"))
|
||||
else:
|
||||
s += "No description available."
|
||||
|
||||
if o:
|
||||
s += f"\n [default: {o.defval}] [currently: {_get_option(k, True)}]"
|
||||
|
||||
if d:
|
||||
rkey = d.rkey or ""
|
||||
s += "\n (Deprecated"
|
||||
s += f", use `{rkey}` instead."
|
||||
s += ")"
|
||||
|
||||
return s
|
||||
|
||||
|
||||
def pp_options_list(keys: Iterable[str], width=80, _print: bool = False):
|
||||
"""Builds a concise listing of available options, grouped by prefix"""
|
||||
from itertools import groupby
|
||||
from textwrap import wrap
|
||||
|
||||
def pp(name: str, ks: Iterable[str]) -> list[str]:
|
||||
pfx = "- " + name + ".[" if name else ""
|
||||
ls = wrap(
|
||||
", ".join(ks),
|
||||
width,
|
||||
initial_indent=pfx,
|
||||
subsequent_indent=" ",
|
||||
break_long_words=False,
|
||||
)
|
||||
if ls and ls[-1] and name:
|
||||
ls[-1] = ls[-1] + "]"
|
||||
return ls
|
||||
|
||||
ls: list[str] = []
|
||||
singles = [x for x in sorted(keys) if x.find(".") < 0]
|
||||
if singles:
|
||||
ls += pp("", singles)
|
||||
keys = [x for x in keys if x.find(".") >= 0]
|
||||
|
||||
for k, g in groupby(sorted(keys), lambda x: x[: x.rfind(".")]):
|
||||
ks = [x[len(k) + 1 :] for x in list(g)]
|
||||
ls += pp(k, ks)
|
||||
s = "\n".join(ls)
|
||||
if _print:
|
||||
print(s)
|
||||
else:
|
||||
return s
|
||||
|
||||
|
||||
#
|
||||
# helpers
|
||||
|
||||
|
||||
@contextmanager
|
||||
def config_prefix(prefix):
|
||||
"""
|
||||
contextmanager for multiple invocations of API with a common prefix
|
||||
|
||||
supported API functions: (register / get / set )__option
|
||||
|
||||
Warning: This is not thread - safe, and won't work properly if you import
|
||||
the API functions into your module using the "from x import y" construct.
|
||||
|
||||
Example
|
||||
-------
|
||||
import pandas._config.config as cf
|
||||
with cf.config_prefix("display.font"):
|
||||
cf.register_option("color", "red")
|
||||
cf.register_option("size", " 5 pt")
|
||||
cf.set_option(size, " 6 pt")
|
||||
cf.get_option(size)
|
||||
...
|
||||
|
||||
etc'
|
||||
|
||||
will register options "display.font.color", "display.font.size", set the
|
||||
value of "display.font.size"... and so on.
|
||||
"""
|
||||
# Note: reset_option relies on set_option, and on key directly
|
||||
# it does not fit in to this monkey-patching scheme
|
||||
|
||||
global register_option, get_option, set_option, reset_option
|
||||
|
||||
def wrap(func: F) -> F:
|
||||
def inner(key: str, *args, **kwds):
|
||||
pkey = f"{prefix}.{key}"
|
||||
return func(pkey, *args, **kwds)
|
||||
|
||||
return cast(F, inner)
|
||||
|
||||
_register_option = register_option
|
||||
_get_option = get_option
|
||||
_set_option = set_option
|
||||
set_option = wrap(set_option)
|
||||
get_option = wrap(get_option)
|
||||
register_option = wrap(register_option)
|
||||
yield None
|
||||
set_option = _set_option
|
||||
get_option = _get_option
|
||||
register_option = _register_option
|
||||
|
||||
|
||||
# These factories and methods are handy for use as the validator
|
||||
# arg in register_option
|
||||
|
||||
|
||||
def is_type_factory(_type: type[Any]) -> Callable[[Any], None]:
|
||||
"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
`_type` - a type to be compared against (e.g. type(x) == `_type`)
|
||||
|
||||
Returns
|
||||
-------
|
||||
validator - a function of a single argument x , which raises
|
||||
ValueError if type(x) is not equal to `_type`
|
||||
|
||||
"""
|
||||
|
||||
def inner(x) -> None:
|
||||
if type(x) != _type:
|
||||
raise ValueError(f"Value must have type '{_type}'")
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
def is_instance_factory(_type) -> Callable[[Any], None]:
|
||||
"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
`_type` - the type to be checked against
|
||||
|
||||
Returns
|
||||
-------
|
||||
validator - a function of a single argument x , which raises
|
||||
ValueError if x is not an instance of `_type`
|
||||
|
||||
"""
|
||||
if isinstance(_type, (tuple, list)):
|
||||
_type = tuple(_type)
|
||||
type_repr = "|".join(map(str, _type))
|
||||
else:
|
||||
type_repr = f"'{_type}'"
|
||||
|
||||
def inner(x) -> None:
|
||||
if not isinstance(x, _type):
|
||||
raise ValueError(f"Value must be an instance of {type_repr}")
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
def is_one_of_factory(legal_values) -> Callable[[Any], None]:
|
||||
|
||||
callables = [c for c in legal_values if callable(c)]
|
||||
legal_values = [c for c in legal_values if not callable(c)]
|
||||
|
||||
def inner(x) -> None:
|
||||
if x not in legal_values:
|
||||
|
||||
if not any(c(x) for c in callables):
|
||||
uvals = [str(lval) for lval in legal_values]
|
||||
pp_values = "|".join(uvals)
|
||||
msg = f"Value must be one of {pp_values}"
|
||||
if len(callables):
|
||||
msg += " or a callable"
|
||||
raise ValueError(msg)
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
def is_nonnegative_int(value: int | None) -> None:
|
||||
"""
|
||||
Verify that value is None or a positive int.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
value : None or int
|
||||
The `value` to be checked.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
When the value is not None or is a negative integer
|
||||
"""
|
||||
if value is None:
|
||||
return
|
||||
|
||||
elif isinstance(value, int):
|
||||
if value >= 0:
|
||||
return
|
||||
|
||||
msg = "Value must be a nonnegative integer or None"
|
||||
raise ValueError(msg)
|
||||
|
||||
|
||||
# common type validators, for convenience
|
||||
# usage: register_option(... , validator = is_int)
|
||||
is_int = is_type_factory(int)
|
||||
is_bool = is_type_factory(bool)
|
||||
is_float = is_type_factory(float)
|
||||
is_str = is_type_factory(str)
|
||||
is_text = is_instance_factory((str, bytes))
|
||||
|
||||
|
||||
def is_callable(obj) -> bool:
|
||||
"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
`obj` - the object to be checked
|
||||
|
||||
Returns
|
||||
-------
|
||||
validator - returns True if object is callable
|
||||
raises ValueError otherwise.
|
||||
|
||||
"""
|
||||
if not callable(obj):
|
||||
raise ValueError("Value must be a callable")
|
||||
return True
|
||||
23
.venv/lib/python3.7/site-packages/pandas/_config/dates.py
Normal file
23
.venv/lib/python3.7/site-packages/pandas/_config/dates.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""
|
||||
config for datetime formatting
|
||||
"""
|
||||
from pandas._config import config as cf
|
||||
|
||||
pc_date_dayfirst_doc = """
|
||||
: boolean
|
||||
When True, prints and parses dates with the day first, eg 20/01/2005
|
||||
"""
|
||||
|
||||
pc_date_yearfirst_doc = """
|
||||
: boolean
|
||||
When True, prints and parses dates with the year first, eg 2005/01/20
|
||||
"""
|
||||
|
||||
with cf.config_prefix("display"):
|
||||
# Needed upstream of `_libs` because these are used in tslibs.parsing
|
||||
cf.register_option(
|
||||
"date_dayfirst", False, pc_date_dayfirst_doc, validator=cf.is_bool
|
||||
)
|
||||
cf.register_option(
|
||||
"date_yearfirst", False, pc_date_yearfirst_doc, validator=cf.is_bool
|
||||
)
|
||||
62
.venv/lib/python3.7/site-packages/pandas/_config/display.py
Normal file
62
.venv/lib/python3.7/site-packages/pandas/_config/display.py
Normal file
@@ -0,0 +1,62 @@
|
||||
"""
|
||||
Unopinionated display configuration.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import locale
|
||||
import sys
|
||||
|
||||
from pandas._config import config as cf
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Global formatting options
|
||||
_initial_defencoding: str | None = None
|
||||
|
||||
|
||||
def detect_console_encoding() -> str:
|
||||
"""
|
||||
Try to find the most capable encoding supported by the console.
|
||||
slightly modified from the way IPython handles the same issue.
|
||||
"""
|
||||
global _initial_defencoding
|
||||
|
||||
encoding = None
|
||||
try:
|
||||
encoding = sys.stdout.encoding or sys.stdin.encoding
|
||||
except (AttributeError, OSError):
|
||||
pass
|
||||
|
||||
# try again for something better
|
||||
if not encoding or "ascii" in encoding.lower():
|
||||
try:
|
||||
encoding = locale.getpreferredencoding()
|
||||
except locale.Error:
|
||||
# can be raised by locale.setlocale(), which is
|
||||
# called by getpreferredencoding
|
||||
# (on some systems, see stdlib locale docs)
|
||||
pass
|
||||
|
||||
# when all else fails. this will usually be "ascii"
|
||||
if not encoding or "ascii" in encoding.lower():
|
||||
encoding = sys.getdefaultencoding()
|
||||
|
||||
# GH#3360, save the reported defencoding at import time
|
||||
# MPL backends may change it. Make available for debugging.
|
||||
if not _initial_defencoding:
|
||||
_initial_defencoding = sys.getdefaultencoding()
|
||||
|
||||
return encoding
|
||||
|
||||
|
||||
pc_encoding_doc = """
|
||||
: str/unicode
|
||||
Defaults to the detected encoding of the console.
|
||||
Specifies the encoding to be used for strings returned by to_string,
|
||||
these are generally strings meant to be displayed on the console.
|
||||
"""
|
||||
|
||||
with cf.config_prefix("display"):
|
||||
cf.register_option(
|
||||
"encoding", detect_console_encoding(), pc_encoding_doc, validator=cf.is_text
|
||||
)
|
||||
166
.venv/lib/python3.7/site-packages/pandas/_config/localization.py
Normal file
166
.venv/lib/python3.7/site-packages/pandas/_config/localization.py
Normal file
@@ -0,0 +1,166 @@
|
||||
"""
|
||||
Helpers for configuring locale settings.
|
||||
|
||||
Name `localization` is chosen to avoid overlap with builtin `locale` module.
|
||||
"""
|
||||
from contextlib import contextmanager
|
||||
import locale
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from pandas._config.config import options
|
||||
|
||||
|
||||
@contextmanager
|
||||
def set_locale(new_locale, lc_var: int = locale.LC_ALL):
|
||||
"""
|
||||
Context manager for temporarily setting a locale.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
new_locale : str or tuple
|
||||
A string of the form <language_country>.<encoding>. For example to set
|
||||
the current locale to US English with a UTF8 encoding, you would pass
|
||||
"en_US.UTF-8".
|
||||
lc_var : int, default `locale.LC_ALL`
|
||||
The category of the locale being set.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is useful when you want to run a particular block of code under a
|
||||
particular locale, without globally setting the locale. This probably isn't
|
||||
thread-safe.
|
||||
"""
|
||||
current_locale = locale.getlocale()
|
||||
|
||||
try:
|
||||
locale.setlocale(lc_var, new_locale)
|
||||
normalized_locale = locale.getlocale()
|
||||
if all(x is not None for x in normalized_locale):
|
||||
yield ".".join(normalized_locale)
|
||||
else:
|
||||
yield new_locale
|
||||
finally:
|
||||
locale.setlocale(lc_var, current_locale)
|
||||
|
||||
|
||||
def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool:
|
||||
"""
|
||||
Check to see if we can set a locale, and subsequently get the locale,
|
||||
without raising an Exception.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lc : str
|
||||
The locale to attempt to set.
|
||||
lc_var : int, default `locale.LC_ALL`
|
||||
The category of the locale being set.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
Whether the passed locale can be set
|
||||
"""
|
||||
try:
|
||||
with set_locale(lc, lc_var=lc_var):
|
||||
pass
|
||||
except (ValueError, locale.Error):
|
||||
# horrible name for a Exception subclass
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def _valid_locales(locales, normalize):
|
||||
"""
|
||||
Return a list of normalized locales that do not throw an ``Exception``
|
||||
when set.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
locales : str
|
||||
A string where each locale is separated by a newline.
|
||||
normalize : bool
|
||||
Whether to call ``locale.normalize`` on each locale.
|
||||
|
||||
Returns
|
||||
-------
|
||||
valid_locales : list
|
||||
A list of valid locales.
|
||||
"""
|
||||
return [
|
||||
loc
|
||||
for loc in (
|
||||
locale.normalize(loc.strip()) if normalize else loc.strip()
|
||||
for loc in locales
|
||||
)
|
||||
if can_set_locale(loc)
|
||||
]
|
||||
|
||||
|
||||
def _default_locale_getter():
|
||||
return subprocess.check_output(["locale -a"], shell=True)
|
||||
|
||||
|
||||
def get_locales(prefix=None, normalize=True, locale_getter=_default_locale_getter):
|
||||
"""
|
||||
Get all the locales that are available on the system.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
prefix : str
|
||||
If not ``None`` then return only those locales with the prefix
|
||||
provided. For example to get all English language locales (those that
|
||||
start with ``"en"``), pass ``prefix="en"``.
|
||||
normalize : bool
|
||||
Call ``locale.normalize`` on the resulting list of available locales.
|
||||
If ``True``, only locales that can be set without throwing an
|
||||
``Exception`` are returned.
|
||||
locale_getter : callable
|
||||
The function to use to retrieve the current locales. This should return
|
||||
a string with each locale separated by a newline character.
|
||||
|
||||
Returns
|
||||
-------
|
||||
locales : list of strings
|
||||
A list of locale strings that can be set with ``locale.setlocale()``.
|
||||
For example::
|
||||
|
||||
locale.setlocale(locale.LC_ALL, locale_string)
|
||||
|
||||
On error will return None (no locale available, e.g. Windows)
|
||||
|
||||
"""
|
||||
try:
|
||||
raw_locales = locale_getter()
|
||||
except subprocess.CalledProcessError:
|
||||
# Raised on (some? all?) Windows platforms because Note: "locale -a"
|
||||
# is not defined
|
||||
return None
|
||||
|
||||
try:
|
||||
# raw_locales is "\n" separated list of locales
|
||||
# it may contain non-decodable parts, so split
|
||||
# extract what we can and then rejoin.
|
||||
raw_locales = raw_locales.split(b"\n")
|
||||
out_locales = []
|
||||
for x in raw_locales:
|
||||
try:
|
||||
out_locales.append(str(x, encoding=options.display.encoding))
|
||||
except UnicodeError:
|
||||
# 'locale -a' is used to populated 'raw_locales' and on
|
||||
# Redhat 7 Linux (and maybe others) prints locale names
|
||||
# using windows-1252 encoding. Bug only triggered by
|
||||
# a few special characters and when there is an
|
||||
# extensive list of installed locales.
|
||||
out_locales.append(str(x, encoding="windows-1252"))
|
||||
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
if prefix is None:
|
||||
return _valid_locales(out_locales, normalize)
|
||||
|
||||
pattern = re.compile(f"{prefix}.*")
|
||||
found = pattern.findall("\n".join(out_locales))
|
||||
return _valid_locales(found, normalize)
|
||||
22
.venv/lib/python3.7/site-packages/pandas/_libs/__init__.py
Normal file
22
.venv/lib/python3.7/site-packages/pandas/_libs/__init__.py
Normal file
@@ -0,0 +1,22 @@
|
||||
__all__ = [
|
||||
"NaT",
|
||||
"NaTType",
|
||||
"OutOfBoundsDatetime",
|
||||
"Period",
|
||||
"Timedelta",
|
||||
"Timestamp",
|
||||
"iNaT",
|
||||
"Interval",
|
||||
]
|
||||
|
||||
|
||||
from pandas._libs.interval import Interval
|
||||
from pandas._libs.tslibs import (
|
||||
NaT,
|
||||
NaTType,
|
||||
OutOfBoundsDatetime,
|
||||
Period,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
iNaT,
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
4
.venv/lib/python3.7/site-packages/pandas/_libs/algos.pxd
Normal file
4
.venv/lib/python3.7/site-packages/pandas/_libs/algos.pxd
Normal file
@@ -0,0 +1,4 @@
|
||||
from pandas._libs.util cimport numeric
|
||||
|
||||
|
||||
cdef numeric kth_smallest_c(numeric* arr, Py_ssize_t k, Py_ssize_t n) nogil
|
||||
388
.venv/lib/python3.7/site-packages/pandas/_libs/algos.pyi
Normal file
388
.venv/lib/python3.7/site-packages/pandas/_libs/algos.pyi
Normal file
@@ -0,0 +1,388 @@
|
||||
# Note: this covers algos.pyx and algos_common_helper but NOT algos_take_helper
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
class Infinity:
|
||||
"""
|
||||
Provide a positive Infinity comparison method for ranking.
|
||||
"""
|
||||
|
||||
def __eq__(self, other) -> bool: ...
|
||||
def __ne__(self, other) -> bool: ...
|
||||
def __lt__(self, other) -> bool: ...
|
||||
def __le__(self, other) -> bool: ...
|
||||
def __gt__(self, other) -> bool: ...
|
||||
def __ge__(self, other) -> bool: ...
|
||||
|
||||
class NegInfinity:
|
||||
"""
|
||||
Provide a negative Infinity comparison method for ranking.
|
||||
"""
|
||||
|
||||
def __eq__(self, other) -> bool: ...
|
||||
def __ne__(self, other) -> bool: ...
|
||||
def __lt__(self, other) -> bool: ...
|
||||
def __le__(self, other) -> bool: ...
|
||||
def __gt__(self, other) -> bool: ...
|
||||
def __ge__(self, other) -> bool: ...
|
||||
|
||||
def unique_deltas(
|
||||
arr: np.ndarray, # const int64_t[:]
|
||||
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1]
|
||||
def is_lexsorted(list_of_arrays: list[np.ndarray]) -> bool: ...
|
||||
def groupsort_indexer(
|
||||
index: np.ndarray, # const int64_t[:]
|
||||
ngroups: int,
|
||||
) -> tuple[
|
||||
np.ndarray, # ndarray[int64_t, ndim=1]
|
||||
np.ndarray, # ndarray[int64_t, ndim=1]
|
||||
]: ...
|
||||
def kth_smallest(
|
||||
a: np.ndarray, # numeric[:]
|
||||
k: int,
|
||||
) -> Any: ... # numeric
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Pairwise correlation/covariance
|
||||
|
||||
def nancorr(
|
||||
mat: np.ndarray, # const float64_t[:, :]
|
||||
cov: bool = False,
|
||||
minp=None,
|
||||
) -> np.ndarray: ... # ndarray[float64_t, ndim=2]
|
||||
def nancorr_spearman(
|
||||
mat: np.ndarray, # ndarray[float64_t, ndim=2]
|
||||
minp: int = 1,
|
||||
) -> np.ndarray: ... # ndarray[float64_t, ndim=2]
|
||||
def nancorr_kendall(
|
||||
mat: np.ndarray, # ndarray[float64_t, ndim=2]
|
||||
minp: int = 1,
|
||||
) -> np.ndarray: ... # ndarray[float64_t, ndim=2]
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
# ctypedef fused algos_t:
|
||||
# float64_t
|
||||
# float32_t
|
||||
# object
|
||||
# int64_t
|
||||
# int32_t
|
||||
# int16_t
|
||||
# int8_t
|
||||
# uint64_t
|
||||
# uint32_t
|
||||
# uint16_t
|
||||
# uint8_t
|
||||
|
||||
def validate_limit(nobs: int | None, limit=None) -> int: ...
|
||||
def pad(
|
||||
old: np.ndarray, # ndarray[algos_t]
|
||||
new: np.ndarray, # ndarray[algos_t]
|
||||
limit=None,
|
||||
) -> np.ndarray: ... # np.ndarray[np.intp, ndim=1]
|
||||
def pad_inplace(
|
||||
values: np.ndarray, # algos_t[:]
|
||||
mask: np.ndarray, # uint8_t[:]
|
||||
limit=None,
|
||||
) -> None: ...
|
||||
def pad_2d_inplace(
|
||||
values: np.ndarray, # algos_t[:, :]
|
||||
mask: np.ndarray, # const uint8_t[:, :]
|
||||
limit=None,
|
||||
) -> None: ...
|
||||
def backfill(
|
||||
old: np.ndarray, # ndarray[algos_t]
|
||||
new: np.ndarray, # ndarray[algos_t]
|
||||
limit=None,
|
||||
) -> np.ndarray: ... # np.ndarray[np.intp, ndim=1]
|
||||
def backfill_inplace(
|
||||
values: np.ndarray, # algos_t[:]
|
||||
mask: np.ndarray, # uint8_t[:]
|
||||
limit=None,
|
||||
) -> None: ...
|
||||
def backfill_2d_inplace(
|
||||
values: np.ndarray, # algos_t[:, :]
|
||||
mask: np.ndarray, # const uint8_t[:, :]
|
||||
limit=None,
|
||||
) -> None: ...
|
||||
def is_monotonic(
|
||||
arr: np.ndarray, # ndarray[algos_t, ndim=1]
|
||||
timelike: bool,
|
||||
) -> tuple[bool, bool, bool]: ...
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# rank_1d, rank_2d
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
# ctypedef fused rank_t:
|
||||
# object
|
||||
# float64_t
|
||||
# uint64_t
|
||||
# int64_t
|
||||
|
||||
def rank_1d(
|
||||
values: np.ndarray, # ndarray[rank_t, ndim=1]
|
||||
labels: np.ndarray, # const int64_t[:]
|
||||
is_datetimelike: bool = ...,
|
||||
ties_method=...,
|
||||
ascending: bool = ...,
|
||||
pct: bool = ...,
|
||||
na_option=...,
|
||||
) -> np.ndarray: ... # np.ndarray[float64_t, ndim=1]
|
||||
def rank_2d(
|
||||
in_arr: np.ndarray, # ndarray[rank_t, ndim=2]
|
||||
axis: int = ...,
|
||||
is_datetimelike: bool = ...,
|
||||
ties_method=...,
|
||||
ascending: bool = ...,
|
||||
na_option=...,
|
||||
pct: bool = ...,
|
||||
) -> np.ndarray: ... # np.ndarray[float64_t, ndim=1]
|
||||
def diff_2d(
|
||||
arr: np.ndarray, # ndarray[diff_t, ndim=2]
|
||||
out: np.ndarray, # ndarray[out_t, ndim=2]
|
||||
periods: int,
|
||||
axis: int,
|
||||
datetimelike: bool = ...,
|
||||
) -> None: ...
|
||||
def ensure_platform_int(arr: object) -> np.ndarray: ...
|
||||
def ensure_object(arr: object) -> np.ndarray: ...
|
||||
def ensure_float64(arr: object, copy=True) -> np.ndarray: ...
|
||||
def ensure_float32(arr: object, copy=True) -> np.ndarray: ...
|
||||
def ensure_int8(arr: object, copy=True) -> np.ndarray: ...
|
||||
def ensure_int16(arr: object, copy=True) -> np.ndarray: ...
|
||||
def ensure_int32(arr: object, copy=True) -> np.ndarray: ...
|
||||
def ensure_int64(arr: object, copy=True) -> np.ndarray: ...
|
||||
def ensure_uint8(arr: object, copy=True) -> np.ndarray: ...
|
||||
def ensure_uint16(arr: object, copy=True) -> np.ndarray: ...
|
||||
def ensure_uint32(arr: object, copy=True) -> np.ndarray: ...
|
||||
def ensure_uint64(arr: object, copy=True) -> np.ndarray: ...
|
||||
def take_1d_int8_int8(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_int8_int32(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_int8_int64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_int8_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_int16_int16(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_int16_int32(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_int16_int64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_int16_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_int32_int32(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_int32_int64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_int32_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_int64_int64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_int64_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_float32_float32(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_float32_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_float64_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_object_object(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_bool_bool(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_1d_bool_object(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_int8_int8(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_int8_int32(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_int8_int64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_int8_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_int16_int16(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_int16_int32(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_int16_int64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_int16_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_int32_int32(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_int32_int64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_int32_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_int64_int64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_int64_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_float32_float32(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_float32_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_float64_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_object_object(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_bool_bool(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis0_bool_object(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_int8_int8(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_int8_int32(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_int8_int64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_int8_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_int16_int16(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_int16_int32(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_int16_int64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_int16_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_int32_int32(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_int32_int64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_int32_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_int64_int64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_int64_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_float32_float32(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_float32_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_float64_float64(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_object_object(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_bool_bool(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_axis1_bool_object(
|
||||
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_int8_int8(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_int8_int32(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_int8_int64(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_int8_float64(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_int16_int16(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_int16_int32(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_int16_int64(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_int16_float64(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_int32_int32(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_int32_int64(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_int32_float64(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_int64_float64(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_float32_float32(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_float32_float64(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_float64_float64(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_object_object(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_bool_bool(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_bool_object(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
def take_2d_multi_int64_int64(
|
||||
values: np.ndarray, indexer, out: np.ndarray, fill_value=...
|
||||
) -> None: ...
|
||||
1524
.venv/lib/python3.7/site-packages/pandas/_libs/algos.pyx
Normal file
1524
.venv/lib/python3.7/site-packages/pandas/_libs/algos.pyx
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,72 @@
|
||||
"""
|
||||
Template for each `dtype` helper function using 1-d template
|
||||
|
||||
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
|
||||
"""
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# ensure_dtype
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
cdef int PLATFORM_INT = (<ndarray>np.arange(0, dtype=np.intp)).descr.type_num
|
||||
|
||||
|
||||
def ensure_platform_int(object arr):
|
||||
# GH3033, GH1392
|
||||
# platform int is the size of the int pointer, e.g. np.intp
|
||||
if util.is_array(arr):
|
||||
if (<ndarray>arr).descr.type_num == PLATFORM_INT:
|
||||
return arr
|
||||
else:
|
||||
# equiv: arr.astype(np.intp)
|
||||
return cnp.PyArray_Cast(<ndarray>arr, PLATFORM_INT)
|
||||
else:
|
||||
return np.array(arr, dtype=np.intp)
|
||||
|
||||
|
||||
def ensure_object(object arr):
|
||||
if util.is_array(arr):
|
||||
if (<ndarray>arr).descr.type_num == NPY_OBJECT:
|
||||
return arr
|
||||
else:
|
||||
# equiv: arr.astype(object)
|
||||
return cnp.PyArray_Cast(<ndarray>arr, NPY_OBJECT)
|
||||
else:
|
||||
return np.array(arr, dtype=np.object_)
|
||||
|
||||
{{py:
|
||||
|
||||
# name, c_type, dtype
|
||||
dtypes = [('float64', 'FLOAT64', 'float64'),
|
||||
('float32', 'FLOAT32', 'float32'),
|
||||
('int8', 'INT8', 'int8'),
|
||||
('int16', 'INT16', 'int16'),
|
||||
('int32', 'INT32', 'int32'),
|
||||
('int64', 'INT64', 'int64'),
|
||||
('uint8', 'UINT8', 'uint8'),
|
||||
('uint16', 'UINT16', 'uint16'),
|
||||
('uint32', 'UINT32', 'uint32'),
|
||||
('uint64', 'UINT64', 'uint64'),
|
||||
# ('platform_int', 'INT', 'int_'),
|
||||
# ('object', 'OBJECT', 'object_'),
|
||||
]
|
||||
|
||||
def get_dispatch(dtypes):
|
||||
|
||||
for name, c_type, dtype in dtypes:
|
||||
yield name, c_type, dtype
|
||||
}}
|
||||
|
||||
{{for name, c_type, dtype in get_dispatch(dtypes)}}
|
||||
|
||||
|
||||
def ensure_{{name}}(object arr, copy=True):
|
||||
if util.is_array(arr):
|
||||
if (<ndarray>arr).descr.type_num == NPY_{{c_type}}:
|
||||
return arr
|
||||
else:
|
||||
return arr.astype(np.{{dtype}}, copy=copy)
|
||||
else:
|
||||
return np.array(arr, dtype=np.{{dtype}})
|
||||
|
||||
{{endfor}}
|
||||
@@ -0,0 +1,251 @@
|
||||
"""
|
||||
Template for each `dtype` helper function for take
|
||||
|
||||
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
|
||||
"""
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# take_1d, take_2d
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
{{py:
|
||||
|
||||
# c_type_in, c_type_out
|
||||
dtypes = [
|
||||
('uint8_t', 'uint8_t'),
|
||||
('uint8_t', 'object'),
|
||||
('int8_t', 'int8_t'),
|
||||
('int8_t', 'int32_t'),
|
||||
('int8_t', 'int64_t'),
|
||||
('int8_t', 'float64_t'),
|
||||
('int16_t', 'int16_t'),
|
||||
('int16_t', 'int32_t'),
|
||||
('int16_t', 'int64_t'),
|
||||
('int16_t', 'float64_t'),
|
||||
('int32_t', 'int32_t'),
|
||||
('int32_t', 'int64_t'),
|
||||
('int32_t', 'float64_t'),
|
||||
('int64_t', 'int64_t'),
|
||||
('int64_t', 'float64_t'),
|
||||
('float32_t', 'float32_t'),
|
||||
('float32_t', 'float64_t'),
|
||||
('float64_t', 'float64_t'),
|
||||
('object', 'object'),
|
||||
]
|
||||
|
||||
|
||||
def get_dispatch(dtypes):
|
||||
|
||||
for (c_type_in, c_type_out) in dtypes:
|
||||
|
||||
def get_name(dtype_name):
|
||||
if dtype_name == "object":
|
||||
return "object"
|
||||
if dtype_name == "uint8_t":
|
||||
return "bool"
|
||||
return dtype_name[:-2]
|
||||
|
||||
name = get_name(c_type_in)
|
||||
dest = get_name(c_type_out)
|
||||
|
||||
args = dict(name=name, dest=dest, c_type_in=c_type_in,
|
||||
c_type_out=c_type_out)
|
||||
|
||||
yield (name, dest, c_type_in, c_type_out)
|
||||
|
||||
}}
|
||||
|
||||
|
||||
{{for name, dest, c_type_in, c_type_out in get_dispatch(dtypes)}}
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
{{if c_type_in != "object"}}
|
||||
def take_1d_{{name}}_{{dest}}(const {{c_type_in}}[:] values,
|
||||
{{else}}
|
||||
def take_1d_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=1] values,
|
||||
{{endif}}
|
||||
const intp_t[:] indexer,
|
||||
{{c_type_out}}[:] out,
|
||||
fill_value=np.nan):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t i, n, idx
|
||||
{{c_type_out}} fv
|
||||
|
||||
n = indexer.shape[0]
|
||||
|
||||
fv = fill_value
|
||||
|
||||
{{if c_type_out != "object"}}
|
||||
with nogil:
|
||||
{{else}}
|
||||
if True:
|
||||
{{endif}}
|
||||
for i in range(n):
|
||||
idx = indexer[i]
|
||||
if idx == -1:
|
||||
out[i] = fv
|
||||
else:
|
||||
{{if c_type_in == "uint8_t" and c_type_out == "object"}}
|
||||
out[i] = True if values[idx] > 0 else False
|
||||
{{else}}
|
||||
out[i] = values[idx]
|
||||
{{endif}}
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
{{if c_type_in != "object"}}
|
||||
def take_2d_axis0_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
|
||||
{{else}}
|
||||
def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
|
||||
{{endif}}
|
||||
ndarray[intp_t] indexer,
|
||||
{{c_type_out}}[:, :] out,
|
||||
fill_value=np.nan):
|
||||
cdef:
|
||||
Py_ssize_t i, j, k, n, idx
|
||||
{{c_type_out}} fv
|
||||
|
||||
n = len(indexer)
|
||||
k = values.shape[1]
|
||||
|
||||
fv = fill_value
|
||||
|
||||
IF {{True if c_type_in == c_type_out != "object" else False}}:
|
||||
cdef:
|
||||
const {{c_type_out}} *v
|
||||
{{c_type_out}} *o
|
||||
|
||||
# GH#3130
|
||||
if (values.strides[1] == out.strides[1] and
|
||||
values.strides[1] == sizeof({{c_type_out}}) and
|
||||
sizeof({{c_type_out}}) * n >= 256):
|
||||
|
||||
for i in range(n):
|
||||
idx = indexer[i]
|
||||
if idx == -1:
|
||||
for j in range(k):
|
||||
out[i, j] = fv
|
||||
else:
|
||||
v = &values[idx, 0]
|
||||
o = &out[i, 0]
|
||||
memmove(o, v, <size_t>(sizeof({{c_type_out}}) * k))
|
||||
return
|
||||
|
||||
for i in range(n):
|
||||
idx = indexer[i]
|
||||
if idx == -1:
|
||||
for j in range(k):
|
||||
out[i, j] = fv
|
||||
else:
|
||||
for j in range(k):
|
||||
{{if c_type_in == "uint8_t" and c_type_out == "object"}}
|
||||
out[i, j] = True if values[idx, j] > 0 else False
|
||||
{{else}}
|
||||
out[i, j] = values[idx, j]
|
||||
{{endif}}
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
{{if c_type_in != "object"}}
|
||||
def take_2d_axis1_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
|
||||
{{else}}
|
||||
def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
|
||||
{{endif}}
|
||||
ndarray[intp_t] indexer,
|
||||
{{c_type_out}}[:, :] out,
|
||||
fill_value=np.nan):
|
||||
|
||||
cdef:
|
||||
Py_ssize_t i, j, k, n, idx
|
||||
{{c_type_out}} fv
|
||||
|
||||
n = len(values)
|
||||
k = len(indexer)
|
||||
|
||||
if n == 0 or k == 0:
|
||||
return
|
||||
|
||||
fv = fill_value
|
||||
|
||||
for i in range(n):
|
||||
for j in range(k):
|
||||
idx = indexer[j]
|
||||
if idx == -1:
|
||||
out[i, j] = fv
|
||||
else:
|
||||
{{if c_type_in == "uint8_t" and c_type_out == "object"}}
|
||||
out[i, j] = True if values[i, idx] > 0 else False
|
||||
{{else}}
|
||||
out[i, j] = values[i, idx]
|
||||
{{endif}}
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
|
||||
indexer,
|
||||
ndarray[{{c_type_out}}, ndim=2] out,
|
||||
fill_value=np.nan):
|
||||
cdef:
|
||||
Py_ssize_t i, j, k, n, idx
|
||||
ndarray[intp_t] idx0 = indexer[0]
|
||||
ndarray[intp_t] idx1 = indexer[1]
|
||||
{{c_type_out}} fv
|
||||
|
||||
n = len(idx0)
|
||||
k = len(idx1)
|
||||
|
||||
fv = fill_value
|
||||
for i in range(n):
|
||||
idx = idx0[i]
|
||||
if idx == -1:
|
||||
for j in range(k):
|
||||
out[i, j] = fv
|
||||
else:
|
||||
for j in range(k):
|
||||
if idx1[j] == -1:
|
||||
out[i, j] = fv
|
||||
else:
|
||||
{{if c_type_in == "uint8_t" and c_type_out == "object"}}
|
||||
out[i, j] = True if values[idx, idx1[j]] > 0 else False
|
||||
{{else}}
|
||||
out[i, j] = values[idx, idx1[j]]
|
||||
{{endif}}
|
||||
|
||||
{{endfor}}
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# take_2d internal function
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
ctypedef fused take_t:
|
||||
float64_t
|
||||
uint64_t
|
||||
int64_t
|
||||
object
|
||||
|
||||
|
||||
cdef _take_2d(ndarray[take_t, ndim=2] values, ndarray[intp_t, ndim=2] idx):
|
||||
cdef:
|
||||
Py_ssize_t i, j, N, K
|
||||
ndarray[intp_t, ndim=2, cast=True] indexer = idx
|
||||
ndarray[take_t, ndim=2] result
|
||||
|
||||
N, K = (<object>values).shape
|
||||
|
||||
if take_t is object:
|
||||
# evaluated at compile-time
|
||||
result = values.copy()
|
||||
else:
|
||||
result = np.empty_like(values)
|
||||
|
||||
for i in range(N):
|
||||
for j in range(K):
|
||||
result[i, j] = values[i, indexer[i, j]]
|
||||
return result
|
||||
Binary file not shown.
11
.venv/lib/python3.7/site-packages/pandas/_libs/arrays.pxd
Normal file
11
.venv/lib/python3.7/site-packages/pandas/_libs/arrays.pxd
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
from numpy cimport ndarray
|
||||
|
||||
|
||||
cdef class NDArrayBacked:
|
||||
cdef:
|
||||
readonly ndarray _ndarray
|
||||
readonly object _dtype
|
||||
|
||||
cpdef NDArrayBacked _from_backing_data(self, ndarray values)
|
||||
cpdef __setstate__(self, state)
|
||||
34
.venv/lib/python3.7/site-packages/pandas/_libs/arrays.pyi
Normal file
34
.venv/lib/python3.7/site-packages/pandas/_libs/arrays.pyi
Normal file
@@ -0,0 +1,34 @@
|
||||
from typing import Sequence
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._typing import (
|
||||
DtypeObj,
|
||||
Shape,
|
||||
)
|
||||
|
||||
class NDArrayBacked:
|
||||
_dtype: DtypeObj
|
||||
_ndarray: np.ndarray
|
||||
def __init__(self, values: np.ndarray, dtype: DtypeObj): ...
|
||||
@classmethod
|
||||
def _simple_new(cls, values: np.ndarray, dtype: DtypeObj): ...
|
||||
def _from_backing_data(self, values: np.ndarray): ...
|
||||
def __setstate__(self, state): ...
|
||||
def __len__(self) -> int: ...
|
||||
@property
|
||||
def shape(self) -> Shape: ...
|
||||
@property
|
||||
def ndim(self) -> int: ...
|
||||
@property
|
||||
def size(self) -> int: ...
|
||||
@property
|
||||
def nbytes(self) -> int: ...
|
||||
def copy(self): ...
|
||||
def delete(self, loc, axis=0): ...
|
||||
def swapaxes(self, axis1, axis2): ...
|
||||
def repeat(self, repeats: int | Sequence[int], axis: int | None = ...): ...
|
||||
def reshape(self, *args, **kwargs): ...
|
||||
def ravel(self, order="C"): ...
|
||||
@property
|
||||
def T(self): ...
|
||||
167
.venv/lib/python3.7/site-packages/pandas/_libs/arrays.pyx
Normal file
167
.venv/lib/python3.7/site-packages/pandas/_libs/arrays.pyx
Normal file
@@ -0,0 +1,167 @@
|
||||
"""
|
||||
Cython implementations for internal ExtensionArrays.
|
||||
"""
|
||||
cimport cython
|
||||
|
||||
import numpy as np
|
||||
|
||||
cimport numpy as cnp
|
||||
from numpy cimport ndarray
|
||||
|
||||
cnp.import_array()
|
||||
|
||||
|
||||
@cython.freelist(16)
|
||||
cdef class NDArrayBacked:
|
||||
"""
|
||||
Implementing these methods in cython improves performance quite a bit.
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from pandas._libs.arrays import NDArrayBacked as cls
|
||||
|
||||
dti = pd.date_range("2016-01-01", periods=3)
|
||||
dta = dti._data
|
||||
arr = dta._ndarray
|
||||
|
||||
obj = cls._simple_new(arr, arr.dtype)
|
||||
|
||||
# for foo in [arr, dta, obj]: ...
|
||||
|
||||
%timeit foo.copy()
|
||||
299 ns ± 30 ns per loop # <-- arr underlying ndarray (for reference)
|
||||
530 ns ± 9.24 ns per loop # <-- dta with cython NDArrayBacked
|
||||
1.66 µs ± 46.3 ns per loop # <-- dta without cython NDArrayBacked
|
||||
328 ns ± 5.29 ns per loop # <-- obj with NDArrayBacked.__cinit__
|
||||
371 ns ± 6.97 ns per loop # <-- obj with NDArrayBacked._simple_new
|
||||
|
||||
%timeit foo.T
|
||||
125 ns ± 6.27 ns per loop # <-- arr underlying ndarray (for reference)
|
||||
226 ns ± 7.66 ns per loop # <-- dta with cython NDArrayBacked
|
||||
911 ns ± 16.6 ns per loop # <-- dta without cython NDArrayBacked
|
||||
215 ns ± 4.54 ns per loop # <-- obj with NDArrayBacked._simple_new
|
||||
|
||||
"""
|
||||
# TODO: implement take in terms of cnp.PyArray_TakeFrom
|
||||
# TODO: implement concat_same_type in terms of cnp.PyArray_Concatenate
|
||||
|
||||
# cdef:
|
||||
# readonly ndarray _ndarray
|
||||
# readonly object _dtype
|
||||
|
||||
def __init__(self, ndarray values, object dtype):
|
||||
self._ndarray = values
|
||||
self._dtype = dtype
|
||||
|
||||
@classmethod
|
||||
def _simple_new(cls, ndarray values, object dtype):
|
||||
cdef:
|
||||
NDArrayBacked obj
|
||||
obj = NDArrayBacked.__new__(cls)
|
||||
obj._ndarray = values
|
||||
obj._dtype = dtype
|
||||
return obj
|
||||
|
||||
cpdef NDArrayBacked _from_backing_data(self, ndarray values):
|
||||
"""
|
||||
Construct a new ExtensionArray `new_array` with `arr` as its _ndarray.
|
||||
|
||||
This should round-trip:
|
||||
self == self._from_backing_data(self._ndarray)
|
||||
"""
|
||||
# TODO: re-reuse simple_new if/when it can be cpdef
|
||||
cdef:
|
||||
NDArrayBacked obj
|
||||
obj = NDArrayBacked.__new__(type(self))
|
||||
obj._ndarray = values
|
||||
obj._dtype = self._dtype
|
||||
return obj
|
||||
|
||||
cpdef __setstate__(self, state):
|
||||
if isinstance(state, dict):
|
||||
if "_data" in state:
|
||||
data = state.pop("_data")
|
||||
elif "_ndarray" in state:
|
||||
data = state.pop("_ndarray")
|
||||
else:
|
||||
raise ValueError
|
||||
self._ndarray = data
|
||||
self._dtype = state.pop("_dtype")
|
||||
|
||||
for key, val in state.items():
|
||||
setattr(self, key, val)
|
||||
elif isinstance(state, tuple):
|
||||
if len(state) != 3:
|
||||
if len(state) == 1 and isinstance(state[0], dict):
|
||||
self.__setstate__(state[0])
|
||||
return
|
||||
raise NotImplementedError(state)
|
||||
|
||||
data, dtype = state[:2]
|
||||
if isinstance(dtype, np.ndarray):
|
||||
dtype, data = data, dtype
|
||||
self._ndarray = data
|
||||
self._dtype = dtype
|
||||
|
||||
if isinstance(state[2], dict):
|
||||
for key, val in state[2].items():
|
||||
setattr(self, key, val)
|
||||
else:
|
||||
raise NotImplementedError(state)
|
||||
else:
|
||||
raise NotImplementedError(state)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._ndarray)
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
# object cast bc _ndarray.shape is npy_intp*
|
||||
return (<object>(self._ndarray)).shape
|
||||
|
||||
@property
|
||||
def ndim(self) -> int:
|
||||
return self._ndarray.ndim
|
||||
|
||||
@property
|
||||
def size(self) -> int:
|
||||
return self._ndarray.size
|
||||
|
||||
@property
|
||||
def nbytes(self) -> int:
|
||||
return self._ndarray.nbytes
|
||||
|
||||
def copy(self):
|
||||
# NPY_ANYORDER -> same order as self._ndarray
|
||||
res_values = cnp.PyArray_NewCopy(self._ndarray, cnp.NPY_ANYORDER)
|
||||
return self._from_backing_data(res_values)
|
||||
|
||||
def delete(self, loc, axis=0):
|
||||
res_values = np.delete(self._ndarray, loc, axis=axis)
|
||||
return self._from_backing_data(res_values)
|
||||
|
||||
def swapaxes(self, axis1, axis2):
|
||||
res_values = cnp.PyArray_SwapAxes(self._ndarray, axis1, axis2)
|
||||
return self._from_backing_data(res_values)
|
||||
|
||||
# TODO: pass NPY_MAXDIMS equiv to axis=None?
|
||||
def repeat(self, repeats, axis: int = 0):
|
||||
if axis is None:
|
||||
axis = 0
|
||||
res_values = cnp.PyArray_Repeat(self._ndarray, repeats, <int>axis)
|
||||
return self._from_backing_data(res_values)
|
||||
|
||||
def reshape(self, *args, **kwargs):
|
||||
res_values = self._ndarray.reshape(*args, **kwargs)
|
||||
return self._from_backing_data(res_values)
|
||||
|
||||
def ravel(self, order="C"):
|
||||
# cnp.PyArray_OrderConverter(PyObject* obj, NPY_ORDER* order)
|
||||
# res_values = cnp.PyArray_Ravel(self._ndarray, order)
|
||||
res_values = self._ndarray.ravel(order)
|
||||
return self._from_backing_data(res_values)
|
||||
|
||||
@property
|
||||
def T(self):
|
||||
res_values = self._ndarray.T
|
||||
return self._from_backing_data(res_values)
|
||||
Binary file not shown.
151
.venv/lib/python3.7/site-packages/pandas/_libs/groupby.pyi
Normal file
151
.venv/lib/python3.7/site-packages/pandas/_libs/groupby.pyi
Normal file
@@ -0,0 +1,151 @@
|
||||
from typing import Literal
|
||||
|
||||
import numpy as np
|
||||
|
||||
def group_median_float64(
|
||||
out: np.ndarray, # ndarray[float64_t, ndim=2]
|
||||
counts: np.ndarray, # ndarray[int64_t]
|
||||
values: np.ndarray, # ndarray[float64_t, ndim=2]
|
||||
labels: np.ndarray, # ndarray[int64_t]
|
||||
min_count: int = ..., # Py_ssize_t
|
||||
) -> None: ...
|
||||
def group_cumprod_float64(
|
||||
out: np.ndarray, # float64_t[:, ::1]
|
||||
values: np.ndarray, # const float64_t[:, :]
|
||||
labels: np.ndarray, # const int64_t[:]
|
||||
ngroups: int,
|
||||
is_datetimelike: bool,
|
||||
skipna: bool = ...,
|
||||
) -> None: ...
|
||||
def group_cumsum(
|
||||
out: np.ndarray, # numeric[:, ::1]
|
||||
values: np.ndarray, # ndarray[numeric, ndim=2]
|
||||
labels: np.ndarray, # const int64_t[:]
|
||||
ngroups: int,
|
||||
is_datetimelike: bool,
|
||||
skipna: bool = ...,
|
||||
) -> None: ...
|
||||
def group_shift_indexer(
|
||||
out: np.ndarray, # int64_t[::1]
|
||||
labels: np.ndarray, # const int64_t[:]
|
||||
ngroups: int,
|
||||
periods: int,
|
||||
) -> None: ...
|
||||
def group_fillna_indexer(
|
||||
out: np.ndarray, # ndarray[int64_t]
|
||||
labels: np.ndarray, # ndarray[int64_t]
|
||||
mask: np.ndarray, # ndarray[uint8_t]
|
||||
direction: Literal["ffill", "bfill"],
|
||||
limit: int, # int64_t
|
||||
dropna: bool,
|
||||
) -> None: ...
|
||||
def group_any_all(
|
||||
out: np.ndarray, # uint8_t[::1]
|
||||
values: np.ndarray, # const uint8_t[::1]
|
||||
labels: np.ndarray, # const int64_t[:]
|
||||
mask: np.ndarray, # const uint8_t[::1]
|
||||
val_test: Literal["any", "all"],
|
||||
skipna: bool,
|
||||
) -> None: ...
|
||||
def group_add(
|
||||
out: np.ndarray, # complexfloating_t[:, ::1]
|
||||
counts: np.ndarray, # int64_t[::1]
|
||||
values: np.ndarray, # ndarray[complexfloating_t, ndim=2]
|
||||
labels: np.ndarray, # const intp_t[:]
|
||||
min_count: int = ...,
|
||||
datetimelike: bool = ...,
|
||||
) -> None: ...
|
||||
def group_prod(
|
||||
out: np.ndarray, # floating[:, ::1]
|
||||
counts: np.ndarray, # int64_t[::1]
|
||||
values: np.ndarray, # ndarray[floating, ndim=2]
|
||||
labels: np.ndarray, # const intp_t[:]
|
||||
min_count: int = ...,
|
||||
) -> None: ...
|
||||
def group_var(
|
||||
out: np.ndarray, # floating[:, ::1]
|
||||
counts: np.ndarray, # int64_t[::1]
|
||||
values: np.ndarray, # ndarray[floating, ndim=2]
|
||||
labels: np.ndarray, # const intp_t[:]
|
||||
min_count: int = ..., # Py_ssize_t
|
||||
ddof: int = ..., # int64_t
|
||||
) -> None: ...
|
||||
def group_mean(
|
||||
out: np.ndarray, # floating[:, ::1]
|
||||
counts: np.ndarray, # int64_t[::1]
|
||||
values: np.ndarray, # ndarray[floating, ndim=2]
|
||||
labels: np.ndarray, # const intp_t[:]
|
||||
min_count: int = ..., # Py_ssize_t
|
||||
is_datetimelike: bool = ..., # bint
|
||||
mask: np.ndarray | None = ...,
|
||||
result_mask: np.ndarray | None = ...,
|
||||
) -> None: ...
|
||||
def group_ohlc(
|
||||
out: np.ndarray, # floating[:, ::1]
|
||||
counts: np.ndarray, # int64_t[::1]
|
||||
values: np.ndarray, # ndarray[floating, ndim=2]
|
||||
labels: np.ndarray, # const intp_t[:]
|
||||
min_count: int = ...,
|
||||
) -> None: ...
|
||||
def group_quantile(
|
||||
out: np.ndarray, # ndarray[float64_t]
|
||||
values: np.ndarray, # ndarray[numeric, ndim=1]
|
||||
labels: np.ndarray, # ndarray[int64_t]
|
||||
mask: np.ndarray, # ndarray[uint8_t]
|
||||
q: float, # float64_t
|
||||
interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"],
|
||||
) -> None: ...
|
||||
def group_last(
|
||||
out: np.ndarray, # rank_t[:, ::1]
|
||||
counts: np.ndarray, # int64_t[::1]
|
||||
values: np.ndarray, # ndarray[rank_t, ndim=2]
|
||||
labels: np.ndarray, # const int64_t[:]
|
||||
min_count: int = ..., # Py_ssize_t
|
||||
) -> None: ...
|
||||
def group_nth(
|
||||
out: np.ndarray, # rank_t[:, ::1]
|
||||
counts: np.ndarray, # int64_t[::1]
|
||||
values: np.ndarray, # ndarray[rank_t, ndim=2]
|
||||
labels: np.ndarray, # const int64_t[:]
|
||||
min_count: int = ..., # int64_t
|
||||
rank: int = ..., # int64_t
|
||||
) -> None: ...
|
||||
def group_rank(
|
||||
out: np.ndarray, # float64_t[:, ::1]
|
||||
values: np.ndarray, # ndarray[rank_t, ndim=2]
|
||||
labels: np.ndarray, # const int64_t[:]
|
||||
ngroups: int,
|
||||
is_datetimelike: bool,
|
||||
ties_method: Literal["aveage", "min", "max", "first", "dense"] = ...,
|
||||
ascending: bool = ...,
|
||||
pct: bool = ...,
|
||||
na_option: Literal["keep", "top", "bottom"] = ...,
|
||||
) -> None: ...
|
||||
def group_max(
|
||||
out: np.ndarray, # groupby_t[:, ::1]
|
||||
counts: np.ndarray, # int64_t[::1]
|
||||
values: np.ndarray, # ndarray[groupby_t, ndim=2]
|
||||
labels: np.ndarray, # const int64_t[:]
|
||||
min_count: int = ...,
|
||||
) -> None: ...
|
||||
def group_min(
|
||||
out: np.ndarray, # groupby_t[:, ::1]
|
||||
counts: np.ndarray, # int64_t[::1]
|
||||
values: np.ndarray, # ndarray[groupby_t, ndim=2]
|
||||
labels: np.ndarray, # const int64_t[:]
|
||||
min_count: int = ...,
|
||||
) -> None: ...
|
||||
def group_cummin(
|
||||
out: np.ndarray, # groupby_t[:, ::1]
|
||||
values: np.ndarray, # ndarray[groupby_t, ndim=2]
|
||||
labels: np.ndarray, # const int64_t[:]
|
||||
ngroups: int,
|
||||
is_datetimelike: bool,
|
||||
) -> None: ...
|
||||
def group_cummax(
|
||||
out: np.ndarray, # groupby_t[:, ::1]
|
||||
values: np.ndarray, # ndarray[groupby_t, ndim=2]
|
||||
labels: np.ndarray, # const int64_t[:]
|
||||
ngroups: int,
|
||||
is_datetimelike: bool,
|
||||
) -> None: ...
|
||||
1519
.venv/lib/python3.7/site-packages/pandas/_libs/groupby.pyx
Normal file
1519
.venv/lib/python3.7/site-packages/pandas/_libs/groupby.pyx
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -0,0 +1,7 @@
|
||||
import numpy as np
|
||||
|
||||
def hash_object_array(
|
||||
arr: np.ndarray, # np.ndarray[object]
|
||||
key: str,
|
||||
encoding: str = ...,
|
||||
) -> np.ndarray: ... # np.ndarray[np.uint64]
|
||||
206
.venv/lib/python3.7/site-packages/pandas/_libs/hashing.pyx
Normal file
206
.venv/lib/python3.7/site-packages/pandas/_libs/hashing.pyx
Normal file
@@ -0,0 +1,206 @@
|
||||
# Translated from the reference implementation
|
||||
# at https://github.com/veorq/SipHash
|
||||
|
||||
import cython
|
||||
|
||||
from libc.stdlib cimport (
|
||||
free,
|
||||
malloc,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from numpy cimport (
|
||||
import_array,
|
||||
ndarray,
|
||||
uint8_t,
|
||||
uint32_t,
|
||||
uint64_t,
|
||||
)
|
||||
|
||||
import_array()
|
||||
|
||||
from pandas._libs.util cimport is_nan
|
||||
|
||||
DEF cROUNDS = 2
|
||||
DEF dROUNDS = 4
|
||||
|
||||
|
||||
@cython.boundscheck(False)
|
||||
def hash_object_array(
|
||||
ndarray[object] arr, str key, str encoding="utf8"
|
||||
) -> np.ndarray[np.uint64]:
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
arr : 1-d object ndarray of objects
|
||||
key : hash key, must be 16 byte len encoded
|
||||
encoding : encoding for key & arr, default to 'utf8'
|
||||
|
||||
Returns
|
||||
-------
|
||||
1-d uint64 ndarray of hashes.
|
||||
|
||||
Raises
|
||||
------
|
||||
TypeError
|
||||
If the array contains mixed types.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Allowed values must be strings, or nulls
|
||||
mixed array types will raise TypeError.
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t i, l, n
|
||||
uint64_t[:] result
|
||||
bytes data, k
|
||||
uint8_t *kb
|
||||
uint64_t *lens
|
||||
char **vecs
|
||||
char *cdata
|
||||
object val
|
||||
list datas = []
|
||||
|
||||
k = <bytes>key.encode(encoding)
|
||||
kb = <uint8_t *>k
|
||||
if len(k) != 16:
|
||||
raise ValueError(
|
||||
f"key should be a 16-byte string encoded, got {k} (len {len(k)})"
|
||||
)
|
||||
|
||||
n = len(arr)
|
||||
|
||||
# create an array of bytes
|
||||
vecs = <char **>malloc(n * sizeof(char *))
|
||||
lens = <uint64_t*>malloc(n * sizeof(uint64_t))
|
||||
|
||||
for i in range(n):
|
||||
val = arr[i]
|
||||
if isinstance(val, bytes):
|
||||
data = <bytes>val
|
||||
elif isinstance(val, str):
|
||||
data = <bytes>val.encode(encoding)
|
||||
elif val is None or is_nan(val):
|
||||
# null, stringify and encode
|
||||
data = <bytes>str(val).encode(encoding)
|
||||
|
||||
elif isinstance(val, tuple):
|
||||
# GH#28969 we could have a tuple, but need to ensure that
|
||||
# the tuple entries are themselves hashable before converting
|
||||
# to str
|
||||
hash(val)
|
||||
data = <bytes>str(val).encode(encoding)
|
||||
else:
|
||||
raise TypeError(
|
||||
f"{val} of type {type(val)} is not a valid type for hashing, "
|
||||
"must be string or null"
|
||||
)
|
||||
|
||||
l = len(data)
|
||||
lens[i] = l
|
||||
cdata = data
|
||||
|
||||
# keep the references alive through the end of the
|
||||
# function
|
||||
datas.append(data)
|
||||
vecs[i] = cdata
|
||||
|
||||
result = np.empty(n, dtype=np.uint64)
|
||||
with nogil:
|
||||
for i in range(n):
|
||||
result[i] = low_level_siphash(<uint8_t *>vecs[i], lens[i], kb)
|
||||
|
||||
free(vecs)
|
||||
free(lens)
|
||||
return result.base # .base to retrieve underlying np.ndarray
|
||||
|
||||
|
||||
cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil:
|
||||
return (x << b) | (x >> (64 - b))
|
||||
|
||||
|
||||
cdef inline void u32to8_le(uint8_t* p, uint32_t v) nogil:
|
||||
p[0] = <uint8_t>(v)
|
||||
p[1] = <uint8_t>(v >> 8)
|
||||
p[2] = <uint8_t>(v >> 16)
|
||||
p[3] = <uint8_t>(v >> 24)
|
||||
|
||||
|
||||
cdef inline uint64_t u8to64_le(uint8_t* p) nogil:
|
||||
return (<uint64_t>p[0] |
|
||||
<uint64_t>p[1] << 8 |
|
||||
<uint64_t>p[2] << 16 |
|
||||
<uint64_t>p[3] << 24 |
|
||||
<uint64_t>p[4] << 32 |
|
||||
<uint64_t>p[5] << 40 |
|
||||
<uint64_t>p[6] << 48 |
|
||||
<uint64_t>p[7] << 56)
|
||||
|
||||
|
||||
cdef inline void _sipround(uint64_t* v0, uint64_t* v1,
|
||||
uint64_t* v2, uint64_t* v3) nogil:
|
||||
v0[0] += v1[0]
|
||||
v1[0] = _rotl(v1[0], 13)
|
||||
v1[0] ^= v0[0]
|
||||
v0[0] = _rotl(v0[0], 32)
|
||||
v2[0] += v3[0]
|
||||
v3[0] = _rotl(v3[0], 16)
|
||||
v3[0] ^= v2[0]
|
||||
v0[0] += v3[0]
|
||||
v3[0] = _rotl(v3[0], 21)
|
||||
v3[0] ^= v0[0]
|
||||
v2[0] += v1[0]
|
||||
v1[0] = _rotl(v1[0], 17)
|
||||
v1[0] ^= v2[0]
|
||||
v2[0] = _rotl(v2[0], 32)
|
||||
|
||||
|
||||
@cython.cdivision(True)
|
||||
cdef uint64_t low_level_siphash(uint8_t* data, size_t datalen,
|
||||
uint8_t* key) nogil:
|
||||
cdef uint64_t v0 = 0x736f6d6570736575ULL
|
||||
cdef uint64_t v1 = 0x646f72616e646f6dULL
|
||||
cdef uint64_t v2 = 0x6c7967656e657261ULL
|
||||
cdef uint64_t v3 = 0x7465646279746573ULL
|
||||
cdef uint64_t b
|
||||
cdef uint64_t k0 = u8to64_le(key)
|
||||
cdef uint64_t k1 = u8to64_le(key + 8)
|
||||
cdef uint64_t m
|
||||
cdef int i
|
||||
cdef uint8_t* end = data + datalen - (datalen % sizeof(uint64_t))
|
||||
cdef int left = datalen & 7
|
||||
cdef int left_byte
|
||||
|
||||
b = (<uint64_t>datalen) << 56
|
||||
v3 ^= k1
|
||||
v2 ^= k0
|
||||
v1 ^= k1
|
||||
v0 ^= k0
|
||||
|
||||
while (data != end):
|
||||
m = u8to64_le(data)
|
||||
v3 ^= m
|
||||
for i in range(cROUNDS):
|
||||
_sipround(&v0, &v1, &v2, &v3)
|
||||
v0 ^= m
|
||||
|
||||
data += sizeof(uint64_t)
|
||||
|
||||
for i in range(left-1, -1, -1):
|
||||
b |= (<uint64_t>data[i]) << (i * 8)
|
||||
|
||||
v3 ^= b
|
||||
|
||||
for i in range(cROUNDS):
|
||||
_sipround(&v0, &v1, &v2, &v3)
|
||||
|
||||
v0 ^= b
|
||||
v2 ^= 0xff
|
||||
|
||||
for i in range(dROUNDS):
|
||||
_sipround(&v0, &v1, &v2, &v3)
|
||||
|
||||
b = v0 ^ v1 ^ v2 ^ v3
|
||||
|
||||
return b
|
||||
Binary file not shown.
141
.venv/lib/python3.7/site-packages/pandas/_libs/hashtable.pxd
Normal file
141
.venv/lib/python3.7/site-packages/pandas/_libs/hashtable.pxd
Normal file
@@ -0,0 +1,141 @@
|
||||
from numpy cimport (
|
||||
intp_t,
|
||||
ndarray,
|
||||
)
|
||||
|
||||
from pandas._libs.khash cimport (
|
||||
complex64_t,
|
||||
complex128_t,
|
||||
float32_t,
|
||||
float64_t,
|
||||
int8_t,
|
||||
int16_t,
|
||||
int32_t,
|
||||
int64_t,
|
||||
kh_complex64_t,
|
||||
kh_complex128_t,
|
||||
kh_float32_t,
|
||||
kh_float64_t,
|
||||
kh_int8_t,
|
||||
kh_int16_t,
|
||||
kh_int32_t,
|
||||
kh_int64_t,
|
||||
kh_pymap_t,
|
||||
kh_str_t,
|
||||
kh_uint8_t,
|
||||
kh_uint16_t,
|
||||
kh_uint32_t,
|
||||
kh_uint64_t,
|
||||
khcomplex64_t,
|
||||
khcomplex128_t,
|
||||
uint8_t,
|
||||
uint16_t,
|
||||
uint32_t,
|
||||
uint64_t,
|
||||
)
|
||||
|
||||
# prototypes for sharing
|
||||
|
||||
cdef class HashTable:
|
||||
pass
|
||||
|
||||
cdef class UInt64HashTable(HashTable):
|
||||
cdef kh_uint64_t *table
|
||||
|
||||
cpdef get_item(self, uint64_t val)
|
||||
cpdef set_item(self, uint64_t key, Py_ssize_t val)
|
||||
|
||||
cdef class Int64HashTable(HashTable):
|
||||
cdef kh_int64_t *table
|
||||
|
||||
cpdef get_item(self, int64_t val)
|
||||
cpdef set_item(self, int64_t key, Py_ssize_t val)
|
||||
|
||||
cdef class UInt32HashTable(HashTable):
|
||||
cdef kh_uint32_t *table
|
||||
|
||||
cpdef get_item(self, uint32_t val)
|
||||
cpdef set_item(self, uint32_t key, Py_ssize_t val)
|
||||
|
||||
cdef class Int32HashTable(HashTable):
|
||||
cdef kh_int32_t *table
|
||||
|
||||
cpdef get_item(self, int32_t val)
|
||||
cpdef set_item(self, int32_t key, Py_ssize_t val)
|
||||
|
||||
cdef class UInt16HashTable(HashTable):
|
||||
cdef kh_uint16_t *table
|
||||
|
||||
cpdef get_item(self, uint16_t val)
|
||||
cpdef set_item(self, uint16_t key, Py_ssize_t val)
|
||||
|
||||
cdef class Int16HashTable(HashTable):
|
||||
cdef kh_int16_t *table
|
||||
|
||||
cpdef get_item(self, int16_t val)
|
||||
cpdef set_item(self, int16_t key, Py_ssize_t val)
|
||||
|
||||
cdef class UInt8HashTable(HashTable):
|
||||
cdef kh_uint8_t *table
|
||||
|
||||
cpdef get_item(self, uint8_t val)
|
||||
cpdef set_item(self, uint8_t key, Py_ssize_t val)
|
||||
|
||||
cdef class Int8HashTable(HashTable):
|
||||
cdef kh_int8_t *table
|
||||
|
||||
cpdef get_item(self, int8_t val)
|
||||
cpdef set_item(self, int8_t key, Py_ssize_t val)
|
||||
|
||||
cdef class Float64HashTable(HashTable):
|
||||
cdef kh_float64_t *table
|
||||
|
||||
cpdef get_item(self, float64_t val)
|
||||
cpdef set_item(self, float64_t key, Py_ssize_t val)
|
||||
|
||||
cdef class Float32HashTable(HashTable):
|
||||
cdef kh_float32_t *table
|
||||
|
||||
cpdef get_item(self, float32_t val)
|
||||
cpdef set_item(self, float32_t key, Py_ssize_t val)
|
||||
|
||||
cdef class Complex64HashTable(HashTable):
|
||||
cdef kh_complex64_t *table
|
||||
|
||||
cpdef get_item(self, complex64_t val)
|
||||
cpdef set_item(self, complex64_t key, Py_ssize_t val)
|
||||
|
||||
cdef class Complex128HashTable(HashTable):
|
||||
cdef kh_complex128_t *table
|
||||
|
||||
cpdef get_item(self, complex128_t val)
|
||||
cpdef set_item(self, complex128_t key, Py_ssize_t val)
|
||||
|
||||
cdef class PyObjectHashTable(HashTable):
|
||||
cdef kh_pymap_t *table
|
||||
|
||||
cpdef get_item(self, object val)
|
||||
cpdef set_item(self, object key, Py_ssize_t val)
|
||||
|
||||
|
||||
cdef class StringHashTable(HashTable):
|
||||
cdef kh_str_t *table
|
||||
|
||||
cpdef get_item(self, str val)
|
||||
cpdef set_item(self, str key, Py_ssize_t val)
|
||||
|
||||
cdef struct Int64VectorData:
|
||||
int64_t *data
|
||||
Py_ssize_t n, m
|
||||
|
||||
cdef class Vector:
|
||||
cdef bint external_view_exists
|
||||
|
||||
cdef class Int64Vector(Vector):
|
||||
cdef Int64VectorData *data
|
||||
cdef ndarray ao
|
||||
|
||||
cdef resize(self)
|
||||
cpdef ndarray to_array(self)
|
||||
cdef inline void append(self, int64_t x)
|
||||
cdef extend(self, int64_t[:] x)
|
||||
232
.venv/lib/python3.7/site-packages/pandas/_libs/hashtable.pyi
Normal file
232
.venv/lib/python3.7/site-packages/pandas/_libs/hashtable.pyi
Normal file
@@ -0,0 +1,232 @@
|
||||
from typing import (
|
||||
Any,
|
||||
Hashable,
|
||||
Literal,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
def unique_label_indices(
|
||||
labels: np.ndarray, # const int64_t[:]
|
||||
) -> np.ndarray: ...
|
||||
|
||||
class Factorizer:
|
||||
count: int
|
||||
def __init__(self, size_hint: int): ...
|
||||
def get_count(self) -> int: ...
|
||||
|
||||
class ObjectFactorizer(Factorizer):
|
||||
table: PyObjectHashTable
|
||||
uniques: ObjectVector
|
||||
def factorize(
|
||||
self,
|
||||
values: np.ndarray, # ndarray[object]
|
||||
sort: bool = ...,
|
||||
na_sentinel=...,
|
||||
na_value=...,
|
||||
) -> np.ndarray: ... # np.ndarray[intp]
|
||||
|
||||
class Int64Factorizer(Factorizer):
|
||||
table: Int64HashTable
|
||||
uniques: Int64Vector
|
||||
def factorize(
|
||||
self,
|
||||
values: np.ndarray, # const int64_t[:]
|
||||
sort: bool = ...,
|
||||
na_sentinel=...,
|
||||
na_value=...,
|
||||
) -> np.ndarray: ... # np.ndarray[intp]
|
||||
|
||||
class Int64Vector:
|
||||
def __init__(self): ...
|
||||
def __len__(self) -> int: ...
|
||||
def to_array(self) -> np.ndarray: ... # np.ndarray[np.int64]
|
||||
|
||||
class Int32Vector:
|
||||
def __init__(self): ...
|
||||
def __len__(self) -> int: ...
|
||||
def to_array(self) -> np.ndarray: ... # np.ndarray[np.int32]
|
||||
|
||||
class Int16Vector:
|
||||
def __init__(self): ...
|
||||
def __len__(self) -> int: ...
|
||||
def to_array(self) -> np.ndarray: ... # np.ndarray[np.int16]
|
||||
|
||||
class Int8Vector:
|
||||
def __init__(self): ...
|
||||
def __len__(self) -> int: ...
|
||||
def to_array(self) -> np.ndarray: ... # np.ndarray[np.int8]
|
||||
|
||||
class UInt64Vector:
|
||||
def __init__(self): ...
|
||||
def __len__(self) -> int: ...
|
||||
def to_array(self) -> np.ndarray: ... # np.ndarray[np.uint64]
|
||||
|
||||
class UInt32Vector:
|
||||
def __init__(self): ...
|
||||
def __len__(self) -> int: ...
|
||||
def to_array(self) -> np.ndarray: ... # np.ndarray[np.uint32]
|
||||
|
||||
class UInt16Vector:
|
||||
def __init__(self): ...
|
||||
def __len__(self) -> int: ...
|
||||
def to_array(self) -> np.ndarray: ... # np.ndarray[np.uint16]
|
||||
|
||||
class UInt8Vector:
|
||||
def __init__(self): ...
|
||||
def __len__(self) -> int: ...
|
||||
def to_array(self) -> np.ndarray: ... # np.ndarray[np.uint8]
|
||||
|
||||
class Float64Vector:
|
||||
def __init__(self): ...
|
||||
def __len__(self) -> int: ...
|
||||
def to_array(self) -> np.ndarray: ... # np.ndarray[np.float64]
|
||||
|
||||
class Float32Vector:
|
||||
def __init__(self): ...
|
||||
def __len__(self) -> int: ...
|
||||
def to_array(self) -> np.ndarray: ... # np.ndarray[np.float32]
|
||||
|
||||
class Complex128Vector:
|
||||
def __init__(self): ...
|
||||
def __len__(self) -> int: ...
|
||||
def to_array(self) -> np.ndarray: ... # np.ndarray[np.complex128]
|
||||
|
||||
class Complex64Vector:
|
||||
def __init__(self): ...
|
||||
def __len__(self) -> int: ...
|
||||
def to_array(self) -> np.ndarray: ... # np.ndarray[np.complex64]
|
||||
|
||||
class StringVector:
|
||||
def __init__(self): ...
|
||||
def __len__(self) -> int: ...
|
||||
def to_array(self) -> np.ndarray: ... # np.ndarray[object]
|
||||
|
||||
class ObjectVector:
|
||||
def __init__(self): ...
|
||||
def __len__(self) -> int: ...
|
||||
def to_array(self) -> np.ndarray: ... # np.ndarray[object]
|
||||
|
||||
class HashTable:
|
||||
# NB: The base HashTable class does _not_ actually have these methods;
|
||||
# we are putting the here for the sake of mypy to avoid
|
||||
# reproducing them in each subclass below.
|
||||
def __init__(self, size_hint: int = ...): ...
|
||||
def __len__(self) -> int: ...
|
||||
def __contains__(self, key: Hashable) -> bool: ...
|
||||
def sizeof(self, deep: bool = ...) -> int: ...
|
||||
def get_state(self) -> dict[str, int]: ...
|
||||
# TODO: `item` type is subclass-specific
|
||||
def get_item(self, item): ... # TODO: return type?
|
||||
def set_item(self, item) -> None: ...
|
||||
# FIXME: we don't actually have this for StringHashTable or ObjectHashTable?
|
||||
def map(
|
||||
self,
|
||||
keys: np.ndarray, # np.ndarray[subclass-specific]
|
||||
values: np.ndarray, # const int64_t[:]
|
||||
) -> None: ...
|
||||
def map_locations(
|
||||
self,
|
||||
values: np.ndarray, # np.ndarray[subclass-specific]
|
||||
) -> None: ...
|
||||
def lookup(
|
||||
self,
|
||||
values: np.ndarray, # np.ndarray[subclass-specific]
|
||||
) -> np.ndarray: ... # np.ndarray[np.intp]
|
||||
def get_labels(
|
||||
self,
|
||||
values: np.ndarray, # np.ndarray[subclass-specific]
|
||||
uniques, # SubclassTypeVector
|
||||
count_prior: int = ...,
|
||||
na_sentinel: int = ...,
|
||||
na_value: object = ...,
|
||||
) -> np.ndarray: ... # np.ndarray[intp_t]
|
||||
def unique(
|
||||
self,
|
||||
values: np.ndarray, # np.ndarray[subclass-specific]
|
||||
return_inverse: bool = ...,
|
||||
) -> tuple[
|
||||
np.ndarray, # np.ndarray[subclass-specific]
|
||||
np.ndarray, # np.ndarray[np.intp],
|
||||
] | np.ndarray: ... # np.ndarray[subclass-specific]
|
||||
def _unique(
|
||||
self,
|
||||
values: np.ndarray, # np.ndarray[subclass-specific]
|
||||
uniques, # FooVector
|
||||
count_prior: int = ...,
|
||||
na_sentinel: int = ...,
|
||||
na_value: object = ...,
|
||||
ignore_na: bool = ...,
|
||||
return_inverse: bool = ...,
|
||||
) -> tuple[
|
||||
np.ndarray, # np.ndarray[subclass-specific]
|
||||
np.ndarray, # np.ndarray[np.intp],
|
||||
] | np.ndarray: ... # np.ndarray[subclass-specific]
|
||||
def factorize(
|
||||
self,
|
||||
values: np.ndarray, # np.ndarray[subclass-specific]
|
||||
na_sentinel: int = ...,
|
||||
na_value: object = ...,
|
||||
mask=...,
|
||||
) -> tuple[
|
||||
np.ndarray, # np.ndarray[subclass-specific]
|
||||
np.ndarray, # np.ndarray[np.intp],
|
||||
]: ...
|
||||
|
||||
class Complex128HashTable(HashTable): ...
|
||||
class Complex64HashTable(HashTable): ...
|
||||
class Float64HashTable(HashTable): ...
|
||||
class Float32HashTable(HashTable): ...
|
||||
|
||||
class Int64HashTable(HashTable):
|
||||
# Only Int64HashTable has get_labels_groupby
|
||||
def get_labels_groupby(
|
||||
self,
|
||||
values: np.ndarray, # const int64_t[:]
|
||||
) -> tuple[
|
||||
np.ndarray, # np.ndarray[np.intp]
|
||||
np.ndarray, # np.ndarray[np.int64]
|
||||
]: ...
|
||||
|
||||
class Int32HashTable(HashTable): ...
|
||||
class Int16HashTable(HashTable): ...
|
||||
class Int8HashTable(HashTable): ...
|
||||
class UInt64HashTable(HashTable): ...
|
||||
class UInt32HashTable(HashTable): ...
|
||||
class UInt16HashTable(HashTable): ...
|
||||
class UInt8HashTable(HashTable): ...
|
||||
class StringHashTable(HashTable): ...
|
||||
class PyObjectHashTable(HashTable): ...
|
||||
|
||||
def duplicated_int64(
|
||||
values: np.ndarray, # const int64_t[:] values
|
||||
keep: Literal["last", "first", False] = ...,
|
||||
) -> np.ndarray: ... # np.ndarray[bool]
|
||||
|
||||
# TODO: Is it actually bool or is it uint8?
|
||||
|
||||
def mode_int64(
|
||||
values: np.ndarray, # const int64_t[:] values
|
||||
dropna: bool,
|
||||
) -> np.ndarray: ... # np.ndarray[np.int64]
|
||||
def value_count_int64(
|
||||
values: np.ndarray, # const int64_t[:]
|
||||
dropna: bool,
|
||||
) -> tuple[np.ndarray, np.ndarray,]: ... # np.ndarray[np.int64] # np.ndarray[np.int64]
|
||||
def duplicated(
|
||||
values: np.ndarray,
|
||||
keep: Literal["last", "first", False] = ...,
|
||||
) -> np.ndarray: ... # np.ndarray[bool]
|
||||
def mode(values: np.ndarray, dropna: bool) -> np.ndarray: ...
|
||||
def value_count(
|
||||
values: np.ndarray,
|
||||
dropna: bool,
|
||||
) -> tuple[np.ndarray, np.ndarray,]: ... # np.ndarray[np.int64]
|
||||
|
||||
# arr and values should have same dtype
|
||||
def ismember(
|
||||
arr: np.ndarray,
|
||||
values: np.ndarray,
|
||||
) -> np.ndarray: ... # np.ndarray[bool]
|
||||
def object_hash(obj) -> int: ...
|
||||
def objects_are_equal(a, b) -> bool: ...
|
||||
203
.venv/lib/python3.7/site-packages/pandas/_libs/hashtable.pyx
Normal file
203
.venv/lib/python3.7/site-packages/pandas/_libs/hashtable.pyx
Normal file
@@ -0,0 +1,203 @@
|
||||
cimport cython
|
||||
from cpython.mem cimport (
|
||||
PyMem_Free,
|
||||
PyMem_Malloc,
|
||||
)
|
||||
from cpython.ref cimport (
|
||||
Py_INCREF,
|
||||
PyObject,
|
||||
)
|
||||
from libc.stdlib cimport (
|
||||
free,
|
||||
malloc,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
cimport numpy as cnp
|
||||
from numpy cimport (
|
||||
float64_t,
|
||||
ndarray,
|
||||
uint8_t,
|
||||
uint32_t,
|
||||
)
|
||||
from numpy.math cimport NAN
|
||||
|
||||
cnp.import_array()
|
||||
|
||||
|
||||
from pandas._libs cimport util
|
||||
from pandas._libs.khash cimport (
|
||||
KHASH_TRACE_DOMAIN,
|
||||
are_equivalent_float32_t,
|
||||
are_equivalent_float64_t,
|
||||
are_equivalent_khcomplex64_t,
|
||||
are_equivalent_khcomplex128_t,
|
||||
kh_needed_n_buckets,
|
||||
kh_python_hash_equal,
|
||||
kh_python_hash_func,
|
||||
kh_str_t,
|
||||
khcomplex64_t,
|
||||
khcomplex128_t,
|
||||
khiter_t,
|
||||
)
|
||||
from pandas._libs.missing cimport checknull
|
||||
|
||||
|
||||
def get_hashtable_trace_domain():
|
||||
return KHASH_TRACE_DOMAIN
|
||||
|
||||
|
||||
def object_hash(obj):
|
||||
return kh_python_hash_func(obj)
|
||||
|
||||
|
||||
def objects_are_equal(a, b):
|
||||
return kh_python_hash_equal(a, b)
|
||||
|
||||
|
||||
cdef int64_t NPY_NAT = util.get_nat()
|
||||
SIZE_HINT_LIMIT = (1 << 20) + 7
|
||||
|
||||
|
||||
cdef Py_ssize_t _INIT_VEC_CAP = 128
|
||||
|
||||
include "hashtable_class_helper.pxi"
|
||||
include "hashtable_func_helper.pxi"
|
||||
|
||||
cdef class Factorizer:
|
||||
cdef readonly:
|
||||
Py_ssize_t count
|
||||
|
||||
def __cinit__(self, size_hint: int):
|
||||
self.count = 0
|
||||
|
||||
def get_count(self) -> int:
|
||||
return self.count
|
||||
|
||||
|
||||
cdef class ObjectFactorizer(Factorizer):
|
||||
cdef public:
|
||||
PyObjectHashTable table
|
||||
ObjectVector uniques
|
||||
|
||||
def __cinit__(self, size_hint: int):
|
||||
self.table = PyObjectHashTable(size_hint)
|
||||
self.uniques = ObjectVector()
|
||||
|
||||
def factorize(
|
||||
self, ndarray[object] values, sort=False, na_sentinel=-1, na_value=None
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
|
||||
Returns
|
||||
-------
|
||||
np.ndarray[np.intp]
|
||||
|
||||
Examples
|
||||
--------
|
||||
Factorize values with nans replaced by na_sentinel
|
||||
|
||||
>>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20)
|
||||
array([ 0, 1, 20])
|
||||
"""
|
||||
cdef:
|
||||
ndarray[intp_t] labels
|
||||
|
||||
if self.uniques.external_view_exists:
|
||||
uniques = ObjectVector()
|
||||
uniques.extend(self.uniques.to_array())
|
||||
self.uniques = uniques
|
||||
labels = self.table.get_labels(values, self.uniques,
|
||||
self.count, na_sentinel, na_value)
|
||||
mask = (labels == na_sentinel)
|
||||
# sort on
|
||||
if sort:
|
||||
sorter = self.uniques.to_array().argsort()
|
||||
reverse_indexer = np.empty(len(sorter), dtype=np.intp)
|
||||
reverse_indexer.put(sorter, np.arange(len(sorter)))
|
||||
labels = reverse_indexer.take(labels, mode='clip')
|
||||
labels[mask] = na_sentinel
|
||||
self.count = len(self.uniques)
|
||||
return labels
|
||||
|
||||
|
||||
cdef class Int64Factorizer(Factorizer):
|
||||
cdef public:
|
||||
Int64HashTable table
|
||||
Int64Vector uniques
|
||||
|
||||
def __cinit__(self, size_hint: int):
|
||||
self.table = Int64HashTable(size_hint)
|
||||
self.uniques = Int64Vector()
|
||||
|
||||
def factorize(self, const int64_t[:] values, sort=False,
|
||||
na_sentinel=-1, na_value=None) -> np.ndarray:
|
||||
"""
|
||||
Returns
|
||||
-------
|
||||
ndarray[intp_t]
|
||||
|
||||
Examples
|
||||
--------
|
||||
Factorize values with nans replaced by na_sentinel
|
||||
|
||||
>>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20)
|
||||
array([ 0, 1, 20])
|
||||
"""
|
||||
cdef:
|
||||
ndarray[intp_t] labels
|
||||
|
||||
if self.uniques.external_view_exists:
|
||||
uniques = Int64Vector()
|
||||
uniques.extend(self.uniques.to_array())
|
||||
self.uniques = uniques
|
||||
labels = self.table.get_labels(values, self.uniques,
|
||||
self.count, na_sentinel,
|
||||
na_value=na_value)
|
||||
|
||||
# sort on
|
||||
if sort:
|
||||
sorter = self.uniques.to_array().argsort()
|
||||
reverse_indexer = np.empty(len(sorter), dtype=np.intp)
|
||||
reverse_indexer.put(sorter, np.arange(len(sorter)))
|
||||
|
||||
labels = reverse_indexer.take(labels)
|
||||
|
||||
self.count = len(self.uniques)
|
||||
return labels
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
def unique_label_indices(const int64_t[:] labels) -> ndarray:
|
||||
"""
|
||||
Indices of the first occurrences of the unique labels
|
||||
*excluding* -1. equivalent to:
|
||||
np.unique(labels, return_index=True)[1]
|
||||
"""
|
||||
cdef:
|
||||
int ret = 0
|
||||
Py_ssize_t i, n = len(labels)
|
||||
kh_int64_t *table = kh_init_int64()
|
||||
Int64Vector idx = Int64Vector()
|
||||
ndarray[int64_t, ndim=1] arr
|
||||
Int64VectorData *ud = idx.data
|
||||
|
||||
kh_resize_int64(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
|
||||
|
||||
with nogil:
|
||||
for i in range(n):
|
||||
kh_put_int64(table, labels[i], &ret)
|
||||
if ret != 0:
|
||||
if needs_resize(ud):
|
||||
with gil:
|
||||
idx.resize()
|
||||
append_data_int64(ud, i)
|
||||
|
||||
kh_destroy_int64(table)
|
||||
|
||||
arr = idx.to_array()
|
||||
arr = arr[np.asarray(labels)[arr].argsort()]
|
||||
|
||||
return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,472 @@
|
||||
"""
|
||||
Template for each `dtype` helper function for hashtable
|
||||
|
||||
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
|
||||
"""
|
||||
|
||||
{{py:
|
||||
|
||||
# name, dtype, ttype, c_type, to_c_type
|
||||
dtypes = [('Complex128', 'complex128', 'complex128',
|
||||
'khcomplex128_t', 'to_khcomplex128_t'),
|
||||
('Complex64', 'complex64', 'complex64',
|
||||
'khcomplex64_t', 'to_khcomplex64_t'),
|
||||
('Float64', 'float64', 'float64', 'float64_t', ''),
|
||||
('Float32', 'float32', 'float32', 'float32_t', ''),
|
||||
('UInt64', 'uint64', 'uint64', 'uint64_t', ''),
|
||||
('UInt32', 'uint32', 'uint32', 'uint32_t', ''),
|
||||
('UInt16', 'uint16', 'uint16', 'uint16_t', ''),
|
||||
('UInt8', 'uint8', 'uint8', 'uint8_t', ''),
|
||||
('Object', 'object', 'pymap', 'object', ''),
|
||||
('Int64', 'int64', 'int64', 'int64_t', ''),
|
||||
('Int32', 'int32', 'int32', 'int32_t', ''),
|
||||
('Int16', 'int16', 'int16', 'int16_t', ''),
|
||||
('Int8', 'int8', 'int8', 'int8_t', '')]
|
||||
|
||||
}}
|
||||
|
||||
{{for name, dtype, ttype, c_type, to_c_type in dtypes}}
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
{{if dtype == 'object'}}
|
||||
cdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna, navalue=np.NaN):
|
||||
{{else}}
|
||||
cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
|
||||
{{endif}}
|
||||
cdef:
|
||||
Py_ssize_t i = 0
|
||||
Py_ssize_t n = len(values)
|
||||
kh_{{ttype}}_t *table
|
||||
|
||||
# Don't use Py_ssize_t, since table.n_buckets is unsigned
|
||||
khiter_t k
|
||||
bint is_null
|
||||
|
||||
{{c_type}} val
|
||||
|
||||
int ret = 0
|
||||
|
||||
# we track the order in which keys are first seen (GH39009),
|
||||
# khash-map isn't insertion-ordered, thus:
|
||||
# table maps keys to counts
|
||||
# result_keys remembers the original order of keys
|
||||
|
||||
result_keys = {{name}}Vector()
|
||||
table = kh_init_{{ttype}}()
|
||||
|
||||
{{if dtype == 'object'}}
|
||||
kh_resize_{{ttype}}(table, n // 10)
|
||||
|
||||
for i in range(n):
|
||||
val = values[i]
|
||||
is_null = checknull(val)
|
||||
if not is_null or not dropna:
|
||||
# all nas become the same representative:
|
||||
if is_null:
|
||||
val = navalue
|
||||
k = kh_get_{{ttype}}(table, <PyObject*>val)
|
||||
if k != table.n_buckets:
|
||||
table.vals[k] += 1
|
||||
else:
|
||||
k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
|
||||
table.vals[k] = 1
|
||||
result_keys.append(val)
|
||||
{{else}}
|
||||
kh_resize_{{ttype}}(table, n)
|
||||
|
||||
for i in range(n):
|
||||
val = {{to_c_type}}(values[i])
|
||||
|
||||
if not is_nan_{{c_type}}(val) or not dropna:
|
||||
k = kh_get_{{ttype}}(table, val)
|
||||
if k != table.n_buckets:
|
||||
table.vals[k] += 1
|
||||
else:
|
||||
k = kh_put_{{ttype}}(table, val, &ret)
|
||||
table.vals[k] = 1
|
||||
result_keys.append(val)
|
||||
{{endif}}
|
||||
|
||||
# collect counts in the order corresponding to result_keys:
|
||||
cdef int64_t[:] result_counts = np.empty(table.size, dtype=np.int64)
|
||||
for i in range(table.size):
|
||||
{{if dtype == 'object'}}
|
||||
k = kh_get_{{ttype}}(table, result_keys.data[i])
|
||||
{{else}}
|
||||
k = kh_get_{{ttype}}(table, result_keys.data.data[i])
|
||||
{{endif}}
|
||||
result_counts[i] = table.vals[k]
|
||||
|
||||
kh_destroy_{{ttype}}(table)
|
||||
|
||||
return result_keys.to_array(), result_counts.base
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
{{if dtype == 'object'}}
|
||||
cdef duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'):
|
||||
{{else}}
|
||||
cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first'):
|
||||
{{endif}}
|
||||
cdef:
|
||||
int ret = 0
|
||||
{{if dtype != 'object'}}
|
||||
{{c_type}} value
|
||||
{{endif}}
|
||||
Py_ssize_t i, n = len(values)
|
||||
khiter_t k
|
||||
kh_{{ttype}}_t *table = kh_init_{{ttype}}()
|
||||
ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool')
|
||||
|
||||
kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
|
||||
|
||||
if keep not in ('last', 'first', False):
|
||||
raise ValueError('keep must be either "first", "last" or False')
|
||||
|
||||
if keep == 'last':
|
||||
{{if dtype == 'object'}}
|
||||
for i in range(n - 1, -1, -1):
|
||||
# equivalent: range(n)[::-1], which cython doesn't like in nogil
|
||||
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
|
||||
out[i] = ret == 0
|
||||
{{else}}
|
||||
with nogil:
|
||||
for i in range(n - 1, -1, -1):
|
||||
# equivalent: range(n)[::-1], which cython doesn't like in nogil
|
||||
value = {{to_c_type}}(values[i])
|
||||
kh_put_{{ttype}}(table, value, &ret)
|
||||
out[i] = ret == 0
|
||||
{{endif}}
|
||||
elif keep == 'first':
|
||||
{{if dtype == 'object'}}
|
||||
for i in range(n):
|
||||
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
|
||||
out[i] = ret == 0
|
||||
{{else}}
|
||||
with nogil:
|
||||
for i in range(n):
|
||||
value = {{to_c_type}}(values[i])
|
||||
kh_put_{{ttype}}(table, value, &ret)
|
||||
out[i] = ret == 0
|
||||
{{endif}}
|
||||
else:
|
||||
{{if dtype == 'object'}}
|
||||
for i in range(n):
|
||||
value = values[i]
|
||||
k = kh_get_{{ttype}}(table, <PyObject*>value)
|
||||
if k != table.n_buckets:
|
||||
out[table.vals[k]] = 1
|
||||
out[i] = 1
|
||||
else:
|
||||
k = kh_put_{{ttype}}(table, <PyObject*>value, &ret)
|
||||
table.vals[k] = i
|
||||
out[i] = 0
|
||||
{{else}}
|
||||
with nogil:
|
||||
for i in range(n):
|
||||
value = {{to_c_type}}(values[i])
|
||||
k = kh_get_{{ttype}}(table, value)
|
||||
if k != table.n_buckets:
|
||||
out[table.vals[k]] = 1
|
||||
out[i] = 1
|
||||
else:
|
||||
k = kh_put_{{ttype}}(table, value, &ret)
|
||||
table.vals[k] = i
|
||||
out[i] = 0
|
||||
{{endif}}
|
||||
kh_destroy_{{ttype}}(table)
|
||||
return out
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Membership
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
{{if dtype == 'object'}}
|
||||
cdef ismember_{{dtype}}(ndarray[{{c_type}}] arr, ndarray[{{c_type}}] values):
|
||||
{{else}}
|
||||
cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
|
||||
{{endif}}
|
||||
"""
|
||||
Return boolean of values in arr on an
|
||||
element by-element basis
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr : {{dtype}} ndarray
|
||||
values : {{dtype}} ndarray
|
||||
|
||||
Returns
|
||||
-------
|
||||
boolean ndarry len of (arr)
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t i, n
|
||||
khiter_t k
|
||||
int ret = 0
|
||||
ndarray[uint8_t] result
|
||||
{{c_type}} val
|
||||
kh_{{ttype}}_t *table = kh_init_{{ttype}}()
|
||||
|
||||
# construct the table
|
||||
n = len(values)
|
||||
kh_resize_{{ttype}}(table, n)
|
||||
|
||||
{{if dtype == 'object'}}
|
||||
for i in range(n):
|
||||
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
|
||||
{{else}}
|
||||
with nogil:
|
||||
for i in range(n):
|
||||
val = {{to_c_type}}(values[i])
|
||||
kh_put_{{ttype}}(table, val, &ret)
|
||||
{{endif}}
|
||||
|
||||
# test membership
|
||||
n = len(arr)
|
||||
result = np.empty(n, dtype=np.uint8)
|
||||
|
||||
{{if dtype == 'object'}}
|
||||
for i in range(n):
|
||||
val = arr[i]
|
||||
k = kh_get_{{ttype}}(table, <PyObject*>val)
|
||||
result[i] = (k != table.n_buckets)
|
||||
{{else}}
|
||||
with nogil:
|
||||
for i in range(n):
|
||||
val = {{to_c_type}}(arr[i])
|
||||
k = kh_get_{{ttype}}(table, val)
|
||||
result[i] = (k != table.n_buckets)
|
||||
{{endif}}
|
||||
|
||||
kh_destroy_{{ttype}}(table)
|
||||
return result.view(np.bool_)
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Mode Computations
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
{{if dtype == 'object'}}
|
||||
cdef mode_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
|
||||
{{else}}
|
||||
cdef mode_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
|
||||
{{endif}}
|
||||
cdef:
|
||||
{{if dtype == 'object'}}
|
||||
ndarray[{{dtype}}] keys
|
||||
ndarray[{{dtype}}] modes
|
||||
{{else}}
|
||||
{{dtype}}_t[:] keys
|
||||
ndarray[{{dtype}}_t] modes
|
||||
{{endif}}
|
||||
int64_t[:] counts
|
||||
int64_t count, max_count = -1
|
||||
Py_ssize_t k, j = 0
|
||||
|
||||
keys, counts = value_count_{{dtype}}(values, dropna)
|
||||
|
||||
{{if dtype == 'object'}}
|
||||
modes = np.empty(len(keys), dtype=np.object_)
|
||||
{{else}}
|
||||
modes = np.empty(len(keys), dtype=np.{{dtype}})
|
||||
{{endif}}
|
||||
|
||||
{{if dtype != 'object'}}
|
||||
with nogil:
|
||||
for k in range(len(keys)):
|
||||
count = counts[k]
|
||||
if count == max_count:
|
||||
j += 1
|
||||
elif count > max_count:
|
||||
max_count = count
|
||||
j = 0
|
||||
else:
|
||||
continue
|
||||
|
||||
modes[j] = keys[k]
|
||||
{{else}}
|
||||
for k in range(len(keys)):
|
||||
count = counts[k]
|
||||
if count == max_count:
|
||||
j += 1
|
||||
elif count > max_count:
|
||||
max_count = count
|
||||
j = 0
|
||||
else:
|
||||
continue
|
||||
|
||||
modes[j] = keys[k]
|
||||
{{endif}}
|
||||
|
||||
return modes[:j + 1]
|
||||
|
||||
{{endfor}}
|
||||
|
||||
|
||||
ctypedef fused htfunc_t:
|
||||
complex128_t
|
||||
complex64_t
|
||||
float64_t
|
||||
float32_t
|
||||
uint64_t
|
||||
uint32_t
|
||||
uint16_t
|
||||
uint8_t
|
||||
int64_t
|
||||
int32_t
|
||||
int16_t
|
||||
int8_t
|
||||
object
|
||||
|
||||
|
||||
cpdef value_count(ndarray[htfunc_t] values, bint dropna):
|
||||
if htfunc_t is object:
|
||||
return value_count_object(values, dropna)
|
||||
|
||||
elif htfunc_t is int8_t:
|
||||
return value_count_int8(values, dropna)
|
||||
elif htfunc_t is int16_t:
|
||||
return value_count_int16(values, dropna)
|
||||
elif htfunc_t is int32_t:
|
||||
return value_count_int32(values, dropna)
|
||||
elif htfunc_t is int64_t:
|
||||
return value_count_int64(values, dropna)
|
||||
|
||||
elif htfunc_t is uint8_t:
|
||||
return value_count_uint8(values, dropna)
|
||||
elif htfunc_t is uint16_t:
|
||||
return value_count_uint16(values, dropna)
|
||||
elif htfunc_t is uint32_t:
|
||||
return value_count_uint32(values, dropna)
|
||||
elif htfunc_t is uint64_t:
|
||||
return value_count_uint64(values, dropna)
|
||||
|
||||
elif htfunc_t is float64_t:
|
||||
return value_count_float64(values, dropna)
|
||||
elif htfunc_t is float32_t:
|
||||
return value_count_float32(values, dropna)
|
||||
|
||||
elif htfunc_t is complex128_t:
|
||||
return value_count_complex128(values, dropna)
|
||||
elif htfunc_t is complex64_t:
|
||||
return value_count_complex64(values, dropna)
|
||||
|
||||
else:
|
||||
raise TypeError(values.dtype)
|
||||
|
||||
|
||||
cpdef duplicated(ndarray[htfunc_t] values, object keep="first"):
|
||||
if htfunc_t is object:
|
||||
return duplicated_object(values, keep)
|
||||
|
||||
elif htfunc_t is int8_t:
|
||||
return duplicated_int8(values, keep)
|
||||
elif htfunc_t is int16_t:
|
||||
return duplicated_int16(values, keep)
|
||||
elif htfunc_t is int32_t:
|
||||
return duplicated_int32(values, keep)
|
||||
elif htfunc_t is int64_t:
|
||||
return duplicated_int64(values, keep)
|
||||
|
||||
elif htfunc_t is uint8_t:
|
||||
return duplicated_uint8(values, keep)
|
||||
elif htfunc_t is uint16_t:
|
||||
return duplicated_uint16(values, keep)
|
||||
elif htfunc_t is uint32_t:
|
||||
return duplicated_uint32(values, keep)
|
||||
elif htfunc_t is uint64_t:
|
||||
return duplicated_uint64(values, keep)
|
||||
|
||||
elif htfunc_t is float64_t:
|
||||
return duplicated_float64(values, keep)
|
||||
elif htfunc_t is float32_t:
|
||||
return duplicated_float32(values, keep)
|
||||
|
||||
elif htfunc_t is complex128_t:
|
||||
return duplicated_complex128(values, keep)
|
||||
elif htfunc_t is complex64_t:
|
||||
return duplicated_complex64(values, keep)
|
||||
|
||||
else:
|
||||
raise TypeError(values.dtype)
|
||||
|
||||
|
||||
cpdef ismember(ndarray[htfunc_t] arr, ndarray[htfunc_t] values):
|
||||
if htfunc_t is object:
|
||||
return ismember_object(arr, values)
|
||||
|
||||
elif htfunc_t is int8_t:
|
||||
return ismember_int8(arr, values)
|
||||
elif htfunc_t is int16_t:
|
||||
return ismember_int16(arr, values)
|
||||
elif htfunc_t is int32_t:
|
||||
return ismember_int32(arr, values)
|
||||
elif htfunc_t is int64_t:
|
||||
return ismember_int64(arr, values)
|
||||
|
||||
elif htfunc_t is uint8_t:
|
||||
return ismember_uint8(arr, values)
|
||||
elif htfunc_t is uint16_t:
|
||||
return ismember_uint16(arr, values)
|
||||
elif htfunc_t is uint32_t:
|
||||
return ismember_uint32(arr, values)
|
||||
elif htfunc_t is uint64_t:
|
||||
return ismember_uint64(arr, values)
|
||||
|
||||
elif htfunc_t is float64_t:
|
||||
return ismember_float64(arr, values)
|
||||
elif htfunc_t is float32_t:
|
||||
return ismember_float32(arr, values)
|
||||
|
||||
elif htfunc_t is complex128_t:
|
||||
return ismember_complex128(arr, values)
|
||||
elif htfunc_t is complex64_t:
|
||||
return ismember_complex64(arr, values)
|
||||
|
||||
else:
|
||||
raise TypeError(values.dtype)
|
||||
|
||||
|
||||
cpdef mode(ndarray[htfunc_t] values, bint dropna):
|
||||
if htfunc_t is object:
|
||||
return mode_object(values, dropna)
|
||||
|
||||
elif htfunc_t is int8_t:
|
||||
return mode_int8(values, dropna)
|
||||
elif htfunc_t is int16_t:
|
||||
return mode_int16(values, dropna)
|
||||
elif htfunc_t is int32_t:
|
||||
return mode_int32(values, dropna)
|
||||
elif htfunc_t is int64_t:
|
||||
return mode_int64(values, dropna)
|
||||
|
||||
elif htfunc_t is uint8_t:
|
||||
return mode_uint8(values, dropna)
|
||||
elif htfunc_t is uint16_t:
|
||||
return mode_uint16(values, dropna)
|
||||
elif htfunc_t is uint32_t:
|
||||
return mode_uint32(values, dropna)
|
||||
elif htfunc_t is uint64_t:
|
||||
return mode_uint64(values, dropna)
|
||||
|
||||
elif htfunc_t is float64_t:
|
||||
return mode_float64(values, dropna)
|
||||
elif htfunc_t is float32_t:
|
||||
return mode_float32(values, dropna)
|
||||
|
||||
elif htfunc_t is complex128_t:
|
||||
return mode_complex128(values, dropna)
|
||||
elif htfunc_t is complex64_t:
|
||||
return mode_complex64(values, dropna)
|
||||
|
||||
else:
|
||||
raise TypeError(values.dtype)
|
||||
Binary file not shown.
70
.venv/lib/python3.7/site-packages/pandas/_libs/index.pyi
Normal file
70
.venv/lib/python3.7/site-packages/pandas/_libs/index.pyi
Normal file
@@ -0,0 +1,70 @@
|
||||
import numpy as np
|
||||
|
||||
class IndexEngine:
|
||||
over_size_threshold: bool
|
||||
def __init__(self, vgetter, n: int): ...
|
||||
def __contains__(self, val: object) -> bool: ...
|
||||
# -> int | slice | np.ndarray[bool]
|
||||
def get_loc(self, val: object) -> int | slice | np.ndarray: ...
|
||||
def sizeof(self, deep: bool = False) -> int: ...
|
||||
def __sizeof__(self) -> int: ...
|
||||
@property
|
||||
def is_unique(self) -> bool: ...
|
||||
@property
|
||||
def is_monotonic_increasing(self) -> bool: ...
|
||||
@property
|
||||
def is_monotonic_decreasing(self) -> bool: ...
|
||||
def get_backfill_indexer(
|
||||
self, other: np.ndarray, limit: int | None = ...
|
||||
) -> np.ndarray: ...
|
||||
def get_pad_indexer(
|
||||
self, other: np.ndarray, limit: int | None = ...
|
||||
) -> np.ndarray: ...
|
||||
@property
|
||||
def is_mapping_populated(self) -> bool: ...
|
||||
def clear_mapping(self): ...
|
||||
def get_indexer(self, values: np.ndarray) -> np.ndarray: ... # np.ndarray[np.intp]
|
||||
def get_indexer_non_unique(
|
||||
self,
|
||||
targets: np.ndarray,
|
||||
) -> tuple[
|
||||
np.ndarray, # np.ndarray[np.intp]
|
||||
np.ndarray, # np.ndarray[np.intp]
|
||||
]: ...
|
||||
|
||||
class Float64Engine(IndexEngine): ...
|
||||
class Float32Engine(IndexEngine): ...
|
||||
class Int64Engine(IndexEngine): ...
|
||||
class Int32Engine(IndexEngine): ...
|
||||
class Int16Engine(IndexEngine): ...
|
||||
class Int8Engine(IndexEngine): ...
|
||||
class UInt64Engine(IndexEngine): ...
|
||||
class UInt32Engine(IndexEngine): ...
|
||||
class UInt16Engine(IndexEngine): ...
|
||||
class UInt8Engine(IndexEngine): ...
|
||||
class ObjectEngine(IndexEngine): ...
|
||||
class DatetimeEngine(Int64Engine): ...
|
||||
class TimedeltaEngine(DatetimeEngine): ...
|
||||
class PeriodEngine(Int64Engine): ...
|
||||
|
||||
class BaseMultiIndexCodesEngine:
|
||||
levels: list[np.ndarray]
|
||||
offsets: np.ndarray # ndarray[uint64_t, ndim=1]
|
||||
def __init__(
|
||||
self,
|
||||
levels: list[np.ndarray], # all entries hashable
|
||||
labels: list[np.ndarray], # all entries integer-dtyped
|
||||
offsets: np.ndarray, # np.ndarray[np.uint64, ndim=1]
|
||||
): ...
|
||||
def get_indexer(
|
||||
self,
|
||||
target: np.ndarray, # np.ndarray[object]
|
||||
) -> np.ndarray: ... # np.ndarray[np.intp]
|
||||
def _extract_level_codes(self, target: object): ...
|
||||
def get_indexer_with_fill(
|
||||
self,
|
||||
target: np.ndarray, # np.ndarray[object] of tuples
|
||||
values: np.ndarray, # np.ndarray[object] of tuples
|
||||
method: str,
|
||||
limit: int | None,
|
||||
) -> np.ndarray: ... # np.ndarray[np.int64]
|
||||
747
.venv/lib/python3.7/site-packages/pandas/_libs/index.pyx
Normal file
747
.venv/lib/python3.7/site-packages/pandas/_libs/index.pyx
Normal file
@@ -0,0 +1,747 @@
|
||||
import warnings
|
||||
|
||||
cimport cython
|
||||
|
||||
import numpy as np
|
||||
|
||||
cimport numpy as cnp
|
||||
from numpy cimport (
|
||||
float32_t,
|
||||
float64_t,
|
||||
int8_t,
|
||||
int16_t,
|
||||
int32_t,
|
||||
int64_t,
|
||||
intp_t,
|
||||
ndarray,
|
||||
uint8_t,
|
||||
uint16_t,
|
||||
uint32_t,
|
||||
uint64_t,
|
||||
)
|
||||
|
||||
cnp.import_array()
|
||||
|
||||
|
||||
from pandas._libs cimport util
|
||||
from pandas._libs.hashtable cimport HashTable
|
||||
from pandas._libs.tslibs.nattype cimport c_NaT as NaT
|
||||
from pandas._libs.tslibs.period cimport is_period_object
|
||||
from pandas._libs.tslibs.timedeltas cimport _Timedelta
|
||||
from pandas._libs.tslibs.timestamps cimport _Timestamp
|
||||
|
||||
from pandas._libs import (
|
||||
algos,
|
||||
hashtable as _hash,
|
||||
)
|
||||
from pandas._libs.missing import checknull
|
||||
|
||||
|
||||
cdef inline bint is_definitely_invalid_key(object val):
|
||||
try:
|
||||
hash(val)
|
||||
except TypeError:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# Don't populate hash tables in monotonic indexes larger than this
|
||||
_SIZE_CUTOFF = 1_000_000
|
||||
|
||||
|
||||
@cython.freelist(32)
|
||||
cdef class IndexEngine:
|
||||
|
||||
cdef readonly:
|
||||
object vgetter
|
||||
HashTable mapping
|
||||
bint over_size_threshold
|
||||
|
||||
cdef:
|
||||
bint unique, monotonic_inc, monotonic_dec
|
||||
bint need_monotonic_check, need_unique_check
|
||||
|
||||
def __init__(self, vgetter, n):
|
||||
self.vgetter = vgetter
|
||||
|
||||
self.over_size_threshold = n >= _SIZE_CUTOFF
|
||||
self.clear_mapping()
|
||||
|
||||
def __contains__(self, val: object) -> bool:
|
||||
# We assume before we get here:
|
||||
# - val is hashable
|
||||
self._ensure_mapping_populated()
|
||||
return val in self.mapping
|
||||
|
||||
cpdef get_loc(self, object val):
|
||||
# -> Py_ssize_t | slice | ndarray[bool]
|
||||
cdef:
|
||||
Py_ssize_t loc
|
||||
|
||||
if is_definitely_invalid_key(val):
|
||||
raise TypeError(f"'{val}' is an invalid key")
|
||||
|
||||
if self.over_size_threshold and self.is_monotonic_increasing:
|
||||
if not self.is_unique:
|
||||
return self._get_loc_duplicates(val)
|
||||
values = self._get_index_values()
|
||||
|
||||
self._check_type(val)
|
||||
try:
|
||||
loc = _bin_search(values, val) # .searchsorted(val, side='left')
|
||||
except TypeError:
|
||||
# GH#35788 e.g. val=None with float64 values
|
||||
raise KeyError(val)
|
||||
if loc >= len(values):
|
||||
raise KeyError(val)
|
||||
if values[loc] != val:
|
||||
raise KeyError(val)
|
||||
return loc
|
||||
|
||||
self._ensure_mapping_populated()
|
||||
if not self.unique:
|
||||
return self._get_loc_duplicates(val)
|
||||
|
||||
self._check_type(val)
|
||||
|
||||
try:
|
||||
return self.mapping.get_item(val)
|
||||
except (TypeError, ValueError, OverflowError):
|
||||
# GH#41775 OverflowError e.g. if we are uint64 and val is -1
|
||||
raise KeyError(val)
|
||||
|
||||
cdef inline _get_loc_duplicates(self, object val):
|
||||
# -> Py_ssize_t | slice | ndarray[bool]
|
||||
cdef:
|
||||
Py_ssize_t diff
|
||||
|
||||
if self.is_monotonic_increasing:
|
||||
values = self._get_index_values()
|
||||
try:
|
||||
left = values.searchsorted(val, side='left')
|
||||
right = values.searchsorted(val, side='right')
|
||||
except TypeError:
|
||||
# e.g. GH#29189 get_loc(None) with a Float64Index
|
||||
raise KeyError(val)
|
||||
|
||||
diff = right - left
|
||||
if diff == 0:
|
||||
raise KeyError(val)
|
||||
elif diff == 1:
|
||||
return left
|
||||
else:
|
||||
return slice(left, right)
|
||||
|
||||
return self._maybe_get_bool_indexer(val)
|
||||
|
||||
cdef _maybe_get_bool_indexer(self, object val):
|
||||
# Returns ndarray[bool] or int
|
||||
cdef:
|
||||
ndarray[uint8_t, ndim=1, cast=True] indexer
|
||||
|
||||
indexer = self._get_index_values() == val
|
||||
return self._unpack_bool_indexer(indexer, val)
|
||||
|
||||
cdef _unpack_bool_indexer(self,
|
||||
ndarray[uint8_t, ndim=1, cast=True] indexer,
|
||||
object val):
|
||||
# Returns ndarray[bool] or int
|
||||
cdef:
|
||||
ndarray[intp_t, ndim=1] found
|
||||
int count
|
||||
|
||||
found = np.where(indexer)[0]
|
||||
count = len(found)
|
||||
|
||||
if count > 1:
|
||||
return indexer
|
||||
if count == 1:
|
||||
return int(found[0])
|
||||
|
||||
raise KeyError(val)
|
||||
|
||||
def sizeof(self, deep: bool = False) -> int:
|
||||
""" return the sizeof our mapping """
|
||||
if not self.is_mapping_populated:
|
||||
return 0
|
||||
return self.mapping.sizeof(deep=deep)
|
||||
|
||||
def __sizeof__(self) -> int:
|
||||
return self.sizeof()
|
||||
|
||||
@property
|
||||
def is_unique(self) -> bool:
|
||||
if self.need_unique_check:
|
||||
self._do_unique_check()
|
||||
|
||||
return self.unique == 1
|
||||
|
||||
cdef inline _do_unique_check(self):
|
||||
|
||||
# this de-facto the same
|
||||
self._ensure_mapping_populated()
|
||||
|
||||
@property
|
||||
def is_monotonic_increasing(self) -> bool:
|
||||
if self.need_monotonic_check:
|
||||
self._do_monotonic_check()
|
||||
|
||||
return self.monotonic_inc == 1
|
||||
|
||||
@property
|
||||
def is_monotonic_decreasing(self) -> bool:
|
||||
if self.need_monotonic_check:
|
||||
self._do_monotonic_check()
|
||||
|
||||
return self.monotonic_dec == 1
|
||||
|
||||
cdef inline _do_monotonic_check(self):
|
||||
cdef:
|
||||
bint is_unique
|
||||
try:
|
||||
values = self._get_index_values()
|
||||
self.monotonic_inc, self.monotonic_dec, is_unique = \
|
||||
self._call_monotonic(values)
|
||||
except TypeError:
|
||||
self.monotonic_inc = 0
|
||||
self.monotonic_dec = 0
|
||||
is_unique = 0
|
||||
|
||||
self.need_monotonic_check = 0
|
||||
|
||||
# we can only be sure of uniqueness if is_unique=1
|
||||
if is_unique:
|
||||
self.unique = 1
|
||||
self.need_unique_check = 0
|
||||
|
||||
cdef _get_index_values(self):
|
||||
return self.vgetter()
|
||||
|
||||
cdef _call_monotonic(self, values):
|
||||
return algos.is_monotonic(values, timelike=False)
|
||||
|
||||
def get_backfill_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
|
||||
return algos.backfill(self._get_index_values(), other, limit=limit)
|
||||
|
||||
def get_pad_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
|
||||
return algos.pad(self._get_index_values(), other, limit=limit)
|
||||
|
||||
cdef _make_hash_table(self, Py_ssize_t n):
|
||||
raise NotImplementedError
|
||||
|
||||
cdef _check_type(self, object val):
|
||||
hash(val)
|
||||
|
||||
@property
|
||||
def is_mapping_populated(self) -> bool:
|
||||
return self.mapping is not None
|
||||
|
||||
cdef inline _ensure_mapping_populated(self):
|
||||
# this populates the mapping
|
||||
# if its not already populated
|
||||
# also satisfies the need_unique_check
|
||||
|
||||
if not self.is_mapping_populated:
|
||||
|
||||
values = self._get_index_values()
|
||||
self.mapping = self._make_hash_table(len(values))
|
||||
self._call_map_locations(values)
|
||||
|
||||
if len(self.mapping) == len(values):
|
||||
self.unique = 1
|
||||
|
||||
self.need_unique_check = 0
|
||||
|
||||
cdef void _call_map_locations(self, ndarray values):
|
||||
self.mapping.map_locations(values)
|
||||
|
||||
def clear_mapping(self):
|
||||
self.mapping = None
|
||||
self.need_monotonic_check = 1
|
||||
self.need_unique_check = 1
|
||||
|
||||
self.unique = 0
|
||||
self.monotonic_inc = 0
|
||||
self.monotonic_dec = 0
|
||||
|
||||
def get_indexer(self, ndarray values) -> np.ndarray:
|
||||
self._ensure_mapping_populated()
|
||||
return self.mapping.lookup(values)
|
||||
|
||||
def get_indexer_non_unique(self, ndarray targets):
|
||||
"""
|
||||
Return an indexer suitable for taking from a non unique index
|
||||
return the labels in the same order as the target
|
||||
and a missing indexer into the targets (which correspond
|
||||
to the -1 indices in the results
|
||||
|
||||
Returns
|
||||
-------
|
||||
indexer : np.ndarray[np.intp]
|
||||
missing : np.ndarray[np.intp]
|
||||
"""
|
||||
cdef:
|
||||
ndarray values, x
|
||||
ndarray[intp_t] result, missing
|
||||
set stargets, remaining_stargets
|
||||
dict d = {}
|
||||
object val
|
||||
int count = 0, count_missing = 0
|
||||
Py_ssize_t i, j, n, n_t, n_alloc
|
||||
|
||||
self._ensure_mapping_populated()
|
||||
values = np.array(self._get_index_values(), copy=False)
|
||||
stargets = set(targets)
|
||||
n = len(values)
|
||||
n_t = len(targets)
|
||||
if n > 10_000:
|
||||
n_alloc = 10_000
|
||||
else:
|
||||
n_alloc = n
|
||||
|
||||
result = np.empty(n_alloc, dtype=np.intp)
|
||||
missing = np.empty(n_t, dtype=np.intp)
|
||||
|
||||
# map each starget to its position in the index
|
||||
if stargets and len(stargets) < 5 and self.is_monotonic_increasing:
|
||||
# if there are few enough stargets and the index is monotonically
|
||||
# increasing, then use binary search for each starget
|
||||
remaining_stargets = set()
|
||||
for starget in stargets:
|
||||
try:
|
||||
start = values.searchsorted(starget, side='left')
|
||||
end = values.searchsorted(starget, side='right')
|
||||
except TypeError: # e.g. if we tried to search for string in int array
|
||||
remaining_stargets.add(starget)
|
||||
else:
|
||||
if start != end:
|
||||
d[starget] = list(range(start, end))
|
||||
|
||||
stargets = remaining_stargets
|
||||
|
||||
if stargets:
|
||||
# otherwise, map by iterating through all items in the index
|
||||
for i in range(n):
|
||||
val = values[i]
|
||||
if val in stargets:
|
||||
if val not in d:
|
||||
d[val] = []
|
||||
d[val].append(i)
|
||||
|
||||
for i in range(n_t):
|
||||
val = targets[i]
|
||||
|
||||
# found
|
||||
if val in d:
|
||||
for j in d[val]:
|
||||
|
||||
# realloc if needed
|
||||
if count >= n_alloc:
|
||||
n_alloc += 10_000
|
||||
result = np.resize(result, n_alloc)
|
||||
|
||||
result[count] = j
|
||||
count += 1
|
||||
|
||||
# value not found
|
||||
else:
|
||||
|
||||
if count >= n_alloc:
|
||||
n_alloc += 10_000
|
||||
result = np.resize(result, n_alloc)
|
||||
result[count] = -1
|
||||
count += 1
|
||||
missing[count_missing] = i
|
||||
count_missing += 1
|
||||
|
||||
return result[0:count], missing[0:count_missing]
|
||||
|
||||
|
||||
cdef Py_ssize_t _bin_search(ndarray values, object val) except -1:
|
||||
cdef:
|
||||
Py_ssize_t mid = 0, lo = 0, hi = len(values) - 1
|
||||
object pval
|
||||
|
||||
if hi == 0 or (hi > 0 and val > values[hi]):
|
||||
return len(values)
|
||||
|
||||
while lo < hi:
|
||||
mid = (lo + hi) // 2
|
||||
pval = values[mid]
|
||||
if val < pval:
|
||||
hi = mid
|
||||
elif val > pval:
|
||||
lo = mid + 1
|
||||
else:
|
||||
while mid > 0 and val == values[mid - 1]:
|
||||
mid -= 1
|
||||
return mid
|
||||
|
||||
if val <= values[mid]:
|
||||
return mid
|
||||
else:
|
||||
return mid + 1
|
||||
|
||||
|
||||
cdef class ObjectEngine(IndexEngine):
|
||||
"""
|
||||
Index Engine for use with object-dtype Index, namely the base class Index.
|
||||
"""
|
||||
cdef _make_hash_table(self, Py_ssize_t n):
|
||||
return _hash.PyObjectHashTable(n)
|
||||
|
||||
|
||||
cdef class DatetimeEngine(Int64Engine):
|
||||
|
||||
cdef str _get_box_dtype(self):
|
||||
return 'M8[ns]'
|
||||
|
||||
cdef int64_t _unbox_scalar(self, scalar) except? -1:
|
||||
# NB: caller is responsible for ensuring tzawareness compat
|
||||
# before we get here
|
||||
if not (isinstance(scalar, _Timestamp) or scalar is NaT):
|
||||
raise TypeError(scalar)
|
||||
return scalar.value
|
||||
|
||||
def __contains__(self, val: object) -> bool:
|
||||
# We assume before we get here:
|
||||
# - val is hashable
|
||||
cdef:
|
||||
int64_t loc, conv
|
||||
|
||||
conv = self._unbox_scalar(val)
|
||||
if self.over_size_threshold and self.is_monotonic_increasing:
|
||||
if not self.is_unique:
|
||||
return self._get_loc_duplicates(conv)
|
||||
values = self._get_index_values()
|
||||
loc = values.searchsorted(conv, side='left')
|
||||
return values[loc] == conv
|
||||
|
||||
self._ensure_mapping_populated()
|
||||
return conv in self.mapping
|
||||
|
||||
cdef _get_index_values(self):
|
||||
return self.vgetter().view('i8')
|
||||
|
||||
cdef _call_monotonic(self, values):
|
||||
return algos.is_monotonic(values, timelike=True)
|
||||
|
||||
cpdef get_loc(self, object val):
|
||||
# NB: the caller is responsible for ensuring that we are called
|
||||
# with either a Timestamp or NaT (Timedelta or NaT for TimedeltaEngine)
|
||||
|
||||
cdef:
|
||||
int64_t loc
|
||||
if is_definitely_invalid_key(val):
|
||||
raise TypeError(f"'{val}' is an invalid key")
|
||||
|
||||
try:
|
||||
conv = self._unbox_scalar(val)
|
||||
except TypeError:
|
||||
raise KeyError(val)
|
||||
|
||||
# Welcome to the spaghetti factory
|
||||
if self.over_size_threshold and self.is_monotonic_increasing:
|
||||
if not self.is_unique:
|
||||
return self._get_loc_duplicates(conv)
|
||||
values = self._get_index_values()
|
||||
|
||||
loc = values.searchsorted(conv, side='left')
|
||||
|
||||
if loc == len(values) or values[loc] != conv:
|
||||
raise KeyError(val)
|
||||
return loc
|
||||
|
||||
self._ensure_mapping_populated()
|
||||
if not self.unique:
|
||||
return self._get_loc_duplicates(conv)
|
||||
|
||||
try:
|
||||
return self.mapping.get_item(conv)
|
||||
except KeyError:
|
||||
raise KeyError(val)
|
||||
|
||||
def get_indexer_non_unique(self, ndarray targets):
|
||||
# we may get datetime64[ns] or timedelta64[ns], cast these to int64
|
||||
return super().get_indexer_non_unique(targets.view("i8"))
|
||||
|
||||
def get_indexer(self, ndarray values) -> np.ndarray:
|
||||
self._ensure_mapping_populated()
|
||||
if values.dtype != self._get_box_dtype():
|
||||
return np.repeat(-1, len(values)).astype(np.intp)
|
||||
values = np.asarray(values).view('i8')
|
||||
return self.mapping.lookup(values)
|
||||
|
||||
def get_pad_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
|
||||
if other.dtype != self._get_box_dtype():
|
||||
return np.repeat(-1, len(other)).astype(np.intp)
|
||||
other = np.asarray(other).view('i8')
|
||||
return algos.pad(self._get_index_values(), other, limit=limit)
|
||||
|
||||
def get_backfill_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
|
||||
if other.dtype != self._get_box_dtype():
|
||||
return np.repeat(-1, len(other)).astype(np.intp)
|
||||
other = np.asarray(other).view('i8')
|
||||
return algos.backfill(self._get_index_values(), other, limit=limit)
|
||||
|
||||
|
||||
cdef class TimedeltaEngine(DatetimeEngine):
|
||||
|
||||
cdef str _get_box_dtype(self):
|
||||
return 'm8[ns]'
|
||||
|
||||
cdef int64_t _unbox_scalar(self, scalar) except? -1:
|
||||
if not (isinstance(scalar, _Timedelta) or scalar is NaT):
|
||||
raise TypeError(scalar)
|
||||
return scalar.value
|
||||
|
||||
|
||||
cdef class PeriodEngine(Int64Engine):
|
||||
|
||||
cdef int64_t _unbox_scalar(self, scalar) except? -1:
|
||||
if scalar is NaT:
|
||||
return scalar.value
|
||||
if is_period_object(scalar):
|
||||
# NB: we assume that we have the correct freq here.
|
||||
return scalar.ordinal
|
||||
raise TypeError(scalar)
|
||||
|
||||
cpdef get_loc(self, object val):
|
||||
# NB: the caller is responsible for ensuring that we are called
|
||||
# with either a Period or NaT
|
||||
cdef:
|
||||
int64_t conv
|
||||
|
||||
try:
|
||||
conv = self._unbox_scalar(val)
|
||||
except TypeError:
|
||||
raise KeyError(val)
|
||||
|
||||
return Int64Engine.get_loc(self, conv)
|
||||
|
||||
cdef _get_index_values(self):
|
||||
return super(PeriodEngine, self).vgetter().view("i8")
|
||||
|
||||
cdef _call_monotonic(self, values):
|
||||
return algos.is_monotonic(values, timelike=True)
|
||||
|
||||
|
||||
cdef class BaseMultiIndexCodesEngine:
|
||||
"""
|
||||
Base class for MultiIndexUIntEngine and MultiIndexPyIntEngine, which
|
||||
represent each label in a MultiIndex as an integer, by juxtaposing the bits
|
||||
encoding each level, with appropriate offsets.
|
||||
|
||||
For instance: if 3 levels have respectively 3, 6 and 1 possible values,
|
||||
then their labels can be represented using respectively 2, 3 and 1 bits,
|
||||
as follows:
|
||||
_ _ _ _____ _ __ __ __
|
||||
|0|0|0| ... |0| 0|a1|a0| -> offset 0 (first level)
|
||||
— — — ————— — —— —— ——
|
||||
|0|0|0| ... |0|b2|b1|b0| -> offset 2 (bits required for first level)
|
||||
— — — ————— — —— —— ——
|
||||
|0|0|0| ... |0| 0| 0|c0| -> offset 5 (bits required for first two levels)
|
||||
‾ ‾ ‾ ‾‾‾‾‾ ‾ ‾‾ ‾‾ ‾‾
|
||||
and the resulting unsigned integer representation will be:
|
||||
_ _ _ _____ _ __ __ __ __ __ __
|
||||
|0|0|0| ... |0|c0|b2|b1|b0|a1|a0|
|
||||
‾ ‾ ‾ ‾‾‾‾‾ ‾ ‾‾ ‾‾ ‾‾ ‾‾ ‾‾ ‾‾
|
||||
|
||||
Offsets are calculated at initialization, labels are transformed by method
|
||||
_codes_to_ints.
|
||||
|
||||
Keys are located by first locating each component against the respective
|
||||
level, then locating (the integer representation of) codes.
|
||||
"""
|
||||
def __init__(self, object levels, object labels,
|
||||
ndarray[uint64_t, ndim=1] offsets):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
levels : list-like of numpy arrays
|
||||
Levels of the MultiIndex.
|
||||
labels : list-like of numpy arrays of integer dtype
|
||||
Labels of the MultiIndex.
|
||||
offsets : numpy array of uint64 dtype
|
||||
Pre-calculated offsets, one for each level of the index.
|
||||
"""
|
||||
self.levels = levels
|
||||
self.offsets = offsets
|
||||
|
||||
# Transform labels in a single array, and add 1 so that we are working
|
||||
# with positive integers (-1 for NaN becomes 0):
|
||||
codes = (np.array(labels, dtype='int64').T + 1).astype('uint64',
|
||||
copy=False)
|
||||
|
||||
# Map each codes combination in the index to an integer unambiguously
|
||||
# (no collisions possible), based on the "offsets", which describe the
|
||||
# number of bits to switch labels for each level:
|
||||
lab_ints = self._codes_to_ints(codes)
|
||||
|
||||
# Initialize underlying index (e.g. libindex.UInt64Engine) with
|
||||
# integers representing labels: we will use its get_loc and get_indexer
|
||||
self._base.__init__(self, lambda: lab_ints, len(lab_ints))
|
||||
|
||||
def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray:
|
||||
raise NotImplementedError("Implemented by subclass")
|
||||
|
||||
def _extract_level_codes(self, ndarray[object] target) -> np.ndarray:
|
||||
"""
|
||||
Map the requested list of (tuple) keys to their integer representations
|
||||
for searching in the underlying integer index.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
target : ndarray[object]
|
||||
Each key is a tuple, with a label for each level of the index.
|
||||
|
||||
Returns
|
||||
------
|
||||
int_keys : 1-dimensional array of dtype uint64 or object
|
||||
Integers representing one combination each
|
||||
"""
|
||||
level_codes = [lev.get_indexer(codes) + 1 for lev, codes
|
||||
in zip(self.levels, zip(*target))]
|
||||
return self._codes_to_ints(np.array(level_codes, dtype='uint64').T)
|
||||
|
||||
def get_indexer(self, ndarray[object] target) -> np.ndarray:
|
||||
"""
|
||||
Returns an array giving the positions of each value of `target` in
|
||||
`self.values`, where -1 represents a value in `target` which does not
|
||||
appear in `self.values`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
target : ndarray[object]
|
||||
Each key is a tuple, with a label for each level of the index
|
||||
|
||||
Returns
|
||||
-------
|
||||
np.ndarray[intp_t, ndim=1] of the indexer of `target` into
|
||||
`self.values`
|
||||
"""
|
||||
lab_ints = self._extract_level_codes(target)
|
||||
return self._base.get_indexer(self, lab_ints)
|
||||
|
||||
def get_indexer_with_fill(self, ndarray target, ndarray values,
|
||||
str method, object limit) -> np.ndarray:
|
||||
"""
|
||||
Returns an array giving the positions of each value of `target` in
|
||||
`values`, where -1 represents a value in `target` which does not
|
||||
appear in `values`
|
||||
|
||||
If `method` is "backfill" then the position for a value in `target`
|
||||
which does not appear in `values` is that of the next greater value
|
||||
in `values` (if one exists), and -1 if there is no such value.
|
||||
|
||||
Similarly, if the method is "pad" then the position for a value in
|
||||
`target` which does not appear in `values` is that of the next smaller
|
||||
value in `values` (if one exists), and -1 if there is no such value.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
target: ndarray[object] of tuples
|
||||
need not be sorted, but all must have the same length, which must be
|
||||
the same as the length of all tuples in `values`
|
||||
values : ndarray[object] of tuples
|
||||
must be sorted and all have the same length. Should be the set of
|
||||
the MultiIndex's values.
|
||||
method: string
|
||||
"backfill" or "pad"
|
||||
limit: int or None
|
||||
if provided, limit the number of fills to this value
|
||||
|
||||
Returns
|
||||
-------
|
||||
np.ndarray[intp_t, ndim=1] of the indexer of `target` into `values`,
|
||||
filled with the `method` (and optionally `limit`) specified
|
||||
"""
|
||||
assert method in ("backfill", "pad")
|
||||
cdef:
|
||||
int64_t i, j, next_code
|
||||
int64_t num_values, num_target_values
|
||||
ndarray[int64_t, ndim=1] target_order
|
||||
ndarray[object, ndim=1] target_values
|
||||
ndarray[int64_t, ndim=1] new_codes, new_target_codes
|
||||
ndarray[intp_t, ndim=1] sorted_indexer
|
||||
|
||||
target_order = np.argsort(target).astype('int64')
|
||||
target_values = target[target_order]
|
||||
num_values, num_target_values = len(values), len(target_values)
|
||||
new_codes, new_target_codes = (
|
||||
np.empty((num_values,)).astype('int64'),
|
||||
np.empty((num_target_values,)).astype('int64'),
|
||||
)
|
||||
|
||||
# `values` and `target_values` are both sorted, so we walk through them
|
||||
# and memoize the (ordered) set of indices in the (implicit) merged-and
|
||||
# sorted list of the two which belong to each of them
|
||||
# the effect of this is to create a factorization for the (sorted)
|
||||
# merger of the index values, where `new_codes` and `new_target_codes`
|
||||
# are the subset of the factors which appear in `values` and `target`,
|
||||
# respectively
|
||||
i, j, next_code = 0, 0, 0
|
||||
while i < num_values and j < num_target_values:
|
||||
val, target_val = values[i], target_values[j]
|
||||
if val <= target_val:
|
||||
new_codes[i] = next_code
|
||||
i += 1
|
||||
if target_val <= val:
|
||||
new_target_codes[j] = next_code
|
||||
j += 1
|
||||
next_code += 1
|
||||
|
||||
# at this point, at least one should have reached the end
|
||||
# the remaining values of the other should be added to the end
|
||||
assert i == num_values or j == num_target_values
|
||||
while i < num_values:
|
||||
new_codes[i] = next_code
|
||||
i += 1
|
||||
next_code += 1
|
||||
while j < num_target_values:
|
||||
new_target_codes[j] = next_code
|
||||
j += 1
|
||||
next_code += 1
|
||||
|
||||
# get the indexer, and undo the sorting of `target.values`
|
||||
algo = algos.backfill if method == "backfill" else algos.pad
|
||||
sorted_indexer = algo(new_codes, new_target_codes, limit=limit)
|
||||
return sorted_indexer[np.argsort(target_order)]
|
||||
|
||||
def get_loc(self, object key):
|
||||
if is_definitely_invalid_key(key):
|
||||
raise TypeError(f"'{key}' is an invalid key")
|
||||
if not isinstance(key, tuple):
|
||||
raise KeyError(key)
|
||||
try:
|
||||
indices = [0 if checknull(v) else lev.get_loc(v) + 1
|
||||
for lev, v in zip(self.levels, key)]
|
||||
except KeyError:
|
||||
raise KeyError(key)
|
||||
|
||||
# Transform indices into single integer:
|
||||
lab_int = self._codes_to_ints(np.array(indices, dtype='uint64'))
|
||||
|
||||
return self._base.get_loc(self, lab_int)
|
||||
|
||||
def get_indexer_non_unique(self, ndarray[object] target):
|
||||
|
||||
lab_ints = self._extract_level_codes(target)
|
||||
indexer = self._base.get_indexer_non_unique(self, lab_ints)
|
||||
|
||||
return indexer
|
||||
|
||||
def __contains__(self, val: object) -> bool:
|
||||
# We assume before we get here:
|
||||
# - val is hashable
|
||||
# Default __contains__ looks in the underlying mapping, which in this
|
||||
# case only contains integer representations.
|
||||
try:
|
||||
self.get_loc(val)
|
||||
return True
|
||||
except (KeyError, TypeError, ValueError):
|
||||
return False
|
||||
|
||||
|
||||
# Generated from template.
|
||||
include "index_class_helper.pxi"
|
||||
@@ -0,0 +1,80 @@
|
||||
"""
|
||||
Template for functions of IndexEngine subclasses.
|
||||
|
||||
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
|
||||
"""
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# IndexEngine Subclass Methods
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
{{py:
|
||||
|
||||
# name, dtype
|
||||
dtypes = [('Float64', 'float64'),
|
||||
('Float32', 'float32'),
|
||||
('Int64', 'int64'),
|
||||
('Int32', 'int32'),
|
||||
('Int16', 'int16'),
|
||||
('Int8', 'int8'),
|
||||
('UInt64', 'uint64'),
|
||||
('UInt32', 'uint32'),
|
||||
('UInt16', 'uint16'),
|
||||
('UInt8', 'uint8'),
|
||||
]
|
||||
}}
|
||||
|
||||
{{for name, dtype in dtypes}}
|
||||
|
||||
|
||||
cdef class {{name}}Engine(IndexEngine):
|
||||
# constructor-caller is responsible for ensuring that vgetter()
|
||||
# returns an ndarray with dtype {{dtype}}_t
|
||||
|
||||
cdef _make_hash_table(self, Py_ssize_t n):
|
||||
return _hash.{{name}}HashTable(n)
|
||||
|
||||
cdef _check_type(self, object val):
|
||||
{{if name not in {'Float64', 'Float32'} }}
|
||||
if not util.is_integer_object(val):
|
||||
raise KeyError(val)
|
||||
{{else}}
|
||||
if util.is_bool_object(val):
|
||||
# avoid casting to True -> 1.0
|
||||
raise KeyError(val)
|
||||
{{endif}}
|
||||
|
||||
cdef void _call_map_locations(self, ndarray[{{dtype}}_t] values):
|
||||
self.mapping.map_locations(values)
|
||||
|
||||
cdef _maybe_get_bool_indexer(self, object val):
|
||||
# Returns ndarray[bool] or int
|
||||
cdef:
|
||||
ndarray[uint8_t, ndim=1, cast=True] indexer
|
||||
ndarray[intp_t, ndim=1] found
|
||||
ndarray[{{dtype}}_t, ndim=1] values
|
||||
int count = 0
|
||||
|
||||
self._check_type(val)
|
||||
|
||||
values = self._get_index_values()
|
||||
try:
|
||||
with warnings.catch_warnings():
|
||||
# e.g. if values is float64 and `val` is a str, suppress warning
|
||||
warnings.filterwarnings("ignore", category=FutureWarning)
|
||||
{{if name in {'Float64', 'Float32'} }}
|
||||
if util.is_nan(val):
|
||||
indexer = np.isnan(values)
|
||||
else:
|
||||
indexer = values == val
|
||||
{{else}}
|
||||
indexer = values == val
|
||||
{{endif}}
|
||||
except TypeError:
|
||||
# if the equality above returns a bool, cython will raise TypeError
|
||||
# when trying to cast it to ndarray
|
||||
raise KeyError(val)
|
||||
|
||||
return self._unpack_bool_indexer(indexer, val)
|
||||
|
||||
{{endfor}}
|
||||
Binary file not shown.
25
.venv/lib/python3.7/site-packages/pandas/_libs/indexing.pyx
Normal file
25
.venv/lib/python3.7/site-packages/pandas/_libs/indexing.pyx
Normal file
@@ -0,0 +1,25 @@
|
||||
cdef class NDFrameIndexerBase:
|
||||
"""
|
||||
A base class for _NDFrameIndexer for fast instantiation and attribute access.
|
||||
"""
|
||||
cdef public:
|
||||
str name
|
||||
object obj, _ndim
|
||||
|
||||
def __init__(self, name: str, obj):
|
||||
self.obj = obj
|
||||
self.name = name
|
||||
self._ndim = None
|
||||
|
||||
@property
|
||||
def ndim(self) -> int:
|
||||
# Delay `ndim` instantiation until required as reading it
|
||||
# from `obj` isn't entirely cheap.
|
||||
ndim = self._ndim
|
||||
if ndim is None:
|
||||
ndim = self._ndim = self.obj.ndim
|
||||
if ndim > 2:
|
||||
raise ValueError(
|
||||
"NDFrameIndexer does not support NDFrame objects with ndim > 2"
|
||||
)
|
||||
return ndim
|
||||
Binary file not shown.
71
.venv/lib/python3.7/site-packages/pandas/_libs/internals.pyi
Normal file
71
.venv/lib/python3.7/site-packages/pandas/_libs/internals.pyi
Normal file
@@ -0,0 +1,71 @@
|
||||
from typing import (
|
||||
Iterator,
|
||||
Sequence,
|
||||
overload,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._typing import (
|
||||
ArrayLike,
|
||||
T,
|
||||
)
|
||||
|
||||
from pandas import Index
|
||||
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
|
||||
from pandas.core.internals.blocks import Block as B
|
||||
|
||||
def slice_len(slc: slice, objlen: int = ...) -> int: ...
|
||||
def get_blkno_indexers(
|
||||
blknos: np.ndarray, # int64_t[:]
|
||||
group: bool = ...,
|
||||
) -> list[tuple[int, slice | np.ndarray]]: ...
|
||||
def get_blkno_placements(
|
||||
blknos: np.ndarray,
|
||||
group: bool = ...,
|
||||
) -> Iterator[tuple[int, BlockPlacement]]: ...
|
||||
|
||||
class BlockPlacement:
|
||||
def __init__(self, val: int | slice | np.ndarray): ...
|
||||
@property
|
||||
def indexer(self) -> np.ndarray | slice: ...
|
||||
@property
|
||||
def as_array(self) -> np.ndarray: ...
|
||||
@property
|
||||
def is_slice_like(self) -> bool: ...
|
||||
@overload
|
||||
def __getitem__(self, loc: slice | Sequence[int]) -> BlockPlacement: ...
|
||||
@overload
|
||||
def __getitem__(self, loc: int) -> int: ...
|
||||
def __iter__(self) -> Iterator[int]: ...
|
||||
def __len__(self) -> int: ...
|
||||
def delete(self, loc) -> BlockPlacement: ...
|
||||
def append(self, others: list[BlockPlacement]) -> BlockPlacement: ...
|
||||
|
||||
class SharedBlock:
|
||||
_mgr_locs: BlockPlacement
|
||||
ndim: int
|
||||
values: ArrayLike
|
||||
def __init__(self, values: ArrayLike, placement: BlockPlacement, ndim: int): ...
|
||||
|
||||
class NumpyBlock(SharedBlock):
|
||||
values: np.ndarray
|
||||
def getitem_block_index(self: T, slicer: slice) -> T: ...
|
||||
|
||||
class NDArrayBackedBlock(SharedBlock):
|
||||
values: NDArrayBackedExtensionArray
|
||||
def getitem_block_index(self: T, slicer: slice) -> T: ...
|
||||
|
||||
class Block(SharedBlock): ...
|
||||
|
||||
class BlockManager:
|
||||
blocks: tuple[B, ...]
|
||||
axes: list[Index]
|
||||
_known_consolidated: bool
|
||||
_is_consolidated: bool
|
||||
_blknos: np.ndarray
|
||||
_blklocs: np.ndarray
|
||||
def __init__(
|
||||
self, blocks: tuple[B, ...], axes: list[Index], verify_integrity=True
|
||||
): ...
|
||||
def get_slice(self: T, slobj: slice, axis: int = ...) -> T: ...
|
||||
670
.venv/lib/python3.7/site-packages/pandas/_libs/internals.pyx
Normal file
670
.venv/lib/python3.7/site-packages/pandas/_libs/internals.pyx
Normal file
@@ -0,0 +1,670 @@
|
||||
from collections import defaultdict
|
||||
|
||||
import cython
|
||||
from cython import Py_ssize_t
|
||||
|
||||
from cpython.slice cimport PySlice_GetIndicesEx
|
||||
|
||||
|
||||
cdef extern from "Python.h":
|
||||
Py_ssize_t PY_SSIZE_T_MAX
|
||||
|
||||
import numpy as np
|
||||
|
||||
cimport numpy as cnp
|
||||
from numpy cimport (
|
||||
NPY_INTP,
|
||||
int64_t,
|
||||
intp_t,
|
||||
ndarray,
|
||||
)
|
||||
|
||||
cnp.import_array()
|
||||
|
||||
from pandas._libs.algos import ensure_int64
|
||||
|
||||
from pandas._libs.arrays cimport NDArrayBacked
|
||||
from pandas._libs.util cimport is_integer_object
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.freelist(32)
|
||||
cdef class BlockPlacement:
|
||||
# __slots__ = '_as_slice', '_as_array', '_len'
|
||||
cdef:
|
||||
slice _as_slice
|
||||
ndarray _as_array # Note: this still allows `None`; will be intp_t
|
||||
bint _has_slice, _has_array, _is_known_slice_like
|
||||
|
||||
def __cinit__(self, val):
|
||||
cdef:
|
||||
slice slc
|
||||
|
||||
self._as_slice = None
|
||||
self._as_array = None
|
||||
self._has_slice = False
|
||||
self._has_array = False
|
||||
|
||||
if is_integer_object(val):
|
||||
slc = slice(val, val + 1, 1)
|
||||
self._as_slice = slc
|
||||
self._has_slice = True
|
||||
elif isinstance(val, slice):
|
||||
slc = slice_canonize(val)
|
||||
|
||||
if slc.start != slc.stop:
|
||||
self._as_slice = slc
|
||||
self._has_slice = True
|
||||
else:
|
||||
arr = np.empty(0, dtype=np.intp)
|
||||
self._as_array = arr
|
||||
self._has_array = True
|
||||
else:
|
||||
# Cython memoryview interface requires ndarray to be writeable.
|
||||
arr = np.require(val, dtype=np.intp, requirements='W')
|
||||
assert arr.ndim == 1, arr.shape
|
||||
self._as_array = arr
|
||||
self._has_array = True
|
||||
|
||||
def __str__(self) -> str:
|
||||
cdef:
|
||||
slice s = self._ensure_has_slice()
|
||||
|
||||
if s is not None:
|
||||
v = self._as_slice
|
||||
else:
|
||||
v = self._as_array
|
||||
|
||||
return f"{type(self).__name__}({v})"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return str(self)
|
||||
|
||||
def __len__(self) -> int:
|
||||
cdef:
|
||||
slice s = self._ensure_has_slice()
|
||||
|
||||
if s is not None:
|
||||
return slice_len(s)
|
||||
else:
|
||||
return len(self._as_array)
|
||||
|
||||
def __iter__(self):
|
||||
cdef:
|
||||
slice s = self._ensure_has_slice()
|
||||
Py_ssize_t start, stop, step, _
|
||||
|
||||
if s is not None:
|
||||
start, stop, step, _ = slice_get_indices_ex(s)
|
||||
return iter(range(start, stop, step))
|
||||
else:
|
||||
return iter(self._as_array)
|
||||
|
||||
@property
|
||||
def as_slice(self) -> slice:
|
||||
cdef:
|
||||
slice s = self._ensure_has_slice()
|
||||
|
||||
if s is not None:
|
||||
return s
|
||||
else:
|
||||
raise TypeError("Not slice-like")
|
||||
|
||||
@property
|
||||
def indexer(self):
|
||||
cdef:
|
||||
slice s = self._ensure_has_slice()
|
||||
|
||||
if s is not None:
|
||||
return s
|
||||
else:
|
||||
return self._as_array
|
||||
|
||||
@property
|
||||
def as_array(self) -> np.ndarray:
|
||||
cdef:
|
||||
Py_ssize_t start, stop, end, _
|
||||
|
||||
if not self._has_array:
|
||||
start, stop, step, _ = slice_get_indices_ex(self._as_slice)
|
||||
# NOTE: this is the C-optimized equivalent of
|
||||
# `np.arange(start, stop, step, dtype=np.intp)`
|
||||
self._as_array = cnp.PyArray_Arange(start, stop, step, NPY_INTP)
|
||||
self._has_array = True
|
||||
|
||||
return self._as_array
|
||||
|
||||
@property
|
||||
def is_slice_like(self) -> bool:
|
||||
cdef:
|
||||
slice s = self._ensure_has_slice()
|
||||
|
||||
return s is not None
|
||||
|
||||
def __getitem__(self, loc):
|
||||
cdef:
|
||||
slice s = self._ensure_has_slice()
|
||||
|
||||
if s is not None:
|
||||
val = slice_getitem(s, loc)
|
||||
else:
|
||||
val = self._as_array[loc]
|
||||
|
||||
if not isinstance(val, slice) and val.ndim == 0:
|
||||
return val
|
||||
|
||||
return BlockPlacement(val)
|
||||
|
||||
def delete(self, loc) -> BlockPlacement:
|
||||
return BlockPlacement(np.delete(self.as_array, loc, axis=0))
|
||||
|
||||
def append(self, others) -> BlockPlacement:
|
||||
if not len(others):
|
||||
return self
|
||||
|
||||
return BlockPlacement(
|
||||
np.concatenate([self.as_array] + [o.as_array for o in others])
|
||||
)
|
||||
|
||||
cdef BlockPlacement iadd(self, other):
|
||||
cdef:
|
||||
slice s = self._ensure_has_slice()
|
||||
Py_ssize_t other_int, start, stop, step, l
|
||||
|
||||
if is_integer_object(other) and s is not None:
|
||||
other_int = <Py_ssize_t>other
|
||||
|
||||
if other_int == 0:
|
||||
# BlockPlacement is treated as immutable
|
||||
return self
|
||||
|
||||
start, stop, step, l = slice_get_indices_ex(s)
|
||||
start += other_int
|
||||
stop += other_int
|
||||
|
||||
if (step > 0 and start < 0) or (step < 0 and stop < step):
|
||||
raise ValueError("iadd causes length change")
|
||||
|
||||
if stop < 0:
|
||||
val = slice(start, None, step)
|
||||
else:
|
||||
val = slice(start, stop, step)
|
||||
|
||||
return BlockPlacement(val)
|
||||
else:
|
||||
newarr = self.as_array + other
|
||||
if (newarr < 0).any():
|
||||
raise ValueError("iadd causes length change")
|
||||
|
||||
val = newarr
|
||||
return BlockPlacement(val)
|
||||
|
||||
def add(self, other) -> BlockPlacement:
|
||||
# We can get here with int or ndarray
|
||||
return self.iadd(other)
|
||||
|
||||
cdef slice _ensure_has_slice(self):
|
||||
if not self._has_slice:
|
||||
self._as_slice = indexer_as_slice(self._as_array)
|
||||
self._has_slice = True
|
||||
|
||||
return self._as_slice
|
||||
|
||||
|
||||
cdef slice slice_canonize(slice s):
|
||||
"""
|
||||
Convert slice to canonical bounded form.
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t start = 0, stop = 0, step = 1
|
||||
|
||||
if s.step is None:
|
||||
step = 1
|
||||
else:
|
||||
step = <Py_ssize_t>s.step
|
||||
if step == 0:
|
||||
raise ValueError("slice step cannot be zero")
|
||||
|
||||
if step > 0:
|
||||
if s.stop is None:
|
||||
raise ValueError("unbounded slice")
|
||||
|
||||
stop = <Py_ssize_t>s.stop
|
||||
if s.start is None:
|
||||
start = 0
|
||||
else:
|
||||
start = <Py_ssize_t>s.start
|
||||
if start > stop:
|
||||
start = stop
|
||||
elif step < 0:
|
||||
if s.start is None:
|
||||
raise ValueError("unbounded slice")
|
||||
|
||||
start = <Py_ssize_t>s.start
|
||||
if s.stop is None:
|
||||
stop = -1
|
||||
else:
|
||||
stop = <Py_ssize_t>s.stop
|
||||
if stop > start:
|
||||
stop = start
|
||||
|
||||
if start < 0 or (stop < 0 and s.stop is not None and step > 0):
|
||||
raise ValueError("unbounded slice")
|
||||
|
||||
if stop < 0:
|
||||
return slice(start, None, step)
|
||||
else:
|
||||
return slice(start, stop, step)
|
||||
|
||||
|
||||
cpdef Py_ssize_t slice_len(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1:
|
||||
"""
|
||||
Get length of a bounded slice.
|
||||
|
||||
The slice must not have any "open" bounds that would create dependency on
|
||||
container size, i.e.:
|
||||
- if ``s.step is None or s.step > 0``, ``s.stop`` is not ``None``
|
||||
- if ``s.step < 0``, ``s.start`` is not ``None``
|
||||
|
||||
Otherwise, the result is unreliable.
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t start, stop, step, length
|
||||
|
||||
if slc is None:
|
||||
raise TypeError("slc must be slice")
|
||||
|
||||
PySlice_GetIndicesEx(slc, objlen, &start, &stop, &step, &length)
|
||||
|
||||
return length
|
||||
|
||||
|
||||
cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX):
|
||||
"""
|
||||
Get (start, stop, step, length) tuple for a slice.
|
||||
|
||||
If `objlen` is not specified, slice must be bounded, otherwise the result
|
||||
will be wrong.
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t start, stop, step, length
|
||||
|
||||
if slc is None:
|
||||
raise TypeError("slc should be a slice")
|
||||
|
||||
PySlice_GetIndicesEx(slc, objlen, &start, &stop, &step, &length)
|
||||
|
||||
return start, stop, step, length
|
||||
|
||||
|
||||
cdef slice_getitem(slice slc, ind):
|
||||
cdef:
|
||||
Py_ssize_t s_start, s_stop, s_step, s_len
|
||||
Py_ssize_t ind_start, ind_stop, ind_step, ind_len
|
||||
|
||||
s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc)
|
||||
|
||||
if isinstance(ind, slice):
|
||||
ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, s_len)
|
||||
|
||||
if ind_step > 0 and ind_len == s_len:
|
||||
# short-cut for no-op slice
|
||||
if ind_len == s_len:
|
||||
return slc
|
||||
|
||||
if ind_step < 0:
|
||||
s_start = s_stop - s_step
|
||||
ind_step = -ind_step
|
||||
|
||||
s_step *= ind_step
|
||||
s_stop = s_start + ind_stop * s_step
|
||||
s_start = s_start + ind_start * s_step
|
||||
|
||||
if s_step < 0 and s_stop < 0:
|
||||
return slice(s_start, None, s_step)
|
||||
else:
|
||||
return slice(s_start, s_stop, s_step)
|
||||
|
||||
else:
|
||||
# NOTE:
|
||||
# this is the C-optimized equivalent of
|
||||
# `np.arange(s_start, s_stop, s_step, dtype=np.intp)[ind]`
|
||||
return cnp.PyArray_Arange(s_start, s_stop, s_step, NPY_INTP)[ind]
|
||||
|
||||
|
||||
@cython.boundscheck(False)
|
||||
@cython.wraparound(False)
|
||||
cdef slice indexer_as_slice(intp_t[:] vals):
|
||||
cdef:
|
||||
Py_ssize_t i, n, start, stop
|
||||
int64_t d
|
||||
|
||||
if vals is None:
|
||||
raise TypeError("vals must be ndarray")
|
||||
|
||||
n = vals.shape[0]
|
||||
|
||||
if n == 0 or vals[0] < 0:
|
||||
return None
|
||||
|
||||
if n == 1:
|
||||
return slice(vals[0], vals[0] + 1, 1)
|
||||
|
||||
if vals[1] < 0:
|
||||
return None
|
||||
|
||||
# n > 2
|
||||
d = vals[1] - vals[0]
|
||||
|
||||
if d == 0:
|
||||
return None
|
||||
|
||||
for i in range(2, n):
|
||||
if vals[i] < 0 or vals[i] - vals[i - 1] != d:
|
||||
return None
|
||||
|
||||
start = vals[0]
|
||||
stop = start + n * d
|
||||
if stop < 0 and d < 0:
|
||||
return slice(start, None, d)
|
||||
else:
|
||||
return slice(start, stop, d)
|
||||
|
||||
|
||||
@cython.boundscheck(False)
|
||||
@cython.wraparound(False)
|
||||
def get_blkno_indexers(
|
||||
int64_t[:] blknos, bint group=True
|
||||
) -> list[tuple[int, slice | np.ndarray]]:
|
||||
"""
|
||||
Enumerate contiguous runs of integers in ndarray.
|
||||
|
||||
Iterate over elements of `blknos` yielding ``(blkno, slice(start, stop))``
|
||||
pairs for each contiguous run found.
|
||||
|
||||
If `group` is True and there is more than one run for a certain blkno,
|
||||
``(blkno, array)`` with an array containing positions of all elements equal
|
||||
to blkno.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list[tuple[int, slice | np.ndarray]]
|
||||
"""
|
||||
# There's blkno in this function's name because it's used in block &
|
||||
# blockno handling.
|
||||
cdef:
|
||||
int64_t cur_blkno
|
||||
Py_ssize_t i, start, stop, n, diff, tot_len
|
||||
object blkno
|
||||
object group_dict = defaultdict(list)
|
||||
|
||||
n = blknos.shape[0]
|
||||
result = list()
|
||||
start = 0
|
||||
cur_blkno = blknos[start]
|
||||
|
||||
if n == 0:
|
||||
pass
|
||||
elif group is False:
|
||||
for i in range(1, n):
|
||||
if blknos[i] != cur_blkno:
|
||||
result.append((cur_blkno, slice(start, i)))
|
||||
|
||||
start = i
|
||||
cur_blkno = blknos[i]
|
||||
|
||||
result.append((cur_blkno, slice(start, n)))
|
||||
else:
|
||||
for i in range(1, n):
|
||||
if blknos[i] != cur_blkno:
|
||||
group_dict[cur_blkno].append((start, i))
|
||||
|
||||
start = i
|
||||
cur_blkno = blknos[i]
|
||||
|
||||
group_dict[cur_blkno].append((start, n))
|
||||
|
||||
for blkno, slices in group_dict.items():
|
||||
if len(slices) == 1:
|
||||
result.append((blkno, slice(slices[0][0], slices[0][1])))
|
||||
else:
|
||||
tot_len = sum(stop - start for start, stop in slices)
|
||||
arr = np.empty(tot_len, dtype=np.int64)
|
||||
|
||||
i = 0
|
||||
for start, stop in slices:
|
||||
for diff in range(start, stop):
|
||||
arr[i] = diff
|
||||
i += 1
|
||||
|
||||
result.append((blkno, arr))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_blkno_placements(blknos, group: bool = True):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
blknos : np.ndarray[int64]
|
||||
group : bool, default True
|
||||
|
||||
Returns
|
||||
-------
|
||||
iterator
|
||||
yield (blkno, BlockPlacement)
|
||||
"""
|
||||
blknos = ensure_int64(blknos)
|
||||
|
||||
for blkno, indexer in get_blkno_indexers(blknos, group):
|
||||
yield blkno, BlockPlacement(indexer)
|
||||
|
||||
|
||||
@cython.freelist(64)
|
||||
cdef class SharedBlock:
|
||||
"""
|
||||
Defining __init__ in a cython class significantly improves performance.
|
||||
"""
|
||||
cdef:
|
||||
public BlockPlacement _mgr_locs
|
||||
readonly int ndim
|
||||
|
||||
def __cinit__(self, values, placement: BlockPlacement, ndim: int):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
values : np.ndarray or ExtensionArray
|
||||
We assume maybe_coerce_values has already been called.
|
||||
placement : BlockPlacement
|
||||
ndim : int
|
||||
1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame
|
||||
"""
|
||||
self._mgr_locs = placement
|
||||
self.ndim = ndim
|
||||
|
||||
cpdef __reduce__(self):
|
||||
# We have to do some gymnastics b/c "ndim" is keyword-only
|
||||
from functools import partial
|
||||
|
||||
from pandas.core.internals.blocks import new_block
|
||||
|
||||
args = (self.values, self.mgr_locs.indexer)
|
||||
func = partial(new_block, ndim=self.ndim)
|
||||
return func, args
|
||||
|
||||
cpdef __setstate__(self, state):
|
||||
from pandas.core.construction import extract_array
|
||||
|
||||
self.mgr_locs = BlockPlacement(state[0])
|
||||
self.values = extract_array(state[1], extract_numpy=True)
|
||||
if len(state) > 2:
|
||||
# we stored ndim
|
||||
self.ndim = state[2]
|
||||
else:
|
||||
# older pickle
|
||||
from pandas.core.internals.api import maybe_infer_ndim
|
||||
|
||||
ndim = maybe_infer_ndim(self.values, self.mgr_locs)
|
||||
self.ndim = ndim
|
||||
|
||||
|
||||
cdef class NumpyBlock(SharedBlock):
|
||||
cdef:
|
||||
public ndarray values
|
||||
|
||||
def __cinit__(self, ndarray values, BlockPlacement placement, int ndim):
|
||||
# set values here the (implicit) call to SharedBlock.__cinit__ will
|
||||
# set placement and ndim
|
||||
self.values = values
|
||||
|
||||
# @final # not useful in cython, but we _would_ annotate with @final
|
||||
cpdef NumpyBlock getitem_block_index(self, slice slicer):
|
||||
"""
|
||||
Perform __getitem__-like specialized to slicing along index.
|
||||
|
||||
Assumes self.ndim == 2
|
||||
"""
|
||||
new_values = self.values[..., slicer]
|
||||
return type(self)(new_values, self._mgr_locs, ndim=self.ndim)
|
||||
|
||||
|
||||
cdef class NDArrayBackedBlock(SharedBlock):
|
||||
"""
|
||||
Block backed by NDArrayBackedExtensionArray
|
||||
"""
|
||||
cdef public:
|
||||
NDArrayBacked values
|
||||
|
||||
def __cinit__(self, NDArrayBacked values, BlockPlacement placement, int ndim):
|
||||
# set values here the (implicit) call to SharedBlock.__cinit__ will
|
||||
# set placement and ndim
|
||||
self.values = values
|
||||
|
||||
# @final # not useful in cython, but we _would_ annotate with @final
|
||||
cpdef NDArrayBackedBlock getitem_block_index(self, slice slicer):
|
||||
"""
|
||||
Perform __getitem__-like specialized to slicing along index.
|
||||
|
||||
Assumes self.ndim == 2
|
||||
"""
|
||||
new_values = self.values[..., slicer]
|
||||
return type(self)(new_values, self._mgr_locs, ndim=self.ndim)
|
||||
|
||||
|
||||
cdef class Block(SharedBlock):
|
||||
cdef:
|
||||
public object values
|
||||
|
||||
def __cinit__(self, object values, BlockPlacement placement, int ndim):
|
||||
# set values here the (implicit) call to SharedBlock.__cinit__ will
|
||||
# set placement and ndim
|
||||
self.values = values
|
||||
|
||||
|
||||
@cython.freelist(64)
|
||||
cdef class BlockManager:
|
||||
cdef:
|
||||
public tuple blocks
|
||||
public list axes
|
||||
public bint _known_consolidated, _is_consolidated
|
||||
public ndarray _blknos, _blklocs
|
||||
|
||||
def __cinit__(self, blocks=None, axes=None, verify_integrity=True):
|
||||
# None as defaults for unpickling GH#42345
|
||||
if blocks is None:
|
||||
# This adds 1-2 microseconds to DataFrame(np.array([]))
|
||||
return
|
||||
|
||||
if isinstance(blocks, list):
|
||||
# Backward compat for e.g. pyarrow
|
||||
blocks = tuple(blocks)
|
||||
|
||||
self.blocks = blocks
|
||||
self.axes = axes.copy() # copy to make sure we are not remotely-mutable
|
||||
|
||||
# Populate known_consolidate, blknos, and blklocs lazily
|
||||
self._known_consolidated = False
|
||||
self._is_consolidated = False
|
||||
# error: Incompatible types in assignment (expression has type "None",
|
||||
# variable has type "ndarray")
|
||||
self._blknos = None # type: ignore[assignment]
|
||||
# error: Incompatible types in assignment (expression has type "None",
|
||||
# variable has type "ndarray")
|
||||
self._blklocs = None # type: ignore[assignment]
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Pickle
|
||||
|
||||
cpdef __reduce__(self):
|
||||
if len(self.axes) == 1:
|
||||
# SingleBlockManager, __init__ expects Block, axis
|
||||
args = (self.blocks[0], self.axes[0])
|
||||
else:
|
||||
args = (self.blocks, self.axes)
|
||||
return type(self), args
|
||||
|
||||
cpdef __setstate__(self, state):
|
||||
from pandas.core.construction import extract_array
|
||||
from pandas.core.internals.blocks import (
|
||||
ensure_block_shape,
|
||||
new_block,
|
||||
)
|
||||
from pandas.core.internals.managers import ensure_index
|
||||
|
||||
if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]:
|
||||
state = state[3]["0.14.1"]
|
||||
axes = [ensure_index(ax) for ax in state["axes"]]
|
||||
ndim = len(axes)
|
||||
|
||||
for blk in state["blocks"]:
|
||||
vals = blk["values"]
|
||||
# older versions may hold e.g. DatetimeIndex instead of DTA
|
||||
vals = extract_array(vals, extract_numpy=True)
|
||||
blk["values"] = ensure_block_shape(vals, ndim=ndim)
|
||||
|
||||
nbs = [
|
||||
new_block(blk["values"], blk["mgr_locs"], ndim=ndim)
|
||||
for blk in state["blocks"]
|
||||
]
|
||||
blocks = tuple(nbs)
|
||||
self.blocks = blocks
|
||||
self.axes = axes
|
||||
|
||||
else:
|
||||
raise NotImplementedError("pre-0.14.1 pickles are no longer supported")
|
||||
|
||||
self._post_setstate()
|
||||
|
||||
def _post_setstate(self) -> None:
|
||||
self._is_consolidated = False
|
||||
self._known_consolidated = False
|
||||
self._rebuild_blknos_and_blklocs()
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Indexing
|
||||
|
||||
cdef BlockManager _get_index_slice(self, slobj):
|
||||
cdef:
|
||||
SharedBlock blk, nb
|
||||
|
||||
nbs = []
|
||||
for blk in self.blocks:
|
||||
nb = blk.getitem_block_index(slobj)
|
||||
nbs.append(nb)
|
||||
|
||||
new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)]
|
||||
return type(self)(tuple(nbs), new_axes, verify_integrity=False)
|
||||
|
||||
def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager:
|
||||
|
||||
if axis == 0:
|
||||
new_blocks = self._slice_take_blocks_ax0(slobj)
|
||||
elif axis == 1:
|
||||
return self._get_index_slice(slobj)
|
||||
else:
|
||||
raise IndexError("Requested axis not found in manager")
|
||||
|
||||
new_axes = list(self.axes)
|
||||
new_axes[axis] = new_axes[axis]._getitem_slice(slobj)
|
||||
|
||||
return type(self)(tuple(new_blocks), new_axes, verify_integrity=False)
|
||||
Binary file not shown.
557
.venv/lib/python3.7/site-packages/pandas/_libs/interval.pyx
Normal file
557
.venv/lib/python3.7/site-packages/pandas/_libs/interval.pyx
Normal file
@@ -0,0 +1,557 @@
|
||||
import numbers
|
||||
from operator import (
|
||||
le,
|
||||
lt,
|
||||
)
|
||||
|
||||
from cpython.datetime cimport (
|
||||
PyDateTime_IMPORT,
|
||||
PyDelta_Check,
|
||||
)
|
||||
|
||||
PyDateTime_IMPORT
|
||||
|
||||
from cpython.object cimport (
|
||||
Py_EQ,
|
||||
Py_GE,
|
||||
Py_GT,
|
||||
Py_LE,
|
||||
Py_LT,
|
||||
Py_NE,
|
||||
PyObject_RichCompare,
|
||||
)
|
||||
|
||||
import cython
|
||||
from cython import Py_ssize_t
|
||||
import numpy as np
|
||||
|
||||
cimport numpy as cnp
|
||||
from numpy cimport (
|
||||
NPY_QUICKSORT,
|
||||
PyArray_ArgSort,
|
||||
PyArray_Take,
|
||||
float32_t,
|
||||
float64_t,
|
||||
int32_t,
|
||||
int64_t,
|
||||
ndarray,
|
||||
uint64_t,
|
||||
)
|
||||
|
||||
cnp.import_array()
|
||||
|
||||
|
||||
from pandas._libs cimport util
|
||||
from pandas._libs.hashtable cimport Int64Vector
|
||||
from pandas._libs.tslibs.timedeltas cimport _Timedelta
|
||||
from pandas._libs.tslibs.timestamps cimport _Timestamp
|
||||
from pandas._libs.tslibs.timezones cimport tz_compare
|
||||
from pandas._libs.tslibs.util cimport (
|
||||
is_float_object,
|
||||
is_integer_object,
|
||||
is_timedelta64_object,
|
||||
)
|
||||
|
||||
VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither'])
|
||||
|
||||
|
||||
cdef class IntervalMixin:
|
||||
|
||||
@property
|
||||
def closed_left(self):
|
||||
"""
|
||||
Check if the interval is closed on the left side.
|
||||
|
||||
For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if the Interval is closed on the left-side.
|
||||
"""
|
||||
return self.closed in ('left', 'both')
|
||||
|
||||
@property
|
||||
def closed_right(self):
|
||||
"""
|
||||
Check if the interval is closed on the right side.
|
||||
|
||||
For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if the Interval is closed on the left-side.
|
||||
"""
|
||||
return self.closed in ('right', 'both')
|
||||
|
||||
@property
|
||||
def open_left(self):
|
||||
"""
|
||||
Check if the interval is open on the left side.
|
||||
|
||||
For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if the Interval is closed on the left-side.
|
||||
"""
|
||||
return not self.closed_left
|
||||
|
||||
@property
|
||||
def open_right(self):
|
||||
"""
|
||||
Check if the interval is open on the right side.
|
||||
|
||||
For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if the Interval is closed on the left-side.
|
||||
"""
|
||||
return not self.closed_right
|
||||
|
||||
@property
|
||||
def mid(self):
|
||||
"""
|
||||
Return the midpoint of the Interval.
|
||||
"""
|
||||
try:
|
||||
return 0.5 * (self.left + self.right)
|
||||
except TypeError:
|
||||
# datetime safe version
|
||||
return self.left + 0.5 * self.length
|
||||
|
||||
@property
|
||||
def length(self):
|
||||
"""
|
||||
Return the length of the Interval.
|
||||
"""
|
||||
return self.right - self.left
|
||||
|
||||
@property
|
||||
def is_empty(self):
|
||||
"""
|
||||
Indicates if an interval is empty, meaning it contains no points.
|
||||
|
||||
.. versionadded:: 0.25.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool or ndarray
|
||||
A boolean indicating if a scalar :class:`Interval` is empty, or a
|
||||
boolean ``ndarray`` positionally indicating if an ``Interval`` in
|
||||
an :class:`~arrays.IntervalArray` or :class:`IntervalIndex` is
|
||||
empty.
|
||||
|
||||
Examples
|
||||
--------
|
||||
An :class:`Interval` that contains points is not empty:
|
||||
|
||||
>>> pd.Interval(0, 1, closed='right').is_empty
|
||||
False
|
||||
|
||||
An ``Interval`` that does not contain any points is empty:
|
||||
|
||||
>>> pd.Interval(0, 0, closed='right').is_empty
|
||||
True
|
||||
>>> pd.Interval(0, 0, closed='left').is_empty
|
||||
True
|
||||
>>> pd.Interval(0, 0, closed='neither').is_empty
|
||||
True
|
||||
|
||||
An ``Interval`` that contains a single point is not empty:
|
||||
|
||||
>>> pd.Interval(0, 0, closed='both').is_empty
|
||||
False
|
||||
|
||||
An :class:`~arrays.IntervalArray` or :class:`IntervalIndex` returns a
|
||||
boolean ``ndarray`` positionally indicating if an ``Interval`` is
|
||||
empty:
|
||||
|
||||
>>> ivs = [pd.Interval(0, 0, closed='neither'),
|
||||
... pd.Interval(1, 2, closed='neither')]
|
||||
>>> pd.arrays.IntervalArray(ivs).is_empty
|
||||
array([ True, False])
|
||||
|
||||
Missing values are not considered empty:
|
||||
|
||||
>>> ivs = [pd.Interval(0, 0, closed='neither'), np.nan]
|
||||
>>> pd.IntervalIndex(ivs).is_empty
|
||||
array([ True, False])
|
||||
"""
|
||||
return (self.right == self.left) & (self.closed != 'both')
|
||||
|
||||
def _check_closed_matches(self, other, name='other'):
|
||||
"""
|
||||
Check if the closed attribute of `other` matches.
|
||||
|
||||
Note that 'left' and 'right' are considered different from 'both'.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : Interval, IntervalIndex, IntervalArray
|
||||
name : str
|
||||
Name to use for 'other' in the error message.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
When `other` is not closed exactly the same as self.
|
||||
"""
|
||||
if self.closed != other.closed:
|
||||
raise ValueError(f"'{name}.closed' is {repr(other.closed)}, "
|
||||
f"expected {repr(self.closed)}.")
|
||||
|
||||
|
||||
cdef bint _interval_like(other):
|
||||
return (hasattr(other, 'left')
|
||||
and hasattr(other, 'right')
|
||||
and hasattr(other, 'closed'))
|
||||
|
||||
|
||||
cdef class Interval(IntervalMixin):
|
||||
"""
|
||||
Immutable object implementing an Interval, a bounded slice-like interval.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : orderable scalar
|
||||
Left bound for the interval.
|
||||
right : orderable scalar
|
||||
Right bound for the interval.
|
||||
closed : {'right', 'left', 'both', 'neither'}, default 'right'
|
||||
Whether the interval is closed on the left-side, right-side, both or
|
||||
neither. See the Notes for more detailed explanation.
|
||||
|
||||
See Also
|
||||
--------
|
||||
IntervalIndex : An Index of Interval objects that are all closed on the
|
||||
same side.
|
||||
cut : Convert continuous data into discrete bins (Categorical
|
||||
of Interval objects).
|
||||
qcut : Convert continuous data into bins (Categorical of Interval objects)
|
||||
based on quantiles.
|
||||
Period : Represents a period of time.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The parameters `left` and `right` must be from the same type, you must be
|
||||
able to compare them and they must satisfy ``left <= right``.
|
||||
|
||||
A closed interval (in mathematics denoted by square brackets) contains
|
||||
its endpoints, i.e. the closed interval ``[0, 5]`` is characterized by the
|
||||
conditions ``0 <= x <= 5``. This is what ``closed='both'`` stands for.
|
||||
An open interval (in mathematics denoted by parentheses) does not contain
|
||||
its endpoints, i.e. the open interval ``(0, 5)`` is characterized by the
|
||||
conditions ``0 < x < 5``. This is what ``closed='neither'`` stands for.
|
||||
Intervals can also be half-open or half-closed, i.e. ``[0, 5)`` is
|
||||
described by ``0 <= x < 5`` (``closed='left'``) and ``(0, 5]`` is
|
||||
described by ``0 < x <= 5`` (``closed='right'``).
|
||||
|
||||
Examples
|
||||
--------
|
||||
It is possible to build Intervals of different types, like numeric ones:
|
||||
|
||||
>>> iv = pd.Interval(left=0, right=5)
|
||||
>>> iv
|
||||
Interval(0, 5, closed='right')
|
||||
|
||||
You can check if an element belongs to it
|
||||
|
||||
>>> 2.5 in iv
|
||||
True
|
||||
|
||||
You can test the bounds (``closed='right'``, so ``0 < x <= 5``):
|
||||
|
||||
>>> 0 in iv
|
||||
False
|
||||
>>> 5 in iv
|
||||
True
|
||||
>>> 0.0001 in iv
|
||||
True
|
||||
|
||||
Calculate its length
|
||||
|
||||
>>> iv.length
|
||||
5
|
||||
|
||||
You can operate with `+` and `*` over an Interval and the operation
|
||||
is applied to each of its bounds, so the result depends on the type
|
||||
of the bound elements
|
||||
|
||||
>>> shifted_iv = iv + 3
|
||||
>>> shifted_iv
|
||||
Interval(3, 8, closed='right')
|
||||
>>> extended_iv = iv * 10.0
|
||||
>>> extended_iv
|
||||
Interval(0.0, 50.0, closed='right')
|
||||
|
||||
To create a time interval you can use Timestamps as the bounds
|
||||
|
||||
>>> year_2017 = pd.Interval(pd.Timestamp('2017-01-01 00:00:00'),
|
||||
... pd.Timestamp('2018-01-01 00:00:00'),
|
||||
... closed='left')
|
||||
>>> pd.Timestamp('2017-01-01 00:00') in year_2017
|
||||
True
|
||||
>>> year_2017.length
|
||||
Timedelta('365 days 00:00:00')
|
||||
"""
|
||||
_typ = "interval"
|
||||
__array_priority__ = 1000
|
||||
|
||||
cdef readonly object left
|
||||
"""
|
||||
Left bound for the interval.
|
||||
"""
|
||||
|
||||
cdef readonly object right
|
||||
"""
|
||||
Right bound for the interval.
|
||||
"""
|
||||
|
||||
cdef readonly str closed
|
||||
"""
|
||||
Whether the interval is closed on the left-side, right-side, both or
|
||||
neither.
|
||||
"""
|
||||
|
||||
def __init__(self, left, right, str closed='right'):
|
||||
# note: it is faster to just do these checks than to use a special
|
||||
# constructor (__cinit__/__new__) to avoid them
|
||||
|
||||
self._validate_endpoint(left)
|
||||
self._validate_endpoint(right)
|
||||
|
||||
if closed not in VALID_CLOSED:
|
||||
raise ValueError(f"invalid option for 'closed': {closed}")
|
||||
if not left <= right:
|
||||
raise ValueError("left side of interval must be <= right side")
|
||||
if (isinstance(left, _Timestamp) and
|
||||
not tz_compare(left.tzinfo, right.tzinfo)):
|
||||
# GH 18538
|
||||
raise ValueError("left and right must have the same time zone, got "
|
||||
f"{repr(left.tzinfo)}' and {repr(right.tzinfo)}")
|
||||
self.left = left
|
||||
self.right = right
|
||||
self.closed = closed
|
||||
|
||||
def _validate_endpoint(self, endpoint):
|
||||
# GH 23013
|
||||
if not (is_integer_object(endpoint) or is_float_object(endpoint) or
|
||||
isinstance(endpoint, (_Timestamp, _Timedelta))):
|
||||
raise ValueError("Only numeric, Timestamp and Timedelta endpoints "
|
||||
"are allowed when constructing an Interval.")
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.left, self.right, self.closed))
|
||||
|
||||
def __contains__(self, key) -> bool:
|
||||
if _interval_like(key):
|
||||
raise TypeError("__contains__ not defined for two intervals")
|
||||
return ((self.left < key if self.open_left else self.left <= key) and
|
||||
(key < self.right if self.open_right else key <= self.right))
|
||||
|
||||
def __richcmp__(self, other, op: int):
|
||||
if isinstance(other, Interval):
|
||||
self_tuple = (self.left, self.right, self.closed)
|
||||
other_tuple = (other.left, other.right, other.closed)
|
||||
return PyObject_RichCompare(self_tuple, other_tuple, op)
|
||||
elif util.is_array(other):
|
||||
return np.array(
|
||||
[PyObject_RichCompare(self, x, op) for x in other],
|
||||
dtype=bool,
|
||||
)
|
||||
|
||||
return NotImplemented
|
||||
|
||||
def __reduce__(self):
|
||||
args = (self.left, self.right, self.closed)
|
||||
return (type(self), args)
|
||||
|
||||
def _repr_base(self):
|
||||
left = self.left
|
||||
right = self.right
|
||||
|
||||
# TODO: need more general formatting methodology here
|
||||
if isinstance(left, _Timestamp) and isinstance(right, _Timestamp):
|
||||
left = left._short_repr
|
||||
right = right._short_repr
|
||||
|
||||
return left, right
|
||||
|
||||
def __repr__(self) -> str:
|
||||
|
||||
left, right = self._repr_base()
|
||||
name = type(self).__name__
|
||||
repr_str = f'{name}({repr(left)}, {repr(right)}, closed={repr(self.closed)})'
|
||||
return repr_str
|
||||
|
||||
def __str__(self) -> str:
|
||||
|
||||
left, right = self._repr_base()
|
||||
start_symbol = '[' if self.closed_left else '('
|
||||
end_symbol = ']' if self.closed_right else ')'
|
||||
return f'{start_symbol}{left}, {right}{end_symbol}'
|
||||
|
||||
def __add__(self, y):
|
||||
if (
|
||||
isinstance(y, numbers.Number)
|
||||
or PyDelta_Check(y)
|
||||
or is_timedelta64_object(y)
|
||||
):
|
||||
return Interval(self.left + y, self.right + y, closed=self.closed)
|
||||
elif (
|
||||
isinstance(y, Interval)
|
||||
and (
|
||||
isinstance(self, numbers.Number)
|
||||
or PyDelta_Check(self)
|
||||
or is_timedelta64_object(self)
|
||||
)
|
||||
):
|
||||
return Interval(y.left + self, y.right + self, closed=y.closed)
|
||||
return NotImplemented
|
||||
|
||||
def __sub__(self, y):
|
||||
if (
|
||||
isinstance(y, numbers.Number)
|
||||
or PyDelta_Check(y)
|
||||
or is_timedelta64_object(y)
|
||||
):
|
||||
return Interval(self.left - y, self.right - y, closed=self.closed)
|
||||
return NotImplemented
|
||||
|
||||
def __mul__(self, y):
|
||||
if isinstance(y, numbers.Number):
|
||||
return Interval(self.left * y, self.right * y, closed=self.closed)
|
||||
elif isinstance(y, Interval) and isinstance(self, numbers.Number):
|
||||
return Interval(y.left * self, y.right * self, closed=y.closed)
|
||||
return NotImplemented
|
||||
|
||||
def __truediv__(self, y):
|
||||
if isinstance(y, numbers.Number):
|
||||
return Interval(self.left / y, self.right / y, closed=self.closed)
|
||||
return NotImplemented
|
||||
|
||||
def __floordiv__(self, y):
|
||||
if isinstance(y, numbers.Number):
|
||||
return Interval(
|
||||
self.left // y, self.right // y, closed=self.closed)
|
||||
return NotImplemented
|
||||
|
||||
def overlaps(self, other):
|
||||
"""
|
||||
Check whether two Interval objects overlap.
|
||||
|
||||
Two intervals overlap if they share a common point, including closed
|
||||
endpoints. Intervals that only have an open endpoint in common do not
|
||||
overlap.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : Interval
|
||||
Interval to check against for an overlap.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if the two intervals overlap.
|
||||
|
||||
See Also
|
||||
--------
|
||||
IntervalArray.overlaps : The corresponding method for IntervalArray.
|
||||
IntervalIndex.overlaps : The corresponding method for IntervalIndex.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> i1 = pd.Interval(0, 2)
|
||||
>>> i2 = pd.Interval(1, 3)
|
||||
>>> i1.overlaps(i2)
|
||||
True
|
||||
>>> i3 = pd.Interval(4, 5)
|
||||
>>> i1.overlaps(i3)
|
||||
False
|
||||
|
||||
Intervals that share closed endpoints overlap:
|
||||
|
||||
>>> i4 = pd.Interval(0, 1, closed='both')
|
||||
>>> i5 = pd.Interval(1, 2, closed='both')
|
||||
>>> i4.overlaps(i5)
|
||||
True
|
||||
|
||||
Intervals that only have an open endpoint in common do not overlap:
|
||||
|
||||
>>> i6 = pd.Interval(1, 2, closed='neither')
|
||||
>>> i4.overlaps(i6)
|
||||
False
|
||||
"""
|
||||
if not isinstance(other, Interval):
|
||||
raise TypeError("`other` must be an Interval, "
|
||||
f"got {type(other).__name__}")
|
||||
|
||||
# equality is okay if both endpoints are closed (overlap at a point)
|
||||
op1 = le if (self.closed_left and other.closed_right) else lt
|
||||
op2 = le if (other.closed_left and self.closed_right) else lt
|
||||
|
||||
# overlaps is equivalent negation of two interval being disjoint:
|
||||
# disjoint = (A.left > B.right) or (B.left > A.right)
|
||||
# (simplifying the negation allows this to be done in less operations)
|
||||
return op1(self.left, other.right) and op2(other.left, self.right)
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
intervals : ndarray
|
||||
Object array of Intervals / nulls.
|
||||
|
||||
validate_closed: bool, default True
|
||||
Boolean indicating if all intervals must be closed on the same side.
|
||||
Mismatching closed will raise if True, else return None for closed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple of tuples
|
||||
left : (ndarray, object, array)
|
||||
right : (ndarray, object, array)
|
||||
closed: str
|
||||
"""
|
||||
cdef:
|
||||
object closed = None, interval
|
||||
Py_ssize_t i, n = len(intervals)
|
||||
ndarray left, right
|
||||
bint seen_closed = False
|
||||
|
||||
left = np.empty(n, dtype=intervals.dtype)
|
||||
right = np.empty(n, dtype=intervals.dtype)
|
||||
|
||||
for i in range(n):
|
||||
interval = intervals[i]
|
||||
if interval is None or util.is_nan(interval):
|
||||
left[i] = np.nan
|
||||
right[i] = np.nan
|
||||
continue
|
||||
|
||||
if not isinstance(interval, Interval):
|
||||
raise TypeError(f"type {type(interval)} with value "
|
||||
f"{interval} is not an interval")
|
||||
|
||||
left[i] = interval.left
|
||||
right[i] = interval.right
|
||||
if not seen_closed:
|
||||
seen_closed = True
|
||||
closed = interval.closed
|
||||
elif closed != interval.closed:
|
||||
closed = None
|
||||
if validate_closed:
|
||||
raise ValueError("intervals must all be closed on the same side")
|
||||
|
||||
return left, right, closed
|
||||
|
||||
|
||||
include "intervaltree.pxi"
|
||||
@@ -0,0 +1,427 @@
|
||||
"""
|
||||
Template for intervaltree
|
||||
|
||||
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
|
||||
"""
|
||||
|
||||
from pandas._libs.algos import is_monotonic
|
||||
|
||||
ctypedef fused int_scalar_t:
|
||||
int64_t
|
||||
float64_t
|
||||
|
||||
ctypedef fused uint_scalar_t:
|
||||
uint64_t
|
||||
float64_t
|
||||
|
||||
ctypedef fused scalar_t:
|
||||
int_scalar_t
|
||||
uint_scalar_t
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# IntervalTree
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
cdef class IntervalTree(IntervalMixin):
|
||||
"""A centered interval tree
|
||||
|
||||
Based off the algorithm described on Wikipedia:
|
||||
https://en.wikipedia.org/wiki/Interval_tree
|
||||
|
||||
we are emulating the IndexEngine interface
|
||||
"""
|
||||
cdef readonly:
|
||||
ndarray left, right
|
||||
IntervalNode root
|
||||
object dtype
|
||||
str closed
|
||||
object _is_overlapping, _left_sorter, _right_sorter
|
||||
Py_ssize_t _na_count
|
||||
|
||||
def __init__(self, left, right, closed='right', leaf_size=100):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
left, right : np.ndarray[ndim=1]
|
||||
Left and right bounds for each interval. Assumed to contain no
|
||||
NaNs.
|
||||
closed : {'left', 'right', 'both', 'neither'}, optional
|
||||
Whether the intervals are closed on the left-side, right-side, both
|
||||
or neither. Defaults to 'right'.
|
||||
leaf_size : int, optional
|
||||
Parameter that controls when the tree switches from creating nodes
|
||||
to brute-force search. Tune this parameter to optimize query
|
||||
performance.
|
||||
"""
|
||||
if closed not in ['left', 'right', 'both', 'neither']:
|
||||
raise ValueError("invalid option for 'closed': %s" % closed)
|
||||
|
||||
left = np.asarray(left)
|
||||
right = np.asarray(right)
|
||||
self.dtype = np.result_type(left, right)
|
||||
self.left = np.asarray(left, dtype=self.dtype)
|
||||
self.right = np.asarray(right, dtype=self.dtype)
|
||||
|
||||
indices = np.arange(len(left), dtype='int64')
|
||||
|
||||
self.closed = closed
|
||||
|
||||
# GH 23352: ensure no nan in nodes
|
||||
mask = ~np.isnan(self.left)
|
||||
self._na_count = len(mask) - mask.sum()
|
||||
self.left = self.left[mask]
|
||||
self.right = self.right[mask]
|
||||
indices = indices[mask]
|
||||
|
||||
node_cls = NODE_CLASSES[str(self.dtype), closed]
|
||||
self.root = node_cls(self.left, self.right, indices, leaf_size)
|
||||
|
||||
@property
|
||||
def left_sorter(self) -> np.ndarray:
|
||||
"""How to sort the left labels; this is used for binary search
|
||||
"""
|
||||
if self._left_sorter is None:
|
||||
self._left_sorter = np.argsort(self.left)
|
||||
return self._left_sorter
|
||||
|
||||
@property
|
||||
def right_sorter(self) -> np.ndarray:
|
||||
"""How to sort the right labels
|
||||
"""
|
||||
if self._right_sorter is None:
|
||||
self._right_sorter = np.argsort(self.right)
|
||||
return self._right_sorter
|
||||
|
||||
@property
|
||||
def is_overlapping(self) -> bool:
|
||||
"""
|
||||
Determine if the IntervalTree contains overlapping intervals.
|
||||
Cached as self._is_overlapping.
|
||||
"""
|
||||
if self._is_overlapping is not None:
|
||||
return self._is_overlapping
|
||||
|
||||
# <= when both sides closed since endpoints can overlap
|
||||
op = le if self.closed == 'both' else lt
|
||||
|
||||
# overlap if start of current interval < end of previous interval
|
||||
# (current and previous in terms of sorted order by left/start side)
|
||||
current = self.left[self.left_sorter[1:]]
|
||||
previous = self.right[self.left_sorter[:-1]]
|
||||
self._is_overlapping = bool(op(current, previous).any())
|
||||
|
||||
return self._is_overlapping
|
||||
|
||||
@property
|
||||
def is_monotonic_increasing(self) -> bool:
|
||||
"""
|
||||
Return True if the IntervalTree is monotonic increasing (only equal or
|
||||
increasing values), else False
|
||||
"""
|
||||
if self._na_count > 0:
|
||||
return False
|
||||
values = [self.right, self.left]
|
||||
|
||||
sort_order = np.lexsort(values)
|
||||
return is_monotonic(sort_order, False)[0]
|
||||
|
||||
def get_indexer(self, scalar_t[:] target) -> np.ndarray:
|
||||
"""Return the positions corresponding to unique intervals that overlap
|
||||
with the given array of scalar targets.
|
||||
"""
|
||||
|
||||
# TODO: write get_indexer_intervals
|
||||
cdef:
|
||||
Py_ssize_t old_len
|
||||
Py_ssize_t i
|
||||
Int64Vector result
|
||||
|
||||
result = Int64Vector()
|
||||
old_len = 0
|
||||
for i in range(len(target)):
|
||||
try:
|
||||
self.root.query(result, target[i])
|
||||
except OverflowError:
|
||||
# overflow -> no match, which is already handled below
|
||||
pass
|
||||
|
||||
if result.data.n == old_len:
|
||||
result.append(-1)
|
||||
elif result.data.n > old_len + 1:
|
||||
raise KeyError(
|
||||
'indexer does not intersect a unique set of intervals')
|
||||
old_len = result.data.n
|
||||
return result.to_array().astype('intp')
|
||||
|
||||
def get_indexer_non_unique(self, scalar_t[:] target):
|
||||
"""Return the positions corresponding to intervals that overlap with
|
||||
the given array of scalar targets. Non-unique positions are repeated.
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t old_len
|
||||
Py_ssize_t i
|
||||
Int64Vector result, missing
|
||||
|
||||
result = Int64Vector()
|
||||
missing = Int64Vector()
|
||||
old_len = 0
|
||||
for i in range(len(target)):
|
||||
try:
|
||||
self.root.query(result, target[i])
|
||||
except OverflowError:
|
||||
# overflow -> no match, which is already handled below
|
||||
pass
|
||||
|
||||
if result.data.n == old_len:
|
||||
result.append(-1)
|
||||
missing.append(i)
|
||||
old_len = result.data.n
|
||||
return (result.to_array().astype('intp'),
|
||||
missing.to_array().astype('intp'))
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return ('<IntervalTree[{dtype},{closed}]: '
|
||||
'{n_elements} elements>'.format(
|
||||
dtype=self.dtype, closed=self.closed,
|
||||
n_elements=self.root.n_elements))
|
||||
|
||||
# compat with IndexEngine interface
|
||||
def clear_mapping(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
cdef take(ndarray source, ndarray indices):
|
||||
"""Take the given positions from a 1D ndarray
|
||||
"""
|
||||
return PyArray_Take(source, indices, 0)
|
||||
|
||||
|
||||
cdef sort_values_and_indices(all_values, all_indices, subset):
|
||||
indices = take(all_indices, subset)
|
||||
values = take(all_values, subset)
|
||||
sorter = PyArray_ArgSort(values, 0, NPY_QUICKSORT)
|
||||
sorted_values = take(values, sorter)
|
||||
sorted_indices = take(indices, sorter)
|
||||
return sorted_values, sorted_indices
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Nodes
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
@cython.internal
|
||||
cdef class IntervalNode:
|
||||
cdef readonly:
|
||||
int64_t n_elements, n_center, leaf_size
|
||||
bint is_leaf_node
|
||||
|
||||
def __repr__(self) -> str:
|
||||
if self.is_leaf_node:
|
||||
return (
|
||||
f"<{type(self).__name__}: {self.n_elements} elements (terminal)>"
|
||||
)
|
||||
else:
|
||||
n_left = self.left_node.n_elements
|
||||
n_right = self.right_node.n_elements
|
||||
n_center = self.n_elements - n_left - n_right
|
||||
return (
|
||||
f"<{type(self).__name__}: "
|
||||
f"pivot {self.pivot}, {self.n_elements} elements "
|
||||
f"({n_left} left, {n_right} right, {n_center} overlapping)>"
|
||||
)
|
||||
|
||||
def counts(self):
|
||||
"""
|
||||
Inspect counts on this node
|
||||
useful for debugging purposes
|
||||
"""
|
||||
if self.is_leaf_node:
|
||||
return self.n_elements
|
||||
else:
|
||||
m = len(self.center_left_values)
|
||||
l = self.left_node.counts()
|
||||
r = self.right_node.counts()
|
||||
return (m, (l, r))
|
||||
|
||||
|
||||
# we need specialized nodes and leaves to optimize for different dtype and
|
||||
# closed values
|
||||
|
||||
{{py:
|
||||
|
||||
nodes = []
|
||||
for dtype in ['float64', 'int64', 'uint64']:
|
||||
for closed, cmp_left, cmp_right in [
|
||||
('left', '<=', '<'),
|
||||
('right', '<', '<='),
|
||||
('both', '<=', '<='),
|
||||
('neither', '<', '<')]:
|
||||
cmp_left_converse = '<' if cmp_left == '<=' else '<='
|
||||
cmp_right_converse = '<' if cmp_right == '<=' else '<='
|
||||
if dtype.startswith('int'):
|
||||
fused_prefix = 'int_'
|
||||
elif dtype.startswith('uint'):
|
||||
fused_prefix = 'uint_'
|
||||
elif dtype.startswith('float'):
|
||||
fused_prefix = ''
|
||||
nodes.append((dtype, dtype.title(),
|
||||
closed, closed.title(),
|
||||
cmp_left,
|
||||
cmp_right,
|
||||
cmp_left_converse,
|
||||
cmp_right_converse,
|
||||
fused_prefix))
|
||||
|
||||
}}
|
||||
|
||||
NODE_CLASSES = {}
|
||||
|
||||
{{for dtype, dtype_title, closed, closed_title, cmp_left, cmp_right,
|
||||
cmp_left_converse, cmp_right_converse, fused_prefix in nodes}}
|
||||
|
||||
|
||||
@cython.internal
|
||||
cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode(IntervalNode):
|
||||
"""Non-terminal node for an IntervalTree
|
||||
|
||||
Categorizes intervals by those that fall to the left, those that fall to
|
||||
the right, and those that overlap with the pivot.
|
||||
"""
|
||||
cdef readonly:
|
||||
{{dtype_title}}Closed{{closed_title}}IntervalNode left_node, right_node
|
||||
{{dtype}}_t[:] center_left_values, center_right_values, left, right
|
||||
int64_t[:] center_left_indices, center_right_indices, indices
|
||||
{{dtype}}_t min_left, max_right
|
||||
{{dtype}}_t pivot
|
||||
|
||||
def __init__(self,
|
||||
ndarray[{{dtype}}_t, ndim=1] left,
|
||||
ndarray[{{dtype}}_t, ndim=1] right,
|
||||
ndarray[int64_t, ndim=1] indices,
|
||||
int64_t leaf_size):
|
||||
|
||||
self.n_elements = len(left)
|
||||
self.leaf_size = leaf_size
|
||||
|
||||
# min_left and min_right are used to speed-up query by skipping
|
||||
# query on sub-nodes. If this node has size 0, query is cheap,
|
||||
# so these values don't matter.
|
||||
if left.size > 0:
|
||||
self.min_left = left.min()
|
||||
self.max_right = right.max()
|
||||
else:
|
||||
self.min_left = 0
|
||||
self.max_right = 0
|
||||
|
||||
if self.n_elements <= leaf_size:
|
||||
# make this a terminal (leaf) node
|
||||
self.is_leaf_node = True
|
||||
self.left = left
|
||||
self.right = right
|
||||
self.indices = indices
|
||||
self.n_center = 0
|
||||
else:
|
||||
# calculate a pivot so we can create child nodes
|
||||
self.is_leaf_node = False
|
||||
self.pivot = np.median(left / 2 + right / 2)
|
||||
left_set, right_set, center_set = self.classify_intervals(
|
||||
left, right)
|
||||
|
||||
self.left_node = self.new_child_node(left, right,
|
||||
indices, left_set)
|
||||
self.right_node = self.new_child_node(left, right,
|
||||
indices, right_set)
|
||||
|
||||
self.center_left_values, self.center_left_indices = \
|
||||
sort_values_and_indices(left, indices, center_set)
|
||||
self.center_right_values, self.center_right_indices = \
|
||||
sort_values_and_indices(right, indices, center_set)
|
||||
self.n_center = len(self.center_left_indices)
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
cdef classify_intervals(self, {{dtype}}_t[:] left, {{dtype}}_t[:] right):
|
||||
"""Classify the given intervals based upon whether they fall to the
|
||||
left, right, or overlap with this node's pivot.
|
||||
"""
|
||||
cdef:
|
||||
Int64Vector left_ind, right_ind, overlapping_ind
|
||||
Py_ssize_t i
|
||||
|
||||
left_ind = Int64Vector()
|
||||
right_ind = Int64Vector()
|
||||
overlapping_ind = Int64Vector()
|
||||
|
||||
for i in range(self.n_elements):
|
||||
if right[i] {{cmp_right_converse}} self.pivot:
|
||||
left_ind.append(i)
|
||||
elif self.pivot {{cmp_left_converse}} left[i]:
|
||||
right_ind.append(i)
|
||||
else:
|
||||
overlapping_ind.append(i)
|
||||
|
||||
return (left_ind.to_array(),
|
||||
right_ind.to_array(),
|
||||
overlapping_ind.to_array())
|
||||
|
||||
cdef new_child_node(self,
|
||||
ndarray[{{dtype}}_t, ndim=1] left,
|
||||
ndarray[{{dtype}}_t, ndim=1] right,
|
||||
ndarray[int64_t, ndim=1] indices,
|
||||
ndarray[int64_t, ndim=1] subset):
|
||||
"""Create a new child node.
|
||||
"""
|
||||
left = take(left, subset)
|
||||
right = take(right, subset)
|
||||
indices = take(indices, subset)
|
||||
return {{dtype_title}}Closed{{closed_title}}IntervalNode(
|
||||
left, right, indices, self.leaf_size)
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
@cython.initializedcheck(False)
|
||||
cpdef query(self, Int64Vector result, {{fused_prefix}}scalar_t point):
|
||||
"""Recursively query this node and its sub-nodes for intervals that
|
||||
overlap with the query point.
|
||||
"""
|
||||
cdef:
|
||||
int64_t[:] indices
|
||||
{{dtype}}_t[:] values
|
||||
Py_ssize_t i
|
||||
|
||||
if self.is_leaf_node:
|
||||
# Once we get down to a certain size, it doesn't make sense to
|
||||
# continue the binary tree structure. Instead, we use linear
|
||||
# search.
|
||||
for i in range(self.n_elements):
|
||||
if self.left[i] {{cmp_left}} point {{cmp_right}} self.right[i]:
|
||||
result.append(self.indices[i])
|
||||
else:
|
||||
# There are child nodes. Based on comparing our query to the pivot,
|
||||
# look at the center values, then go to the relevant child.
|
||||
if point < self.pivot:
|
||||
values = self.center_left_values
|
||||
indices = self.center_left_indices
|
||||
for i in range(self.n_center):
|
||||
if not values[i] {{cmp_left}} point:
|
||||
break
|
||||
result.append(indices[i])
|
||||
if point {{cmp_right}} self.left_node.max_right:
|
||||
self.left_node.query(result, point)
|
||||
elif point > self.pivot:
|
||||
values = self.center_right_values
|
||||
indices = self.center_right_indices
|
||||
for i in range(self.n_center - 1, -1, -1):
|
||||
if not point {{cmp_right}} values[i]:
|
||||
break
|
||||
result.append(indices[i])
|
||||
if self.right_node.min_left {{cmp_left}} point:
|
||||
self.right_node.query(result, point)
|
||||
else:
|
||||
result.extend(self.center_left_indices)
|
||||
|
||||
|
||||
NODE_CLASSES['{{dtype}}',
|
||||
'{{closed}}'] = {{dtype_title}}Closed{{closed_title}}IntervalNode
|
||||
|
||||
{{endfor}}
|
||||
Binary file not shown.
91
.venv/lib/python3.7/site-packages/pandas/_libs/join.pyi
Normal file
91
.venv/lib/python3.7/site-packages/pandas/_libs/join.pyi
Normal file
@@ -0,0 +1,91 @@
|
||||
import numpy as np
|
||||
|
||||
def inner_join(
|
||||
left: np.ndarray, # const intp_t[:]
|
||||
right: np.ndarray, # const intp_t[:]
|
||||
max_groups: int,
|
||||
) -> tuple[np.ndarray, np.ndarray,]: ... # np.ndarray[np.intp] # np.ndarray[np.intp]
|
||||
def left_outer_join(
|
||||
left: np.ndarray, # const intp_t[:]
|
||||
right: np.ndarray, # const intp_t[:]
|
||||
max_groups: int,
|
||||
sort: bool = True,
|
||||
) -> tuple[np.ndarray, np.ndarray,]: ... # np.ndarray[np.intp] # np.ndarray[np.intp]
|
||||
def full_outer_join(
|
||||
left: np.ndarray, # const intp_t[:]
|
||||
right: np.ndarray, # const intp_t[:]
|
||||
max_groups: int,
|
||||
) -> tuple[np.ndarray, np.ndarray,]: ... # np.ndarray[np.intp] # np.ndarray[np.intp]
|
||||
def ffill_indexer(
|
||||
indexer: np.ndarray, # const intp_t[:]
|
||||
) -> np.ndarray: ... # np.ndarray[np.intp]
|
||||
def left_join_indexer_unique(
|
||||
left: np.ndarray, # ndarray[join_t]
|
||||
right: np.ndarray, # ndarray[join_t]
|
||||
) -> np.ndarray: ... # np.ndarray[np.intp]
|
||||
def left_join_indexer(
|
||||
left: np.ndarray, # ndarray[join_t]
|
||||
right: np.ndarray, # ndarray[join_t]
|
||||
) -> tuple[
|
||||
np.ndarray, # np.ndarray[join_t]
|
||||
np.ndarray, # np.ndarray[np.intp]
|
||||
np.ndarray, # np.ndarray[np.intp]
|
||||
]: ...
|
||||
def inner_join_indexer(
|
||||
left: np.ndarray, # ndarray[join_t]
|
||||
right: np.ndarray, # ndarray[join_t]
|
||||
) -> tuple[
|
||||
np.ndarray, # np.ndarray[join_t]
|
||||
np.ndarray, # np.ndarray[np.intp]
|
||||
np.ndarray, # np.ndarray[np.intp]
|
||||
]: ...
|
||||
def outer_join_indexer(
|
||||
left: np.ndarray, # ndarray[join_t]
|
||||
right: np.ndarray, # ndarray[join_t]
|
||||
) -> tuple[
|
||||
np.ndarray, # np.ndarray[join_t]
|
||||
np.ndarray, # np.ndarray[np.intp]
|
||||
np.ndarray, # np.ndarray[np.intp]
|
||||
]: ...
|
||||
def asof_join_backward_on_X_by_Y(
|
||||
left_values: np.ndarray, # asof_t[:]
|
||||
right_values: np.ndarray, # asof_t[:]
|
||||
left_by_values: np.ndarray, # by_t[:]
|
||||
right_by_values: np.ndarray, # by_t[:]
|
||||
allow_exact_matches: bool = True,
|
||||
tolerance=None,
|
||||
) -> tuple[np.ndarray, np.ndarray,]: ... # np.ndarray[np.intp] # np.ndarray[np.intp]
|
||||
def asof_join_forward_on_X_by_Y(
|
||||
left_values: np.ndarray, # asof_t[:]
|
||||
right_values: np.ndarray, # asof_t[:]
|
||||
left_by_values: np.ndarray, # by_t[:]
|
||||
right_by_values: np.ndarray, # by_t[:]
|
||||
allow_exact_matches: bool = True,
|
||||
tolerance=None,
|
||||
) -> tuple[np.ndarray, np.ndarray,]: ... # np.ndarray[np.intp] # np.ndarray[np.intp]
|
||||
def asof_join_nearest_on_X_by_Y(
|
||||
left_values: np.ndarray, # asof_t[:]
|
||||
right_values: np.ndarray, # asof_t[:]
|
||||
left_by_values: np.ndarray, # by_t[:]
|
||||
right_by_values: np.ndarray, # by_t[:]
|
||||
allow_exact_matches: bool = True,
|
||||
tolerance=None,
|
||||
) -> tuple[np.ndarray, np.ndarray,]: ... # np.ndarray[np.intp] # np.ndarray[np.intp]
|
||||
def asof_join_backward(
|
||||
left_values: np.ndarray, # asof_t[:]
|
||||
right_values: np.ndarray, # asof_t[:]
|
||||
allow_exact_matches: bool = True,
|
||||
tolerance=None,
|
||||
) -> tuple[np.ndarray, np.ndarray,]: ... # np.ndarray[np.intp] # np.ndarray[np.intp]
|
||||
def asof_join_forward(
|
||||
left_values: np.ndarray, # asof_t[:]
|
||||
right_values: np.ndarray, # asof_t[:]
|
||||
allow_exact_matches: bool = True,
|
||||
tolerance=None,
|
||||
) -> tuple[np.ndarray, np.ndarray,]: ... # np.ndarray[np.intp] # np.ndarray[np.intp]
|
||||
def asof_join_nearest(
|
||||
left_values: np.ndarray, # asof_t[:]
|
||||
right_values: np.ndarray, # asof_t[:]
|
||||
allow_exact_matches: bool = True,
|
||||
tolerance=None,
|
||||
) -> tuple[np.ndarray, np.ndarray,]: ... # np.ndarray[np.intp] # np.ndarray[np.intp]
|
||||
1004
.venv/lib/python3.7/site-packages/pandas/_libs/join.pyx
Normal file
1004
.venv/lib/python3.7/site-packages/pandas/_libs/join.pyx
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
129
.venv/lib/python3.7/site-packages/pandas/_libs/khash.pxd
Normal file
129
.venv/lib/python3.7/site-packages/pandas/_libs/khash.pxd
Normal file
@@ -0,0 +1,129 @@
|
||||
from cpython.object cimport PyObject
|
||||
from numpy cimport (
|
||||
complex64_t,
|
||||
complex128_t,
|
||||
float32_t,
|
||||
float64_t,
|
||||
int8_t,
|
||||
int16_t,
|
||||
int32_t,
|
||||
int64_t,
|
||||
uint8_t,
|
||||
uint16_t,
|
||||
uint32_t,
|
||||
uint64_t,
|
||||
)
|
||||
|
||||
|
||||
cdef extern from "khash_python.h":
|
||||
const int KHASH_TRACE_DOMAIN
|
||||
|
||||
ctypedef uint32_t khuint_t
|
||||
ctypedef khuint_t khiter_t
|
||||
|
||||
ctypedef struct khcomplex128_t:
|
||||
double real
|
||||
double imag
|
||||
|
||||
bint are_equivalent_khcomplex128_t \
|
||||
"kh_complex_hash_equal" (khcomplex128_t a, khcomplex128_t b) nogil
|
||||
|
||||
ctypedef struct khcomplex64_t:
|
||||
float real
|
||||
float imag
|
||||
|
||||
bint are_equivalent_khcomplex64_t \
|
||||
"kh_complex_hash_equal" (khcomplex64_t a, khcomplex64_t b) nogil
|
||||
|
||||
bint are_equivalent_float64_t \
|
||||
"kh_floats_hash_equal" (float64_t a, float64_t b) nogil
|
||||
|
||||
bint are_equivalent_float32_t \
|
||||
"kh_floats_hash_equal" (float32_t a, float32_t b) nogil
|
||||
|
||||
uint32_t kh_python_hash_func(object key)
|
||||
bint kh_python_hash_equal(object a, object b)
|
||||
|
||||
ctypedef struct kh_pymap_t:
|
||||
khuint_t n_buckets, size, n_occupied, upper_bound
|
||||
uint32_t *flags
|
||||
PyObject **keys
|
||||
size_t *vals
|
||||
|
||||
kh_pymap_t* kh_init_pymap()
|
||||
void kh_destroy_pymap(kh_pymap_t*)
|
||||
void kh_clear_pymap(kh_pymap_t*)
|
||||
khuint_t kh_get_pymap(kh_pymap_t*, PyObject*)
|
||||
void kh_resize_pymap(kh_pymap_t*, khuint_t)
|
||||
khuint_t kh_put_pymap(kh_pymap_t*, PyObject*, int*)
|
||||
void kh_del_pymap(kh_pymap_t*, khuint_t)
|
||||
|
||||
bint kh_exist_pymap(kh_pymap_t*, khiter_t)
|
||||
|
||||
ctypedef struct kh_pyset_t:
|
||||
khuint_t n_buckets, size, n_occupied, upper_bound
|
||||
uint32_t *flags
|
||||
PyObject **keys
|
||||
size_t *vals
|
||||
|
||||
kh_pyset_t* kh_init_pyset()
|
||||
void kh_destroy_pyset(kh_pyset_t*)
|
||||
void kh_clear_pyset(kh_pyset_t*)
|
||||
khuint_t kh_get_pyset(kh_pyset_t*, PyObject*)
|
||||
void kh_resize_pyset(kh_pyset_t*, khuint_t)
|
||||
khuint_t kh_put_pyset(kh_pyset_t*, PyObject*, int*)
|
||||
void kh_del_pyset(kh_pyset_t*, khuint_t)
|
||||
|
||||
bint kh_exist_pyset(kh_pyset_t*, khiter_t)
|
||||
|
||||
ctypedef char* kh_cstr_t
|
||||
|
||||
ctypedef struct kh_str_t:
|
||||
khuint_t n_buckets, size, n_occupied, upper_bound
|
||||
uint32_t *flags
|
||||
kh_cstr_t *keys
|
||||
size_t *vals
|
||||
|
||||
kh_str_t* kh_init_str() nogil
|
||||
void kh_destroy_str(kh_str_t*) nogil
|
||||
void kh_clear_str(kh_str_t*) nogil
|
||||
khuint_t kh_get_str(kh_str_t*, kh_cstr_t) nogil
|
||||
void kh_resize_str(kh_str_t*, khuint_t) nogil
|
||||
khuint_t kh_put_str(kh_str_t*, kh_cstr_t, int*) nogil
|
||||
void kh_del_str(kh_str_t*, khuint_t) nogil
|
||||
|
||||
bint kh_exist_str(kh_str_t*, khiter_t) nogil
|
||||
|
||||
ctypedef struct kh_str_starts_t:
|
||||
kh_str_t *table
|
||||
int starts[256]
|
||||
|
||||
kh_str_starts_t* kh_init_str_starts() nogil
|
||||
khuint_t kh_put_str_starts_item(kh_str_starts_t* table, char* key,
|
||||
int* ret) nogil
|
||||
khuint_t kh_get_str_starts_item(kh_str_starts_t* table, char* key) nogil
|
||||
void kh_destroy_str_starts(kh_str_starts_t*) nogil
|
||||
void kh_resize_str_starts(kh_str_starts_t*, khuint_t) nogil
|
||||
|
||||
# sweep factorize
|
||||
|
||||
ctypedef struct kh_strbox_t:
|
||||
khuint_t n_buckets, size, n_occupied, upper_bound
|
||||
uint32_t *flags
|
||||
kh_cstr_t *keys
|
||||
PyObject **vals
|
||||
|
||||
kh_strbox_t* kh_init_strbox() nogil
|
||||
void kh_destroy_strbox(kh_strbox_t*) nogil
|
||||
void kh_clear_strbox(kh_strbox_t*) nogil
|
||||
khuint_t kh_get_strbox(kh_strbox_t*, kh_cstr_t) nogil
|
||||
void kh_resize_strbox(kh_strbox_t*, khuint_t) nogil
|
||||
khuint_t kh_put_strbox(kh_strbox_t*, kh_cstr_t, int*) nogil
|
||||
void kh_del_strbox(kh_strbox_t*, khuint_t) nogil
|
||||
|
||||
bint kh_exist_strbox(kh_strbox_t*, khiter_t) nogil
|
||||
|
||||
khuint_t kh_needed_n_buckets(khuint_t element_n) nogil
|
||||
|
||||
|
||||
include "khash_for_primitive_helper.pxi"
|
||||
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
Template for wrapping khash-tables for each primitive `dtype`
|
||||
|
||||
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
|
||||
"""
|
||||
|
||||
{{py:
|
||||
|
||||
# name, c_type
|
||||
primitive_types = [('int64', 'int64_t'),
|
||||
('uint64', 'uint64_t'),
|
||||
('float64', 'float64_t'),
|
||||
('int32', 'int32_t'),
|
||||
('uint32', 'uint32_t'),
|
||||
('float32', 'float32_t'),
|
||||
('int16', 'int16_t'),
|
||||
('uint16', 'uint16_t'),
|
||||
('int8', 'int8_t'),
|
||||
('uint8', 'uint8_t'),
|
||||
('complex64', 'khcomplex64_t'),
|
||||
('complex128', 'khcomplex128_t'),
|
||||
]
|
||||
}}
|
||||
|
||||
{{for name, c_type in primitive_types}}
|
||||
|
||||
cdef extern from "khash_python.h":
|
||||
ctypedef struct kh_{{name}}_t:
|
||||
khuint_t n_buckets, size, n_occupied, upper_bound
|
||||
uint32_t *flags
|
||||
{{c_type}} *keys
|
||||
size_t *vals
|
||||
|
||||
kh_{{name}}_t* kh_init_{{name}}() nogil
|
||||
void kh_destroy_{{name}}(kh_{{name}}_t*) nogil
|
||||
void kh_clear_{{name}}(kh_{{name}}_t*) nogil
|
||||
khuint_t kh_get_{{name}}(kh_{{name}}_t*, {{c_type}}) nogil
|
||||
void kh_resize_{{name}}(kh_{{name}}_t*, khuint_t) nogil
|
||||
khuint_t kh_put_{{name}}(kh_{{name}}_t*, {{c_type}}, int*) nogil
|
||||
void kh_del_{{name}}(kh_{{name}}_t*, khuint_t) nogil
|
||||
|
||||
bint kh_exist_{{name}}(kh_{{name}}_t*, khiter_t) nogil
|
||||
|
||||
{{endfor}}
|
||||
Binary file not shown.
1
.venv/lib/python3.7/site-packages/pandas/_libs/lib.pxd
Normal file
1
.venv/lib/python3.7/site-packages/pandas/_libs/lib.pxd
Normal file
@@ -0,0 +1 @@
|
||||
cdef bint c_is_list_like(object, bint) except -1
|
||||
229
.venv/lib/python3.7/site-packages/pandas/_libs/lib.pyi
Normal file
229
.venv/lib/python3.7/site-packages/pandas/_libs/lib.pyi
Normal file
@@ -0,0 +1,229 @@
|
||||
# TODO(npdtypes): Many types specified here can be made more specific/accurate;
|
||||
# the more specific versions are specified in comments
|
||||
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Generator,
|
||||
Literal,
|
||||
overload,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._typing import (
|
||||
ArrayLike,
|
||||
DtypeObj,
|
||||
)
|
||||
|
||||
# placeholder until we can specify np.ndarray[object, ndim=2]
|
||||
ndarray_obj_2d = np.ndarray
|
||||
|
||||
from enum import Enum
|
||||
|
||||
class NoDefault(Enum): ...
|
||||
|
||||
no_default: NoDefault
|
||||
|
||||
i8max: int
|
||||
u8max: int
|
||||
|
||||
def item_from_zerodim(val: object) -> object: ...
|
||||
def infer_dtype(value: object, skipna: bool = True) -> str: ...
|
||||
def is_iterator(obj: object) -> bool: ...
|
||||
def is_scalar(val: object) -> bool: ...
|
||||
def is_list_like(obj: object, allow_sets: bool = True) -> bool: ...
|
||||
def is_period(val: object) -> bool: ...
|
||||
def is_interval(val: object) -> bool: ...
|
||||
def is_decimal(val: object) -> bool: ...
|
||||
def is_complex(val: object) -> bool: ...
|
||||
def is_bool(val: object) -> bool: ...
|
||||
def is_integer(val: object) -> bool: ...
|
||||
def is_float(val: object) -> bool: ...
|
||||
def is_interval_array(values: np.ndarray) -> bool: ...
|
||||
def is_datetime64_array(values: np.ndarray) -> bool: ...
|
||||
def is_timedelta_or_timedelta64_array(values: np.ndarray) -> bool: ...
|
||||
def is_datetime_with_singletz_array(values: np.ndarray) -> bool: ...
|
||||
def is_time_array(values: np.ndarray, skipna: bool = False): ...
|
||||
def is_date_array(values: np.ndarray, skipna: bool = False): ...
|
||||
def is_datetime_array(values: np.ndarray, skipna: bool = False): ...
|
||||
def is_string_array(values: np.ndarray, skipna: bool = False): ...
|
||||
def is_float_array(values: np.ndarray, skipna: bool = False): ...
|
||||
def is_integer_array(values: np.ndarray, skipna: bool = False): ...
|
||||
def is_bool_array(values: np.ndarray, skipna: bool = False): ...
|
||||
def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> np.ndarray: ...
|
||||
def fast_unique_multiple_list_gen(gen: Generator, sort: bool = True) -> list: ...
|
||||
def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: ...
|
||||
def fast_unique_multiple(arrays: list, sort: bool = True) -> list: ...
|
||||
def map_infer(
|
||||
arr: np.ndarray,
|
||||
f: Callable[[Any], Any],
|
||||
convert: bool = True,
|
||||
ignore_na: bool = False,
|
||||
) -> np.ndarray: ...
|
||||
@overload # both convert_datetime and convert_to_nullable_integer False -> np.ndarray
|
||||
def maybe_convert_objects(
|
||||
objects: np.ndarray, # np.ndarray[object]
|
||||
*,
|
||||
try_float: bool = ...,
|
||||
safe: bool = ...,
|
||||
convert_datetime: Literal[False] = ...,
|
||||
convert_timedelta: bool = ...,
|
||||
convert_period: Literal[False] = ...,
|
||||
convert_interval: Literal[False] = ...,
|
||||
convert_to_nullable_integer: Literal[False] = ...,
|
||||
dtype_if_all_nat: DtypeObj | None = ...,
|
||||
) -> np.ndarray: ...
|
||||
@overload
|
||||
def maybe_convert_objects(
|
||||
objects: np.ndarray, # np.ndarray[object]
|
||||
*,
|
||||
try_float: bool = ...,
|
||||
safe: bool = ...,
|
||||
convert_datetime: bool = ...,
|
||||
convert_timedelta: bool = ...,
|
||||
convert_period: bool = ...,
|
||||
convert_interval: bool = ...,
|
||||
convert_to_nullable_integer: Literal[True] = ...,
|
||||
dtype_if_all_nat: DtypeObj | None = ...,
|
||||
) -> ArrayLike: ...
|
||||
@overload
|
||||
def maybe_convert_objects(
|
||||
objects: np.ndarray, # np.ndarray[object]
|
||||
*,
|
||||
try_float: bool = ...,
|
||||
safe: bool = ...,
|
||||
convert_datetime: Literal[True] = ...,
|
||||
convert_timedelta: bool = ...,
|
||||
convert_period: bool = ...,
|
||||
convert_interval: bool = ...,
|
||||
convert_to_nullable_integer: bool = ...,
|
||||
dtype_if_all_nat: DtypeObj | None = ...,
|
||||
) -> ArrayLike: ...
|
||||
@overload
|
||||
def maybe_convert_objects(
|
||||
objects: np.ndarray, # np.ndarray[object]
|
||||
*,
|
||||
try_float: bool = ...,
|
||||
safe: bool = ...,
|
||||
convert_datetime: bool = ...,
|
||||
convert_timedelta: bool = ...,
|
||||
convert_period: Literal[True] = ...,
|
||||
convert_interval: bool = ...,
|
||||
convert_to_nullable_integer: bool = ...,
|
||||
dtype_if_all_nat: DtypeObj | None = ...,
|
||||
) -> ArrayLike: ...
|
||||
@overload
|
||||
def maybe_convert_objects(
|
||||
objects: np.ndarray, # np.ndarray[object]
|
||||
*,
|
||||
try_float: bool = ...,
|
||||
safe: bool = ...,
|
||||
convert_datetime: bool = ...,
|
||||
convert_timedelta: bool = ...,
|
||||
convert_period: bool = ...,
|
||||
convert_interval: bool = ...,
|
||||
convert_to_nullable_integer: bool = ...,
|
||||
dtype_if_all_nat: DtypeObj | None = ...,
|
||||
) -> ArrayLike: ...
|
||||
@overload
|
||||
def maybe_convert_numeric(
|
||||
values: np.ndarray, # np.ndarray[object]
|
||||
na_values: set,
|
||||
convert_empty: bool = True,
|
||||
coerce_numeric: bool = False,
|
||||
convert_to_masked_nullable: Literal[False] = ...,
|
||||
) -> tuple[np.ndarray, None]: ...
|
||||
@overload
|
||||
def maybe_convert_numeric(
|
||||
values: np.ndarray, # np.ndarray[object]
|
||||
na_values: set,
|
||||
convert_empty: bool = True,
|
||||
coerce_numeric: bool = False,
|
||||
*,
|
||||
convert_to_masked_nullable: Literal[True],
|
||||
) -> tuple[np.ndarray, np.ndarray]: ...
|
||||
|
||||
# TODO: restrict `arr`?
|
||||
def ensure_string_array(
|
||||
arr,
|
||||
na_value: object = np.nan,
|
||||
convert_na_value: bool = True,
|
||||
copy: bool = True,
|
||||
skipna: bool = True,
|
||||
) -> np.ndarray: ... # np.ndarray[object]
|
||||
def infer_datetimelike_array(
|
||||
arr: np.ndarray, # np.ndarray[object]
|
||||
) -> tuple[str, bool]: ...
|
||||
def astype_intsafe(
|
||||
arr: np.ndarray, # np.ndarray[object]
|
||||
new_dtype: np.dtype,
|
||||
) -> np.ndarray: ...
|
||||
def fast_zip(ndarrays: list) -> np.ndarray: ... # np.ndarray[object]
|
||||
|
||||
# TODO: can we be more specific about rows?
|
||||
def to_object_array_tuples(rows: object) -> ndarray_obj_2d: ...
|
||||
def tuples_to_object_array(
|
||||
tuples: np.ndarray, # np.ndarray[object]
|
||||
) -> ndarray_obj_2d: ...
|
||||
|
||||
# TODO: can we be more specific about rows?
|
||||
def to_object_array(rows: object, min_width: int = 0) -> ndarray_obj_2d: ...
|
||||
def dicts_to_array(dicts: list, columns: list) -> ndarray_obj_2d: ...
|
||||
def maybe_booleans_to_slice(
|
||||
mask: np.ndarray, # ndarray[uint8_t]
|
||||
) -> slice | np.ndarray: ... # np.ndarray[np.uint8]
|
||||
def maybe_indices_to_slice(
|
||||
indices: np.ndarray, # np.ndarray[np.intp]
|
||||
max_len: int,
|
||||
) -> slice | np.ndarray: ... # np.ndarray[np.uint8]
|
||||
def is_all_arraylike(obj: list) -> bool: ...
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Functions which in reality take memoryviews
|
||||
|
||||
def memory_usage_of_objects(arr: np.ndarray) -> int: ... # object[:] # np.int64
|
||||
def map_infer_mask(
|
||||
arr: np.ndarray,
|
||||
f: Callable[[Any], Any],
|
||||
mask: np.ndarray, # const uint8_t[:]
|
||||
convert: bool = ...,
|
||||
na_value: Any = ...,
|
||||
dtype: np.dtype = ...,
|
||||
) -> np.ndarray: ...
|
||||
def indices_fast(
|
||||
index: np.ndarray, # ndarray[intp_t]
|
||||
labels: np.ndarray, # const int64_t[:]
|
||||
keys: list,
|
||||
sorted_labels: list[np.ndarray], # list[ndarray[np.int64]]
|
||||
) -> dict: ...
|
||||
def generate_slices(
|
||||
labels: np.ndarray, ngroups: int # const intp_t[:]
|
||||
) -> tuple[np.ndarray, np.ndarray,]: ... # np.ndarray[np.int64] # np.ndarray[np.int64]
|
||||
def count_level_2d(
|
||||
mask: np.ndarray, # ndarray[uint8_t, ndim=2, cast=True],
|
||||
labels: np.ndarray, # const intp_t[:]
|
||||
max_bin: int,
|
||||
axis: int,
|
||||
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=2]
|
||||
def get_level_sorter(
|
||||
label: np.ndarray, # const int64_t[:]
|
||||
starts: np.ndarray, # const intp_t[:]
|
||||
) -> np.ndarray: ... # np.ndarray[np.intp, ndim=1]
|
||||
def generate_bins_dt64(
|
||||
values: np.ndarray, # np.ndarray[np.int64]
|
||||
binner: np.ndarray, # const int64_t[:]
|
||||
closed: object = "left",
|
||||
hasnans: bool = False,
|
||||
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1]
|
||||
def array_equivalent_object(
|
||||
left: np.ndarray, # object[:]
|
||||
right: np.ndarray, # object[:]
|
||||
) -> bool: ...
|
||||
def has_infs_f8(arr: np.ndarray) -> bool: ... # const float64_t[:]
|
||||
def has_infs_f4(arr: np.ndarray) -> bool: ... # const float32_t[:]
|
||||
def get_reverse_indexer(
|
||||
indexer: np.ndarray, # const intp_t[:]
|
||||
length: int,
|
||||
) -> np.ndarray: ... # np.ndarray[np.intp]
|
||||
def is_bool_list(obj: list) -> bool: ...
|
||||
3039
.venv/lib/python3.7/site-packages/pandas/_libs/lib.pyx
Normal file
3039
.venv/lib/python3.7/site-packages/pandas/_libs/lib.pyx
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
20
.venv/lib/python3.7/site-packages/pandas/_libs/missing.pxd
Normal file
20
.venv/lib/python3.7/site-packages/pandas/_libs/missing.pxd
Normal file
@@ -0,0 +1,20 @@
|
||||
from numpy cimport (
|
||||
ndarray,
|
||||
uint8_t,
|
||||
)
|
||||
|
||||
|
||||
cpdef bint is_matching_na(object left, object right, bint nan_matches_none=*)
|
||||
|
||||
cpdef bint checknull(object val)
|
||||
cpdef bint checknull_old(object val)
|
||||
cpdef ndarray[uint8_t] isnaobj(ndarray arr)
|
||||
|
||||
cdef bint is_null_datetime64(v)
|
||||
cdef bint is_null_timedelta64(v)
|
||||
cdef bint checknull_with_nat_and_na(object obj)
|
||||
|
||||
cdef class C_NAType:
|
||||
pass
|
||||
|
||||
cdef C_NAType C_NA
|
||||
571
.venv/lib/python3.7/site-packages/pandas/_libs/missing.pyx
Normal file
571
.venv/lib/python3.7/site-packages/pandas/_libs/missing.pyx
Normal file
@@ -0,0 +1,571 @@
|
||||
from decimal import Decimal
|
||||
import numbers
|
||||
from sys import maxsize
|
||||
|
||||
import cython
|
||||
from cython import Py_ssize_t
|
||||
import numpy as np
|
||||
|
||||
cimport numpy as cnp
|
||||
from numpy cimport (
|
||||
float64_t,
|
||||
int64_t,
|
||||
ndarray,
|
||||
uint8_t,
|
||||
)
|
||||
|
||||
cnp.import_array()
|
||||
|
||||
from pandas._libs cimport util
|
||||
from pandas._libs.tslibs.nattype cimport (
|
||||
c_NaT as NaT,
|
||||
checknull_with_nat,
|
||||
is_null_datetimelike,
|
||||
)
|
||||
from pandas._libs.tslibs.np_datetime cimport (
|
||||
get_datetime64_value,
|
||||
get_timedelta64_value,
|
||||
)
|
||||
|
||||
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
|
||||
|
||||
cdef:
|
||||
float64_t INF = <float64_t>np.inf
|
||||
float64_t NEGINF = -INF
|
||||
|
||||
int64_t NPY_NAT = util.get_nat()
|
||||
|
||||
bint is_32bit = maxsize <= 2 ** 32
|
||||
|
||||
type cDecimal = Decimal # for faster isinstance checks
|
||||
|
||||
|
||||
cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False):
|
||||
"""
|
||||
Check if two scalars are both NA of matching types.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : Any
|
||||
right : Any
|
||||
nan_matches_none : bool, default False
|
||||
For backwards compatibility, consider NaN as matching None.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
"""
|
||||
if left is None:
|
||||
if nan_matches_none and util.is_nan(right):
|
||||
return True
|
||||
return right is None
|
||||
elif left is C_NA:
|
||||
return right is C_NA
|
||||
elif left is NaT:
|
||||
return right is NaT
|
||||
elif util.is_float_object(left):
|
||||
if nan_matches_none and right is None and util.is_nan(left):
|
||||
return True
|
||||
return (
|
||||
util.is_nan(left)
|
||||
and util.is_float_object(right)
|
||||
and util.is_nan(right)
|
||||
)
|
||||
elif util.is_complex_object(left):
|
||||
return (
|
||||
util.is_nan(left)
|
||||
and util.is_complex_object(right)
|
||||
and util.is_nan(right)
|
||||
)
|
||||
elif util.is_datetime64_object(left):
|
||||
return (
|
||||
get_datetime64_value(left) == NPY_NAT
|
||||
and util.is_datetime64_object(right)
|
||||
and get_datetime64_value(right) == NPY_NAT
|
||||
)
|
||||
elif util.is_timedelta64_object(left):
|
||||
return (
|
||||
get_timedelta64_value(left) == NPY_NAT
|
||||
and util.is_timedelta64_object(right)
|
||||
and get_timedelta64_value(right) == NPY_NAT
|
||||
)
|
||||
elif is_decimal_na(left):
|
||||
return is_decimal_na(right)
|
||||
return False
|
||||
|
||||
|
||||
cpdef bint checknull(object val):
|
||||
"""
|
||||
Return boolean describing of the input is NA-like, defined here as any
|
||||
of:
|
||||
- None
|
||||
- nan
|
||||
- NaT
|
||||
- np.datetime64 representation of NaT
|
||||
- np.timedelta64 representation of NaT
|
||||
- NA
|
||||
- Decimal("NaN")
|
||||
|
||||
Parameters
|
||||
----------
|
||||
val : object
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
|
||||
Notes
|
||||
-----
|
||||
The difference between `checknull` and `checknull_old` is that `checknull`
|
||||
does *not* consider INF or NEGINF to be NA.
|
||||
"""
|
||||
return (
|
||||
val is C_NA
|
||||
or is_null_datetimelike(val, inat_is_null=False)
|
||||
or is_decimal_na(val)
|
||||
)
|
||||
|
||||
|
||||
cdef inline bint is_decimal_na(object val):
|
||||
"""
|
||||
Is this a decimal.Decimal object Decimal("NAN").
|
||||
"""
|
||||
return isinstance(val, cDecimal) and val != val
|
||||
|
||||
|
||||
cpdef bint checknull_old(object val):
|
||||
"""
|
||||
Return boolean describing of the input is NA-like, defined here as any
|
||||
of:
|
||||
- None
|
||||
- nan
|
||||
- INF
|
||||
- NEGINF
|
||||
- NaT
|
||||
- np.datetime64 representation of NaT
|
||||
- np.timedelta64 representation of NaT
|
||||
- NA
|
||||
- Decimal("NaN")
|
||||
|
||||
Parameters
|
||||
----------
|
||||
val : object
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : bool
|
||||
|
||||
Notes
|
||||
-----
|
||||
The difference between `checknull` and `checknull_old` is that `checknull`
|
||||
does *not* consider INF or NEGINF to be NA.
|
||||
"""
|
||||
if checknull(val):
|
||||
return True
|
||||
elif util.is_float_object(val) or util.is_complex_object(val):
|
||||
return val == INF or val == NEGINF
|
||||
return False
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
cpdef ndarray[uint8_t] isnaobj(ndarray arr):
|
||||
"""
|
||||
Return boolean mask denoting which elements of a 1-D array are na-like,
|
||||
according to the criteria defined in `checknull`:
|
||||
- None
|
||||
- nan
|
||||
- NaT
|
||||
- np.datetime64 representation of NaT
|
||||
- np.timedelta64 representation of NaT
|
||||
- NA
|
||||
- Decimal("NaN")
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr : ndarray
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : ndarray (dtype=np.bool_)
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t i, n
|
||||
object val
|
||||
ndarray[uint8_t] result
|
||||
|
||||
assert arr.ndim == 1, "'arr' must be 1-D."
|
||||
|
||||
n = len(arr)
|
||||
result = np.empty(n, dtype=np.uint8)
|
||||
for i in range(n):
|
||||
val = arr[i]
|
||||
result[i] = checknull(val)
|
||||
return result.view(np.bool_)
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
def isnaobj_old(arr: ndarray) -> ndarray:
|
||||
"""
|
||||
Return boolean mask denoting which elements of a 1-D array are na-like,
|
||||
defined as being any of:
|
||||
- None
|
||||
- nan
|
||||
- INF
|
||||
- NEGINF
|
||||
- NaT
|
||||
- NA
|
||||
- Decimal("NaN")
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr : ndarray
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : ndarray (dtype=np.bool_)
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t i, n
|
||||
object val
|
||||
ndarray[uint8_t] result
|
||||
|
||||
assert arr.ndim == 1, "'arr' must be 1-D."
|
||||
|
||||
n = len(arr)
|
||||
result = np.zeros(n, dtype=np.uint8)
|
||||
for i in range(n):
|
||||
val = arr[i]
|
||||
result[i] = (
|
||||
checknull(val)
|
||||
or util.is_float_object(val) and (val == INF or val == NEGINF)
|
||||
)
|
||||
return result.view(np.bool_)
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
def isnaobj2d(arr: ndarray) -> ndarray:
|
||||
"""
|
||||
Return boolean mask denoting which elements of a 2-D array are na-like,
|
||||
according to the criteria defined in `checknull`:
|
||||
- None
|
||||
- nan
|
||||
- NaT
|
||||
- np.datetime64 representation of NaT
|
||||
- np.timedelta64 representation of NaT
|
||||
- NA
|
||||
- Decimal("NaN")
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr : ndarray
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : ndarray (dtype=np.bool_)
|
||||
|
||||
Notes
|
||||
-----
|
||||
The difference between `isnaobj2d` and `isnaobj2d_old` is that `isnaobj2d`
|
||||
does *not* consider INF or NEGINF to be NA.
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t i, j, n, m
|
||||
object val
|
||||
ndarray[uint8_t, ndim=2] result
|
||||
|
||||
assert arr.ndim == 2, "'arr' must be 2-D."
|
||||
|
||||
n, m = (<object>arr).shape
|
||||
result = np.zeros((n, m), dtype=np.uint8)
|
||||
for i in range(n):
|
||||
for j in range(m):
|
||||
val = arr[i, j]
|
||||
if checknull(val):
|
||||
result[i, j] = 1
|
||||
return result.view(np.bool_)
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
def isnaobj2d_old(arr: ndarray) -> ndarray:
|
||||
"""
|
||||
Return boolean mask denoting which elements of a 2-D array are na-like,
|
||||
according to the criteria defined in `checknull_old`:
|
||||
- None
|
||||
- nan
|
||||
- INF
|
||||
- NEGINF
|
||||
- NaT
|
||||
- np.datetime64 representation of NaT
|
||||
- np.timedelta64 representation of NaT
|
||||
- NA
|
||||
- Decimal("NaN")
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr : ndarray
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray (dtype=np.bool_)
|
||||
|
||||
Notes
|
||||
-----
|
||||
The difference between `isnaobj2d` and `isnaobj2d_old` is that `isnaobj2d`
|
||||
does *not* consider INF or NEGINF to be NA.
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t i, j, n, m
|
||||
object val
|
||||
ndarray[uint8_t, ndim=2] result
|
||||
|
||||
assert arr.ndim == 2, "'arr' must be 2-D."
|
||||
|
||||
n, m = (<object>arr).shape
|
||||
result = np.zeros((n, m), dtype=np.uint8)
|
||||
for i in range(n):
|
||||
for j in range(m):
|
||||
val = arr[i, j]
|
||||
if checknull_old(val):
|
||||
result[i, j] = 1
|
||||
return result.view(np.bool_)
|
||||
|
||||
|
||||
def isposinf_scalar(val: object) -> bool:
|
||||
return util.is_float_object(val) and val == INF
|
||||
|
||||
|
||||
def isneginf_scalar(val: object) -> bool:
|
||||
return util.is_float_object(val) and val == NEGINF
|
||||
|
||||
|
||||
cdef inline bint is_null_datetime64(v):
|
||||
# determine if we have a null for a datetime (or integer versions),
|
||||
# excluding np.timedelta64('nat')
|
||||
if checknull_with_nat(v):
|
||||
return True
|
||||
elif util.is_datetime64_object(v):
|
||||
return get_datetime64_value(v) == NPY_NAT
|
||||
return False
|
||||
|
||||
|
||||
cdef inline bint is_null_timedelta64(v):
|
||||
# determine if we have a null for a timedelta (or integer versions),
|
||||
# excluding np.datetime64('nat')
|
||||
if checknull_with_nat(v):
|
||||
return True
|
||||
elif util.is_timedelta64_object(v):
|
||||
return get_timedelta64_value(v) == NPY_NAT
|
||||
return False
|
||||
|
||||
|
||||
cdef bint checknull_with_nat_and_na(object obj):
|
||||
# See GH#32214
|
||||
return checknull_with_nat(obj) or obj is C_NA
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Implementation of NA singleton
|
||||
|
||||
|
||||
def _create_binary_propagating_op(name, is_divmod=False):
|
||||
|
||||
def method(self, other):
|
||||
if (other is C_NA or isinstance(other, str)
|
||||
or isinstance(other, (numbers.Number, np.bool_))
|
||||
or isinstance(other, np.ndarray) and not other.shape):
|
||||
# Need the other.shape clause to handle NumPy scalars,
|
||||
# since we do a setitem on `out` below, which
|
||||
# won't work for NumPy scalars.
|
||||
if is_divmod:
|
||||
return NA, NA
|
||||
else:
|
||||
return NA
|
||||
|
||||
elif isinstance(other, np.ndarray):
|
||||
out = np.empty(other.shape, dtype=object)
|
||||
out[:] = NA
|
||||
|
||||
if is_divmod:
|
||||
return out, out.copy()
|
||||
else:
|
||||
return out
|
||||
|
||||
return NotImplemented
|
||||
|
||||
method.__name__ = name
|
||||
return method
|
||||
|
||||
|
||||
def _create_unary_propagating_op(name: str):
|
||||
def method(self):
|
||||
return NA
|
||||
|
||||
method.__name__ = name
|
||||
return method
|
||||
|
||||
|
||||
cdef class C_NAType:
|
||||
pass
|
||||
|
||||
|
||||
class NAType(C_NAType):
|
||||
"""
|
||||
NA ("not available") missing value indicator.
|
||||
|
||||
.. warning::
|
||||
|
||||
Experimental: the behaviour of NA can still change without warning.
|
||||
|
||||
.. versionadded:: 1.0.0
|
||||
|
||||
The NA singleton is a missing value indicator defined by pandas. It is
|
||||
used in certain new extension dtypes (currently the "string" dtype).
|
||||
"""
|
||||
|
||||
_instance = None
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
if NAType._instance is None:
|
||||
NAType._instance = C_NAType.__new__(cls, *args, **kwargs)
|
||||
return NAType._instance
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "<NA>"
|
||||
|
||||
def __format__(self, format_spec) -> str:
|
||||
try:
|
||||
return self.__repr__().__format__(format_spec)
|
||||
except ValueError:
|
||||
return self.__repr__()
|
||||
|
||||
def __bool__(self):
|
||||
raise TypeError("boolean value of NA is ambiguous")
|
||||
|
||||
def __hash__(self):
|
||||
# GH 30013: Ensure hash is large enough to avoid hash collisions with integers
|
||||
exponent = 31 if is_32bit else 61
|
||||
return 2 ** exponent - 1
|
||||
|
||||
def __reduce__(self):
|
||||
return "NA"
|
||||
|
||||
# Binary arithmetic and comparison ops -> propagate
|
||||
|
||||
__add__ = _create_binary_propagating_op("__add__")
|
||||
__radd__ = _create_binary_propagating_op("__radd__")
|
||||
__sub__ = _create_binary_propagating_op("__sub__")
|
||||
__rsub__ = _create_binary_propagating_op("__rsub__")
|
||||
__mul__ = _create_binary_propagating_op("__mul__")
|
||||
__rmul__ = _create_binary_propagating_op("__rmul__")
|
||||
__matmul__ = _create_binary_propagating_op("__matmul__")
|
||||
__rmatmul__ = _create_binary_propagating_op("__rmatmul__")
|
||||
__truediv__ = _create_binary_propagating_op("__truediv__")
|
||||
__rtruediv__ = _create_binary_propagating_op("__rtruediv__")
|
||||
__floordiv__ = _create_binary_propagating_op("__floordiv__")
|
||||
__rfloordiv__ = _create_binary_propagating_op("__rfloordiv__")
|
||||
__mod__ = _create_binary_propagating_op("__mod__")
|
||||
__rmod__ = _create_binary_propagating_op("__rmod__")
|
||||
__divmod__ = _create_binary_propagating_op("__divmod__", is_divmod=True)
|
||||
__rdivmod__ = _create_binary_propagating_op("__rdivmod__", is_divmod=True)
|
||||
# __lshift__ and __rshift__ are not implemented
|
||||
|
||||
__eq__ = _create_binary_propagating_op("__eq__")
|
||||
__ne__ = _create_binary_propagating_op("__ne__")
|
||||
__le__ = _create_binary_propagating_op("__le__")
|
||||
__lt__ = _create_binary_propagating_op("__lt__")
|
||||
__gt__ = _create_binary_propagating_op("__gt__")
|
||||
__ge__ = _create_binary_propagating_op("__ge__")
|
||||
|
||||
# Unary ops
|
||||
|
||||
__neg__ = _create_unary_propagating_op("__neg__")
|
||||
__pos__ = _create_unary_propagating_op("__pos__")
|
||||
__abs__ = _create_unary_propagating_op("__abs__")
|
||||
__invert__ = _create_unary_propagating_op("__invert__")
|
||||
|
||||
# pow has special
|
||||
def __pow__(self, other):
|
||||
if other is C_NA:
|
||||
return NA
|
||||
elif isinstance(other, (numbers.Number, np.bool_)):
|
||||
if other == 0:
|
||||
# returning positive is correct for +/- 0.
|
||||
return type(other)(1)
|
||||
else:
|
||||
return NA
|
||||
elif isinstance(other, np.ndarray):
|
||||
return np.where(other == 0, other.dtype.type(1), NA)
|
||||
|
||||
return NotImplemented
|
||||
|
||||
def __rpow__(self, other):
|
||||
if other is C_NA:
|
||||
return NA
|
||||
elif isinstance(other, (numbers.Number, np.bool_)):
|
||||
if other == 1:
|
||||
return other
|
||||
else:
|
||||
return NA
|
||||
elif isinstance(other, np.ndarray):
|
||||
return np.where(other == 1, other, NA)
|
||||
return NotImplemented
|
||||
|
||||
# Logical ops using Kleene logic
|
||||
|
||||
def __and__(self, other):
|
||||
if other is False:
|
||||
return False
|
||||
elif other is True or other is C_NA:
|
||||
return NA
|
||||
return NotImplemented
|
||||
|
||||
__rand__ = __and__
|
||||
|
||||
def __or__(self, other):
|
||||
if other is True:
|
||||
return True
|
||||
elif other is False or other is C_NA:
|
||||
return NA
|
||||
return NotImplemented
|
||||
|
||||
__ror__ = __or__
|
||||
|
||||
def __xor__(self, other):
|
||||
if other is False or other is True or other is C_NA:
|
||||
return NA
|
||||
return NotImplemented
|
||||
|
||||
__rxor__ = __xor__
|
||||
|
||||
__array_priority__ = 1000
|
||||
_HANDLED_TYPES = (np.ndarray, numbers.Number, str, np.bool_)
|
||||
|
||||
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
|
||||
types = self._HANDLED_TYPES + (NAType,)
|
||||
for x in inputs:
|
||||
if not isinstance(x, types):
|
||||
return NotImplemented
|
||||
|
||||
if method != "__call__":
|
||||
raise ValueError(f"ufunc method '{method}' not supported for NA")
|
||||
result = maybe_dispatch_ufunc_to_dunder_op(
|
||||
self, ufunc, method, *inputs, **kwargs
|
||||
)
|
||||
if result is NotImplemented:
|
||||
# For a NumPy ufunc that's not a binop, like np.logaddexp
|
||||
index = [i for i, x in enumerate(inputs) if x is NA][0]
|
||||
result = np.broadcast_arrays(*inputs)[index]
|
||||
if result.ndim == 0:
|
||||
result = result.item()
|
||||
if ufunc.nout > 1:
|
||||
result = (NA,) * ufunc.nout
|
||||
|
||||
return result
|
||||
|
||||
|
||||
C_NA = NAType() # C-visible
|
||||
NA = C_NA # Python-visible
|
||||
Binary file not shown.
47
.venv/lib/python3.7/site-packages/pandas/_libs/ops.pyi
Normal file
47
.venv/lib/python3.7/site-packages/pandas/_libs/ops.pyi
Normal file
@@ -0,0 +1,47 @@
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Literal,
|
||||
overload,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
_BinOp = Callable[[Any, Any], Any]
|
||||
_BoolOp = Callable[[Any, Any], bool]
|
||||
|
||||
def scalar_compare(
|
||||
values: np.ndarray, # object[:]
|
||||
val: object,
|
||||
op: _BoolOp, # {operator.eq, operator.ne, ...}
|
||||
) -> np.ndarray: ... # np.ndarray[bool]
|
||||
def vec_compare(
|
||||
left: np.ndarray, # np.ndarray[object]
|
||||
right: np.ndarray, # np.ndarray[object]
|
||||
op: _BoolOp, # {operator.eq, operator.ne, ...}
|
||||
) -> np.ndarray: ... # np.ndarray[bool]
|
||||
def scalar_binop(
|
||||
values: np.ndarray, # object[:]
|
||||
val: object,
|
||||
op: _BinOp, # binary operator
|
||||
) -> np.ndarray: ...
|
||||
def vec_binop(
|
||||
left: np.ndarray, # object[:]
|
||||
right: np.ndarray, # object[:]
|
||||
op: _BinOp, # binary operator
|
||||
) -> np.ndarray: ...
|
||||
@overload
|
||||
def maybe_convert_bool(
|
||||
arr: np.ndarray, # np.ndarray[object]
|
||||
true_values=...,
|
||||
false_values=...,
|
||||
convert_to_masked_nullable: Literal[False] = ...,
|
||||
) -> tuple[np.ndarray, None]: ...
|
||||
@overload
|
||||
def maybe_convert_bool(
|
||||
arr: np.ndarray, # np.ndarray[object]
|
||||
true_values=...,
|
||||
false_values=...,
|
||||
*,
|
||||
convert_to_masked_nullable: Literal[True],
|
||||
) -> tuple[np.ndarray, np.ndarray]: ...
|
||||
310
.venv/lib/python3.7/site-packages/pandas/_libs/ops.pyx
Normal file
310
.venv/lib/python3.7/site-packages/pandas/_libs/ops.pyx
Normal file
@@ -0,0 +1,310 @@
|
||||
import operator
|
||||
|
||||
from cpython.object cimport (
|
||||
Py_EQ,
|
||||
Py_GE,
|
||||
Py_GT,
|
||||
Py_LE,
|
||||
Py_LT,
|
||||
Py_NE,
|
||||
PyObject_RichCompareBool,
|
||||
)
|
||||
|
||||
import cython
|
||||
from cython import Py_ssize_t
|
||||
import numpy as np
|
||||
|
||||
from numpy cimport (
|
||||
import_array,
|
||||
ndarray,
|
||||
uint8_t,
|
||||
)
|
||||
|
||||
import_array()
|
||||
|
||||
|
||||
from pandas._libs.missing cimport checknull
|
||||
from pandas._libs.util cimport is_nan
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
def scalar_compare(object[:] values, object val, object op) -> ndarray:
|
||||
"""
|
||||
Compare each element of `values` array with the scalar `val`, with
|
||||
the comparison operation described by `op`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
values : ndarray[object]
|
||||
val : object
|
||||
op : {operator.eq, operator.ne,
|
||||
operator.le, operator.lt,
|
||||
operator.ge, operator.gt}
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : ndarray[bool]
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t i, n = len(values)
|
||||
ndarray[uint8_t, cast=True] result
|
||||
bint isnull_val
|
||||
int flag
|
||||
object x
|
||||
|
||||
if op is operator.lt:
|
||||
flag = Py_LT
|
||||
elif op is operator.le:
|
||||
flag = Py_LE
|
||||
elif op is operator.gt:
|
||||
flag = Py_GT
|
||||
elif op is operator.ge:
|
||||
flag = Py_GE
|
||||
elif op is operator.eq:
|
||||
flag = Py_EQ
|
||||
elif op is operator.ne:
|
||||
flag = Py_NE
|
||||
else:
|
||||
raise ValueError('Unrecognized operator')
|
||||
|
||||
result = np.empty(n, dtype=bool).view(np.uint8)
|
||||
isnull_val = checknull(val)
|
||||
|
||||
if flag == Py_NE:
|
||||
for i in range(n):
|
||||
x = values[i]
|
||||
if checknull(x):
|
||||
result[i] = True
|
||||
elif isnull_val:
|
||||
result[i] = True
|
||||
else:
|
||||
try:
|
||||
result[i] = PyObject_RichCompareBool(x, val, flag)
|
||||
except TypeError:
|
||||
result[i] = True
|
||||
elif flag == Py_EQ:
|
||||
for i in range(n):
|
||||
x = values[i]
|
||||
if checknull(x):
|
||||
result[i] = False
|
||||
elif isnull_val:
|
||||
result[i] = False
|
||||
else:
|
||||
try:
|
||||
result[i] = PyObject_RichCompareBool(x, val, flag)
|
||||
except TypeError:
|
||||
result[i] = False
|
||||
|
||||
else:
|
||||
for i in range(n):
|
||||
x = values[i]
|
||||
if checknull(x):
|
||||
result[i] = False
|
||||
elif isnull_val:
|
||||
result[i] = False
|
||||
else:
|
||||
result[i] = PyObject_RichCompareBool(x, val, flag)
|
||||
|
||||
return result.view(bool)
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarray:
|
||||
"""
|
||||
Compare the elements of `left` with the elements of `right` pointwise,
|
||||
with the comparison operation described by `op`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : ndarray[object]
|
||||
right : ndarray[object]
|
||||
op : {operator.eq, operator.ne,
|
||||
operator.le, operator.lt,
|
||||
operator.ge, operator.gt}
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : ndarray[bool]
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t i, n = len(left)
|
||||
ndarray[uint8_t, cast=True] result
|
||||
int flag
|
||||
|
||||
if n != <Py_ssize_t>len(right):
|
||||
raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}')
|
||||
|
||||
if op is operator.lt:
|
||||
flag = Py_LT
|
||||
elif op is operator.le:
|
||||
flag = Py_LE
|
||||
elif op is operator.gt:
|
||||
flag = Py_GT
|
||||
elif op is operator.ge:
|
||||
flag = Py_GE
|
||||
elif op is operator.eq:
|
||||
flag = Py_EQ
|
||||
elif op is operator.ne:
|
||||
flag = Py_NE
|
||||
else:
|
||||
raise ValueError('Unrecognized operator')
|
||||
|
||||
result = np.empty(n, dtype=bool).view(np.uint8)
|
||||
|
||||
if flag == Py_NE:
|
||||
for i in range(n):
|
||||
x = left[i]
|
||||
y = right[i]
|
||||
|
||||
if checknull(x) or checknull(y):
|
||||
result[i] = True
|
||||
else:
|
||||
result[i] = PyObject_RichCompareBool(x, y, flag)
|
||||
else:
|
||||
for i in range(n):
|
||||
x = left[i]
|
||||
y = right[i]
|
||||
|
||||
if checknull(x) or checknull(y):
|
||||
result[i] = False
|
||||
else:
|
||||
result[i] = PyObject_RichCompareBool(x, y, flag)
|
||||
|
||||
return result.view(bool)
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
def scalar_binop(object[:] values, object val, object op) -> ndarray:
|
||||
"""
|
||||
Apply the given binary operator `op` between each element of the array
|
||||
`values` and the scalar `val`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
values : ndarray[object]
|
||||
val : object
|
||||
op : binary operator
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : ndarray[object]
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t i, n = len(values)
|
||||
object[:] result
|
||||
object x
|
||||
|
||||
result = np.empty(n, dtype=object)
|
||||
if val is None or is_nan(val):
|
||||
result[:] = val
|
||||
return result.base # `.base` to access underlying np.ndarray
|
||||
|
||||
for i in range(n):
|
||||
x = values[i]
|
||||
if x is None or is_nan(x):
|
||||
result[i] = x
|
||||
else:
|
||||
result[i] = op(x, val)
|
||||
|
||||
return maybe_convert_bool(result.base)[0]
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
def vec_binop(object[:] left, object[:] right, object op) -> ndarray:
|
||||
"""
|
||||
Apply the given binary operator `op` pointwise to the elements of
|
||||
arrays `left` and `right`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : ndarray[object]
|
||||
right : ndarray[object]
|
||||
op : binary operator
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : ndarray[object]
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t i, n = len(left)
|
||||
object[:] result
|
||||
|
||||
if n != <Py_ssize_t>len(right):
|
||||
raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}')
|
||||
|
||||
result = np.empty(n, dtype=object)
|
||||
|
||||
for i in range(n):
|
||||
x = left[i]
|
||||
y = right[i]
|
||||
try:
|
||||
result[i] = op(x, y)
|
||||
except TypeError:
|
||||
if x is None or is_nan(x):
|
||||
result[i] = x
|
||||
elif y is None or is_nan(y):
|
||||
result[i] = y
|
||||
else:
|
||||
raise
|
||||
|
||||
return maybe_convert_bool(result.base)[0] # `.base` to access np.ndarray
|
||||
|
||||
|
||||
def maybe_convert_bool(ndarray[object] arr,
|
||||
true_values=None,
|
||||
false_values=None,
|
||||
convert_to_masked_nullable=False
|
||||
) -> tuple[np.ndarray, np.ndarray | None]:
|
||||
cdef:
|
||||
Py_ssize_t i, n
|
||||
ndarray[uint8_t] result
|
||||
ndarray[uint8_t] mask
|
||||
object val
|
||||
set true_vals, false_vals
|
||||
bint has_na = False
|
||||
|
||||
n = len(arr)
|
||||
result = np.empty(n, dtype=np.uint8)
|
||||
mask = np.zeros(n, dtype=np.uint8)
|
||||
# the defaults
|
||||
true_vals = {'True', 'TRUE', 'true'}
|
||||
false_vals = {'False', 'FALSE', 'false'}
|
||||
|
||||
if true_values is not None:
|
||||
true_vals = true_vals | set(true_values)
|
||||
|
||||
if false_values is not None:
|
||||
false_vals = false_vals | set(false_values)
|
||||
|
||||
for i in range(n):
|
||||
val = arr[i]
|
||||
|
||||
if isinstance(val, bool):
|
||||
if val is True:
|
||||
result[i] = 1
|
||||
else:
|
||||
result[i] = 0
|
||||
elif val in true_vals:
|
||||
result[i] = 1
|
||||
elif val in false_vals:
|
||||
result[i] = 0
|
||||
elif is_nan(val):
|
||||
mask[i] = 1
|
||||
result[i] = 0 # Value here doesn't matter, will be replaced w/ nan
|
||||
has_na = True
|
||||
else:
|
||||
return (arr, None)
|
||||
|
||||
if has_na:
|
||||
if convert_to_masked_nullable:
|
||||
return (result.view(np.bool_), mask.view(np.bool_))
|
||||
else:
|
||||
arr = result.view(np.bool_).astype(object)
|
||||
np.putmask(arr, mask, np.nan)
|
||||
return (arr, None)
|
||||
else:
|
||||
return (result.view(np.bool_), None)
|
||||
Binary file not shown.
@@ -0,0 +1,5 @@
|
||||
import numpy as np
|
||||
|
||||
def maybe_dispatch_ufunc_to_dunder_op(
|
||||
self, ufunc: np.ufunc, method: str, *inputs, **kwargs
|
||||
): ...
|
||||
@@ -0,0 +1,94 @@
|
||||
DISPATCHED_UFUNCS = {
|
||||
"add",
|
||||
"sub",
|
||||
"mul",
|
||||
"pow",
|
||||
"mod",
|
||||
"floordiv",
|
||||
"truediv",
|
||||
"divmod",
|
||||
"eq",
|
||||
"ne",
|
||||
"lt",
|
||||
"gt",
|
||||
"le",
|
||||
"ge",
|
||||
"remainder",
|
||||
"matmul",
|
||||
"or",
|
||||
"xor",
|
||||
"and",
|
||||
}
|
||||
UFUNC_ALIASES = {
|
||||
"subtract": "sub",
|
||||
"multiply": "mul",
|
||||
"floor_divide": "floordiv",
|
||||
"true_divide": "truediv",
|
||||
"power": "pow",
|
||||
"remainder": "mod",
|
||||
"divide": "truediv",
|
||||
"equal": "eq",
|
||||
"not_equal": "ne",
|
||||
"less": "lt",
|
||||
"less_equal": "le",
|
||||
"greater": "gt",
|
||||
"greater_equal": "ge",
|
||||
"bitwise_or": "or",
|
||||
"bitwise_and": "and",
|
||||
"bitwise_xor": "xor",
|
||||
}
|
||||
|
||||
# For op(., Array) -> Array.__r{op}__
|
||||
REVERSED_NAMES = {
|
||||
"lt": "__gt__",
|
||||
"le": "__ge__",
|
||||
"gt": "__lt__",
|
||||
"ge": "__le__",
|
||||
"eq": "__eq__",
|
||||
"ne": "__ne__",
|
||||
}
|
||||
|
||||
|
||||
def maybe_dispatch_ufunc_to_dunder_op(
|
||||
object self, object ufunc, str method, *inputs, **kwargs
|
||||
):
|
||||
"""
|
||||
Dispatch a ufunc to the equivalent dunder method.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
self : ArrayLike
|
||||
The array whose dunder method we dispatch to
|
||||
ufunc : Callable
|
||||
A NumPy ufunc
|
||||
method : {'reduce', 'accumulate', 'reduceat', 'outer', 'at', '__call__'}
|
||||
inputs : ArrayLike
|
||||
The input arrays.
|
||||
kwargs : Any
|
||||
The additional keyword arguments, e.g. ``out``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : Any
|
||||
The result of applying the ufunc
|
||||
"""
|
||||
# special has the ufuncs we dispatch to the dunder op on
|
||||
|
||||
op_name = ufunc.__name__
|
||||
op_name = UFUNC_ALIASES.get(op_name, op_name)
|
||||
|
||||
def not_implemented(*args, **kwargs):
|
||||
return NotImplemented
|
||||
|
||||
if (method == "__call__"
|
||||
and op_name in DISPATCHED_UFUNCS
|
||||
and kwargs.get("out") is None):
|
||||
if isinstance(inputs[0], type(self)):
|
||||
name = f"__{op_name}__"
|
||||
return getattr(self, name, not_implemented)(inputs[1])
|
||||
else:
|
||||
name = REVERSED_NAMES.get(op_name, f"__r{op_name}__")
|
||||
result = getattr(self, name, not_implemented)(inputs[0])
|
||||
return result
|
||||
else:
|
||||
return NotImplemented
|
||||
Binary file not shown.
71
.venv/lib/python3.7/site-packages/pandas/_libs/parsers.pyi
Normal file
71
.venv/lib/python3.7/site-packages/pandas/_libs/parsers.pyi
Normal file
@@ -0,0 +1,71 @@
|
||||
from typing import (
|
||||
Hashable,
|
||||
Literal,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._typing import (
|
||||
ArrayLike,
|
||||
Dtype,
|
||||
)
|
||||
|
||||
STR_NA_VALUES: set[str]
|
||||
|
||||
def sanitize_objects(
|
||||
values: np.ndarray, # ndarray[object]
|
||||
na_values: set,
|
||||
convert_empty: bool = ...,
|
||||
) -> int: ...
|
||||
|
||||
class TextReader:
|
||||
unnamed_cols: set[str]
|
||||
table_width: int # int64_t
|
||||
leading_cols: int # int64_t
|
||||
header: list[list[int]] # non-negative integers
|
||||
def __init__(
|
||||
self,
|
||||
source,
|
||||
delimiter: bytes | str = ..., # single-character only
|
||||
header=...,
|
||||
header_start: int = ..., # int64_t
|
||||
header_end: int = ..., # uint64_t
|
||||
index_col=...,
|
||||
names=...,
|
||||
tokenize_chunksize: int = ..., # int64_t
|
||||
delim_whitespace: bool = ...,
|
||||
converters=...,
|
||||
skipinitialspace: bool = ...,
|
||||
escapechar: bytes | str | None = ..., # single-character only
|
||||
doublequote: bool = ...,
|
||||
quotechar: str | bytes | None = ..., # at most 1 character
|
||||
quoting: int = ...,
|
||||
lineterminator: bytes | str | None = ..., # at most 1 character
|
||||
comment=...,
|
||||
decimal: bytes | str = ..., # single-character only
|
||||
thousands: bytes | str | None = ..., # single-character only
|
||||
dtype: Dtype | dict[Hashable, Dtype] = ...,
|
||||
usecols=...,
|
||||
error_bad_lines: bool = ...,
|
||||
warn_bad_lines: bool = ...,
|
||||
na_filter: bool = ...,
|
||||
na_values=...,
|
||||
na_fvalues=...,
|
||||
keep_default_na: bool = ...,
|
||||
true_values=...,
|
||||
false_values=...,
|
||||
allow_leading_cols: bool = ...,
|
||||
skiprows=...,
|
||||
skipfooter: int = ..., # int64_t
|
||||
verbose: bool = ...,
|
||||
mangle_dupe_cols: bool = ...,
|
||||
float_precision: Literal["round_trip", "legacy", "high"] | None = ...,
|
||||
skip_blank_lines: bool = ...,
|
||||
encoding_errors: bytes | str = ...,
|
||||
): ...
|
||||
def set_error_bad_lines(self, status: int) -> None: ...
|
||||
def set_noconvert(self, i: int) -> None: ...
|
||||
def remove_noconvert(self, i: int) -> None: ...
|
||||
def close(self) -> None: ...
|
||||
def read(self, rows: int | None = ...) -> dict[int, ArrayLike]: ...
|
||||
def read_low_memory(self, rows: int | None) -> list[dict[int, ArrayLike]]: ...
|
||||
2026
.venv/lib/python3.7/site-packages/pandas/_libs/parsers.pyx
Normal file
2026
.venv/lib/python3.7/site-packages/pandas/_libs/parsers.pyx
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -0,0 +1,70 @@
|
||||
from cython import Py_ssize_t
|
||||
|
||||
from cpython.dict cimport (
|
||||
PyDict_Contains,
|
||||
PyDict_GetItem,
|
||||
PyDict_SetItem,
|
||||
)
|
||||
|
||||
|
||||
cdef class CachedProperty:
|
||||
|
||||
cdef readonly:
|
||||
object func, name, __doc__
|
||||
|
||||
def __init__(self, func):
|
||||
self.func = func
|
||||
self.name = func.__name__
|
||||
self.__doc__ = getattr(func, '__doc__', None)
|
||||
|
||||
def __get__(self, obj, typ):
|
||||
if obj is None:
|
||||
# accessed on the class, not the instance
|
||||
return self
|
||||
|
||||
# Get the cache or set a default one if needed
|
||||
cache = getattr(obj, '_cache', None)
|
||||
if cache is None:
|
||||
try:
|
||||
cache = obj._cache = {}
|
||||
except (AttributeError):
|
||||
return self
|
||||
|
||||
if PyDict_Contains(cache, self.name):
|
||||
# not necessary to Py_INCREF
|
||||
val = <object>PyDict_GetItem(cache, self.name)
|
||||
else:
|
||||
val = self.func(obj)
|
||||
PyDict_SetItem(cache, self.name, val)
|
||||
return val
|
||||
|
||||
def __set__(self, obj, value):
|
||||
raise AttributeError("Can't set attribute")
|
||||
|
||||
|
||||
cache_readonly = CachedProperty
|
||||
|
||||
|
||||
cdef class AxisProperty:
|
||||
|
||||
cdef readonly:
|
||||
Py_ssize_t axis
|
||||
object __doc__
|
||||
|
||||
def __init__(self, axis=0, doc=""):
|
||||
self.axis = axis
|
||||
self.__doc__ = doc
|
||||
|
||||
def __get__(self, obj, type):
|
||||
cdef:
|
||||
list axes
|
||||
|
||||
if obj is None:
|
||||
# Only instances have _mgr, not classes
|
||||
return self
|
||||
else:
|
||||
axes = obj._mgr.axes
|
||||
return axes[self.axis]
|
||||
|
||||
def __set__(self, obj, value):
|
||||
obj._set_axis(self.axis, value)
|
||||
Binary file not shown.
500
.venv/lib/python3.7/site-packages/pandas/_libs/reduction.pyx
Normal file
500
.venv/lib/python3.7/site-packages/pandas/_libs/reduction.pyx
Normal file
@@ -0,0 +1,500 @@
|
||||
|
||||
from libc.stdlib cimport (
|
||||
free,
|
||||
malloc,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
cimport numpy as cnp
|
||||
from numpy cimport (
|
||||
int64_t,
|
||||
intp_t,
|
||||
ndarray,
|
||||
)
|
||||
|
||||
cnp.import_array()
|
||||
|
||||
from pandas._libs.util cimport (
|
||||
is_array,
|
||||
set_array_not_contiguous,
|
||||
)
|
||||
|
||||
from pandas._libs.lib import is_scalar
|
||||
|
||||
|
||||
cdef cnp.dtype _dtype_obj = np.dtype("object")
|
||||
|
||||
|
||||
cpdef check_result_array(object obj, object dtype):
|
||||
# Our operation is supposed to be an aggregation/reduction. If
|
||||
# it returns an ndarray, this likely means an invalid operation has
|
||||
# been passed. See test_apply_without_aggregation, test_agg_must_agg
|
||||
if is_array(obj):
|
||||
if dtype != _dtype_obj:
|
||||
# If it is object dtype, the function can be a reduction/aggregation
|
||||
# and still return an ndarray e.g. test_agg_over_numpy_arrays
|
||||
raise ValueError("Must produce aggregated value")
|
||||
|
||||
|
||||
cdef class _BaseGrouper:
|
||||
cdef _check_dummy(self, object dummy):
|
||||
# both values and index must be an ndarray!
|
||||
|
||||
values = dummy.values
|
||||
# GH 23683: datetimetz types are equivalent to datetime types here
|
||||
if (dummy.dtype != self.arr.dtype
|
||||
and values.dtype != self.arr.dtype):
|
||||
raise ValueError('Dummy array must be same dtype')
|
||||
if is_array(values) and not values.flags.contiguous:
|
||||
# e.g. Categorical has no `flags` attribute
|
||||
values = values.copy()
|
||||
index = dummy.index.values
|
||||
if not index.flags.contiguous:
|
||||
index = index.copy()
|
||||
|
||||
return values, index
|
||||
|
||||
cdef _init_dummy_series_and_index(self, Slider islider, Slider vslider):
|
||||
"""
|
||||
Create Series and Index objects that we will alter in-place while iterating.
|
||||
"""
|
||||
cached_index = self.ityp(islider.buf, dtype=self.idtype)
|
||||
cached_series = self.typ(
|
||||
vslider.buf, dtype=vslider.buf.dtype, index=cached_index, name=self.name
|
||||
)
|
||||
return cached_index, cached_series
|
||||
|
||||
cdef inline _update_cached_objs(self, object cached_series, object cached_index,
|
||||
Slider islider, Slider vslider):
|
||||
# See the comment in indexes/base.py about _index_data.
|
||||
# We need this for EA-backed indexes that have a reference
|
||||
# to a 1-d ndarray like datetime / timedelta / period.
|
||||
cached_index._engine.clear_mapping()
|
||||
cached_index._cache.clear() # e.g. inferred_freq must go
|
||||
cached_series._mgr.set_values(vslider.buf)
|
||||
|
||||
cdef inline object _apply_to_group(self,
|
||||
object cached_series, object cached_index,
|
||||
bint initialized):
|
||||
"""
|
||||
Call self.f on our new group, then update to the next group.
|
||||
"""
|
||||
cdef:
|
||||
object res
|
||||
|
||||
# NB: we assume that _update_cached_objs has already cleared cleared
|
||||
# the cache and engine mapping
|
||||
res = self.f(cached_series)
|
||||
res = extract_result(res)
|
||||
if not initialized:
|
||||
# On the first pass, we check the output shape to see
|
||||
# if this looks like a reduction.
|
||||
initialized = True
|
||||
check_result_array(res, cached_series.dtype)
|
||||
|
||||
return res, initialized
|
||||
|
||||
|
||||
cdef class SeriesBinGrouper(_BaseGrouper):
|
||||
"""
|
||||
Performs grouping operation according to bin edges, rather than labels
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t nresults, ngroups
|
||||
|
||||
cdef public:
|
||||
ndarray bins # ndarray[int64_t]
|
||||
ndarray arr, index, dummy_arr, dummy_index
|
||||
object values, f, typ, ityp, name, idtype
|
||||
|
||||
def __init__(self, object series, object f, ndarray[int64_t] bins):
|
||||
|
||||
assert len(bins) > 0 # otherwise we get IndexError in get_result
|
||||
|
||||
self.bins = bins
|
||||
self.f = f
|
||||
|
||||
values = series.values
|
||||
if is_array(values) and not values.flags.c_contiguous:
|
||||
# e.g. Categorical has no `flags` attribute
|
||||
values = values.copy('C')
|
||||
self.arr = values
|
||||
self.typ = series._constructor
|
||||
self.ityp = series.index._constructor
|
||||
self.idtype = series.index.dtype
|
||||
self.index = series.index.values
|
||||
self.name = series.name
|
||||
|
||||
dummy = series.iloc[:0]
|
||||
self.dummy_arr, self.dummy_index = self._check_dummy(dummy)
|
||||
|
||||
# kludge for #1688
|
||||
if len(bins) > 0 and bins[-1] == len(series):
|
||||
self.ngroups = len(bins)
|
||||
else:
|
||||
# TODO: not reached except in test_series_bin_grouper directly
|
||||
# constructing SeriesBinGrouper; can we rule this case out?
|
||||
self.ngroups = len(bins) + 1
|
||||
|
||||
def get_result(self):
|
||||
cdef:
|
||||
ndarray arr, result
|
||||
ndarray[int64_t] counts
|
||||
Py_ssize_t i, n, group_size, start, end
|
||||
object res
|
||||
bint initialized = 0
|
||||
Slider vslider, islider
|
||||
object cached_series = None, cached_index = None
|
||||
|
||||
counts = np.zeros(self.ngroups, dtype=np.int64)
|
||||
|
||||
if self.ngroups > 0:
|
||||
counts[0] = self.bins[0]
|
||||
for i in range(1, self.ngroups):
|
||||
if i == self.ngroups - 1:
|
||||
counts[i] = len(self.arr) - self.bins[i - 1]
|
||||
else:
|
||||
counts[i] = self.bins[i] - self.bins[i - 1]
|
||||
|
||||
group_size = 0
|
||||
n = len(self.arr)
|
||||
|
||||
vslider = Slider(self.arr, self.dummy_arr)
|
||||
islider = Slider(self.index, self.dummy_index)
|
||||
|
||||
result = np.empty(self.ngroups, dtype='O')
|
||||
|
||||
cached_index, cached_series = self._init_dummy_series_and_index(
|
||||
islider, vslider
|
||||
)
|
||||
|
||||
start = 0
|
||||
try:
|
||||
for i in range(self.ngroups):
|
||||
group_size = counts[i]
|
||||
end = start + group_size
|
||||
|
||||
islider.move(start, end)
|
||||
vslider.move(start, end)
|
||||
|
||||
self._update_cached_objs(
|
||||
cached_series, cached_index, islider, vslider)
|
||||
|
||||
res, initialized = self._apply_to_group(cached_series, cached_index,
|
||||
initialized)
|
||||
start += group_size
|
||||
|
||||
result[i] = res
|
||||
|
||||
finally:
|
||||
# so we don't free the wrong memory
|
||||
islider.reset()
|
||||
vslider.reset()
|
||||
|
||||
return result, counts
|
||||
|
||||
|
||||
cdef class SeriesGrouper(_BaseGrouper):
|
||||
"""
|
||||
Performs generic grouping operation while avoiding ndarray construction
|
||||
overhead
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t nresults, ngroups
|
||||
|
||||
cdef public:
|
||||
ndarray arr, index, dummy_arr, dummy_index
|
||||
object f, labels, values, typ, ityp, name, idtype
|
||||
|
||||
def __init__(self, object series, object f, ndarray[intp_t] labels,
|
||||
Py_ssize_t ngroups):
|
||||
|
||||
if len(series) == 0:
|
||||
# get_result would never assign `result`
|
||||
raise ValueError("SeriesGrouper requires non-empty `series`")
|
||||
|
||||
self.labels = labels
|
||||
self.f = f
|
||||
|
||||
values = series.values
|
||||
if is_array(values) and not values.flags.c_contiguous:
|
||||
# e.g. Categorical has no `flags` attribute
|
||||
values = values.copy('C')
|
||||
self.arr = values
|
||||
self.typ = series._constructor
|
||||
self.ityp = series.index._constructor
|
||||
self.idtype = series.index.dtype
|
||||
self.index = series.index.values
|
||||
self.name = series.name
|
||||
|
||||
dummy = series.iloc[:0]
|
||||
self.dummy_arr, self.dummy_index = self._check_dummy(dummy)
|
||||
self.ngroups = ngroups
|
||||
|
||||
def get_result(self):
|
||||
cdef:
|
||||
# Define result to avoid UnboundLocalError
|
||||
ndarray arr, result = None
|
||||
ndarray[intp_t] labels
|
||||
ndarray[int64_t] counts
|
||||
Py_ssize_t i, n, group_size, lab, start, end
|
||||
object res
|
||||
bint initialized = 0
|
||||
Slider vslider, islider
|
||||
object cached_series = None, cached_index = None
|
||||
|
||||
labels = self.labels
|
||||
counts = np.zeros(self.ngroups, dtype=np.int64)
|
||||
group_size = 0
|
||||
n = len(self.arr)
|
||||
|
||||
vslider = Slider(self.arr, self.dummy_arr)
|
||||
islider = Slider(self.index, self.dummy_index)
|
||||
|
||||
result = np.empty(self.ngroups, dtype='O')
|
||||
|
||||
cached_index, cached_series = self._init_dummy_series_and_index(
|
||||
islider, vslider
|
||||
)
|
||||
|
||||
start = 0
|
||||
try:
|
||||
for i in range(n):
|
||||
group_size += 1
|
||||
|
||||
lab = labels[i]
|
||||
|
||||
if i == n - 1 or lab != labels[i + 1]:
|
||||
if lab == -1:
|
||||
start += group_size
|
||||
group_size = 0
|
||||
continue
|
||||
|
||||
end = start + group_size
|
||||
islider.move(start, end)
|
||||
vslider.move(start, end)
|
||||
|
||||
self._update_cached_objs(
|
||||
cached_series, cached_index, islider, vslider)
|
||||
|
||||
res, initialized = self._apply_to_group(cached_series, cached_index,
|
||||
initialized)
|
||||
|
||||
start += group_size
|
||||
|
||||
result[lab] = res
|
||||
counts[lab] = group_size
|
||||
group_size = 0
|
||||
|
||||
finally:
|
||||
# so we don't free the wrong memory
|
||||
islider.reset()
|
||||
vslider.reset()
|
||||
|
||||
# We check for empty series in the constructor, so should always
|
||||
# have result initialized by this point.
|
||||
assert initialized, "`result` has not been initialized."
|
||||
|
||||
return result, counts
|
||||
|
||||
|
||||
cpdef inline extract_result(object res):
|
||||
""" extract the result object, it might be a 0-dim ndarray
|
||||
or a len-1 0-dim, or a scalar """
|
||||
if hasattr(res, "_values"):
|
||||
# Preserve EA
|
||||
res = res._values
|
||||
if res.ndim == 1 and len(res) == 1:
|
||||
# see test_agg_lambda_with_timezone, test_resampler_grouper.py::test_apply
|
||||
res = res[0]
|
||||
if is_array(res):
|
||||
if res.ndim == 1 and len(res) == 1:
|
||||
# see test_resampler_grouper.py::test_apply
|
||||
res = res[0]
|
||||
return res
|
||||
|
||||
|
||||
cdef class Slider:
|
||||
"""
|
||||
Only handles contiguous data for now
|
||||
"""
|
||||
cdef:
|
||||
ndarray values, buf
|
||||
Py_ssize_t stride
|
||||
char *orig_data
|
||||
|
||||
def __init__(self, ndarray values, ndarray buf):
|
||||
assert values.ndim == 1
|
||||
assert values.dtype == buf.dtype
|
||||
|
||||
if not values.flags.contiguous:
|
||||
values = values.copy()
|
||||
|
||||
self.values = values
|
||||
self.buf = buf
|
||||
|
||||
self.stride = values.strides[0]
|
||||
self.orig_data = self.buf.data
|
||||
|
||||
self.buf.data = self.values.data
|
||||
self.buf.strides[0] = self.stride
|
||||
|
||||
cdef move(self, int start, int end):
|
||||
"""
|
||||
For slicing
|
||||
"""
|
||||
self.buf.data = self.values.data + self.stride * start
|
||||
self.buf.shape[0] = end - start
|
||||
|
||||
cdef reset(self):
|
||||
self.buf.data = self.orig_data
|
||||
self.buf.shape[0] = 0
|
||||
|
||||
|
||||
def apply_frame_axis0(object frame, object f, object names,
|
||||
const int64_t[:] starts, const int64_t[:] ends):
|
||||
cdef:
|
||||
BlockSlider slider
|
||||
Py_ssize_t i, n = len(starts)
|
||||
list results
|
||||
object piece
|
||||
dict item_cache
|
||||
|
||||
# We have already checked that we don't have a MultiIndex before calling
|
||||
assert frame.index.nlevels == 1
|
||||
|
||||
results = []
|
||||
|
||||
slider = BlockSlider(frame)
|
||||
|
||||
mutated = False
|
||||
item_cache = slider.dummy._item_cache
|
||||
try:
|
||||
for i in range(n):
|
||||
slider.move(starts[i], ends[i])
|
||||
|
||||
item_cache.clear() # ugh
|
||||
chunk = slider.dummy
|
||||
object.__setattr__(chunk, 'name', names[i])
|
||||
|
||||
piece = f(chunk)
|
||||
|
||||
# Need to infer if low level index slider will cause segfaults
|
||||
require_slow_apply = i == 0 and piece is chunk
|
||||
try:
|
||||
if piece.index is not chunk.index:
|
||||
mutated = True
|
||||
except AttributeError:
|
||||
# `piece` might not have an index, could be e.g. an int
|
||||
pass
|
||||
|
||||
if not is_scalar(piece):
|
||||
# Need to copy data to avoid appending references
|
||||
try:
|
||||
piece = piece.copy(deep="all")
|
||||
except (TypeError, AttributeError):
|
||||
pass
|
||||
|
||||
results.append(piece)
|
||||
|
||||
# If the data was modified inplace we need to
|
||||
# take the slow path to not risk segfaults
|
||||
# we have already computed the first piece
|
||||
if require_slow_apply:
|
||||
break
|
||||
finally:
|
||||
slider.reset()
|
||||
|
||||
return results, mutated
|
||||
|
||||
|
||||
cdef class BlockSlider:
|
||||
"""
|
||||
Only capable of sliding on axis=0
|
||||
"""
|
||||
cdef:
|
||||
object frame, dummy, index, block
|
||||
list blocks, blk_values
|
||||
ndarray orig_blklocs, orig_blknos
|
||||
ndarray values
|
||||
Slider idx_slider
|
||||
char **base_ptrs
|
||||
int nblocks
|
||||
Py_ssize_t i
|
||||
|
||||
def __init__(self, object frame):
|
||||
self.frame = frame
|
||||
self.dummy = frame[:0]
|
||||
self.index = self.dummy.index
|
||||
|
||||
# GH#35417 attributes we need to restore at each step in case
|
||||
# the function modified them.
|
||||
mgr = self.dummy._mgr
|
||||
self.orig_blklocs = mgr.blklocs
|
||||
self.orig_blknos = mgr.blknos
|
||||
self.blocks = [x for x in self.dummy._mgr.blocks]
|
||||
|
||||
self.blk_values = [block.values for block in self.dummy._mgr.blocks]
|
||||
|
||||
for values in self.blk_values:
|
||||
set_array_not_contiguous(values)
|
||||
|
||||
self.nblocks = len(self.blk_values)
|
||||
# See the comment in indexes/base.py about _index_data.
|
||||
# We need this for EA-backed indexes that have a reference to a 1-d
|
||||
# ndarray like datetime / timedelta / period.
|
||||
self.idx_slider = Slider(
|
||||
self.frame.index._index_data, self.dummy.index._index_data)
|
||||
|
||||
self.base_ptrs = <char**>malloc(sizeof(char*) * self.nblocks)
|
||||
for i, block in enumerate(self.blk_values):
|
||||
self.base_ptrs[i] = (<ndarray>block).data
|
||||
|
||||
def __dealloc__(self):
|
||||
free(self.base_ptrs)
|
||||
|
||||
cdef move(self, int start, int end):
|
||||
cdef:
|
||||
ndarray arr
|
||||
Py_ssize_t i
|
||||
|
||||
self._restore_blocks()
|
||||
|
||||
# move blocks
|
||||
for i in range(self.nblocks):
|
||||
arr = self.blk_values[i]
|
||||
|
||||
# axis=1 is the frame's axis=0
|
||||
arr.data = self.base_ptrs[i] + arr.strides[1] * start
|
||||
arr.shape[1] = end - start
|
||||
|
||||
# move and set the index
|
||||
self.idx_slider.move(start, end)
|
||||
|
||||
object.__setattr__(self.index, '_index_data', self.idx_slider.buf)
|
||||
self.index._engine.clear_mapping()
|
||||
self.index._cache.clear() # e.g. inferred_freq must go
|
||||
|
||||
cdef reset(self):
|
||||
cdef:
|
||||
ndarray arr
|
||||
Py_ssize_t i
|
||||
|
||||
self._restore_blocks()
|
||||
|
||||
for i in range(self.nblocks):
|
||||
arr = self.blk_values[i]
|
||||
|
||||
# axis=1 is the frame's axis=0
|
||||
arr.data = self.base_ptrs[i]
|
||||
arr.shape[1] = 0
|
||||
|
||||
cdef _restore_blocks(self):
|
||||
"""
|
||||
Ensure that we have the original blocks, blknos, and blklocs.
|
||||
"""
|
||||
mgr = self.dummy._mgr
|
||||
mgr.blocks = tuple(self.blocks)
|
||||
mgr._blklocs = self.orig_blklocs
|
||||
mgr._blknos = self.orig_blknos
|
||||
Binary file not shown.
14
.venv/lib/python3.7/site-packages/pandas/_libs/reshape.pyi
Normal file
14
.venv/lib/python3.7/site-packages/pandas/_libs/reshape.pyi
Normal file
@@ -0,0 +1,14 @@
|
||||
import numpy as np
|
||||
|
||||
def unstack(
|
||||
values: np.ndarray, # reshape_t[:, :]
|
||||
mask: np.ndarray, # const uint8_t[:]
|
||||
stride: int,
|
||||
length: int,
|
||||
width: int,
|
||||
new_values: np.ndarray, # reshape_t[:, :]
|
||||
new_mask: np.ndarray, # uint8_t[:, :]
|
||||
) -> None: ...
|
||||
def explode(
|
||||
values: np.ndarray, # np.ndarray[object]
|
||||
) -> tuple[np.ndarray, np.ndarray,]: ... # np.ndarray[object] # np.ndarray[np.int64]
|
||||
159
.venv/lib/python3.7/site-packages/pandas/_libs/reshape.pyx
Normal file
159
.venv/lib/python3.7/site-packages/pandas/_libs/reshape.pyx
Normal file
@@ -0,0 +1,159 @@
|
||||
import cython
|
||||
from cython import Py_ssize_t
|
||||
|
||||
from numpy cimport (
|
||||
float32_t,
|
||||
float64_t,
|
||||
int8_t,
|
||||
int16_t,
|
||||
int32_t,
|
||||
int64_t,
|
||||
ndarray,
|
||||
uint8_t,
|
||||
uint16_t,
|
||||
uint32_t,
|
||||
uint64_t,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
cimport numpy as cnp
|
||||
|
||||
cnp.import_array()
|
||||
|
||||
from pandas._libs.lib cimport c_is_list_like
|
||||
|
||||
ctypedef fused reshape_t:
|
||||
uint8_t
|
||||
uint16_t
|
||||
uint32_t
|
||||
uint64_t
|
||||
int8_t
|
||||
int16_t
|
||||
int32_t
|
||||
int64_t
|
||||
float32_t
|
||||
float64_t
|
||||
object
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
def unstack(reshape_t[:, :] values, const uint8_t[:] mask,
|
||||
Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width,
|
||||
reshape_t[:, :] new_values, uint8_t[:, :] new_mask) -> None:
|
||||
"""
|
||||
Transform long values to wide new_values.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
values : typed ndarray
|
||||
mask : np.ndarray[bool]
|
||||
stride : int
|
||||
length : int
|
||||
width : int
|
||||
new_values : np.ndarray[bool]
|
||||
result array
|
||||
new_mask : np.ndarray[bool]
|
||||
result mask
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t i, j, w, nulls, s, offset
|
||||
|
||||
if reshape_t is not object:
|
||||
# evaluated at compile-time
|
||||
with nogil:
|
||||
for i in range(stride):
|
||||
|
||||
nulls = 0
|
||||
for j in range(length):
|
||||
|
||||
for w in range(width):
|
||||
|
||||
offset = j * width + w
|
||||
|
||||
if mask[offset]:
|
||||
s = i * width + w
|
||||
new_values[j, s] = values[offset - nulls, i]
|
||||
new_mask[j, s] = 1
|
||||
else:
|
||||
nulls += 1
|
||||
|
||||
else:
|
||||
# object-dtype, identical to above but we cannot use nogil
|
||||
for i in range(stride):
|
||||
|
||||
nulls = 0
|
||||
for j in range(length):
|
||||
|
||||
for w in range(width):
|
||||
|
||||
offset = j * width + w
|
||||
|
||||
if mask[offset]:
|
||||
s = i * width + w
|
||||
new_values[j, s] = values[offset - nulls, i]
|
||||
new_mask[j, s] = 1
|
||||
else:
|
||||
nulls += 1
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
def explode(ndarray[object] values):
|
||||
"""
|
||||
transform array list-likes to long form
|
||||
preserve non-list entries
|
||||
|
||||
Parameters
|
||||
----------
|
||||
values : object ndarray
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray[object]
|
||||
result
|
||||
ndarray[int64_t]
|
||||
counts
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t i, j, count, n
|
||||
object v
|
||||
ndarray[object] result
|
||||
ndarray[int64_t] counts
|
||||
|
||||
# find the resulting len
|
||||
n = len(values)
|
||||
counts = np.zeros(n, dtype='int64')
|
||||
for i in range(n):
|
||||
v = values[i]
|
||||
|
||||
if c_is_list_like(v, True):
|
||||
if len(v):
|
||||
counts[i] += len(v)
|
||||
else:
|
||||
# empty list-like, use a nan marker
|
||||
counts[i] += 1
|
||||
else:
|
||||
counts[i] += 1
|
||||
|
||||
result = np.empty(counts.sum(), dtype='object')
|
||||
count = 0
|
||||
for i in range(n):
|
||||
v = values[i]
|
||||
|
||||
if c_is_list_like(v, True):
|
||||
if len(v):
|
||||
v = list(v)
|
||||
for j in range(len(v)):
|
||||
result[count] = v[j]
|
||||
count += 1
|
||||
else:
|
||||
# empty list-like, use a nan marker
|
||||
result[count] = np.nan
|
||||
count += 1
|
||||
else:
|
||||
# replace with the existing scalar
|
||||
result[count] = v
|
||||
count += 1
|
||||
return result, counts
|
||||
Binary file not shown.
805
.venv/lib/python3.7/site-packages/pandas/_libs/sparse.pyx
Normal file
805
.venv/lib/python3.7/site-packages/pandas/_libs/sparse.pyx
Normal file
@@ -0,0 +1,805 @@
|
||||
import cython
|
||||
import numpy as np
|
||||
|
||||
cimport numpy as cnp
|
||||
from numpy cimport (
|
||||
float32_t,
|
||||
float64_t,
|
||||
int8_t,
|
||||
int16_t,
|
||||
int32_t,
|
||||
int64_t,
|
||||
ndarray,
|
||||
uint8_t,
|
||||
)
|
||||
|
||||
cnp.import_array()
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Preamble stuff
|
||||
|
||||
cdef float64_t NaN = <float64_t>np.NaN
|
||||
cdef float64_t INF = <float64_t>np.inf
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
cdef class SparseIndex:
|
||||
"""
|
||||
Abstract superclass for sparse index types.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
cdef class IntIndex(SparseIndex):
|
||||
"""
|
||||
Object for holding exact integer sparse indexing information
|
||||
|
||||
Parameters
|
||||
----------
|
||||
length : integer
|
||||
indices : array-like
|
||||
Contains integers corresponding to the indices.
|
||||
check_integrity : bool, default=True
|
||||
Check integrity of the input.
|
||||
"""
|
||||
|
||||
cdef readonly:
|
||||
Py_ssize_t length, npoints
|
||||
ndarray indices
|
||||
|
||||
def __init__(self, Py_ssize_t length, indices, bint check_integrity=True):
|
||||
self.length = length
|
||||
self.indices = np.ascontiguousarray(indices, dtype=np.int32)
|
||||
self.npoints = len(self.indices)
|
||||
|
||||
if check_integrity:
|
||||
self.check_integrity()
|
||||
|
||||
def __reduce__(self):
|
||||
args = (self.length, self.indices)
|
||||
return IntIndex, args
|
||||
|
||||
def __repr__(self) -> str:
|
||||
output = 'IntIndex\n'
|
||||
output += f'Indices: {repr(self.indices)}\n'
|
||||
return output
|
||||
|
||||
@property
|
||||
def nbytes(self) -> int:
|
||||
return self.indices.nbytes
|
||||
|
||||
def check_integrity(self):
|
||||
"""
|
||||
Checks the following:
|
||||
|
||||
- Indices are strictly ascending
|
||||
- Number of indices is at most self.length
|
||||
- Indices are at least 0 and at most the total length less one
|
||||
|
||||
A ValueError is raised if any of these conditions is violated.
|
||||
"""
|
||||
|
||||
if self.npoints > self.length:
|
||||
raise ValueError(
|
||||
f"Too many indices. Expected {self.length} but found {self.npoints}"
|
||||
)
|
||||
|
||||
# Indices are vacuously ordered and non-negative
|
||||
# if the sequence of indices is empty.
|
||||
if self.npoints == 0:
|
||||
return
|
||||
|
||||
if self.indices.min() < 0:
|
||||
raise ValueError("No index can be less than zero")
|
||||
|
||||
if self.indices.max() >= self.length:
|
||||
raise ValueError("All indices must be less than the length")
|
||||
|
||||
monotonic = np.all(self.indices[:-1] < self.indices[1:])
|
||||
if not monotonic:
|
||||
raise ValueError("Indices must be strictly increasing")
|
||||
|
||||
def equals(self, other: object) -> bool:
|
||||
if not isinstance(other, IntIndex):
|
||||
return False
|
||||
|
||||
if self is other:
|
||||
return True
|
||||
|
||||
same_length = self.length == other.length
|
||||
same_indices = np.array_equal(self.indices, other.indices)
|
||||
return same_length and same_indices
|
||||
|
||||
@property
|
||||
def ngaps(self) -> int:
|
||||
return self.length - self.npoints
|
||||
|
||||
def to_int_index(self):
|
||||
return self
|
||||
|
||||
def to_block_index(self):
|
||||
locs, lens = get_blocks(self.indices)
|
||||
return BlockIndex(self.length, locs, lens)
|
||||
|
||||
cpdef IntIndex intersect(self, SparseIndex y_):
|
||||
cdef:
|
||||
Py_ssize_t out_length, xi, yi = 0, result_indexer = 0
|
||||
int32_t xind
|
||||
ndarray[int32_t, ndim=1] xindices, yindices, new_indices
|
||||
IntIndex y
|
||||
|
||||
# if is one already, returns self
|
||||
y = y_.to_int_index()
|
||||
|
||||
if self.length != y.length:
|
||||
raise Exception('Indices must reference same underlying length')
|
||||
|
||||
xindices = self.indices
|
||||
yindices = y.indices
|
||||
new_indices = np.empty(min(
|
||||
len(xindices), len(yindices)), dtype=np.int32)
|
||||
|
||||
for xi in range(self.npoints):
|
||||
xind = xindices[xi]
|
||||
|
||||
while yi < y.npoints and yindices[yi] < xind:
|
||||
yi += 1
|
||||
|
||||
if yi >= y.npoints:
|
||||
break
|
||||
|
||||
# TODO: would a two-pass algorithm be faster?
|
||||
if yindices[yi] == xind:
|
||||
new_indices[result_indexer] = xind
|
||||
result_indexer += 1
|
||||
|
||||
new_indices = new_indices[:result_indexer]
|
||||
return IntIndex(self.length, new_indices)
|
||||
|
||||
cpdef IntIndex make_union(self, SparseIndex y_):
|
||||
|
||||
cdef:
|
||||
ndarray[int32_t, ndim=1] new_indices
|
||||
IntIndex y
|
||||
|
||||
# if is one already, returns self
|
||||
y = y_.to_int_index()
|
||||
|
||||
if self.length != y.length:
|
||||
raise ValueError('Indices must reference same underlying length')
|
||||
|
||||
new_indices = np.union1d(self.indices, y.indices)
|
||||
return IntIndex(self.length, new_indices)
|
||||
|
||||
@cython.wraparound(False)
|
||||
cpdef int32_t lookup(self, Py_ssize_t index):
|
||||
"""
|
||||
Return the internal location if value exists on given index.
|
||||
Return -1 otherwise.
|
||||
"""
|
||||
cdef:
|
||||
int32_t res
|
||||
ndarray[int32_t, ndim=1] inds
|
||||
|
||||
inds = self.indices
|
||||
if self.npoints == 0:
|
||||
return -1
|
||||
elif index < 0 or self.length <= index:
|
||||
return -1
|
||||
|
||||
res = inds.searchsorted(index)
|
||||
if res == self.npoints:
|
||||
return -1
|
||||
elif inds[res] == index:
|
||||
return res
|
||||
else:
|
||||
return -1
|
||||
|
||||
@cython.wraparound(False)
|
||||
cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer):
|
||||
"""
|
||||
Vectorized lookup, returns ndarray[int32_t]
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t n, i, ind_val
|
||||
ndarray[int32_t, ndim=1] inds
|
||||
ndarray[uint8_t, ndim=1, cast=True] mask
|
||||
ndarray[int32_t, ndim=1] masked
|
||||
ndarray[int32_t, ndim=1] res
|
||||
ndarray[int32_t, ndim=1] results
|
||||
|
||||
n = len(indexer)
|
||||
results = np.empty(n, dtype=np.int32)
|
||||
results[:] = -1
|
||||
|
||||
if self.npoints == 0:
|
||||
return results
|
||||
|
||||
inds = self.indices
|
||||
mask = (inds[0] <= indexer) & (indexer <= inds[len(inds) - 1])
|
||||
|
||||
masked = indexer[mask]
|
||||
res = inds.searchsorted(masked).astype(np.int32)
|
||||
|
||||
res[inds[res] != masked] = -1
|
||||
results[mask] = res
|
||||
return results
|
||||
|
||||
cpdef ndarray reindex(self, ndarray[float64_t, ndim=1] values,
|
||||
float64_t fill_value, SparseIndex other_):
|
||||
cdef:
|
||||
Py_ssize_t i = 0, j = 0
|
||||
IntIndex other
|
||||
ndarray[float64_t, ndim=1] result
|
||||
ndarray[int32_t, ndim=1] sinds, oinds
|
||||
|
||||
other = other_.to_int_index()
|
||||
|
||||
oinds = other.indices
|
||||
sinds = self.indices
|
||||
|
||||
result = np.empty(other.npoints, dtype=np.float64)
|
||||
result[:] = fill_value
|
||||
|
||||
for i in range(other.npoints):
|
||||
while oinds[i] > sinds[j] and j < self.npoints:
|
||||
j += 1
|
||||
|
||||
if j == self.npoints:
|
||||
break
|
||||
|
||||
if oinds[i] < sinds[j]:
|
||||
continue
|
||||
elif oinds[i] == sinds[j]:
|
||||
result[i] = values[j]
|
||||
j += 1
|
||||
|
||||
return result
|
||||
|
||||
cpdef put(self, ndarray[float64_t, ndim=1] values,
|
||||
ndarray[int32_t, ndim=1] indices, object to_put):
|
||||
pass
|
||||
|
||||
cpdef take(self, ndarray[float64_t, ndim=1] values,
|
||||
ndarray[int32_t, ndim=1] indices):
|
||||
pass
|
||||
|
||||
|
||||
cpdef get_blocks(ndarray[int32_t, ndim=1] indices):
|
||||
cdef:
|
||||
Py_ssize_t init_len, i, npoints, result_indexer = 0
|
||||
int32_t block, length = 1, cur, prev
|
||||
ndarray[int32_t, ndim=1] locs, lens
|
||||
|
||||
npoints = len(indices)
|
||||
|
||||
# just handle the special empty case separately
|
||||
if npoints == 0:
|
||||
return np.array([], dtype=np.int32), np.array([], dtype=np.int32)
|
||||
|
||||
# block size can't be longer than npoints
|
||||
locs = np.empty(npoints, dtype=np.int32)
|
||||
lens = np.empty(npoints, dtype=np.int32)
|
||||
|
||||
# TODO: two-pass algorithm faster?
|
||||
prev = block = indices[0]
|
||||
for i in range(1, npoints):
|
||||
cur = indices[i]
|
||||
if cur - prev > 1:
|
||||
# new block
|
||||
locs[result_indexer] = block
|
||||
lens[result_indexer] = length
|
||||
block = cur
|
||||
length = 1
|
||||
result_indexer += 1
|
||||
else:
|
||||
# same block, increment length
|
||||
length += 1
|
||||
|
||||
prev = cur
|
||||
|
||||
locs[result_indexer] = block
|
||||
lens[result_indexer] = length
|
||||
result_indexer += 1
|
||||
locs = locs[:result_indexer]
|
||||
lens = lens[:result_indexer]
|
||||
return locs, lens
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# BlockIndex
|
||||
|
||||
cdef class BlockIndex(SparseIndex):
|
||||
"""
|
||||
Object for holding block-based sparse indexing information
|
||||
|
||||
Parameters
|
||||
----------
|
||||
"""
|
||||
cdef readonly:
|
||||
int32_t nblocks, npoints, length
|
||||
ndarray blocs, blengths
|
||||
|
||||
cdef:
|
||||
object __weakref__ # need to be picklable
|
||||
int32_t *locbuf
|
||||
int32_t *lenbuf
|
||||
|
||||
def __init__(self, length, blocs, blengths):
|
||||
|
||||
self.blocs = np.ascontiguousarray(blocs, dtype=np.int32)
|
||||
self.blengths = np.ascontiguousarray(blengths, dtype=np.int32)
|
||||
|
||||
# in case we need
|
||||
self.locbuf = <int32_t*>self.blocs.data
|
||||
self.lenbuf = <int32_t*>self.blengths.data
|
||||
|
||||
self.length = length
|
||||
self.nblocks = np.int32(len(self.blocs))
|
||||
self.npoints = self.blengths.sum()
|
||||
|
||||
# self.block_start = blocs
|
||||
# self.block_end = blocs + blengths
|
||||
|
||||
self.check_integrity()
|
||||
|
||||
def __reduce__(self):
|
||||
args = (self.length, self.blocs, self.blengths)
|
||||
return BlockIndex, args
|
||||
|
||||
def __repr__(self) -> str:
|
||||
output = 'BlockIndex\n'
|
||||
output += f'Block locations: {repr(self.blocs)}\n'
|
||||
output += f'Block lengths: {repr(self.blengths)}'
|
||||
|
||||
return output
|
||||
|
||||
@property
|
||||
def nbytes(self) -> int:
|
||||
return self.blocs.nbytes + self.blengths.nbytes
|
||||
|
||||
@property
|
||||
def ngaps(self) -> int:
|
||||
return self.length - self.npoints
|
||||
|
||||
cpdef check_integrity(self):
|
||||
"""
|
||||
Check:
|
||||
- Locations are in ascending order
|
||||
- No overlapping blocks
|
||||
- Blocks to not start after end of index, nor extend beyond end
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t i
|
||||
ndarray[int32_t, ndim=1] blocs, blengths
|
||||
|
||||
blocs = self.blocs
|
||||
blengths = self.blengths
|
||||
|
||||
if len(blocs) != len(blengths):
|
||||
raise ValueError('block bound arrays must be same length')
|
||||
|
||||
for i in range(self.nblocks):
|
||||
if i > 0:
|
||||
if blocs[i] <= blocs[i - 1]:
|
||||
raise ValueError('Locations not in ascending order')
|
||||
|
||||
if i < self.nblocks - 1:
|
||||
if blocs[i] + blengths[i] > blocs[i + 1]:
|
||||
raise ValueError(f'Block {i} overlaps')
|
||||
else:
|
||||
if blocs[i] + blengths[i] > self.length:
|
||||
raise ValueError(f'Block {i} extends beyond end')
|
||||
|
||||
# no zero-length blocks
|
||||
if blengths[i] == 0:
|
||||
raise ValueError(f'Zero-length block {i}')
|
||||
|
||||
def equals(self, other: object) -> bool:
|
||||
if not isinstance(other, BlockIndex):
|
||||
return False
|
||||
|
||||
if self is other:
|
||||
return True
|
||||
|
||||
same_length = self.length == other.length
|
||||
same_blocks = (np.array_equal(self.blocs, other.blocs) and
|
||||
np.array_equal(self.blengths, other.blengths))
|
||||
return same_length and same_blocks
|
||||
|
||||
def to_block_index(self):
|
||||
return self
|
||||
|
||||
def to_int_index(self):
|
||||
cdef:
|
||||
int32_t i = 0, j, b
|
||||
int32_t offset
|
||||
ndarray[int32_t, ndim=1] indices
|
||||
|
||||
indices = np.empty(self.npoints, dtype=np.int32)
|
||||
|
||||
for b in range(self.nblocks):
|
||||
offset = self.locbuf[b]
|
||||
|
||||
for j in range(self.lenbuf[b]):
|
||||
indices[i] = offset + j
|
||||
i += 1
|
||||
|
||||
return IntIndex(self.length, indices)
|
||||
|
||||
cpdef BlockIndex intersect(self, SparseIndex other):
|
||||
"""
|
||||
Intersect two BlockIndex objects
|
||||
|
||||
Returns
|
||||
-------
|
||||
BlockIndex
|
||||
"""
|
||||
cdef:
|
||||
BlockIndex y
|
||||
ndarray[int32_t, ndim=1] xloc, xlen, yloc, ylen, out_bloc, out_blen
|
||||
Py_ssize_t xi = 0, yi = 0, max_len, result_indexer = 0
|
||||
int32_t cur_loc, cur_length, diff
|
||||
|
||||
y = other.to_block_index()
|
||||
|
||||
if self.length != y.length:
|
||||
raise Exception('Indices must reference same underlying length')
|
||||
|
||||
xloc = self.blocs
|
||||
xlen = self.blengths
|
||||
yloc = y.blocs
|
||||
ylen = y.blengths
|
||||
|
||||
# block may be split, but can't exceed original len / 2 + 1
|
||||
max_len = min(self.length, y.length) // 2 + 1
|
||||
out_bloc = np.empty(max_len, dtype=np.int32)
|
||||
out_blen = np.empty(max_len, dtype=np.int32)
|
||||
|
||||
while True:
|
||||
# we are done (or possibly never began)
|
||||
if xi >= self.nblocks or yi >= y.nblocks:
|
||||
break
|
||||
|
||||
# completely symmetric...would like to avoid code dup but oh well
|
||||
if xloc[xi] >= yloc[yi]:
|
||||
cur_loc = xloc[xi]
|
||||
diff = xloc[xi] - yloc[yi]
|
||||
|
||||
if ylen[yi] <= diff:
|
||||
# have to skip this block
|
||||
yi += 1
|
||||
continue
|
||||
|
||||
if ylen[yi] - diff < xlen[xi]:
|
||||
# take end of y block, move onward
|
||||
cur_length = ylen[yi] - diff
|
||||
yi += 1
|
||||
else:
|
||||
# take end of x block
|
||||
cur_length = xlen[xi]
|
||||
xi += 1
|
||||
|
||||
else: # xloc[xi] < yloc[yi]
|
||||
cur_loc = yloc[yi]
|
||||
diff = yloc[yi] - xloc[xi]
|
||||
|
||||
if xlen[xi] <= diff:
|
||||
# have to skip this block
|
||||
xi += 1
|
||||
continue
|
||||
|
||||
if xlen[xi] - diff < ylen[yi]:
|
||||
# take end of x block, move onward
|
||||
cur_length = xlen[xi] - diff
|
||||
xi += 1
|
||||
else:
|
||||
# take end of y block
|
||||
cur_length = ylen[yi]
|
||||
yi += 1
|
||||
|
||||
out_bloc[result_indexer] = cur_loc
|
||||
out_blen[result_indexer] = cur_length
|
||||
result_indexer += 1
|
||||
|
||||
out_bloc = out_bloc[:result_indexer]
|
||||
out_blen = out_blen[:result_indexer]
|
||||
|
||||
return BlockIndex(self.length, out_bloc, out_blen)
|
||||
|
||||
cpdef BlockIndex make_union(self, SparseIndex y):
|
||||
"""
|
||||
Combine together two BlockIndex objects, accepting indices if contained
|
||||
in one or the other
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other : SparseIndex
|
||||
|
||||
Notes
|
||||
-----
|
||||
union is a protected keyword in Cython, hence make_union
|
||||
|
||||
Returns
|
||||
-------
|
||||
BlockIndex
|
||||
"""
|
||||
return BlockUnion(self, y.to_block_index()).result
|
||||
|
||||
cpdef Py_ssize_t lookup(self, Py_ssize_t index):
|
||||
"""
|
||||
Return the internal location if value exists on given index.
|
||||
Return -1 otherwise.
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t i, cum_len
|
||||
ndarray[int32_t, ndim=1] locs, lens
|
||||
|
||||
locs = self.blocs
|
||||
lens = self.blengths
|
||||
|
||||
if self.nblocks == 0:
|
||||
return -1
|
||||
elif index < locs[0]:
|
||||
return -1
|
||||
|
||||
cum_len = 0
|
||||
for i in range(self.nblocks):
|
||||
if index >= locs[i] and index < locs[i] + lens[i]:
|
||||
return cum_len + index - locs[i]
|
||||
cum_len += lens[i]
|
||||
|
||||
return -1
|
||||
|
||||
@cython.wraparound(False)
|
||||
cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer):
|
||||
"""
|
||||
Vectorized lookup, returns ndarray[int32_t]
|
||||
"""
|
||||
cdef:
|
||||
Py_ssize_t n, i, j, ind_val
|
||||
ndarray[int32_t, ndim=1] locs, lens
|
||||
ndarray[int32_t, ndim=1] results
|
||||
|
||||
locs = self.blocs
|
||||
lens = self.blengths
|
||||
|
||||
n = len(indexer)
|
||||
results = np.empty(n, dtype=np.int32)
|
||||
results[:] = -1
|
||||
|
||||
if self.npoints == 0:
|
||||
return results
|
||||
|
||||
for i in range(n):
|
||||
ind_val = indexer[i]
|
||||
if not (ind_val < 0 or self.length <= ind_val):
|
||||
cum_len = 0
|
||||
for j in range(self.nblocks):
|
||||
if ind_val >= locs[j] and ind_val < locs[j] + lens[j]:
|
||||
results[i] = cum_len + ind_val - locs[j]
|
||||
cum_len += lens[j]
|
||||
return results
|
||||
|
||||
cpdef ndarray reindex(self, ndarray[float64_t, ndim=1] values,
|
||||
float64_t fill_value, SparseIndex other_):
|
||||
cdef:
|
||||
Py_ssize_t i = 0, j = 0, ocur, ocurlen
|
||||
BlockIndex other
|
||||
ndarray[float64_t, ndim=1] result
|
||||
ndarray[int32_t, ndim=1] slocs, slens, olocs, olens
|
||||
|
||||
other = other_.to_block_index()
|
||||
|
||||
olocs = other.blocs
|
||||
olens = other.blengths
|
||||
slocs = self.blocs
|
||||
slens = self.blengths
|
||||
|
||||
result = np.empty(other.npoints, dtype=np.float64)
|
||||
|
||||
for i in range(other.nblocks):
|
||||
ocur = olocs[i]
|
||||
ocurlen = olens[i]
|
||||
|
||||
while slocs[j] + slens[j] < ocur:
|
||||
j += 1
|
||||
|
||||
cpdef put(self, ndarray[float64_t, ndim=1] values,
|
||||
ndarray[int32_t, ndim=1] indices, object to_put):
|
||||
pass
|
||||
|
||||
cpdef take(self, ndarray[float64_t, ndim=1] values,
|
||||
ndarray[int32_t, ndim=1] indices):
|
||||
pass
|
||||
|
||||
|
||||
@cython.internal
|
||||
cdef class BlockMerge:
|
||||
"""
|
||||
Object-oriented approach makes sharing state between recursive functions a
|
||||
lot easier and reduces code duplication
|
||||
"""
|
||||
cdef:
|
||||
BlockIndex x, y, result
|
||||
ndarray xstart, xlen, xend, ystart, ylen, yend
|
||||
int32_t xi, yi # block indices
|
||||
|
||||
def __init__(self, BlockIndex x, BlockIndex y):
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
if x.length != y.length:
|
||||
raise Exception('Indices must reference same underlying length')
|
||||
|
||||
self.xstart = self.x.blocs
|
||||
self.ystart = self.y.blocs
|
||||
|
||||
self.xend = self.x.blocs + self.x.blengths
|
||||
self.yend = self.y.blocs + self.y.blengths
|
||||
|
||||
# self.xlen = self.x.blengths
|
||||
# self.ylen = self.y.blengths
|
||||
|
||||
self.xi = 0
|
||||
self.yi = 0
|
||||
|
||||
self.result = self._make_merged_blocks()
|
||||
|
||||
cdef _make_merged_blocks(self):
|
||||
raise NotImplementedError
|
||||
|
||||
cdef _set_current_indices(self, int32_t xi, int32_t yi, bint mode):
|
||||
if mode == 0:
|
||||
self.xi = xi
|
||||
self.yi = yi
|
||||
else:
|
||||
self.xi = yi
|
||||
self.yi = xi
|
||||
|
||||
|
||||
@cython.internal
|
||||
cdef class BlockUnion(BlockMerge):
|
||||
"""
|
||||
Object-oriented approach makes sharing state between recursive functions a
|
||||
lot easier and reduces code duplication
|
||||
"""
|
||||
|
||||
cdef _make_merged_blocks(self):
|
||||
cdef:
|
||||
ndarray[int32_t, ndim=1] xstart, xend, ystart
|
||||
ndarray[int32_t, ndim=1] yend, out_bloc, out_blen
|
||||
int32_t nstart, nend, diff
|
||||
Py_ssize_t max_len, result_indexer = 0
|
||||
|
||||
xstart = self.xstart
|
||||
xend = self.xend
|
||||
ystart = self.ystart
|
||||
yend = self.yend
|
||||
|
||||
max_len = min(self.x.length, self.y.length) // 2 + 1
|
||||
out_bloc = np.empty(max_len, dtype=np.int32)
|
||||
out_blen = np.empty(max_len, dtype=np.int32)
|
||||
|
||||
while True:
|
||||
# we are done (or possibly never began)
|
||||
if self.xi >= self.x.nblocks and self.yi >= self.y.nblocks:
|
||||
break
|
||||
elif self.yi >= self.y.nblocks:
|
||||
# through with y, just pass through x blocks
|
||||
nstart = xstart[self.xi]
|
||||
nend = xend[self.xi]
|
||||
self.xi += 1
|
||||
elif self.xi >= self.x.nblocks:
|
||||
# through with x, just pass through y blocks
|
||||
nstart = ystart[self.yi]
|
||||
nend = yend[self.yi]
|
||||
self.yi += 1
|
||||
else:
|
||||
# find end of new block
|
||||
if xstart[self.xi] < ystart[self.yi]:
|
||||
nstart = xstart[self.xi]
|
||||
nend = self._find_next_block_end(0)
|
||||
else:
|
||||
nstart = ystart[self.yi]
|
||||
nend = self._find_next_block_end(1)
|
||||
|
||||
out_bloc[result_indexer] = nstart
|
||||
out_blen[result_indexer] = nend - nstart
|
||||
result_indexer += 1
|
||||
|
||||
out_bloc = out_bloc[:result_indexer]
|
||||
out_blen = out_blen[:result_indexer]
|
||||
|
||||
return BlockIndex(self.x.length, out_bloc, out_blen)
|
||||
|
||||
cdef int32_t _find_next_block_end(self, bint mode) except -1:
|
||||
"""
|
||||
Wow, this got complicated in a hurry
|
||||
|
||||
mode 0: block started in index x
|
||||
mode 1: block started in index y
|
||||
"""
|
||||
cdef:
|
||||
ndarray[int32_t, ndim=1] xstart, xend, ystart, yend
|
||||
int32_t xi, yi, xnblocks, ynblocks, nend
|
||||
|
||||
if mode != 0 and mode != 1:
|
||||
raise Exception('Mode must be 0 or 1')
|
||||
|
||||
# so symmetric code will work
|
||||
if mode == 0:
|
||||
xstart = self.xstart
|
||||
xend = self.xend
|
||||
xi = self.xi
|
||||
|
||||
ystart = self.ystart
|
||||
yend = self.yend
|
||||
yi = self.yi
|
||||
ynblocks = self.y.nblocks
|
||||
else:
|
||||
xstart = self.ystart
|
||||
xend = self.yend
|
||||
xi = self.yi
|
||||
|
||||
ystart = self.xstart
|
||||
yend = self.xend
|
||||
yi = self.xi
|
||||
ynblocks = self.x.nblocks
|
||||
|
||||
nend = xend[xi]
|
||||
|
||||
# done with y?
|
||||
if yi == ynblocks:
|
||||
self._set_current_indices(xi + 1, yi, mode)
|
||||
return nend
|
||||
elif nend < ystart[yi]:
|
||||
# block ends before y block
|
||||
self._set_current_indices(xi + 1, yi, mode)
|
||||
return nend
|
||||
else:
|
||||
while yi < ynblocks and nend > yend[yi]:
|
||||
yi += 1
|
||||
|
||||
self._set_current_indices(xi + 1, yi, mode)
|
||||
|
||||
if yi == ynblocks:
|
||||
return nend
|
||||
|
||||
if nend < ystart[yi]:
|
||||
# we're done, return the block end
|
||||
return nend
|
||||
else:
|
||||
# merge blocks, continue searching
|
||||
# this also catches the case where blocks
|
||||
return self._find_next_block_end(1 - mode)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Sparse arithmetic
|
||||
|
||||
include "sparse_op_helper.pxi"
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# SparseArray mask create operations
|
||||
|
||||
def make_mask_object_ndarray(ndarray[object, ndim=1] arr, object fill_value):
|
||||
cdef:
|
||||
object value
|
||||
Py_ssize_t i
|
||||
Py_ssize_t new_length = len(arr)
|
||||
ndarray[int8_t, ndim=1] mask
|
||||
|
||||
mask = np.ones(new_length, dtype=np.int8)
|
||||
|
||||
for i in range(new_length):
|
||||
value = arr[i]
|
||||
if value == fill_value and type(value) == type(fill_value):
|
||||
mask[i] = 0
|
||||
|
||||
return mask.view(dtype=bool)
|
||||
@@ -0,0 +1,309 @@
|
||||
"""
|
||||
Template for each `dtype` helper function for sparse ops
|
||||
|
||||
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
|
||||
"""
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Sparse op
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
ctypedef fused sparse_t:
|
||||
float64_t
|
||||
int64_t
|
||||
|
||||
|
||||
cdef inline float64_t __div__(sparse_t a, sparse_t b):
|
||||
if b == 0:
|
||||
if a > 0:
|
||||
return INF
|
||||
elif a < 0:
|
||||
return -INF
|
||||
else:
|
||||
return NaN
|
||||
else:
|
||||
return float(a) / b
|
||||
|
||||
|
||||
cdef inline float64_t __truediv__(sparse_t a, sparse_t b):
|
||||
return __div__(a, b)
|
||||
|
||||
|
||||
cdef inline sparse_t __mod__(sparse_t a, sparse_t b):
|
||||
if b == 0:
|
||||
if sparse_t is float64_t:
|
||||
return NaN
|
||||
else:
|
||||
return 0
|
||||
else:
|
||||
return a % b
|
||||
|
||||
|
||||
cdef inline sparse_t __floordiv__(sparse_t a, sparse_t b):
|
||||
if b == 0:
|
||||
if sparse_t is float64_t:
|
||||
return NaN
|
||||
else:
|
||||
return 0
|
||||
else:
|
||||
return a // b
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# sparse array op
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
{{py:
|
||||
|
||||
# dtype, arith_comp_group, logical_group
|
||||
dtypes = [('float64', True, False),
|
||||
('int64', True, True),
|
||||
('uint8', False, True)]
|
||||
# do not generate arithmetic / comparison template for uint8,
|
||||
# it should be done in fused types
|
||||
|
||||
def get_op(tup):
|
||||
assert isinstance(tup, tuple)
|
||||
assert len(tup) == 4
|
||||
|
||||
opname, lval, rval, dtype = tup
|
||||
|
||||
ops_dict = {'add': '{0} + {1}',
|
||||
'sub': '{0} - {1}',
|
||||
'mul': '{0} * {1}',
|
||||
'div': '__div__({0}, {1})',
|
||||
'mod': '__mod__({0}, {1})',
|
||||
'truediv': '__truediv__({0}, {1})',
|
||||
'floordiv': '__floordiv__({0}, {1})',
|
||||
'pow': '{0} ** {1}',
|
||||
'eq': '{0} == {1}',
|
||||
'ne': '{0} != {1}',
|
||||
'lt': '{0} < {1}',
|
||||
'gt': '{0} > {1}',
|
||||
'le': '{0} <= {1}',
|
||||
'ge': '{0} >= {1}',
|
||||
|
||||
'and': '{0} & {1}', # logical op
|
||||
'or': '{0} | {1}',
|
||||
'xor': '{0} ^ {1}'}
|
||||
|
||||
return ops_dict[opname].format(lval, rval)
|
||||
|
||||
|
||||
def get_dispatch(dtypes):
|
||||
|
||||
ops_list = ['add', 'sub', 'mul', 'div', 'mod', 'truediv',
|
||||
'floordiv', 'pow',
|
||||
'eq', 'ne', 'lt', 'gt', 'le', 'ge',
|
||||
'and', 'or', 'xor']
|
||||
|
||||
for opname in ops_list:
|
||||
for dtype, arith_comp_group, logical_group in dtypes:
|
||||
|
||||
if opname in ('div', 'truediv'):
|
||||
rdtype = 'float64'
|
||||
elif opname in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'):
|
||||
# comparison op
|
||||
rdtype = 'uint8'
|
||||
elif opname in ('and', 'or', 'xor'):
|
||||
# logical op
|
||||
rdtype = 'uint8'
|
||||
else:
|
||||
rdtype = dtype
|
||||
|
||||
if opname in ('and', 'or', 'xor'):
|
||||
if logical_group:
|
||||
yield opname, dtype, rdtype
|
||||
else:
|
||||
if arith_comp_group:
|
||||
yield opname, dtype, rdtype
|
||||
|
||||
}}
|
||||
|
||||
|
||||
{{for opname, dtype, rdtype in get_dispatch(dtypes)}}
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
cdef inline tuple block_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_,
|
||||
BlockIndex xindex,
|
||||
{{dtype}}_t xfill,
|
||||
{{dtype}}_t[:] y_,
|
||||
BlockIndex yindex,
|
||||
{{dtype}}_t yfill):
|
||||
'''
|
||||
Binary operator on BlockIndex objects with fill values
|
||||
'''
|
||||
|
||||
cdef:
|
||||
BlockIndex out_index
|
||||
Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
|
||||
int32_t xbp = 0, ybp = 0 # block positions
|
||||
int32_t xloc, yloc
|
||||
Py_ssize_t xblock = 0, yblock = 0 # block numbers
|
||||
|
||||
{{dtype}}_t[:] x, y
|
||||
ndarray[{{rdtype}}_t, ndim=1] out
|
||||
|
||||
# to suppress Cython warning
|
||||
x = x_
|
||||
y = y_
|
||||
|
||||
out_index = xindex.make_union(yindex)
|
||||
out = np.empty(out_index.npoints, dtype=np.{{rdtype}})
|
||||
|
||||
# Wow, what a hack job. Need to do something about this
|
||||
|
||||
# walk the two SparseVectors, adding matched locations...
|
||||
for out_i in range(out_index.npoints):
|
||||
if yblock == yindex.nblocks:
|
||||
# use y fill value
|
||||
out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}}
|
||||
xi += 1
|
||||
|
||||
# advance x location
|
||||
xbp += 1
|
||||
if xbp == xindex.lenbuf[xblock]:
|
||||
xblock += 1
|
||||
xbp = 0
|
||||
continue
|
||||
|
||||
if xblock == xindex.nblocks:
|
||||
# use x fill value
|
||||
out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}}
|
||||
yi += 1
|
||||
|
||||
# advance y location
|
||||
ybp += 1
|
||||
if ybp == yindex.lenbuf[yblock]:
|
||||
yblock += 1
|
||||
ybp = 0
|
||||
continue
|
||||
|
||||
yloc = yindex.locbuf[yblock] + ybp
|
||||
xloc = xindex.locbuf[xblock] + xbp
|
||||
|
||||
# each index in the out_index had to come from either x, y, or both
|
||||
if xloc == yloc:
|
||||
out[out_i] = {{(opname, 'x[xi]', 'y[yi]', dtype) | get_op}}
|
||||
xi += 1
|
||||
yi += 1
|
||||
|
||||
# advance both locations
|
||||
xbp += 1
|
||||
if xbp == xindex.lenbuf[xblock]:
|
||||
xblock += 1
|
||||
xbp = 0
|
||||
|
||||
ybp += 1
|
||||
if ybp == yindex.lenbuf[yblock]:
|
||||
yblock += 1
|
||||
ybp = 0
|
||||
|
||||
elif xloc < yloc:
|
||||
# use y fill value
|
||||
out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}}
|
||||
xi += 1
|
||||
|
||||
# advance x location
|
||||
xbp += 1
|
||||
if xbp == xindex.lenbuf[xblock]:
|
||||
xblock += 1
|
||||
xbp = 0
|
||||
else:
|
||||
# use x fill value
|
||||
out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}}
|
||||
yi += 1
|
||||
|
||||
# advance y location
|
||||
ybp += 1
|
||||
if ybp == yindex.lenbuf[yblock]:
|
||||
yblock += 1
|
||||
ybp = 0
|
||||
|
||||
return out, out_index, {{(opname, 'xfill', 'yfill', dtype) | get_op}}
|
||||
|
||||
|
||||
@cython.wraparound(False)
|
||||
@cython.boundscheck(False)
|
||||
cdef inline tuple int_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_,
|
||||
IntIndex xindex,
|
||||
{{dtype}}_t xfill,
|
||||
{{dtype}}_t[:] y_,
|
||||
IntIndex yindex,
|
||||
{{dtype}}_t yfill):
|
||||
cdef:
|
||||
IntIndex out_index
|
||||
Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
|
||||
int32_t xloc, yloc
|
||||
int32_t[:] xindices, yindices, out_indices
|
||||
{{dtype}}_t[:] x, y
|
||||
ndarray[{{rdtype}}_t, ndim=1] out
|
||||
|
||||
# suppress Cython compiler warnings due to inlining
|
||||
x = x_
|
||||
y = y_
|
||||
|
||||
# need to do this first to know size of result array
|
||||
out_index = xindex.make_union(yindex)
|
||||
out = np.empty(out_index.npoints, dtype=np.{{rdtype}})
|
||||
|
||||
xindices = xindex.indices
|
||||
yindices = yindex.indices
|
||||
out_indices = out_index.indices
|
||||
|
||||
# walk the two SparseVectors, adding matched locations...
|
||||
for out_i in range(out_index.npoints):
|
||||
if xi == xindex.npoints:
|
||||
# use x fill value
|
||||
out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}}
|
||||
yi += 1
|
||||
continue
|
||||
|
||||
if yi == yindex.npoints:
|
||||
# use y fill value
|
||||
out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}}
|
||||
xi += 1
|
||||
continue
|
||||
|
||||
xloc = xindices[xi]
|
||||
yloc = yindices[yi]
|
||||
|
||||
# each index in the out_index had to come from either x, y, or both
|
||||
if xloc == yloc:
|
||||
out[out_i] = {{(opname, 'x[xi]', 'y[yi]', dtype) | get_op}}
|
||||
xi += 1
|
||||
yi += 1
|
||||
elif xloc < yloc:
|
||||
# use y fill value
|
||||
out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}}
|
||||
xi += 1
|
||||
else:
|
||||
# use x fill value
|
||||
out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}}
|
||||
yi += 1
|
||||
|
||||
return out, out_index, {{(opname, 'xfill', 'yfill', dtype) | get_op}}
|
||||
|
||||
|
||||
cpdef sparse_{{opname}}_{{dtype}}({{dtype}}_t[:] x,
|
||||
SparseIndex xindex, {{dtype}}_t xfill,
|
||||
{{dtype}}_t[:] y,
|
||||
SparseIndex yindex, {{dtype}}_t yfill):
|
||||
|
||||
if isinstance(xindex, BlockIndex):
|
||||
return block_op_{{opname}}_{{dtype}}(x, xindex.to_block_index(), xfill,
|
||||
y, yindex.to_block_index(), yfill)
|
||||
elif isinstance(xindex, IntIndex):
|
||||
return int_op_{{opname}}_{{dtype}}(x, xindex.to_int_index(), xfill,
|
||||
y, yindex.to_int_index(), yfill)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
cpdef sparse_fill_{{opname}}_{{dtype}}({{dtype}}_t xfill,
|
||||
{{dtype}}_t yfill):
|
||||
return {{(opname, 'xfill', 'yfill', dtype) | get_op}}
|
||||
|
||||
{{endfor}}
|
||||
@@ -0,0 +1,48 @@
|
||||
#ifndef _PANDAS_MATH_H_
|
||||
#define _PANDAS_MATH_H_
|
||||
|
||||
// MSVC 2017 has a bug where `x == x` can be true for NaNs.
|
||||
// MSC_VER from https://stackoverflow.com/a/70630/1889400
|
||||
// Place upper bound on this check once a fixed MSVC is released.
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1800)
|
||||
#include <cmath>
|
||||
// In older versions of Visual Studio there wasn't a std::signbit defined
|
||||
// This defines it using _copysign
|
||||
namespace std {
|
||||
__inline int isnan(double x) { return _isnan(x); }
|
||||
__inline int signbit(double num) { return _copysign(1.0, num) < 0; }
|
||||
__inline int notnan(double x) { return !isnan(x); }
|
||||
}
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1900)
|
||||
#include <cmath>
|
||||
namespace std {
|
||||
__inline int isnan(double x) { return _isnan(x); }
|
||||
__inline int notnan(double x) { return !isnan(x); }
|
||||
}
|
||||
#elif defined(_MSC_VER)
|
||||
#include <cmath>
|
||||
namespace std {
|
||||
__inline int isnan(double x) { return _isnan(x); }
|
||||
__inline int notnan(double x) { return x == x; }
|
||||
}
|
||||
#elif defined(__MVS__)
|
||||
#include <cmath>
|
||||
|
||||
#define _signbit signbit
|
||||
#undef signbit
|
||||
#undef isnan
|
||||
|
||||
namespace std {
|
||||
__inline int notnan(double x) { return x == x; }
|
||||
__inline int signbit(double num) { return _signbit(num); }
|
||||
__inline int isnan(double x) { return isnan(x); }
|
||||
}
|
||||
#else
|
||||
#include <cmath>
|
||||
|
||||
namespace std {
|
||||
__inline int notnan(double x) { return x == x; }
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@@ -0,0 +1,305 @@
|
||||
// ISO C9x compliant inttypes.h for Microsoft Visual Studio
|
||||
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
|
||||
//
|
||||
// Copyright (c) 2006 Alexander Chemeris
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. The name of the author may be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _MSC_VER // [
|
||||
#error "Use this header only with Microsoft Visual C++ compilers!"
|
||||
#endif // _MSC_VER ]
|
||||
|
||||
#ifndef _MSC_INTTYPES_H_ // [
|
||||
#define _MSC_INTTYPES_H_
|
||||
|
||||
#if _MSC_VER > 1000
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
#include "ms_stdint.h"
|
||||
|
||||
// 7.8 Format conversion of integer types
|
||||
|
||||
typedef struct {
|
||||
intmax_t quot;
|
||||
intmax_t rem;
|
||||
} imaxdiv_t;
|
||||
|
||||
// 7.8.1 Macros for format specifiers
|
||||
|
||||
#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198
|
||||
|
||||
// The fprintf macros for signed integers are:
|
||||
#define PRId8 "d"
|
||||
#define PRIi8 "i"
|
||||
#define PRIdLEAST8 "d"
|
||||
#define PRIiLEAST8 "i"
|
||||
#define PRIdFAST8 "d"
|
||||
#define PRIiFAST8 "i"
|
||||
|
||||
#define PRId16 "hd"
|
||||
#define PRIi16 "hi"
|
||||
#define PRIdLEAST16 "hd"
|
||||
#define PRIiLEAST16 "hi"
|
||||
#define PRIdFAST16 "hd"
|
||||
#define PRIiFAST16 "hi"
|
||||
|
||||
#define PRId32 "I32d"
|
||||
#define PRIi32 "I32i"
|
||||
#define PRIdLEAST32 "I32d"
|
||||
#define PRIiLEAST32 "I32i"
|
||||
#define PRIdFAST32 "I32d"
|
||||
#define PRIiFAST32 "I32i"
|
||||
|
||||
#define PRId64 "I64d"
|
||||
#define PRIi64 "I64i"
|
||||
#define PRIdLEAST64 "I64d"
|
||||
#define PRIiLEAST64 "I64i"
|
||||
#define PRIdFAST64 "I64d"
|
||||
#define PRIiFAST64 "I64i"
|
||||
|
||||
#define PRIdMAX "I64d"
|
||||
#define PRIiMAX "I64i"
|
||||
|
||||
#define PRIdPTR "Id"
|
||||
#define PRIiPTR "Ii"
|
||||
|
||||
// The fprintf macros for unsigned integers are:
|
||||
#define PRIo8 "o"
|
||||
#define PRIu8 "u"
|
||||
#define PRIx8 "x"
|
||||
#define PRIX8 "X"
|
||||
#define PRIoLEAST8 "o"
|
||||
#define PRIuLEAST8 "u"
|
||||
#define PRIxLEAST8 "x"
|
||||
#define PRIXLEAST8 "X"
|
||||
#define PRIoFAST8 "o"
|
||||
#define PRIuFAST8 "u"
|
||||
#define PRIxFAST8 "x"
|
||||
#define PRIXFAST8 "X"
|
||||
|
||||
#define PRIo16 "ho"
|
||||
#define PRIu16 "hu"
|
||||
#define PRIx16 "hx"
|
||||
#define PRIX16 "hX"
|
||||
#define PRIoLEAST16 "ho"
|
||||
#define PRIuLEAST16 "hu"
|
||||
#define PRIxLEAST16 "hx"
|
||||
#define PRIXLEAST16 "hX"
|
||||
#define PRIoFAST16 "ho"
|
||||
#define PRIuFAST16 "hu"
|
||||
#define PRIxFAST16 "hx"
|
||||
#define PRIXFAST16 "hX"
|
||||
|
||||
#define PRIo32 "I32o"
|
||||
#define PRIu32 "I32u"
|
||||
#define PRIx32 "I32x"
|
||||
#define PRIX32 "I32X"
|
||||
#define PRIoLEAST32 "I32o"
|
||||
#define PRIuLEAST32 "I32u"
|
||||
#define PRIxLEAST32 "I32x"
|
||||
#define PRIXLEAST32 "I32X"
|
||||
#define PRIoFAST32 "I32o"
|
||||
#define PRIuFAST32 "I32u"
|
||||
#define PRIxFAST32 "I32x"
|
||||
#define PRIXFAST32 "I32X"
|
||||
|
||||
#define PRIo64 "I64o"
|
||||
#define PRIu64 "I64u"
|
||||
#define PRIx64 "I64x"
|
||||
#define PRIX64 "I64X"
|
||||
#define PRIoLEAST64 "I64o"
|
||||
#define PRIuLEAST64 "I64u"
|
||||
#define PRIxLEAST64 "I64x"
|
||||
#define PRIXLEAST64 "I64X"
|
||||
#define PRIoFAST64 "I64o"
|
||||
#define PRIuFAST64 "I64u"
|
||||
#define PRIxFAST64 "I64x"
|
||||
#define PRIXFAST64 "I64X"
|
||||
|
||||
#define PRIoMAX "I64o"
|
||||
#define PRIuMAX "I64u"
|
||||
#define PRIxMAX "I64x"
|
||||
#define PRIXMAX "I64X"
|
||||
|
||||
#define PRIoPTR "Io"
|
||||
#define PRIuPTR "Iu"
|
||||
#define PRIxPTR "Ix"
|
||||
#define PRIXPTR "IX"
|
||||
|
||||
// The fscanf macros for signed integers are:
|
||||
#define SCNd8 "d"
|
||||
#define SCNi8 "i"
|
||||
#define SCNdLEAST8 "d"
|
||||
#define SCNiLEAST8 "i"
|
||||
#define SCNdFAST8 "d"
|
||||
#define SCNiFAST8 "i"
|
||||
|
||||
#define SCNd16 "hd"
|
||||
#define SCNi16 "hi"
|
||||
#define SCNdLEAST16 "hd"
|
||||
#define SCNiLEAST16 "hi"
|
||||
#define SCNdFAST16 "hd"
|
||||
#define SCNiFAST16 "hi"
|
||||
|
||||
#define SCNd32 "ld"
|
||||
#define SCNi32 "li"
|
||||
#define SCNdLEAST32 "ld"
|
||||
#define SCNiLEAST32 "li"
|
||||
#define SCNdFAST32 "ld"
|
||||
#define SCNiFAST32 "li"
|
||||
|
||||
#define SCNd64 "I64d"
|
||||
#define SCNi64 "I64i"
|
||||
#define SCNdLEAST64 "I64d"
|
||||
#define SCNiLEAST64 "I64i"
|
||||
#define SCNdFAST64 "I64d"
|
||||
#define SCNiFAST64 "I64i"
|
||||
|
||||
#define SCNdMAX "I64d"
|
||||
#define SCNiMAX "I64i"
|
||||
|
||||
#ifdef _WIN64 // [
|
||||
# define SCNdPTR "I64d"
|
||||
# define SCNiPTR "I64i"
|
||||
#else // _WIN64 ][
|
||||
# define SCNdPTR "ld"
|
||||
# define SCNiPTR "li"
|
||||
#endif // _WIN64 ]
|
||||
|
||||
// The fscanf macros for unsigned integers are:
|
||||
#define SCNo8 "o"
|
||||
#define SCNu8 "u"
|
||||
#define SCNx8 "x"
|
||||
#define SCNX8 "X"
|
||||
#define SCNoLEAST8 "o"
|
||||
#define SCNuLEAST8 "u"
|
||||
#define SCNxLEAST8 "x"
|
||||
#define SCNXLEAST8 "X"
|
||||
#define SCNoFAST8 "o"
|
||||
#define SCNuFAST8 "u"
|
||||
#define SCNxFAST8 "x"
|
||||
#define SCNXFAST8 "X"
|
||||
|
||||
#define SCNo16 "ho"
|
||||
#define SCNu16 "hu"
|
||||
#define SCNx16 "hx"
|
||||
#define SCNX16 "hX"
|
||||
#define SCNoLEAST16 "ho"
|
||||
#define SCNuLEAST16 "hu"
|
||||
#define SCNxLEAST16 "hx"
|
||||
#define SCNXLEAST16 "hX"
|
||||
#define SCNoFAST16 "ho"
|
||||
#define SCNuFAST16 "hu"
|
||||
#define SCNxFAST16 "hx"
|
||||
#define SCNXFAST16 "hX"
|
||||
|
||||
#define SCNo32 "lo"
|
||||
#define SCNu32 "lu"
|
||||
#define SCNx32 "lx"
|
||||
#define SCNX32 "lX"
|
||||
#define SCNoLEAST32 "lo"
|
||||
#define SCNuLEAST32 "lu"
|
||||
#define SCNxLEAST32 "lx"
|
||||
#define SCNXLEAST32 "lX"
|
||||
#define SCNoFAST32 "lo"
|
||||
#define SCNuFAST32 "lu"
|
||||
#define SCNxFAST32 "lx"
|
||||
#define SCNXFAST32 "lX"
|
||||
|
||||
#define SCNo64 "I64o"
|
||||
#define SCNu64 "I64u"
|
||||
#define SCNx64 "I64x"
|
||||
#define SCNX64 "I64X"
|
||||
#define SCNoLEAST64 "I64o"
|
||||
#define SCNuLEAST64 "I64u"
|
||||
#define SCNxLEAST64 "I64x"
|
||||
#define SCNXLEAST64 "I64X"
|
||||
#define SCNoFAST64 "I64o"
|
||||
#define SCNuFAST64 "I64u"
|
||||
#define SCNxFAST64 "I64x"
|
||||
#define SCNXFAST64 "I64X"
|
||||
|
||||
#define SCNoMAX "I64o"
|
||||
#define SCNuMAX "I64u"
|
||||
#define SCNxMAX "I64x"
|
||||
#define SCNXMAX "I64X"
|
||||
|
||||
#ifdef _WIN64 // [
|
||||
# define SCNoPTR "I64o"
|
||||
# define SCNuPTR "I64u"
|
||||
# define SCNxPTR "I64x"
|
||||
# define SCNXPTR "I64X"
|
||||
#else // _WIN64 ][
|
||||
# define SCNoPTR "lo"
|
||||
# define SCNuPTR "lu"
|
||||
# define SCNxPTR "lx"
|
||||
# define SCNXPTR "lX"
|
||||
#endif // _WIN64 ]
|
||||
|
||||
#endif // __STDC_FORMAT_MACROS ]
|
||||
|
||||
// 7.8.2 Functions for greatest-width integer types
|
||||
|
||||
// 7.8.2.1 The imaxabs function
|
||||
#define imaxabs _abs64
|
||||
|
||||
// 7.8.2.2 The imaxdiv function
|
||||
|
||||
// This is modified version of div() function from Microsoft's div.c found
|
||||
// in %MSVC.NET%\crt\src\div.c
|
||||
#ifdef STATIC_IMAXDIV // [
|
||||
static
|
||||
#else // STATIC_IMAXDIV ][
|
||||
_inline
|
||||
#endif // STATIC_IMAXDIV ]
|
||||
imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
|
||||
{
|
||||
imaxdiv_t result;
|
||||
|
||||
result.quot = numer / denom;
|
||||
result.rem = numer % denom;
|
||||
|
||||
if (numer < 0 && result.rem > 0) {
|
||||
// did division wrong; must fix up
|
||||
++result.quot;
|
||||
result.rem -= denom;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// 7.8.2.3 The strtoimax and strtoumax functions
|
||||
#define strtoimax _strtoi64
|
||||
#define strtoumax _strtoui64
|
||||
|
||||
// 7.8.2.4 The wcstoimax and wcstoumax functions
|
||||
#define wcstoimax _wcstoi64
|
||||
#define wcstoumax _wcstoui64
|
||||
|
||||
|
||||
#endif // _MSC_INTTYPES_H_ ]
|
||||
@@ -0,0 +1,247 @@
|
||||
// ISO C9x compliant stdint.h for Microsoft Visual Studio
|
||||
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
|
||||
//
|
||||
// Copyright (c) 2006-2008 Alexander Chemeris
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. The name of the author may be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _MSC_VER // [
|
||||
#error "Use this header only with Microsoft Visual C++ compilers!"
|
||||
#endif // _MSC_VER ]
|
||||
|
||||
#ifndef _MSC_STDINT_H_ // [
|
||||
#define _MSC_STDINT_H_
|
||||
|
||||
#if _MSC_VER > 1000
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
|
||||
// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
|
||||
// or compiler give many errors like this:
|
||||
// error C2733: second C linkage of overloaded function 'wmemchr' not allowed
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
# include <wchar.h>
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
// Define _W64 macros to mark types changing their size, like intptr_t.
|
||||
#ifndef _W64
|
||||
# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
|
||||
# define _W64 __w64
|
||||
# else
|
||||
# define _W64
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
// 7.18.1 Integer types
|
||||
|
||||
// 7.18.1.1 Exact-width integer types
|
||||
|
||||
// Visual Studio 6 and Embedded Visual C++ 4 doesn't
|
||||
// realize that, e.g. char has the same size as __int8
|
||||
// so we give up on __intX for them.
|
||||
#if (_MSC_VER < 1300)
|
||||
typedef signed char int8_t;
|
||||
typedef signed short int16_t;
|
||||
typedef signed int int32_t;
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
#else
|
||||
typedef signed __int8 int8_t;
|
||||
typedef signed __int16 int16_t;
|
||||
typedef signed __int32 int32_t;
|
||||
typedef unsigned __int8 uint8_t;
|
||||
typedef unsigned __int16 uint16_t;
|
||||
typedef unsigned __int32 uint32_t;
|
||||
#endif
|
||||
typedef signed __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
|
||||
|
||||
// 7.18.1.2 Minimum-width integer types
|
||||
typedef int8_t int_least8_t;
|
||||
typedef int16_t int_least16_t;
|
||||
typedef int32_t int_least32_t;
|
||||
typedef int64_t int_least64_t;
|
||||
typedef uint8_t uint_least8_t;
|
||||
typedef uint16_t uint_least16_t;
|
||||
typedef uint32_t uint_least32_t;
|
||||
typedef uint64_t uint_least64_t;
|
||||
|
||||
// 7.18.1.3 Fastest minimum-width integer types
|
||||
typedef int8_t int_fast8_t;
|
||||
typedef int16_t int_fast16_t;
|
||||
typedef int32_t int_fast32_t;
|
||||
typedef int64_t int_fast64_t;
|
||||
typedef uint8_t uint_fast8_t;
|
||||
typedef uint16_t uint_fast16_t;
|
||||
typedef uint32_t uint_fast32_t;
|
||||
typedef uint64_t uint_fast64_t;
|
||||
|
||||
// 7.18.1.4 Integer types capable of holding object pointers
|
||||
#ifdef _WIN64 // [
|
||||
typedef signed __int64 intptr_t;
|
||||
typedef unsigned __int64 uintptr_t;
|
||||
#else // _WIN64 ][
|
||||
typedef _W64 signed int intptr_t;
|
||||
typedef _W64 unsigned int uintptr_t;
|
||||
#endif // _WIN64 ]
|
||||
|
||||
// 7.18.1.5 Greatest-width integer types
|
||||
typedef int64_t intmax_t;
|
||||
typedef uint64_t uintmax_t;
|
||||
|
||||
|
||||
// 7.18.2 Limits of specified-width integer types
|
||||
|
||||
#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259
|
||||
|
||||
// 7.18.2.1 Limits of exact-width integer types
|
||||
#define INT8_MIN ((int8_t)_I8_MIN)
|
||||
#define INT8_MAX _I8_MAX
|
||||
#define INT16_MIN ((int16_t)_I16_MIN)
|
||||
#define INT16_MAX _I16_MAX
|
||||
#define INT32_MIN ((int32_t)_I32_MIN)
|
||||
#define INT32_MAX _I32_MAX
|
||||
#define INT64_MIN ((int64_t)_I64_MIN)
|
||||
#define INT64_MAX _I64_MAX
|
||||
#define UINT8_MAX _UI8_MAX
|
||||
#define UINT16_MAX _UI16_MAX
|
||||
#define UINT32_MAX _UI32_MAX
|
||||
#define UINT64_MAX _UI64_MAX
|
||||
|
||||
// 7.18.2.2 Limits of minimum-width integer types
|
||||
#define INT_LEAST8_MIN INT8_MIN
|
||||
#define INT_LEAST8_MAX INT8_MAX
|
||||
#define INT_LEAST16_MIN INT16_MIN
|
||||
#define INT_LEAST16_MAX INT16_MAX
|
||||
#define INT_LEAST32_MIN INT32_MIN
|
||||
#define INT_LEAST32_MAX INT32_MAX
|
||||
#define INT_LEAST64_MIN INT64_MIN
|
||||
#define INT_LEAST64_MAX INT64_MAX
|
||||
#define UINT_LEAST8_MAX UINT8_MAX
|
||||
#define UINT_LEAST16_MAX UINT16_MAX
|
||||
#define UINT_LEAST32_MAX UINT32_MAX
|
||||
#define UINT_LEAST64_MAX UINT64_MAX
|
||||
|
||||
// 7.18.2.3 Limits of fastest minimum-width integer types
|
||||
#define INT_FAST8_MIN INT8_MIN
|
||||
#define INT_FAST8_MAX INT8_MAX
|
||||
#define INT_FAST16_MIN INT16_MIN
|
||||
#define INT_FAST16_MAX INT16_MAX
|
||||
#define INT_FAST32_MIN INT32_MIN
|
||||
#define INT_FAST32_MAX INT32_MAX
|
||||
#define INT_FAST64_MIN INT64_MIN
|
||||
#define INT_FAST64_MAX INT64_MAX
|
||||
#define UINT_FAST8_MAX UINT8_MAX
|
||||
#define UINT_FAST16_MAX UINT16_MAX
|
||||
#define UINT_FAST32_MAX UINT32_MAX
|
||||
#define UINT_FAST64_MAX UINT64_MAX
|
||||
|
||||
// 7.18.2.4 Limits of integer types capable of holding object pointers
|
||||
#ifdef _WIN64 // [
|
||||
# define INTPTR_MIN INT64_MIN
|
||||
# define INTPTR_MAX INT64_MAX
|
||||
# define UINTPTR_MAX UINT64_MAX
|
||||
#else // _WIN64 ][
|
||||
# define INTPTR_MIN INT32_MIN
|
||||
# define INTPTR_MAX INT32_MAX
|
||||
# define UINTPTR_MAX UINT32_MAX
|
||||
#endif // _WIN64 ]
|
||||
|
||||
// 7.18.2.5 Limits of greatest-width integer types
|
||||
#define INTMAX_MIN INT64_MIN
|
||||
#define INTMAX_MAX INT64_MAX
|
||||
#define UINTMAX_MAX UINT64_MAX
|
||||
|
||||
// 7.18.3 Limits of other integer types
|
||||
|
||||
#ifdef _WIN64 // [
|
||||
# define PTRDIFF_MIN _I64_MIN
|
||||
# define PTRDIFF_MAX _I64_MAX
|
||||
#else // _WIN64 ][
|
||||
# define PTRDIFF_MIN _I32_MIN
|
||||
# define PTRDIFF_MAX _I32_MAX
|
||||
#endif // _WIN64 ]
|
||||
|
||||
#define SIG_ATOMIC_MIN INT_MIN
|
||||
#define SIG_ATOMIC_MAX INT_MAX
|
||||
|
||||
#ifndef SIZE_MAX // [
|
||||
# ifdef _WIN64 // [
|
||||
# define SIZE_MAX _UI64_MAX
|
||||
# else // _WIN64 ][
|
||||
# define SIZE_MAX _UI32_MAX
|
||||
# endif // _WIN64 ]
|
||||
#endif // SIZE_MAX ]
|
||||
|
||||
// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
|
||||
#ifndef WCHAR_MIN // [
|
||||
# define WCHAR_MIN 0
|
||||
#endif // WCHAR_MIN ]
|
||||
#ifndef WCHAR_MAX // [
|
||||
# define WCHAR_MAX _UI16_MAX
|
||||
#endif // WCHAR_MAX ]
|
||||
|
||||
#define WINT_MIN 0
|
||||
#define WINT_MAX _UI16_MAX
|
||||
|
||||
#endif // __STDC_LIMIT_MACROS ]
|
||||
|
||||
|
||||
// 7.18.4 Limits of other integer types
|
||||
|
||||
#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260
|
||||
|
||||
// 7.18.4.1 Macros for minimum-width integer constants
|
||||
|
||||
#define INT8_C(val) val##i8
|
||||
#define INT16_C(val) val##i16
|
||||
#define INT32_C(val) val##i32
|
||||
#define INT64_C(val) val##i64
|
||||
|
||||
#define UINT8_C(val) val##ui8
|
||||
#define UINT16_C(val) val##ui16
|
||||
#define UINT32_C(val) val##ui32
|
||||
#define UINT64_C(val) val##ui64
|
||||
|
||||
// 7.18.4.2 Macros for greatest-width integer constants
|
||||
#define INTMAX_C INT64_C
|
||||
#define UINTMAX_C UINT64_C
|
||||
|
||||
#endif // __STDC_CONSTANT_MACROS ]
|
||||
|
||||
|
||||
#endif // _MSC_STDINT_H_ ]
|
||||
@@ -0,0 +1,16 @@
|
||||
#ifndef _PANDAS_PORTABLE_H_
|
||||
#define _PANDAS_PORTABLE_H_
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define strcasecmp( s1, s2 ) _stricmp( s1, s2 )
|
||||
#endif
|
||||
|
||||
// GH-23516 - works around locale perf issues
|
||||
// from MUSL libc, MIT Licensed - see LICENSES
|
||||
#define isdigit_ascii(c) (((unsigned)(c) - '0') < 10u)
|
||||
#define getdigit_ascii(c, default) (isdigit_ascii(c) ? ((int)((c) - '0')) : default)
|
||||
#define isspace_ascii(c) (((c) == ' ') || (((unsigned)(c) - '\t') < 5))
|
||||
#define toupper_ascii(c) ((((unsigned)(c) - 'a') < 26) ? ((c) & 0x5f) : (c))
|
||||
#define tolower_ascii(c) ((((unsigned)(c) - 'A') < 26) ? ((c) | 0x20) : (c))
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,10 @@
|
||||
#ifndef _PANDAS_STDINT_H_
|
||||
#define _PANDAS_STDINT_H_
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
#include "ms_stdint.h"
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
Copyright (c) 2016, PyData Development Team
|
||||
All rights reserved.
|
||||
|
||||
Distributed under the terms of the BSD Simplified License.
|
||||
|
||||
The full license is in the LICENSE file, distributed with this software.
|
||||
*/
|
||||
|
||||
#ifndef PANDAS__LIBS_SRC_INLINE_HELPER_H_
|
||||
#define PANDAS__LIBS_SRC_INLINE_HELPER_H_
|
||||
|
||||
#ifndef PANDAS_INLINE
|
||||
#if defined(__clang__)
|
||||
#define PANDAS_INLINE static __inline__ __attribute__ ((__unused__))
|
||||
#elif defined(__GNUC__)
|
||||
#define PANDAS_INLINE static __inline__
|
||||
#elif defined(_MSC_VER)
|
||||
#define PANDAS_INLINE static __inline
|
||||
#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
|
||||
#define PANDAS_INLINE static inline
|
||||
#else
|
||||
#define PANDAS_INLINE
|
||||
#endif // __GNUC__
|
||||
#endif // PANDAS_INLINE
|
||||
|
||||
#endif // PANDAS__LIBS_SRC_INLINE_HELPER_H_
|
||||
719
.venv/lib/python3.7/site-packages/pandas/_libs/src/klib/khash.h
Normal file
719
.venv/lib/python3.7/site-packages/pandas/_libs/src/klib/khash.h
Normal file
@@ -0,0 +1,719 @@
|
||||
/* The MIT License
|
||||
|
||||
Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
An example:
|
||||
|
||||
#include "khash.h"
|
||||
KHASH_MAP_INIT_INT(32, char)
|
||||
int main() {
|
||||
int ret, is_missing;
|
||||
khiter_t k;
|
||||
khash_t(32) *h = kh_init(32);
|
||||
k = kh_put(32, h, 5, &ret);
|
||||
if (!ret) kh_del(32, h, k);
|
||||
kh_value(h, k) = 10;
|
||||
k = kh_get(32, h, 10);
|
||||
is_missing = (k == kh_end(h));
|
||||
k = kh_get(32, h, 5);
|
||||
kh_del(32, h, k);
|
||||
for (k = kh_begin(h); k != kh_end(h); ++k)
|
||||
if (kh_exist(h, k)) kh_value(h, k) = 1;
|
||||
kh_destroy(32, h);
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
2011-09-16 (0.2.6):
|
||||
|
||||
* The capacity is a power of 2. This seems to dramatically improve the
|
||||
speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
|
||||
|
||||
- https://github.com/stefanocasazza/ULib
|
||||
- https://nothings.org/computer/judy/
|
||||
|
||||
* Allow to optionally use linear probing which usually has better
|
||||
performance for random input. Double hashing is still the default as it
|
||||
is more robust to certain non-random input.
|
||||
|
||||
* Added Wang's integer hash function (not used by default). This hash
|
||||
function is more robust to certain non-random input.
|
||||
|
||||
2011-02-14 (0.2.5):
|
||||
|
||||
* Allow to declare global functions.
|
||||
|
||||
2009-09-26 (0.2.4):
|
||||
|
||||
* Improve portability
|
||||
|
||||
2008-09-19 (0.2.3):
|
||||
|
||||
* Corrected the example
|
||||
* Improved interfaces
|
||||
|
||||
2008-09-11 (0.2.2):
|
||||
|
||||
* Improved speed a little in kh_put()
|
||||
|
||||
2008-09-10 (0.2.1):
|
||||
|
||||
* Added kh_clear()
|
||||
* Fixed a compiling error
|
||||
|
||||
2008-09-02 (0.2.0):
|
||||
|
||||
* Changed to token concatenation which increases flexibility.
|
||||
|
||||
2008-08-31 (0.1.2):
|
||||
|
||||
* Fixed a bug in kh_get(), which has not been tested previously.
|
||||
|
||||
2008-08-31 (0.1.1):
|
||||
|
||||
* Added destructor
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __AC_KHASH_H
|
||||
#define __AC_KHASH_H
|
||||
|
||||
/*!
|
||||
@header
|
||||
|
||||
Generic hash table library.
|
||||
*/
|
||||
|
||||
#define AC_VERSION_KHASH_H "0.2.6"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include "../inline_helper.h"
|
||||
|
||||
|
||||
// hooks for memory allocator, C-runtime allocator used per default
|
||||
#ifndef KHASH_MALLOC
|
||||
#define KHASH_MALLOC malloc
|
||||
#endif
|
||||
|
||||
#ifndef KHASH_REALLOC
|
||||
#define KHASH_REALLOC realloc
|
||||
#endif
|
||||
|
||||
#ifndef KHASH_CALLOC
|
||||
#define KHASH_CALLOC calloc
|
||||
#endif
|
||||
|
||||
#ifndef KHASH_FREE
|
||||
#define KHASH_FREE free
|
||||
#endif
|
||||
|
||||
|
||||
#if UINT_MAX == 0xffffffffu
|
||||
typedef unsigned int khuint32_t;
|
||||
typedef signed int khint32_t;
|
||||
#elif ULONG_MAX == 0xffffffffu
|
||||
typedef unsigned long khuint32_t;
|
||||
typedef signed long khint32_t;
|
||||
#endif
|
||||
|
||||
#if ULONG_MAX == ULLONG_MAX
|
||||
typedef unsigned long khuint64_t;
|
||||
typedef signed long khint64_t;
|
||||
#else
|
||||
typedef unsigned long long khuint64_t;
|
||||
typedef signed long long khint64_t;
|
||||
#endif
|
||||
|
||||
#if UINT_MAX == 0xffffu
|
||||
typedef unsigned int khuint16_t;
|
||||
typedef signed int khint16_t;
|
||||
#elif USHRT_MAX == 0xffffu
|
||||
typedef unsigned short khuint16_t;
|
||||
typedef signed short khint16_t;
|
||||
#endif
|
||||
|
||||
#if UCHAR_MAX == 0xffu
|
||||
typedef unsigned char khuint8_t;
|
||||
typedef signed char khint8_t;
|
||||
#endif
|
||||
|
||||
typedef double khfloat64_t;
|
||||
typedef float khfloat32_t;
|
||||
|
||||
typedef khuint32_t khuint_t;
|
||||
typedef khuint_t khiter_t;
|
||||
|
||||
#define __ac_isempty(flag, i) ((flag[i>>5]>>(i&0x1fU))&1)
|
||||
#define __ac_isdel(flag, i) (0)
|
||||
#define __ac_iseither(flag, i) __ac_isempty(flag, i)
|
||||
#define __ac_set_isdel_false(flag, i) (0)
|
||||
#define __ac_set_isempty_false(flag, i) (flag[i>>5]&=~(1ul<<(i&0x1fU)))
|
||||
#define __ac_set_isempty_true(flag, i) (flag[i>>5]|=(1ul<<(i&0x1fU)))
|
||||
#define __ac_set_isboth_false(flag, i) __ac_set_isempty_false(flag, i)
|
||||
#define __ac_set_isdel_true(flag, i) ((void)0)
|
||||
|
||||
|
||||
// specializations of https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
|
||||
khuint32_t PANDAS_INLINE murmur2_32to32(khuint32_t k){
|
||||
const khuint32_t SEED = 0xc70f6907UL;
|
||||
// 'm' and 'r' are mixing constants generated offline.
|
||||
// They're not really 'magic', they just happen to work well.
|
||||
const khuint32_t M_32 = 0x5bd1e995;
|
||||
const int R_32 = 24;
|
||||
|
||||
// Initialize the hash to a 'random' value
|
||||
khuint32_t h = SEED ^ 4;
|
||||
|
||||
//handle 4 bytes:
|
||||
k *= M_32;
|
||||
k ^= k >> R_32;
|
||||
k *= M_32;
|
||||
|
||||
h *= M_32;
|
||||
h ^= k;
|
||||
|
||||
// Do a few final mixes of the hash to ensure the "last few
|
||||
// bytes" are well-incorporated. (Really needed here?)
|
||||
h ^= h >> 13;
|
||||
h *= M_32;
|
||||
h ^= h >> 15;
|
||||
return h;
|
||||
}
|
||||
|
||||
// it is possible to have a special x64-version, which would need less operations, but
|
||||
// using 32bit version always has also some benifits:
|
||||
// - one code for 32bit and 64bit builds
|
||||
// - the same case for 32bit and 64bit builds
|
||||
// - no performance difference could be measured compared to a possible x64-version
|
||||
|
||||
khuint32_t PANDAS_INLINE murmur2_32_32to32(khuint32_t k1, khuint32_t k2){
|
||||
const khuint32_t SEED = 0xc70f6907UL;
|
||||
// 'm' and 'r' are mixing constants generated offline.
|
||||
// They're not really 'magic', they just happen to work well.
|
||||
const khuint32_t M_32 = 0x5bd1e995;
|
||||
const int R_32 = 24;
|
||||
|
||||
// Initialize the hash to a 'random' value
|
||||
khuint32_t h = SEED ^ 4;
|
||||
|
||||
//handle first 4 bytes:
|
||||
k1 *= M_32;
|
||||
k1 ^= k1 >> R_32;
|
||||
k1 *= M_32;
|
||||
|
||||
h *= M_32;
|
||||
h ^= k1;
|
||||
|
||||
//handle second 4 bytes:
|
||||
k2 *= M_32;
|
||||
k2 ^= k2 >> R_32;
|
||||
k2 *= M_32;
|
||||
|
||||
h *= M_32;
|
||||
h ^= k2;
|
||||
|
||||
// Do a few final mixes of the hash to ensure the "last few
|
||||
// bytes" are well-incorporated.
|
||||
h ^= h >> 13;
|
||||
h *= M_32;
|
||||
h ^= h >> 15;
|
||||
return h;
|
||||
}
|
||||
|
||||
khuint32_t PANDAS_INLINE murmur2_64to32(khuint64_t k){
|
||||
khuint32_t k1 = (khuint32_t)k;
|
||||
khuint32_t k2 = (khuint32_t)(k >> 32);
|
||||
|
||||
return murmur2_32_32to32(k1, k2);
|
||||
}
|
||||
|
||||
|
||||
#ifdef KHASH_LINEAR
|
||||
#define __ac_inc(k, m) 1
|
||||
#else
|
||||
#define __ac_inc(k, m) (murmur2_32to32(k) | 1) & (m)
|
||||
#endif
|
||||
|
||||
#define __ac_fsize(m) ((m) < 32? 1 : (m)>>5)
|
||||
|
||||
#ifndef kroundup32
|
||||
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||
#endif
|
||||
|
||||
static const double __ac_HASH_UPPER = 0.77;
|
||||
|
||||
#define KHASH_DECLARE(name, khkey_t, khval_t) \
|
||||
typedef struct { \
|
||||
khuint_t n_buckets, size, n_occupied, upper_bound; \
|
||||
khuint32_t *flags; \
|
||||
khkey_t *keys; \
|
||||
khval_t *vals; \
|
||||
} kh_##name##_t; \
|
||||
extern kh_##name##_t *kh_init_##name(); \
|
||||
extern void kh_destroy_##name(kh_##name##_t *h); \
|
||||
extern void kh_clear_##name(kh_##name##_t *h); \
|
||||
extern khuint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
|
||||
extern void kh_resize_##name(kh_##name##_t *h, khuint_t new_n_buckets); \
|
||||
extern khuint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
|
||||
extern void kh_del_##name(kh_##name##_t *h, khuint_t x);
|
||||
|
||||
#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
|
||||
typedef struct { \
|
||||
khuint_t n_buckets, size, n_occupied, upper_bound; \
|
||||
khuint32_t *flags; \
|
||||
khkey_t *keys; \
|
||||
khval_t *vals; \
|
||||
} kh_##name##_t; \
|
||||
SCOPE kh_##name##_t *kh_init_##name(void) { \
|
||||
return (kh_##name##_t*)KHASH_CALLOC(1, sizeof(kh_##name##_t)); \
|
||||
} \
|
||||
SCOPE void kh_destroy_##name(kh_##name##_t *h) \
|
||||
{ \
|
||||
if (h) { \
|
||||
KHASH_FREE(h->keys); KHASH_FREE(h->flags); \
|
||||
KHASH_FREE(h->vals); \
|
||||
KHASH_FREE(h); \
|
||||
} \
|
||||
} \
|
||||
SCOPE void kh_clear_##name(kh_##name##_t *h) \
|
||||
{ \
|
||||
if (h && h->flags) { \
|
||||
memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khuint32_t)); \
|
||||
h->size = h->n_occupied = 0; \
|
||||
} \
|
||||
} \
|
||||
SCOPE khuint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
|
||||
{ \
|
||||
if (h->n_buckets) { \
|
||||
khuint_t inc, k, i, last, mask; \
|
||||
mask = h->n_buckets - 1; \
|
||||
k = __hash_func(key); i = k & mask; \
|
||||
inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \
|
||||
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
|
||||
i = (i + inc) & mask; \
|
||||
if (i == last) return h->n_buckets; \
|
||||
} \
|
||||
return __ac_iseither(h->flags, i)? h->n_buckets : i; \
|
||||
} else return 0; \
|
||||
} \
|
||||
SCOPE void kh_resize_##name(kh_##name##_t *h, khuint_t new_n_buckets) \
|
||||
{ /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
|
||||
khuint32_t *new_flags = 0; \
|
||||
khuint_t j = 1; \
|
||||
{ \
|
||||
kroundup32(new_n_buckets); \
|
||||
if (new_n_buckets < 4) new_n_buckets = 4; \
|
||||
if (h->size >= (khuint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
|
||||
else { /* hash table size to be changed (shrink or expand); rehash */ \
|
||||
new_flags = (khuint32_t*)KHASH_MALLOC(__ac_fsize(new_n_buckets) * sizeof(khuint32_t)); \
|
||||
memset(new_flags, 0xff, __ac_fsize(new_n_buckets) * sizeof(khuint32_t)); \
|
||||
if (h->n_buckets < new_n_buckets) { /* expand */ \
|
||||
h->keys = (khkey_t*)KHASH_REALLOC(h->keys, new_n_buckets * sizeof(khkey_t)); \
|
||||
if (kh_is_map) h->vals = (khval_t*)KHASH_REALLOC(h->vals, new_n_buckets * sizeof(khval_t)); \
|
||||
} /* otherwise shrink */ \
|
||||
} \
|
||||
} \
|
||||
if (j) { /* rehashing is needed */ \
|
||||
for (j = 0; j != h->n_buckets; ++j) { \
|
||||
if (__ac_iseither(h->flags, j) == 0) { \
|
||||
khkey_t key = h->keys[j]; \
|
||||
khval_t val; \
|
||||
khuint_t new_mask; \
|
||||
new_mask = new_n_buckets - 1; \
|
||||
if (kh_is_map) val = h->vals[j]; \
|
||||
__ac_set_isempty_true(h->flags, j); \
|
||||
while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
|
||||
khuint_t inc, k, i; \
|
||||
k = __hash_func(key); \
|
||||
i = k & new_mask; \
|
||||
inc = __ac_inc(k, new_mask); \
|
||||
while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \
|
||||
__ac_set_isempty_false(new_flags, i); \
|
||||
if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
|
||||
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
|
||||
if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
|
||||
__ac_set_isempty_true(h->flags, i); /* mark it as deleted in the old hash table */ \
|
||||
} else { /* write the element and jump out of the loop */ \
|
||||
h->keys[i] = key; \
|
||||
if (kh_is_map) h->vals[i] = val; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
|
||||
h->keys = (khkey_t*)KHASH_REALLOC(h->keys, new_n_buckets * sizeof(khkey_t)); \
|
||||
if (kh_is_map) h->vals = (khval_t*)KHASH_REALLOC(h->vals, new_n_buckets * sizeof(khval_t)); \
|
||||
} \
|
||||
KHASH_FREE(h->flags); /* free the working space */ \
|
||||
h->flags = new_flags; \
|
||||
h->n_buckets = new_n_buckets; \
|
||||
h->n_occupied = h->size; \
|
||||
h->upper_bound = (khuint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
|
||||
} \
|
||||
} \
|
||||
SCOPE khuint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
|
||||
{ \
|
||||
khuint_t x; \
|
||||
if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
|
||||
if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \
|
||||
else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \
|
||||
} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
|
||||
{ \
|
||||
khuint_t inc, k, i, site, last, mask = h->n_buckets - 1; \
|
||||
x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
|
||||
if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
|
||||
else { \
|
||||
inc = __ac_inc(k, mask); last = i; \
|
||||
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
|
||||
if (__ac_isdel(h->flags, i)) site = i; \
|
||||
i = (i + inc) & mask; \
|
||||
if (i == last) { x = site; break; } \
|
||||
} \
|
||||
if (x == h->n_buckets) { \
|
||||
if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
|
||||
else x = i; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (__ac_isempty(h->flags, x)) { /* not present at all */ \
|
||||
h->keys[x] = key; \
|
||||
__ac_set_isboth_false(h->flags, x); \
|
||||
++h->size; ++h->n_occupied; \
|
||||
*ret = 1; \
|
||||
} else if (__ac_isdel(h->flags, x)) { /* deleted */ \
|
||||
h->keys[x] = key; \
|
||||
__ac_set_isboth_false(h->flags, x); \
|
||||
++h->size; \
|
||||
*ret = 2; \
|
||||
} else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
|
||||
return x; \
|
||||
} \
|
||||
SCOPE void kh_del_##name(kh_##name##_t *h, khuint_t x) \
|
||||
{ \
|
||||
if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
|
||||
__ac_set_isdel_true(h->flags, x); \
|
||||
--h->size; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
|
||||
KHASH_INIT2(name, PANDAS_INLINE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
|
||||
|
||||
/* --- BEGIN OF HASH FUNCTIONS --- */
|
||||
|
||||
/*! @function
|
||||
@abstract Integer hash function
|
||||
@param key The integer [khuint32_t]
|
||||
@return The hash value [khuint_t]
|
||||
*/
|
||||
#define kh_int_hash_func(key) (khuint32_t)(key)
|
||||
/*! @function
|
||||
@abstract Integer comparison function
|
||||
*/
|
||||
#define kh_int_hash_equal(a, b) ((a) == (b))
|
||||
/*! @function
|
||||
@abstract 64-bit integer hash function
|
||||
@param key The integer [khuint64_t]
|
||||
@return The hash value [khuint_t]
|
||||
*/
|
||||
PANDAS_INLINE khuint_t kh_int64_hash_func(khuint64_t key)
|
||||
{
|
||||
return (khuint_t)((key)>>33^(key)^(key)<<11);
|
||||
}
|
||||
/*! @function
|
||||
@abstract 64-bit integer comparison function
|
||||
*/
|
||||
#define kh_int64_hash_equal(a, b) ((a) == (b))
|
||||
|
||||
/*! @function
|
||||
@abstract const char* hash function
|
||||
@param s Pointer to a null terminated string
|
||||
@return The hash value
|
||||
*/
|
||||
PANDAS_INLINE khuint_t __ac_X31_hash_string(const char *s)
|
||||
{
|
||||
khuint_t h = *s;
|
||||
if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
|
||||
return h;
|
||||
}
|
||||
/*! @function
|
||||
@abstract Another interface to const char* hash function
|
||||
@param key Pointer to a null terminated string [const char*]
|
||||
@return The hash value [khuint_t]
|
||||
*/
|
||||
#define kh_str_hash_func(key) __ac_X31_hash_string(key)
|
||||
/*! @function
|
||||
@abstract Const char* comparison function
|
||||
*/
|
||||
#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
|
||||
|
||||
PANDAS_INLINE khuint_t __ac_Wang_hash(khuint_t key)
|
||||
{
|
||||
key += ~(key << 15);
|
||||
key ^= (key >> 10);
|
||||
key += (key << 3);
|
||||
key ^= (key >> 6);
|
||||
key += ~(key << 11);
|
||||
key ^= (key >> 16);
|
||||
return key;
|
||||
}
|
||||
#define kh_int_hash_func2(k) __ac_Wang_hash((khuint_t)key)
|
||||
|
||||
/* --- END OF HASH FUNCTIONS --- */
|
||||
|
||||
/* Other convenient macros... */
|
||||
|
||||
/*!
|
||||
@abstract Type of the hash table.
|
||||
@param name Name of the hash table [symbol]
|
||||
*/
|
||||
#define khash_t(name) kh_##name##_t
|
||||
|
||||
/*! @function
|
||||
@abstract Initiate a hash table.
|
||||
@param name Name of the hash table [symbol]
|
||||
@return Pointer to the hash table [khash_t(name)*]
|
||||
*/
|
||||
#define kh_init(name) kh_init_##name(void)
|
||||
|
||||
/*! @function
|
||||
@abstract Destroy a hash table.
|
||||
@param name Name of the hash table [symbol]
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
*/
|
||||
#define kh_destroy(name, h) kh_destroy_##name(h)
|
||||
|
||||
/*! @function
|
||||
@abstract Reset a hash table without deallocating memory.
|
||||
@param name Name of the hash table [symbol]
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
*/
|
||||
#define kh_clear(name, h) kh_clear_##name(h)
|
||||
|
||||
/*! @function
|
||||
@abstract Resize a hash table.
|
||||
@param name Name of the hash table [symbol]
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param s New size [khuint_t]
|
||||
*/
|
||||
#define kh_resize(name, h, s) kh_resize_##name(h, s)
|
||||
|
||||
/*! @function
|
||||
@abstract Insert a key to the hash table.
|
||||
@param name Name of the hash table [symbol]
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param k Key [type of keys]
|
||||
@param r Extra return code: 0 if the key is present in the hash table;
|
||||
1 if the bucket is empty (never used); 2 if the element in
|
||||
the bucket has been deleted [int*]
|
||||
@return Iterator to the inserted element [khuint_t]
|
||||
*/
|
||||
#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
|
||||
|
||||
/*! @function
|
||||
@abstract Retrieve a key from the hash table.
|
||||
@param name Name of the hash table [symbol]
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param k Key [type of keys]
|
||||
@return Iterator to the found element, or kh_end(h) is the element is absent [khuint_t]
|
||||
*/
|
||||
#define kh_get(name, h, k) kh_get_##name(h, k)
|
||||
|
||||
/*! @function
|
||||
@abstract Remove a key from the hash table.
|
||||
@param name Name of the hash table [symbol]
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param k Iterator to the element to be deleted [khuint_t]
|
||||
*/
|
||||
#define kh_del(name, h, k) kh_del_##name(h, k)
|
||||
|
||||
/*! @function
|
||||
@abstract Test whether a bucket contains data.
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param x Iterator to the bucket [khuint_t]
|
||||
@return 1 if containing data; 0 otherwise [int]
|
||||
*/
|
||||
#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
|
||||
|
||||
/*! @function
|
||||
@abstract Get key given an iterator
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param x Iterator to the bucket [khuint_t]
|
||||
@return Key [type of keys]
|
||||
*/
|
||||
#define kh_key(h, x) ((h)->keys[x])
|
||||
|
||||
/*! @function
|
||||
@abstract Get value given an iterator
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param x Iterator to the bucket [khuint_t]
|
||||
@return Value [type of values]
|
||||
@discussion For hash sets, calling this results in segfault.
|
||||
*/
|
||||
#define kh_val(h, x) ((h)->vals[x])
|
||||
|
||||
/*! @function
|
||||
@abstract Alias of kh_val()
|
||||
*/
|
||||
#define kh_value(h, x) ((h)->vals[x])
|
||||
|
||||
/*! @function
|
||||
@abstract Get the start iterator
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@return The start iterator [khuint_t]
|
||||
*/
|
||||
#define kh_begin(h) (khuint_t)(0)
|
||||
|
||||
/*! @function
|
||||
@abstract Get the end iterator
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@return The end iterator [khuint_t]
|
||||
*/
|
||||
#define kh_end(h) ((h)->n_buckets)
|
||||
|
||||
/*! @function
|
||||
@abstract Get the number of elements in the hash table
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@return Number of elements in the hash table [khuint_t]
|
||||
*/
|
||||
#define kh_size(h) ((h)->size)
|
||||
|
||||
/*! @function
|
||||
@abstract Get the number of buckets in the hash table
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@return Number of buckets in the hash table [khuint_t]
|
||||
*/
|
||||
#define kh_n_buckets(h) ((h)->n_buckets)
|
||||
|
||||
/* More convenient interfaces */
|
||||
|
||||
/*! @function
|
||||
@abstract Instantiate a hash set containing integer keys
|
||||
@param name Name of the hash table [symbol]
|
||||
*/
|
||||
#define KHASH_SET_INIT_INT(name) \
|
||||
KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
|
||||
|
||||
/*! @function
|
||||
@abstract Instantiate a hash map containing integer keys
|
||||
@param name Name of the hash table [symbol]
|
||||
@param khval_t Type of values [type]
|
||||
*/
|
||||
#define KHASH_MAP_INIT_INT(name, khval_t) \
|
||||
KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
|
||||
|
||||
#define KHASH_MAP_INIT_UINT(name, khval_t) \
|
||||
KHASH_INIT(name, khuint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
|
||||
|
||||
/*! @function
|
||||
@abstract Instantiate a hash map containing 64-bit integer keys
|
||||
@param name Name of the hash table [symbol]
|
||||
*/
|
||||
#define KHASH_SET_INIT_UINT64(name) \
|
||||
KHASH_INIT(name, khuint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
|
||||
|
||||
#define KHASH_SET_INIT_INT64(name) \
|
||||
KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
|
||||
|
||||
/*! @function
|
||||
@abstract Instantiate a hash map containing 64-bit integer keys
|
||||
@param name Name of the hash table [symbol]
|
||||
@param khval_t Type of values [type]
|
||||
*/
|
||||
#define KHASH_MAP_INIT_UINT64(name, khval_t) \
|
||||
KHASH_INIT(name, khuint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
|
||||
|
||||
#define KHASH_MAP_INIT_INT64(name, khval_t) \
|
||||
KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
|
||||
|
||||
/*! @function
|
||||
@abstract Instantiate a hash map containing 16bit-integer keys
|
||||
@param name Name of the hash table [symbol]
|
||||
@param khval_t Type of values [type]
|
||||
*/
|
||||
#define KHASH_MAP_INIT_INT16(name, khval_t) \
|
||||
KHASH_INIT(name, khint16_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
|
||||
|
||||
#define KHASH_MAP_INIT_UINT16(name, khval_t) \
|
||||
KHASH_INIT(name, khuint16_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
|
||||
|
||||
/*! @function
|
||||
@abstract Instantiate a hash map containing 8bit-integer keys
|
||||
@param name Name of the hash table [symbol]
|
||||
@param khval_t Type of values [type]
|
||||
*/
|
||||
#define KHASH_MAP_INIT_INT8(name, khval_t) \
|
||||
KHASH_INIT(name, khint8_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
|
||||
|
||||
#define KHASH_MAP_INIT_UINT8(name, khval_t) \
|
||||
KHASH_INIT(name, khuint8_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
|
||||
|
||||
|
||||
|
||||
typedef const char *kh_cstr_t;
|
||||
/*! @function
|
||||
@abstract Instantiate a hash map containing const char* keys
|
||||
@param name Name of the hash table [symbol]
|
||||
*/
|
||||
#define KHASH_SET_INIT_STR(name) \
|
||||
KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
|
||||
|
||||
/*! @function
|
||||
@abstract Instantiate a hash map containing const char* keys
|
||||
@param name Name of the hash table [symbol]
|
||||
@param khval_t Type of values [type]
|
||||
*/
|
||||
#define KHASH_MAP_INIT_STR(name, khval_t) \
|
||||
KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
|
||||
|
||||
|
||||
#define kh_exist_str(h, k) (kh_exist(h, k))
|
||||
#define kh_exist_float64(h, k) (kh_exist(h, k))
|
||||
#define kh_exist_uint64(h, k) (kh_exist(h, k))
|
||||
#define kh_exist_int64(h, k) (kh_exist(h, k))
|
||||
#define kh_exist_float32(h, k) (kh_exist(h, k))
|
||||
#define kh_exist_int32(h, k) (kh_exist(h, k))
|
||||
#define kh_exist_uint32(h, k) (kh_exist(h, k))
|
||||
#define kh_exist_int16(h, k) (kh_exist(h, k))
|
||||
#define kh_exist_uint16(h, k) (kh_exist(h, k))
|
||||
#define kh_exist_int8(h, k) (kh_exist(h, k))
|
||||
#define kh_exist_uint8(h, k) (kh_exist(h, k))
|
||||
|
||||
KHASH_MAP_INIT_STR(str, size_t)
|
||||
KHASH_MAP_INIT_INT(int32, size_t)
|
||||
KHASH_MAP_INIT_UINT(uint32, size_t)
|
||||
KHASH_MAP_INIT_INT64(int64, size_t)
|
||||
KHASH_MAP_INIT_UINT64(uint64, size_t)
|
||||
KHASH_MAP_INIT_INT16(int16, size_t)
|
||||
KHASH_MAP_INIT_UINT16(uint16, size_t)
|
||||
KHASH_MAP_INIT_INT8(int8, size_t)
|
||||
KHASH_MAP_INIT_UINT8(uint8, size_t)
|
||||
|
||||
|
||||
#endif /* __AC_KHASH_H */
|
||||
@@ -0,0 +1,446 @@
|
||||
#include <string.h>
|
||||
#include <Python.h>
|
||||
|
||||
|
||||
// use numpy's definitions for complex
|
||||
#include <numpy/arrayobject.h>
|
||||
typedef npy_complex64 khcomplex64_t;
|
||||
typedef npy_complex128 khcomplex128_t;
|
||||
|
||||
|
||||
|
||||
// khash should report usage to tracemalloc
|
||||
#if PY_VERSION_HEX >= 0x03060000
|
||||
#include <pymem.h>
|
||||
#if PY_VERSION_HEX < 0x03070000
|
||||
#define PyTraceMalloc_Track _PyTraceMalloc_Track
|
||||
#define PyTraceMalloc_Untrack _PyTraceMalloc_Untrack
|
||||
#endif
|
||||
#else
|
||||
#define PyTraceMalloc_Track(...)
|
||||
#define PyTraceMalloc_Untrack(...)
|
||||
#endif
|
||||
|
||||
|
||||
static const int KHASH_TRACE_DOMAIN = 424242;
|
||||
void *traced_malloc(size_t size){
|
||||
void * ptr = malloc(size);
|
||||
if(ptr!=NULL){
|
||||
PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, size);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void *traced_calloc(size_t num, size_t size){
|
||||
void * ptr = calloc(num, size);
|
||||
if(ptr!=NULL){
|
||||
PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, num*size);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void *traced_realloc(void* old_ptr, size_t size){
|
||||
void * ptr = realloc(old_ptr, size);
|
||||
if(ptr!=NULL){
|
||||
if(old_ptr != ptr){
|
||||
PyTraceMalloc_Untrack(KHASH_TRACE_DOMAIN, (uintptr_t)old_ptr);
|
||||
}
|
||||
PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, size);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void traced_free(void* ptr){
|
||||
if(ptr!=NULL){
|
||||
PyTraceMalloc_Untrack(KHASH_TRACE_DOMAIN, (uintptr_t)ptr);
|
||||
}
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
|
||||
#define KHASH_MALLOC traced_malloc
|
||||
#define KHASH_REALLOC traced_realloc
|
||||
#define KHASH_CALLOC traced_calloc
|
||||
#define KHASH_FREE traced_free
|
||||
#include "khash.h"
|
||||
|
||||
// Previously we were using the built in cpython hash function for doubles
|
||||
// python 2.7 https://github.com/python/cpython/blob/2.7/Objects/object.c#L1021
|
||||
// python 3.5 https://github.com/python/cpython/blob/3.5/Python/pyhash.c#L85
|
||||
|
||||
// The python 3 hash function has the invariant hash(x) == hash(int(x)) == hash(decimal(x))
|
||||
// and the size of hash may be different by platform / version (long in py2, Py_ssize_t in py3).
|
||||
// We don't need those invariants because types will be cast before hashing, and if Py_ssize_t
|
||||
// is 64 bits the truncation causes collission issues. Given all that, we use our own
|
||||
// simple hash, viewing the double bytes as an int64 and using khash's default
|
||||
// hash for 64 bit integers.
|
||||
// GH 13436 showed that _Py_HashDouble doesn't work well with khash
|
||||
// GH 28303 showed, that the simple xoring-version isn't good enough
|
||||
// See GH 36729 for evaluation of the currently used murmur2-hash version
|
||||
// An interesting alternative to expensive murmur2-hash would be to change
|
||||
// the probing strategy and use e.g. the probing strategy from CPython's
|
||||
// implementation of dicts, which shines for smaller sizes but is more
|
||||
// predisposed to superlinear running times (see GH 36729 for comparison)
|
||||
|
||||
|
||||
khuint64_t PANDAS_INLINE asuint64(double key) {
|
||||
khuint64_t val;
|
||||
memcpy(&val, &key, sizeof(double));
|
||||
return val;
|
||||
}
|
||||
|
||||
khuint32_t PANDAS_INLINE asuint32(float key) {
|
||||
khuint32_t val;
|
||||
memcpy(&val, &key, sizeof(float));
|
||||
return val;
|
||||
}
|
||||
|
||||
#define ZERO_HASH 0
|
||||
#define NAN_HASH 0
|
||||
|
||||
khuint32_t PANDAS_INLINE kh_float64_hash_func(double val){
|
||||
// 0.0 and -0.0 should have the same hash:
|
||||
if (val == 0.0){
|
||||
return ZERO_HASH;
|
||||
}
|
||||
// all nans should have the same hash:
|
||||
if ( val!=val ){
|
||||
return NAN_HASH;
|
||||
}
|
||||
khuint64_t as_int = asuint64(val);
|
||||
return murmur2_64to32(as_int);
|
||||
}
|
||||
|
||||
khuint32_t PANDAS_INLINE kh_float32_hash_func(float val){
|
||||
// 0.0 and -0.0 should have the same hash:
|
||||
if (val == 0.0f){
|
||||
return ZERO_HASH;
|
||||
}
|
||||
// all nans should have the same hash:
|
||||
if ( val!=val ){
|
||||
return NAN_HASH;
|
||||
}
|
||||
khuint32_t as_int = asuint32(val);
|
||||
return murmur2_32to32(as_int);
|
||||
}
|
||||
|
||||
#define kh_floats_hash_equal(a, b) ((a) == (b) || ((b) != (b) && (a) != (a)))
|
||||
|
||||
#define KHASH_MAP_INIT_FLOAT64(name, khval_t) \
|
||||
KHASH_INIT(name, khfloat64_t, khval_t, 1, kh_float64_hash_func, kh_floats_hash_equal)
|
||||
|
||||
KHASH_MAP_INIT_FLOAT64(float64, size_t)
|
||||
|
||||
#define KHASH_MAP_INIT_FLOAT32(name, khval_t) \
|
||||
KHASH_INIT(name, khfloat32_t, khval_t, 1, kh_float32_hash_func, kh_floats_hash_equal)
|
||||
|
||||
KHASH_MAP_INIT_FLOAT32(float32, size_t)
|
||||
|
||||
khint32_t PANDAS_INLINE kh_complex128_hash_func(khcomplex128_t val){
|
||||
return kh_float64_hash_func(val.real)^kh_float64_hash_func(val.imag);
|
||||
}
|
||||
khint32_t PANDAS_INLINE kh_complex64_hash_func(khcomplex64_t val){
|
||||
return kh_float32_hash_func(val.real)^kh_float32_hash_func(val.imag);
|
||||
}
|
||||
|
||||
#define kh_complex_hash_equal(a, b) \
|
||||
(kh_floats_hash_equal(a.real, b.real) && kh_floats_hash_equal(a.imag, b.imag))
|
||||
|
||||
|
||||
#define KHASH_MAP_INIT_COMPLEX64(name, khval_t) \
|
||||
KHASH_INIT(name, khcomplex64_t, khval_t, 1, kh_complex64_hash_func, kh_complex_hash_equal)
|
||||
|
||||
KHASH_MAP_INIT_COMPLEX64(complex64, size_t)
|
||||
|
||||
|
||||
#define KHASH_MAP_INIT_COMPLEX128(name, khval_t) \
|
||||
KHASH_INIT(name, khcomplex128_t, khval_t, 1, kh_complex128_hash_func, kh_complex_hash_equal)
|
||||
|
||||
KHASH_MAP_INIT_COMPLEX128(complex128, size_t)
|
||||
|
||||
|
||||
#define kh_exist_complex64(h, k) (kh_exist(h, k))
|
||||
#define kh_exist_complex128(h, k) (kh_exist(h, k))
|
||||
|
||||
|
||||
// NaN-floats should be in the same equivalency class, see GH 22119
|
||||
int PANDAS_INLINE floatobject_cmp(PyFloatObject* a, PyFloatObject* b){
|
||||
return (
|
||||
Py_IS_NAN(PyFloat_AS_DOUBLE(a)) &&
|
||||
Py_IS_NAN(PyFloat_AS_DOUBLE(b))
|
||||
)
|
||||
||
|
||||
( PyFloat_AS_DOUBLE(a) == PyFloat_AS_DOUBLE(b) );
|
||||
}
|
||||
|
||||
|
||||
// NaNs should be in the same equivalency class, see GH 41836
|
||||
// PyObject_RichCompareBool for complexobjects has a different behavior
|
||||
// needs to be replaced
|
||||
int PANDAS_INLINE complexobject_cmp(PyComplexObject* a, PyComplexObject* b){
|
||||
return (
|
||||
Py_IS_NAN(a->cval.real) &&
|
||||
Py_IS_NAN(b->cval.real) &&
|
||||
Py_IS_NAN(a->cval.imag) &&
|
||||
Py_IS_NAN(b->cval.imag)
|
||||
)
|
||||
||
|
||||
(
|
||||
Py_IS_NAN(a->cval.real) &&
|
||||
Py_IS_NAN(b->cval.real) &&
|
||||
a->cval.imag == b->cval.imag
|
||||
)
|
||||
||
|
||||
(
|
||||
a->cval.real == b->cval.real &&
|
||||
Py_IS_NAN(a->cval.imag) &&
|
||||
Py_IS_NAN(b->cval.imag)
|
||||
)
|
||||
||
|
||||
(
|
||||
a->cval.real == b->cval.real &&
|
||||
a->cval.imag == b->cval.imag
|
||||
);
|
||||
}
|
||||
|
||||
int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b);
|
||||
|
||||
|
||||
// replacing PyObject_RichCompareBool (NaN!=NaN) with pyobject_cmp (NaN==NaN),
|
||||
// which treats NaNs as equivalent
|
||||
// see GH 41836
|
||||
int PANDAS_INLINE tupleobject_cmp(PyTupleObject* a, PyTupleObject* b){
|
||||
Py_ssize_t i;
|
||||
|
||||
if (Py_SIZE(a) != Py_SIZE(b)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < Py_SIZE(a); ++i) {
|
||||
if (!pyobject_cmp(PyTuple_GET_ITEM(a, i), PyTuple_GET_ITEM(b, i))) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
|
||||
if (a == b) {
|
||||
return 1;
|
||||
}
|
||||
if (Py_TYPE(a) == Py_TYPE(b)) {
|
||||
// special handling for some built-in types which could have NaNs
|
||||
// as we would like to have them equivalent, but the usual
|
||||
// PyObject_RichCompareBool would return False
|
||||
if (PyFloat_CheckExact(a)) {
|
||||
return floatobject_cmp((PyFloatObject*)a, (PyFloatObject*)b);
|
||||
}
|
||||
if (PyComplex_CheckExact(a)) {
|
||||
return complexobject_cmp((PyComplexObject*)a, (PyComplexObject*)b);
|
||||
}
|
||||
if (PyTuple_CheckExact(a)) {
|
||||
return tupleobject_cmp((PyTupleObject*)a, (PyTupleObject*)b);
|
||||
}
|
||||
// frozenset isn't yet supported
|
||||
}
|
||||
|
||||
int result = PyObject_RichCompareBool(a, b, Py_EQ);
|
||||
if (result < 0) {
|
||||
PyErr_Clear();
|
||||
return 0;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
Py_hash_t PANDAS_INLINE _Pandas_HashDouble(double val) {
|
||||
//Since Python3.10, nan is no longer has hash 0
|
||||
if (Py_IS_NAN(val)) {
|
||||
return 0;
|
||||
}
|
||||
#if PY_VERSION_HEX < 0x030A0000
|
||||
return _Py_HashDouble(val);
|
||||
#else
|
||||
return _Py_HashDouble(NULL, val);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
Py_hash_t PANDAS_INLINE floatobject_hash(PyFloatObject* key) {
|
||||
return _Pandas_HashDouble(PyFloat_AS_DOUBLE(key));
|
||||
}
|
||||
|
||||
|
||||
#define _PandasHASH_IMAG 1000003UL
|
||||
|
||||
// replaces _Py_HashDouble with _Pandas_HashDouble
|
||||
Py_hash_t PANDAS_INLINE complexobject_hash(PyComplexObject* key) {
|
||||
Py_uhash_t realhash = (Py_uhash_t)_Pandas_HashDouble(key->cval.real);
|
||||
Py_uhash_t imaghash = (Py_uhash_t)_Pandas_HashDouble(key->cval.imag);
|
||||
if (realhash == (Py_uhash_t)-1 || imaghash == (Py_uhash_t)-1) {
|
||||
return -1;
|
||||
}
|
||||
Py_uhash_t combined = realhash + _PandasHASH_IMAG * imaghash;
|
||||
if (combined == (Py_uhash_t)-1) {
|
||||
return -2;
|
||||
}
|
||||
return (Py_hash_t)combined;
|
||||
}
|
||||
|
||||
|
||||
khuint32_t PANDAS_INLINE kh_python_hash_func(PyObject* key);
|
||||
|
||||
//we could use any hashing algorithm, this is the original CPython's for tuples
|
||||
|
||||
#if SIZEOF_PY_UHASH_T > 4
|
||||
#define _PandasHASH_XXPRIME_1 ((Py_uhash_t)11400714785074694791ULL)
|
||||
#define _PandasHASH_XXPRIME_2 ((Py_uhash_t)14029467366897019727ULL)
|
||||
#define _PandasHASH_XXPRIME_5 ((Py_uhash_t)2870177450012600261ULL)
|
||||
#define _PandasHASH_XXROTATE(x) ((x << 31) | (x >> 33)) /* Rotate left 31 bits */
|
||||
#else
|
||||
#define _PandasHASH_XXPRIME_1 ((Py_uhash_t)2654435761UL)
|
||||
#define _PandasHASH_XXPRIME_2 ((Py_uhash_t)2246822519UL)
|
||||
#define _PandasHASH_XXPRIME_5 ((Py_uhash_t)374761393UL)
|
||||
#define _PandasHASH_XXROTATE(x) ((x << 13) | (x >> 19)) /* Rotate left 13 bits */
|
||||
#endif
|
||||
|
||||
Py_hash_t PANDAS_INLINE tupleobject_hash(PyTupleObject* key) {
|
||||
Py_ssize_t i, len = Py_SIZE(key);
|
||||
PyObject **item = key->ob_item;
|
||||
|
||||
Py_uhash_t acc = _PandasHASH_XXPRIME_5;
|
||||
for (i = 0; i < len; i++) {
|
||||
Py_uhash_t lane = kh_python_hash_func(item[i]);
|
||||
if (lane == (Py_uhash_t)-1) {
|
||||
return -1;
|
||||
}
|
||||
acc += lane * _PandasHASH_XXPRIME_2;
|
||||
acc = _PandasHASH_XXROTATE(acc);
|
||||
acc *= _PandasHASH_XXPRIME_1;
|
||||
}
|
||||
|
||||
/* Add input length, mangled to keep the historical value of hash(()). */
|
||||
acc += len ^ (_PandasHASH_XXPRIME_5 ^ 3527539UL);
|
||||
|
||||
if (acc == (Py_uhash_t)-1) {
|
||||
return 1546275796;
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
|
||||
khuint32_t PANDAS_INLINE kh_python_hash_func(PyObject* key) {
|
||||
Py_hash_t hash;
|
||||
// For PyObject_Hash holds:
|
||||
// hash(0.0) == 0 == hash(-0.0)
|
||||
// yet for different nan-objects different hash-values
|
||||
// are possible
|
||||
if (PyFloat_CheckExact(key)) {
|
||||
// we cannot use kh_float64_hash_func
|
||||
// becase float(k) == k holds for any int-object k
|
||||
// and kh_float64_hash_func doesn't respect it
|
||||
hash = floatobject_hash((PyFloatObject*)key);
|
||||
}
|
||||
else if (PyComplex_CheckExact(key)) {
|
||||
// we cannot use kh_complex128_hash_func
|
||||
// becase complex(k,0) == k holds for any int-object k
|
||||
// and kh_complex128_hash_func doesn't respect it
|
||||
hash = complexobject_hash((PyComplexObject*)key);
|
||||
}
|
||||
else if (PyTuple_CheckExact(key)) {
|
||||
hash = tupleobject_hash((PyTupleObject*)key);
|
||||
}
|
||||
else {
|
||||
hash = PyObject_Hash(key);
|
||||
}
|
||||
|
||||
if (hash == -1) {
|
||||
PyErr_Clear();
|
||||
return 0;
|
||||
}
|
||||
#if SIZEOF_PY_HASH_T == 4
|
||||
// it is already 32bit value
|
||||
return hash;
|
||||
#else
|
||||
// for 64bit builds,
|
||||
// we need information of the upper 32bits as well
|
||||
// see GH 37615
|
||||
khuint64_t as_uint = (khuint64_t) hash;
|
||||
// uints avoid undefined behavior of signed ints
|
||||
return (as_uint>>32)^as_uint;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#define kh_python_hash_equal(a, b) (pyobject_cmp(a, b))
|
||||
|
||||
|
||||
// Python object
|
||||
|
||||
typedef PyObject* kh_pyobject_t;
|
||||
|
||||
#define KHASH_MAP_INIT_PYOBJECT(name, khval_t) \
|
||||
KHASH_INIT(name, kh_pyobject_t, khval_t, 1, \
|
||||
kh_python_hash_func, kh_python_hash_equal)
|
||||
|
||||
KHASH_MAP_INIT_PYOBJECT(pymap, Py_ssize_t)
|
||||
|
||||
#define KHASH_SET_INIT_PYOBJECT(name) \
|
||||
KHASH_INIT(name, kh_pyobject_t, char, 0, \
|
||||
kh_python_hash_func, kh_python_hash_equal)
|
||||
|
||||
KHASH_SET_INIT_PYOBJECT(pyset)
|
||||
|
||||
#define kh_exist_pymap(h, k) (kh_exist(h, k))
|
||||
#define kh_exist_pyset(h, k) (kh_exist(h, k))
|
||||
|
||||
KHASH_MAP_INIT_STR(strbox, kh_pyobject_t)
|
||||
|
||||
typedef struct {
|
||||
kh_str_t *table;
|
||||
int starts[256];
|
||||
} kh_str_starts_t;
|
||||
|
||||
typedef kh_str_starts_t* p_kh_str_starts_t;
|
||||
|
||||
p_kh_str_starts_t PANDAS_INLINE kh_init_str_starts(void) {
|
||||
kh_str_starts_t *result = (kh_str_starts_t*)KHASH_CALLOC(1, sizeof(kh_str_starts_t));
|
||||
result->table = kh_init_str();
|
||||
return result;
|
||||
}
|
||||
|
||||
khuint_t PANDAS_INLINE kh_put_str_starts_item(kh_str_starts_t* table, char* key, int* ret) {
|
||||
khuint_t result = kh_put_str(table->table, key, ret);
|
||||
if (*ret != 0) {
|
||||
table->starts[(unsigned char)key[0]] = 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
khuint_t PANDAS_INLINE kh_get_str_starts_item(const kh_str_starts_t* table, const char* key) {
|
||||
unsigned char ch = *key;
|
||||
if (table->starts[ch]) {
|
||||
if (ch == '\0' || kh_get_str(table->table, key) != table->table->n_buckets) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void PANDAS_INLINE kh_destroy_str_starts(kh_str_starts_t* table) {
|
||||
kh_destroy_str(table->table);
|
||||
KHASH_FREE(table);
|
||||
}
|
||||
|
||||
void PANDAS_INLINE kh_resize_str_starts(kh_str_starts_t* table, khuint_t val) {
|
||||
kh_resize_str(table->table, val);
|
||||
}
|
||||
|
||||
// utility function: given the number of elements
|
||||
// returns number of necessary buckets
|
||||
khuint_t PANDAS_INLINE kh_needed_n_buckets(khuint_t n_elements){
|
||||
khuint_t candidate = n_elements;
|
||||
kroundup32(candidate);
|
||||
khuint_t upper_bound = (khuint_t)(candidate * __ac_HASH_UPPER + 0.5);
|
||||
return (upper_bound < n_elements) ? 2*candidate : candidate;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
Copyright (c) 2016, PyData Development Team
|
||||
All rights reserved.
|
||||
|
||||
Distributed under the terms of the BSD Simplified License.
|
||||
|
||||
The full license is in the LICENSE file, distributed with this software.
|
||||
*/
|
||||
|
||||
#ifndef PANDAS__LIBS_SRC_PARSE_HELPER_H_
|
||||
#define PANDAS__LIBS_SRC_PARSE_HELPER_H_
|
||||
|
||||
#include <float.h>
|
||||
#include "parser/tokenizer.h"
|
||||
|
||||
int to_double(char *item, double *p_value, char sci, char decimal,
|
||||
int *maybe_int) {
|
||||
char *p_end = NULL;
|
||||
int error = 0;
|
||||
|
||||
/* Switch to precise xstrtod GH 31364 */
|
||||
*p_value = precise_xstrtod(item, &p_end, decimal, sci, '\0', 1,
|
||||
&error, maybe_int);
|
||||
|
||||
return (error == 0) && (!*p_end);
|
||||
}
|
||||
|
||||
int floatify(PyObject *str, double *result, int *maybe_int) {
|
||||
int status;
|
||||
char *data;
|
||||
PyObject *tmp = NULL;
|
||||
const char sci = 'E';
|
||||
const char dec = '.';
|
||||
|
||||
if (PyBytes_Check(str)) {
|
||||
data = PyBytes_AS_STRING(str);
|
||||
} else if (PyUnicode_Check(str)) {
|
||||
tmp = PyUnicode_AsUTF8String(str);
|
||||
if (tmp == NULL) {
|
||||
return -1;
|
||||
}
|
||||
data = PyBytes_AS_STRING(tmp);
|
||||
} else {
|
||||
PyErr_SetString(PyExc_TypeError, "Invalid object type");
|
||||
return -1;
|
||||
}
|
||||
|
||||
status = to_double(data, result, sci, dec, maybe_int);
|
||||
|
||||
if (!status) {
|
||||
/* handle inf/-inf infinity/-infinity */
|
||||
if (strlen(data) == 3) {
|
||||
if (0 == strcasecmp(data, "inf")) {
|
||||
*result = HUGE_VAL;
|
||||
*maybe_int = 0;
|
||||
} else {
|
||||
goto parsingerror;
|
||||
}
|
||||
} else if (strlen(data) == 4) {
|
||||
if (0 == strcasecmp(data, "-inf")) {
|
||||
*result = -HUGE_VAL;
|
||||
*maybe_int = 0;
|
||||
} else if (0 == strcasecmp(data, "+inf")) {
|
||||
*result = HUGE_VAL;
|
||||
*maybe_int = 0;
|
||||
} else {
|
||||
goto parsingerror;
|
||||
}
|
||||
} else if (strlen(data) == 8) {
|
||||
if (0 == strcasecmp(data, "infinity")) {
|
||||
*result = HUGE_VAL;
|
||||
*maybe_int = 0;
|
||||
} else {
|
||||
goto parsingerror;
|
||||
}
|
||||
} else if (strlen(data) == 9) {
|
||||
if (0 == strcasecmp(data, "-infinity")) {
|
||||
*result = -HUGE_VAL;
|
||||
*maybe_int = 0;
|
||||
} else if (0 == strcasecmp(data, "+infinity")) {
|
||||
*result = HUGE_VAL;
|
||||
*maybe_int = 0;
|
||||
} else {
|
||||
goto parsingerror;
|
||||
}
|
||||
} else {
|
||||
goto parsingerror;
|
||||
}
|
||||
}
|
||||
|
||||
Py_XDECREF(tmp);
|
||||
return 0;
|
||||
|
||||
parsingerror:
|
||||
PyErr_Format(PyExc_ValueError, "Unable to parse string \"%s\"", data);
|
||||
Py_XDECREF(tmp);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif // PANDAS__LIBS_SRC_PARSE_HELPER_H_
|
||||
107
.venv/lib/python3.7/site-packages/pandas/_libs/src/parser/io.c
Normal file
107
.venv/lib/python3.7/site-packages/pandas/_libs/src/parser/io.c
Normal file
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
Copyright (c) 2016, PyData Development Team
|
||||
All rights reserved.
|
||||
|
||||
Distributed under the terms of the BSD Simplified License.
|
||||
|
||||
The full license is in the LICENSE file, distributed with this software.
|
||||
*/
|
||||
|
||||
#include "io.h"
|
||||
|
||||
/*
|
||||
On-disk FILE, uncompressed
|
||||
*/
|
||||
|
||||
void *new_rd_source(PyObject *obj) {
|
||||
rd_source *rds = (rd_source *)malloc(sizeof(rd_source));
|
||||
|
||||
if (rds == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
/* hold on to this object */
|
||||
Py_INCREF(obj);
|
||||
rds->obj = obj;
|
||||
rds->buffer = NULL;
|
||||
rds->position = 0;
|
||||
|
||||
return (void *)rds;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Cleanup callbacks
|
||||
|
||||
*/
|
||||
|
||||
int del_rd_source(void *rds) {
|
||||
Py_XDECREF(RDS(rds)->obj);
|
||||
Py_XDECREF(RDS(rds)->buffer);
|
||||
free(rds);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
IO callbacks
|
||||
|
||||
*/
|
||||
|
||||
void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
|
||||
int *status, const char *encoding_errors) {
|
||||
PyGILState_STATE state;
|
||||
PyObject *result, *func, *args, *tmp;
|
||||
|
||||
void *retval;
|
||||
|
||||
size_t length;
|
||||
rd_source *src = RDS(source);
|
||||
state = PyGILState_Ensure();
|
||||
|
||||
/* delete old object */
|
||||
Py_XDECREF(src->buffer);
|
||||
src->buffer = NULL;
|
||||
args = Py_BuildValue("(i)", nbytes);
|
||||
|
||||
func = PyObject_GetAttrString(src->obj, "read");
|
||||
|
||||
/* TODO: does this release the GIL? */
|
||||
result = PyObject_CallObject(func, args);
|
||||
Py_XDECREF(args);
|
||||
Py_XDECREF(func);
|
||||
|
||||
if (result == NULL) {
|
||||
PyGILState_Release(state);
|
||||
*bytes_read = 0;
|
||||
*status = CALLING_READ_FAILED;
|
||||
return NULL;
|
||||
} else if (!PyBytes_Check(result)) {
|
||||
tmp = PyUnicode_AsEncodedString(result, "utf-8", encoding_errors);
|
||||
Py_DECREF(result);
|
||||
if (tmp == NULL) {
|
||||
PyGILState_Release(state);
|
||||
return NULL;
|
||||
}
|
||||
result = tmp;
|
||||
}
|
||||
|
||||
length = PySequence_Length(result);
|
||||
|
||||
if (length == 0)
|
||||
*status = REACHED_EOF;
|
||||
else
|
||||
*status = 0;
|
||||
|
||||
/* hang on to the Python object */
|
||||
src->buffer = result;
|
||||
retval = (void *)PyBytes_AsString(result);
|
||||
|
||||
PyGILState_Release(state);
|
||||
|
||||
/* TODO: more error handling */
|
||||
*bytes_read = length;
|
||||
|
||||
return retval;
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
Copyright (c) 2016, PyData Development Team
|
||||
All rights reserved.
|
||||
|
||||
Distributed under the terms of the BSD Simplified License.
|
||||
|
||||
The full license is in the LICENSE file, distributed with this software.
|
||||
*/
|
||||
|
||||
#ifndef PANDAS__LIBS_SRC_PARSER_IO_H_
|
||||
#define PANDAS__LIBS_SRC_PARSER_IO_H_
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
#include "tokenizer.h"
|
||||
|
||||
#define FS(source) ((file_source *)source)
|
||||
|
||||
typedef struct _rd_source {
|
||||
PyObject *obj;
|
||||
PyObject *buffer;
|
||||
size_t position;
|
||||
} rd_source;
|
||||
|
||||
#define RDS(source) ((rd_source *)source)
|
||||
|
||||
void *new_rd_source(PyObject *obj);
|
||||
|
||||
int del_rd_source(void *src);
|
||||
|
||||
void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
|
||||
int *status, const char *encoding_errors);
|
||||
|
||||
#endif // PANDAS__LIBS_SRC_PARSER_IO_H_
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,236 @@
|
||||
/*
|
||||
|
||||
Copyright (c) 2012, Lambda Foundry, Inc., except where noted
|
||||
|
||||
Incorporates components of WarrenWeckesser/textreader, licensed under 3-clause
|
||||
BSD
|
||||
|
||||
See LICENSE for the license
|
||||
|
||||
*/
|
||||
|
||||
#ifndef PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_
|
||||
#define PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
|
||||
#define ERROR_NO_DIGITS 1
|
||||
#define ERROR_OVERFLOW 2
|
||||
#define ERROR_INVALID_CHARS 3
|
||||
|
||||
#include "../headers/stdint.h"
|
||||
#include "../inline_helper.h"
|
||||
#include "../headers/portable.h"
|
||||
|
||||
#include "khash.h"
|
||||
|
||||
#define STREAM_INIT_SIZE 32
|
||||
|
||||
#define REACHED_EOF 1
|
||||
#define CALLING_READ_FAILED 2
|
||||
|
||||
|
||||
/*
|
||||
|
||||
C flat file parsing low level code for pandas / NumPy
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
* Common set of error types for the read_rows() and tokenize()
|
||||
* functions.
|
||||
*/
|
||||
|
||||
// #define VERBOSE
|
||||
#if defined(VERBOSE)
|
||||
#define TRACE(X) printf X;
|
||||
#else
|
||||
#define TRACE(X)
|
||||
#endif // VERBOSE
|
||||
|
||||
#define PARSER_OUT_OF_MEMORY -1
|
||||
|
||||
/*
|
||||
* TODO: Might want to couple count_rows() with read_rows() to avoid
|
||||
* duplication of some file I/O.
|
||||
*/
|
||||
|
||||
typedef enum {
|
||||
START_RECORD,
|
||||
START_FIELD,
|
||||
ESCAPED_CHAR,
|
||||
IN_FIELD,
|
||||
IN_QUOTED_FIELD,
|
||||
ESCAPE_IN_QUOTED_FIELD,
|
||||
QUOTE_IN_QUOTED_FIELD,
|
||||
EAT_CRNL,
|
||||
EAT_CRNL_NOP,
|
||||
EAT_WHITESPACE,
|
||||
EAT_COMMENT,
|
||||
EAT_LINE_COMMENT,
|
||||
WHITESPACE_LINE,
|
||||
START_FIELD_IN_SKIP_LINE,
|
||||
IN_FIELD_IN_SKIP_LINE,
|
||||
IN_QUOTED_FIELD_IN_SKIP_LINE,
|
||||
QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE,
|
||||
FINISHED
|
||||
} ParserState;
|
||||
|
||||
typedef enum {
|
||||
QUOTE_MINIMAL,
|
||||
QUOTE_ALL,
|
||||
QUOTE_NONNUMERIC,
|
||||
QUOTE_NONE
|
||||
} QuoteStyle;
|
||||
|
||||
typedef enum {
|
||||
ERROR,
|
||||
WARN,
|
||||
SKIP
|
||||
} BadLineHandleMethod;
|
||||
|
||||
typedef void *(*io_callback)(void *src, size_t nbytes, size_t *bytes_read,
|
||||
int *status, const char *encoding_errors);
|
||||
typedef int (*io_cleanup)(void *src);
|
||||
|
||||
typedef struct parser_t {
|
||||
void *source;
|
||||
io_callback cb_io;
|
||||
io_cleanup cb_cleanup;
|
||||
|
||||
int64_t chunksize; // Number of bytes to prepare for each chunk
|
||||
char *data; // pointer to data to be processed
|
||||
int64_t datalen; // amount of data available
|
||||
int64_t datapos;
|
||||
|
||||
// where to write out tokenized data
|
||||
char *stream;
|
||||
uint64_t stream_len;
|
||||
uint64_t stream_cap;
|
||||
|
||||
// Store words in (potentially ragged) matrix for now, hmm
|
||||
char **words;
|
||||
int64_t *word_starts; // where we are in the stream
|
||||
uint64_t words_len;
|
||||
uint64_t words_cap;
|
||||
uint64_t max_words_cap; // maximum word cap encountered
|
||||
|
||||
char *pword_start; // pointer to stream start of current field
|
||||
int64_t word_start; // position start of current field
|
||||
|
||||
int64_t *line_start; // position in words for start of line
|
||||
int64_t *line_fields; // Number of fields in each line
|
||||
uint64_t lines; // Number of (good) lines observed
|
||||
uint64_t file_lines; // Number of lines (including bad or skipped)
|
||||
uint64_t lines_cap; // Vector capacity
|
||||
|
||||
// Tokenizing stuff
|
||||
ParserState state;
|
||||
int doublequote; /* is " represented by ""? */
|
||||
char delimiter; /* field separator */
|
||||
int delim_whitespace; /* delimit by consuming space/tabs instead */
|
||||
char quotechar; /* quote character */
|
||||
char escapechar; /* escape character */
|
||||
char lineterminator;
|
||||
int skipinitialspace; /* ignore spaces following delimiter? */
|
||||
int quoting; /* style of quoting to write */
|
||||
|
||||
char commentchar;
|
||||
int allow_embedded_newline;
|
||||
|
||||
int usecols; // Boolean: 1: usecols provided, 0: none provided
|
||||
|
||||
int expected_fields;
|
||||
BadLineHandleMethod on_bad_lines;
|
||||
|
||||
// floating point options
|
||||
char decimal;
|
||||
char sci;
|
||||
|
||||
// thousands separator (comma, period)
|
||||
char thousands;
|
||||
|
||||
int header; // Boolean: 1: has header, 0: no header
|
||||
int64_t header_start; // header row start
|
||||
uint64_t header_end; // header row end
|
||||
|
||||
void *skipset;
|
||||
PyObject *skipfunc;
|
||||
int64_t skip_first_N_rows;
|
||||
int64_t skip_footer;
|
||||
double (*double_converter)(const char *, char **,
|
||||
char, char, char, int, int *, int *);
|
||||
|
||||
// error handling
|
||||
char *warn_msg;
|
||||
char *error_msg;
|
||||
|
||||
int skip_empty_lines;
|
||||
} parser_t;
|
||||
|
||||
typedef struct coliter_t {
|
||||
char **words;
|
||||
int64_t *line_start;
|
||||
int64_t col;
|
||||
} coliter_t;
|
||||
|
||||
void coliter_setup(coliter_t *self, parser_t *parser, int i, int start);
|
||||
|
||||
#define COLITER_NEXT(iter, word) \
|
||||
do { \
|
||||
const int64_t i = *iter.line_start++ + iter.col; \
|
||||
word = i >= *iter.line_start ? "" : iter.words[i]; \
|
||||
} while (0)
|
||||
|
||||
parser_t *parser_new(void);
|
||||
|
||||
int parser_init(parser_t *self);
|
||||
|
||||
int parser_consume_rows(parser_t *self, size_t nrows);
|
||||
|
||||
int parser_trim_buffers(parser_t *self);
|
||||
|
||||
int parser_add_skiprow(parser_t *self, int64_t row);
|
||||
|
||||
int parser_set_skipfirstnrows(parser_t *self, int64_t nrows);
|
||||
|
||||
void parser_free(parser_t *self);
|
||||
|
||||
void parser_del(parser_t *self);
|
||||
|
||||
void parser_set_default_options(parser_t *self);
|
||||
|
||||
int tokenize_nrows(parser_t *self, size_t nrows, const char *encoding_errors);
|
||||
|
||||
int tokenize_all_rows(parser_t *self, const char *encoding_errors);
|
||||
|
||||
// Have parsed / type-converted a chunk of data
|
||||
// and want to free memory from the token stream
|
||||
|
||||
typedef struct uint_state {
|
||||
int seen_sint;
|
||||
int seen_uint;
|
||||
int seen_null;
|
||||
} uint_state;
|
||||
|
||||
void uint_state_init(uint_state *self);
|
||||
|
||||
int uint64_conflict(uint_state *self);
|
||||
|
||||
uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
|
||||
uint64_t uint_max, int *error, char tsep);
|
||||
int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
|
||||
int *error, char tsep);
|
||||
double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
|
||||
int skip_trailing, int *error, int *maybe_int);
|
||||
double precise_xstrtod(const char *p, char **q, char decimal,
|
||||
char sci, char tsep, int skip_trailing,
|
||||
int *error, int *maybe_int);
|
||||
|
||||
// GH-15140 - round_trip requires and acquires the GIL on its own
|
||||
double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
|
||||
int skip_trailing, int *error, int *maybe_int);
|
||||
int to_boolean(const char *item, uint8_t *val);
|
||||
|
||||
#endif // PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_
|
||||
279
.venv/lib/python3.7/site-packages/pandas/_libs/src/skiplist.h
Normal file
279
.venv/lib/python3.7/site-packages/pandas/_libs/src/skiplist.h
Normal file
@@ -0,0 +1,279 @@
|
||||
/*
|
||||
Copyright (c) 2016, PyData Development Team
|
||||
All rights reserved.
|
||||
|
||||
Distributed under the terms of the BSD Simplified License.
|
||||
|
||||
The full license is in the LICENSE file, distributed with this software.
|
||||
|
||||
Flexibly-sized, index-able skiplist data structure for maintaining a sorted
|
||||
list of values
|
||||
|
||||
Port of Wes McKinney's Cython version of Raymond Hettinger's original pure
|
||||
Python recipe (https://rhettinger.wordpress.com/2010/02/06/lost-knowledge/)
|
||||
*/
|
||||
|
||||
#ifndef PANDAS__LIBS_SRC_SKIPLIST_H_
|
||||
#define PANDAS__LIBS_SRC_SKIPLIST_H_
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "inline_helper.h"
|
||||
|
||||
PANDAS_INLINE float __skiplist_nanf(void) {
|
||||
const union {
|
||||
int __i;
|
||||
float __f;
|
||||
} __bint = {0x7fc00000UL};
|
||||
return __bint.__f;
|
||||
}
|
||||
#define PANDAS_NAN ((double)__skiplist_nanf())
|
||||
|
||||
PANDAS_INLINE double Log2(double val) { return log(val) / log(2.); }
|
||||
|
||||
typedef struct node_t node_t;
|
||||
|
||||
struct node_t {
|
||||
node_t **next;
|
||||
int *width;
|
||||
double value;
|
||||
int is_nil;
|
||||
int levels;
|
||||
int ref_count;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
node_t *head;
|
||||
node_t **tmp_chain;
|
||||
int *tmp_steps;
|
||||
int size;
|
||||
int maxlevels;
|
||||
} skiplist_t;
|
||||
|
||||
PANDAS_INLINE double urand(void) {
|
||||
return ((double)rand() + 1) / ((double)RAND_MAX + 2);
|
||||
}
|
||||
|
||||
PANDAS_INLINE int int_min(int a, int b) { return a < b ? a : b; }
|
||||
|
||||
PANDAS_INLINE node_t *node_init(double value, int levels) {
|
||||
node_t *result;
|
||||
result = (node_t *)malloc(sizeof(node_t));
|
||||
if (result) {
|
||||
result->value = value;
|
||||
result->levels = levels;
|
||||
result->is_nil = 0;
|
||||
result->ref_count = 0;
|
||||
result->next = (node_t **)malloc(levels * sizeof(node_t *));
|
||||
result->width = (int *)malloc(levels * sizeof(int));
|
||||
if (!(result->next && result->width) && (levels != 0)) {
|
||||
free(result->next);
|
||||
free(result->width);
|
||||
free(result);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// do this ourselves
|
||||
PANDAS_INLINE void node_incref(node_t *node) { ++(node->ref_count); }
|
||||
|
||||
PANDAS_INLINE void node_decref(node_t *node) { --(node->ref_count); }
|
||||
|
||||
static void node_destroy(node_t *node) {
|
||||
int i;
|
||||
if (node) {
|
||||
if (node->ref_count <= 1) {
|
||||
for (i = 0; i < node->levels; ++i) {
|
||||
node_destroy(node->next[i]);
|
||||
}
|
||||
free(node->next);
|
||||
free(node->width);
|
||||
// printf("Reference count was 1, freeing\n");
|
||||
free(node);
|
||||
} else {
|
||||
node_decref(node);
|
||||
}
|
||||
// pretty sure that freeing the struct above will be enough
|
||||
}
|
||||
}
|
||||
|
||||
PANDAS_INLINE void skiplist_destroy(skiplist_t *skp) {
|
||||
if (skp) {
|
||||
node_destroy(skp->head);
|
||||
free(skp->tmp_steps);
|
||||
free(skp->tmp_chain);
|
||||
free(skp);
|
||||
}
|
||||
}
|
||||
|
||||
PANDAS_INLINE skiplist_t *skiplist_init(int expected_size) {
|
||||
skiplist_t *result;
|
||||
node_t *NIL, *head;
|
||||
int maxlevels, i;
|
||||
|
||||
maxlevels = 1 + Log2((double)expected_size);
|
||||
result = (skiplist_t *)malloc(sizeof(skiplist_t));
|
||||
if (!result) {
|
||||
return NULL;
|
||||
}
|
||||
result->tmp_chain = (node_t **)malloc(maxlevels * sizeof(node_t *));
|
||||
result->tmp_steps = (int *)malloc(maxlevels * sizeof(int));
|
||||
result->maxlevels = maxlevels;
|
||||
result->size = 0;
|
||||
|
||||
head = result->head = node_init(PANDAS_NAN, maxlevels);
|
||||
NIL = node_init(0.0, 0);
|
||||
|
||||
if (!(result->tmp_chain && result->tmp_steps && result->head && NIL)) {
|
||||
skiplist_destroy(result);
|
||||
node_destroy(NIL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
node_incref(head);
|
||||
|
||||
NIL->is_nil = 1;
|
||||
|
||||
for (i = 0; i < maxlevels; ++i) {
|
||||
head->next[i] = NIL;
|
||||
head->width[i] = 1;
|
||||
node_incref(NIL);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// 1 if left < right, 0 if left == right, -1 if left > right
|
||||
PANDAS_INLINE int _node_cmp(node_t *node, double value) {
|
||||
if (node->is_nil || node->value > value) {
|
||||
return -1;
|
||||
} else if (node->value < value) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
PANDAS_INLINE double skiplist_get(skiplist_t *skp, int i, int *ret) {
|
||||
node_t *node;
|
||||
int level;
|
||||
|
||||
if (i < 0 || i >= skp->size) {
|
||||
*ret = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
node = skp->head;
|
||||
++i;
|
||||
for (level = skp->maxlevels - 1; level >= 0; --level) {
|
||||
while (node->width[level] <= i) {
|
||||
i -= node->width[level];
|
||||
node = node->next[level];
|
||||
}
|
||||
}
|
||||
|
||||
*ret = 1;
|
||||
return node->value;
|
||||
}
|
||||
|
||||
PANDAS_INLINE int skiplist_insert(skiplist_t *skp, double value) {
|
||||
node_t *node, *prevnode, *newnode, *next_at_level;
|
||||
int *steps_at_level;
|
||||
int size, steps, level;
|
||||
node_t **chain;
|
||||
|
||||
chain = skp->tmp_chain;
|
||||
|
||||
steps_at_level = skp->tmp_steps;
|
||||
memset(steps_at_level, 0, skp->maxlevels * sizeof(int));
|
||||
|
||||
node = skp->head;
|
||||
|
||||
for (level = skp->maxlevels - 1; level >= 0; --level) {
|
||||
next_at_level = node->next[level];
|
||||
while (_node_cmp(next_at_level, value) >= 0) {
|
||||
steps_at_level[level] += node->width[level];
|
||||
node = next_at_level;
|
||||
next_at_level = node->next[level];
|
||||
}
|
||||
chain[level] = node;
|
||||
}
|
||||
|
||||
size = int_min(skp->maxlevels, 1 - ((int)Log2(urand())));
|
||||
|
||||
newnode = node_init(value, size);
|
||||
if (!newnode) {
|
||||
return -1;
|
||||
}
|
||||
steps = 0;
|
||||
|
||||
for (level = 0; level < size; ++level) {
|
||||
prevnode = chain[level];
|
||||
newnode->next[level] = prevnode->next[level];
|
||||
|
||||
prevnode->next[level] = newnode;
|
||||
node_incref(newnode); // increment the reference count
|
||||
|
||||
newnode->width[level] = prevnode->width[level] - steps;
|
||||
prevnode->width[level] = steps + 1;
|
||||
|
||||
steps += steps_at_level[level];
|
||||
}
|
||||
|
||||
for (level = size; level < skp->maxlevels; ++level) {
|
||||
chain[level]->width[level] += 1;
|
||||
}
|
||||
|
||||
++(skp->size);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
PANDAS_INLINE int skiplist_remove(skiplist_t *skp, double value) {
|
||||
int level, size;
|
||||
node_t *node, *prevnode, *tmpnode, *next_at_level;
|
||||
node_t **chain;
|
||||
|
||||
chain = skp->tmp_chain;
|
||||
node = skp->head;
|
||||
|
||||
for (level = skp->maxlevels - 1; level >= 0; --level) {
|
||||
next_at_level = node->next[level];
|
||||
while (_node_cmp(next_at_level, value) > 0) {
|
||||
node = next_at_level;
|
||||
next_at_level = node->next[level];
|
||||
}
|
||||
chain[level] = node;
|
||||
}
|
||||
|
||||
if (value != chain[0]->next[0]->value) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
size = chain[0]->next[0]->levels;
|
||||
|
||||
for (level = 0; level < size; ++level) {
|
||||
prevnode = chain[level];
|
||||
|
||||
tmpnode = prevnode->next[level];
|
||||
|
||||
prevnode->width[level] += tmpnode->width[level] - 1;
|
||||
prevnode->next[level] = tmpnode->next[level];
|
||||
|
||||
tmpnode->next[level] = NULL;
|
||||
node_destroy(tmpnode); // decrement refcount or free
|
||||
}
|
||||
|
||||
for (level = size; level < skp->maxlevels; ++level) {
|
||||
--(chain[level]->width[level]);
|
||||
}
|
||||
|
||||
--(skp->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif // PANDAS__LIBS_SRC_SKIPLIST_H_
|
||||
@@ -0,0 +1,315 @@
|
||||
/*
|
||||
Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the ESN Social Software AB nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
|
||||
https://github.com/client9/stringencoders
|
||||
Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
|
||||
|
||||
Numeric decoder derived from from TCL library
|
||||
https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
|
||||
* Copyright (c) 1988-1993 The Regents of the University of California.
|
||||
* Copyright (c) 1994 Sun Microsystems, Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
Ultra fast JSON encoder and decoder
|
||||
Developed by Jonas Tarnstrom (jonas@esn.me).
|
||||
|
||||
Encoder notes:
|
||||
------------------
|
||||
|
||||
:: Cyclic references ::
|
||||
Cyclic referenced objects are not detected.
|
||||
Set JSONObjectEncoder.recursionMax to suitable value or make sure input object
|
||||
tree doesn't have cyclic references.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_
|
||||
#define PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <wchar.h>
|
||||
|
||||
// Don't output any extra whitespaces when encoding
|
||||
#define JSON_NO_EXTRA_WHITESPACE
|
||||
|
||||
// Max decimals to encode double floating point numbers with
|
||||
#ifndef JSON_DOUBLE_MAX_DECIMALS
|
||||
#define JSON_DOUBLE_MAX_DECIMALS 15
|
||||
#endif
|
||||
|
||||
// Max recursion depth, default for encoder
|
||||
#ifndef JSON_MAX_RECURSION_DEPTH
|
||||
#define JSON_MAX_RECURSION_DEPTH 1024
|
||||
#endif
|
||||
|
||||
// Max recursion depth, default for decoder
|
||||
#ifndef JSON_MAX_OBJECT_DEPTH
|
||||
#define JSON_MAX_OBJECT_DEPTH 1024
|
||||
#endif
|
||||
|
||||
/*
|
||||
Dictates and limits how much stack space for buffers UltraJSON will use before resorting to provided heap functions */
|
||||
#ifndef JSON_MAX_STACK_BUFFER_SIZE
|
||||
#define JSON_MAX_STACK_BUFFER_SIZE 131072
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
typedef __int64 JSINT64;
|
||||
typedef unsigned __int64 JSUINT64;
|
||||
|
||||
typedef __int32 JSINT32;
|
||||
typedef unsigned __int32 JSUINT32;
|
||||
typedef unsigned __int8 JSUINT8;
|
||||
typedef unsigned __int16 JSUTF16;
|
||||
typedef unsigned __int32 JSUTF32;
|
||||
typedef __int64 JSLONG;
|
||||
|
||||
#define EXPORTFUNCTION __declspec(dllexport)
|
||||
|
||||
#define FASTCALL_MSVC __fastcall
|
||||
|
||||
#define INLINE_PREFIX static __inline
|
||||
|
||||
#else
|
||||
|
||||
#include <stdint.h>
|
||||
typedef int64_t JSINT64;
|
||||
typedef uint64_t JSUINT64;
|
||||
|
||||
typedef int32_t JSINT32;
|
||||
typedef uint32_t JSUINT32;
|
||||
|
||||
#define FASTCALL_MSVC
|
||||
|
||||
#define INLINE_PREFIX static inline
|
||||
|
||||
typedef uint8_t JSUINT8;
|
||||
typedef uint16_t JSUTF16;
|
||||
typedef uint32_t JSUTF32;
|
||||
|
||||
typedef int64_t JSLONG;
|
||||
|
||||
#define EXPORTFUNCTION
|
||||
#endif
|
||||
|
||||
#if !(defined(__LITTLE_ENDIAN__) || defined(__BIG_ENDIAN__))
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#define __LITTLE_ENDIAN__
|
||||
#else
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
#define __BIG_ENDIAN__
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)
|
||||
#error "Endianness not supported"
|
||||
#endif
|
||||
|
||||
enum JSTYPES {
|
||||
JT_NULL, // NULL
|
||||
JT_TRUE, // boolean true
|
||||
JT_FALSE, // boolean false
|
||||
JT_INT, // (JSINT32 (signed 32-bit))
|
||||
JT_LONG, // (JSINT64 (signed 64-bit))
|
||||
JT_DOUBLE, // (double)
|
||||
JT_BIGNUM, // integer larger than sys.maxsize
|
||||
JT_UTF8, // (char 8-bit)
|
||||
JT_ARRAY, // Array structure
|
||||
JT_OBJECT, // Key/Value structure
|
||||
JT_INVALID, // Internal, do not return nor expect
|
||||
JT_POS_INF, // Positive infinity
|
||||
JT_NEG_INF, // Negative infinity
|
||||
};
|
||||
|
||||
typedef void * JSOBJ;
|
||||
typedef void * JSITER;
|
||||
|
||||
typedef struct __JSONTypeContext {
|
||||
int type;
|
||||
void *encoder;
|
||||
void *prv;
|
||||
} JSONTypeContext;
|
||||
|
||||
/*
|
||||
Function pointer declarations, suitable for implementing UltraJSON */
|
||||
typedef void (*JSPFN_ITERBEGIN)(JSOBJ obj, JSONTypeContext *tc);
|
||||
typedef int (*JSPFN_ITERNEXT)(JSOBJ obj, JSONTypeContext *tc);
|
||||
typedef void (*JSPFN_ITEREND)(JSOBJ obj, JSONTypeContext *tc);
|
||||
typedef JSOBJ (*JSPFN_ITERGETVALUE)(JSOBJ obj, JSONTypeContext *tc);
|
||||
typedef char *(*JSPFN_ITERGETNAME)(JSOBJ obj, JSONTypeContext *tc,
|
||||
size_t *outLen);
|
||||
typedef void *(*JSPFN_MALLOC)(size_t size);
|
||||
typedef void (*JSPFN_FREE)(void *pptr);
|
||||
typedef void *(*JSPFN_REALLOC)(void *base, size_t size);
|
||||
|
||||
typedef struct __JSONObjectEncoder {
|
||||
void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc);
|
||||
void (*endTypeContext)(JSOBJ obj, JSONTypeContext *tc);
|
||||
const char *(*getStringValue)(JSOBJ obj, JSONTypeContext *tc,
|
||||
size_t *_outLen);
|
||||
JSINT64 (*getLongValue)(JSOBJ obj, JSONTypeContext *tc);
|
||||
JSINT32 (*getIntValue)(JSOBJ obj, JSONTypeContext *tc);
|
||||
double (*getDoubleValue)(JSOBJ obj, JSONTypeContext *tc);
|
||||
const char *(*getBigNumStringValue)(JSOBJ obj, JSONTypeContext *tc,
|
||||
size_t *_outLen);
|
||||
|
||||
/*
|
||||
Begin iteration of an iteratable object (JS_ARRAY or JS_OBJECT)
|
||||
Implementor should setup iteration state in ti->prv
|
||||
*/
|
||||
JSPFN_ITERBEGIN iterBegin;
|
||||
|
||||
/*
|
||||
Retrieve next object in an iteration. Should return 0 to indicate iteration has reached end or 1 if there are more items.
|
||||
Implementor is responsible for keeping state of the iteration. Use ti->prv fields for this
|
||||
*/
|
||||
JSPFN_ITERNEXT iterNext;
|
||||
|
||||
/*
|
||||
Ends the iteration of an iteratable object.
|
||||
Any iteration state stored in ti->prv can be freed here
|
||||
*/
|
||||
JSPFN_ITEREND iterEnd;
|
||||
|
||||
/*
|
||||
Returns a reference to the value object of an iterator
|
||||
The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object
|
||||
*/
|
||||
JSPFN_ITERGETVALUE iterGetValue;
|
||||
|
||||
/*
|
||||
Return name of iterator.
|
||||
The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object
|
||||
*/
|
||||
JSPFN_ITERGETNAME iterGetName;
|
||||
|
||||
/*
|
||||
Release a value as indicated by setting ti->release = 1 in the previous getValue call.
|
||||
The ti->prv array should contain the necessary context to release the value
|
||||
*/
|
||||
void (*releaseObject)(JSOBJ obj);
|
||||
|
||||
/* Library functions
|
||||
Set to NULL to use STDLIB malloc,realloc,free */
|
||||
JSPFN_MALLOC malloc;
|
||||
JSPFN_REALLOC realloc;
|
||||
JSPFN_FREE free;
|
||||
|
||||
/*
|
||||
Configuration for max recursion, set to 0 to use default (see JSON_MAX_RECURSION_DEPTH)*/
|
||||
int recursionMax;
|
||||
|
||||
/*
|
||||
Configuration for max decimals of double floating point numbers to encode (0-9) */
|
||||
int doublePrecision;
|
||||
|
||||
/*
|
||||
If true output will be ASCII with all characters above 127 encoded as \uXXXX. If false output will be UTF-8 or what ever charset strings are brought as */
|
||||
int forceASCII;
|
||||
|
||||
/*
|
||||
If true, '<', '>', and '&' characters will be encoded as \u003c, \u003e, and \u0026, respectively. If false, no special encoding will be used. */
|
||||
int encodeHTMLChars;
|
||||
|
||||
/*
|
||||
Configuration for spaces of indent */
|
||||
int indent;
|
||||
|
||||
/*
|
||||
Set to an error message if error occurred */
|
||||
const char *errorMsg;
|
||||
JSOBJ errorObj;
|
||||
|
||||
/* Buffer stuff */
|
||||
char *start;
|
||||
char *offset;
|
||||
char *end;
|
||||
int heap;
|
||||
int level;
|
||||
} JSONObjectEncoder;
|
||||
|
||||
/*
|
||||
Encode an object structure into JSON.
|
||||
|
||||
Arguments:
|
||||
obj - An anonymous type representing the object
|
||||
enc - Function definitions for querying JSOBJ type
|
||||
buffer - Preallocated buffer to store result in. If NULL function allocates own buffer
|
||||
cbBuffer - Length of buffer (ignored if buffer is NULL)
|
||||
|
||||
Returns:
|
||||
Encoded JSON object as a null terminated char string.
|
||||
|
||||
NOTE:
|
||||
If the supplied buffer wasn't enough to hold the result the function will allocate a new buffer.
|
||||
Life cycle of the provided buffer must still be handled by caller.
|
||||
|
||||
If the return value doesn't equal the specified buffer caller must release the memory using
|
||||
JSONObjectEncoder.free or free() as specified when calling this function.
|
||||
*/
|
||||
EXPORTFUNCTION char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc,
|
||||
char *buffer, size_t cbBuffer);
|
||||
|
||||
typedef struct __JSONObjectDecoder {
|
||||
JSOBJ (*newString)(void *prv, wchar_t *start, wchar_t *end);
|
||||
int (*objectAddKey)(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value);
|
||||
int (*arrayAddItem)(void *prv, JSOBJ obj, JSOBJ value);
|
||||
JSOBJ (*newTrue)(void *prv);
|
||||
JSOBJ (*newFalse)(void *prv);
|
||||
JSOBJ (*newNull)(void *prv);
|
||||
JSOBJ (*newPosInf)(void *prv);
|
||||
JSOBJ (*newNegInf)(void *prv);
|
||||
JSOBJ (*newObject)(void *prv, void *decoder);
|
||||
JSOBJ (*endObject)(void *prv, JSOBJ obj);
|
||||
JSOBJ (*newArray)(void *prv, void *decoder);
|
||||
JSOBJ (*endArray)(void *prv, JSOBJ obj);
|
||||
JSOBJ (*newInt)(void *prv, JSINT32 value);
|
||||
JSOBJ (*newLong)(void *prv, JSINT64 value);
|
||||
JSOBJ (*newDouble)(void *prv, double value);
|
||||
void (*releaseObject)(void *prv, JSOBJ obj, void *decoder);
|
||||
JSPFN_MALLOC malloc;
|
||||
JSPFN_FREE free;
|
||||
JSPFN_REALLOC realloc;
|
||||
char *errorStr;
|
||||
char *errorOffset;
|
||||
int preciseFloat;
|
||||
void *prv;
|
||||
} JSONObjectDecoder;
|
||||
|
||||
EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec,
|
||||
const char *buffer, size_t cbBuffer);
|
||||
EXPORTFUNCTION void encode(JSOBJ, JSONObjectEncoder *, const char *, size_t);
|
||||
|
||||
#endif // PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_
|
||||
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user