libraries

This commit is contained in:
2024-09-28 22:52:53 -07:00
parent 5cdaf1f76b
commit 4929d1fa66
7378 changed files with 1550978 additions and 14 deletions

View File

@@ -0,0 +1,21 @@
# Import seaborn objects
from .rcmod import * # noqa: F401,F403
from .utils import * # noqa: F401,F403
from .palettes import * # noqa: F401,F403
from .relational import * # noqa: F401,F403
from .regression import * # noqa: F401,F403
from .categorical import * # noqa: F401,F403
from .distributions import * # noqa: F401,F403
from .matrix import * # noqa: F401,F403
from .miscplot import * # noqa: F401,F403
from .axisgrid import * # noqa: F401,F403
from .widgets import * # noqa: F401,F403
from .colors import xkcd_rgb, crayons # noqa: F401
from . import cm # noqa: F401
# Capture the original matplotlib rcParams
import matplotlib as mpl
_orig_rc_params = mpl.rcParams.copy()
# Define the seaborn version
__version__ = "0.13.2"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,123 @@
from __future__ import annotations
from typing import Literal
import numpy as np
import pandas as pd
import matplotlib as mpl
from matplotlib.figure import Figure
from seaborn.utils import _version_predates
def norm_from_scale(scale, norm):
"""Produce a Normalize object given a Scale and min/max domain limits."""
# This is an internal maplotlib function that simplifies things to access
# It is likely to become part of the matplotlib API at some point:
# https://github.com/matplotlib/matplotlib/issues/20329
if isinstance(norm, mpl.colors.Normalize):
return norm
if scale is None:
return None
if norm is None:
vmin = vmax = None
else:
vmin, vmax = norm # TODO more helpful error if this fails?
class ScaledNorm(mpl.colors.Normalize):
def __call__(self, value, clip=None):
# From github.com/matplotlib/matplotlib/blob/v3.4.2/lib/matplotlib/colors.py
# See github.com/matplotlib/matplotlib/tree/v3.4.2/LICENSE
value, is_scalar = self.process_value(value)
self.autoscale_None(value)
if self.vmin > self.vmax:
raise ValueError("vmin must be less or equal to vmax")
if self.vmin == self.vmax:
return np.full_like(value, 0)
if clip is None:
clip = self.clip
if clip:
value = np.clip(value, self.vmin, self.vmax)
# ***** Seaborn changes start ****
t_value = self.transform(value).reshape(np.shape(value))
t_vmin, t_vmax = self.transform([self.vmin, self.vmax])
# ***** Seaborn changes end *****
if not np.isfinite([t_vmin, t_vmax]).all():
raise ValueError("Invalid vmin or vmax")
t_value -= t_vmin
t_value /= (t_vmax - t_vmin)
t_value = np.ma.masked_invalid(t_value, copy=False)
return t_value[0] if is_scalar else t_value
new_norm = ScaledNorm(vmin, vmax)
new_norm.transform = scale.get_transform().transform
return new_norm
def get_colormap(name):
"""Handle changes to matplotlib colormap interface in 3.6."""
try:
return mpl.colormaps[name]
except AttributeError:
return mpl.cm.get_cmap(name)
def register_colormap(name, cmap):
"""Handle changes to matplotlib colormap interface in 3.6."""
try:
if name not in mpl.colormaps:
mpl.colormaps.register(cmap, name=name)
except AttributeError:
mpl.cm.register_cmap(name, cmap)
def set_layout_engine(
fig: Figure,
engine: Literal["constrained", "compressed", "tight", "none"],
) -> None:
"""Handle changes to auto layout engine interface in 3.6"""
if hasattr(fig, "set_layout_engine"):
fig.set_layout_engine(engine)
else:
# _version_predates(mpl, 3.6)
if engine == "tight":
fig.set_tight_layout(True) # type: ignore # predates typing
elif engine == "constrained":
fig.set_constrained_layout(True) # type: ignore
elif engine == "none":
fig.set_tight_layout(False) # type: ignore
fig.set_constrained_layout(False) # type: ignore
def get_layout_engine(fig: Figure) -> mpl.layout_engine.LayoutEngine | None:
"""Handle changes to auto layout engine interface in 3.6"""
if hasattr(fig, "get_layout_engine"):
return fig.get_layout_engine()
else:
# _version_predates(mpl, 3.6)
return None
def share_axis(ax0, ax1, which):
"""Handle changes to post-hoc axis sharing."""
if _version_predates(mpl, "3.5"):
group = getattr(ax0, f"get_shared_{which}_axes")()
group.join(ax1, ax0)
else:
getattr(ax1, f"share{which}")(ax0)
def get_legend_handles(legend):
"""Handle legendHandles attribute rename."""
if _version_predates(mpl, "3.7"):
return legend.legendHandles
else:
return legend.legend_handles
def groupby_apply_include_groups(val):
if _version_predates(pd, "2.2.0"):
return {}
return {"include_groups": val}

View File

@@ -0,0 +1,319 @@
"""
Components for parsing variable assignments and internally representing plot data.
"""
from __future__ import annotations
from collections.abc import Mapping, Sized
from typing import cast
import warnings
import pandas as pd
from pandas import DataFrame
from seaborn._core.typing import DataSource, VariableSpec, ColumnName
from seaborn.utils import _version_predates
class PlotData:
"""
Data table with plot variable schema and mapping to original names.
Contains logic for parsing variable specification arguments and updating
the table with layer-specific data and/or mappings.
Parameters
----------
data
Input data where variable names map to vector values.
variables
Keys are names of plot variables (x, y, ...) each value is one of:
- name of a column (or index level, or dictionary entry) in `data`
- vector in any format that can construct a :class:`pandas.DataFrame`
Attributes
----------
frame
Data table with column names having defined plot variables.
names
Dictionary mapping plot variable names to names in source data structure(s).
ids
Dictionary mapping plot variable names to unique data source identifiers.
"""
frame: DataFrame
frames: dict[tuple, DataFrame]
names: dict[str, str | None]
ids: dict[str, str | int]
source_data: DataSource
source_vars: dict[str, VariableSpec]
def __init__(
self,
data: DataSource,
variables: dict[str, VariableSpec],
):
data = handle_data_source(data)
frame, names, ids = self._assign_variables(data, variables)
self.frame = frame
self.names = names
self.ids = ids
# The reason we possibly have a dictionary of frames is to support the
# Plot.pair operation, post scaling, where each x/y variable needs its
# own frame. This feels pretty clumsy and there are a bunch of places in
# the client code with awkard if frame / elif frames constructions.
# It would be great to have a cleaner abstraction here.
self.frames = {}
self.source_data = data
self.source_vars = variables
def __contains__(self, key: str) -> bool:
"""Boolean check on whether a variable is defined in this dataset."""
if self.frame is None:
return any(key in df for df in self.frames.values())
return key in self.frame
def join(
self,
data: DataSource,
variables: dict[str, VariableSpec] | None,
) -> PlotData:
"""Add, replace, or drop variables and return as a new dataset."""
# Inherit the original source of the upstream data by default
if data is None:
data = self.source_data
# TODO allow `data` to be a function (that is called on the source data?)
if not variables:
variables = self.source_vars
# Passing var=None implies that we do not want that variable in this layer
disinherit = [k for k, v in variables.items() if v is None]
# Create a new dataset with just the info passed here
new = PlotData(data, variables)
# -- Update the inherited DataSource with this new information
drop_cols = [k for k in self.frame if k in new.frame or k in disinherit]
parts = [self.frame.drop(columns=drop_cols), new.frame]
# Because we are combining distinct columns, this is perhaps more
# naturally thought of as a "merge"/"join". But using concat because
# some simple testing suggests that it is marginally faster.
frame = pd.concat(parts, axis=1, sort=False, copy=False)
names = {k: v for k, v in self.names.items() if k not in disinherit}
names.update(new.names)
ids = {k: v for k, v in self.ids.items() if k not in disinherit}
ids.update(new.ids)
new.frame = frame
new.names = names
new.ids = ids
# Multiple chained operations should always inherit from the original object
new.source_data = self.source_data
new.source_vars = self.source_vars
return new
def _assign_variables(
self,
data: DataFrame | Mapping | None,
variables: dict[str, VariableSpec],
) -> tuple[DataFrame, dict[str, str | None], dict[str, str | int]]:
"""
Assign values for plot variables given long-form data and/or vector inputs.
Parameters
----------
data
Input data where variable names map to vector values.
variables
Keys are names of plot variables (x, y, ...) each value is one of:
- name of a column (or index level, or dictionary entry) in `data`
- vector in any format that can construct a :class:`pandas.DataFrame`
Returns
-------
frame
Table mapping seaborn variables (x, y, color, ...) to data vectors.
names
Keys are defined seaborn variables; values are names inferred from
the inputs (or None when no name can be determined).
ids
Like the `names` dict, but `None` values are replaced by the `id()`
of the data object that defined the variable.
Raises
------
TypeError
When data source is not a DataFrame or Mapping.
ValueError
When variables are strings that don't appear in `data`, or when they are
non-indexed vector datatypes that have a different length from `data`.
"""
source_data: Mapping | DataFrame
frame: DataFrame
names: dict[str, str | None]
ids: dict[str, str | int]
plot_data = {}
names = {}
ids = {}
given_data = data is not None
if data is None:
# Data is optional; all variables can be defined as vectors
# But simplify downstream code by always having a usable source data object
source_data = {}
else:
source_data = data
# Variables can also be extracted from the index of a DataFrame
if isinstance(source_data, pd.DataFrame):
index = source_data.index.to_frame().to_dict("series")
else:
index = {}
for key, val in variables.items():
# Simply ignore variables with no specification
if val is None:
continue
# Try to treat the argument as a key for the data collection.
# But be flexible about what can be used as a key.
# Usually it will be a string, but allow other hashables when
# taking from the main data object. Allow only strings to reference
# fields in the index, because otherwise there is too much ambiguity.
# TODO this will be rendered unnecessary by the following pandas fix:
# https://github.com/pandas-dev/pandas/pull/41283
try:
hash(val)
val_is_hashable = True
except TypeError:
val_is_hashable = False
val_as_data_key = (
# See https://github.com/pandas-dev/pandas/pull/41283
# (isinstance(val, abc.Hashable) and val in source_data)
(val_is_hashable and val in source_data)
or (isinstance(val, str) and val in index)
)
if val_as_data_key:
val = cast(ColumnName, val)
if val in source_data:
plot_data[key] = source_data[val]
elif val in index:
plot_data[key] = index[val]
names[key] = ids[key] = str(val)
elif isinstance(val, str):
# This looks like a column name but, lookup failed.
err = f"Could not interpret value `{val}` for `{key}`. "
if not given_data:
err += "Value is a string, but `data` was not passed."
else:
err += "An entry with this name does not appear in `data`."
raise ValueError(err)
else:
# Otherwise, assume the value somehow represents data
# Ignore empty data structures
if isinstance(val, Sized) and len(val) == 0:
continue
# If vector has no index, it must match length of data table
if isinstance(data, pd.DataFrame) and not isinstance(val, pd.Series):
if isinstance(val, Sized) and len(data) != len(val):
val_cls = val.__class__.__name__
err = (
f"Length of {val_cls} vectors must match length of `data`"
f" when both are used, but `data` has length {len(data)}"
f" and the vector passed to `{key}` has length {len(val)}."
)
raise ValueError(err)
plot_data[key] = val
# Try to infer the original name using pandas-like metadata
if hasattr(val, "name"):
names[key] = ids[key] = str(val.name) # type: ignore # mypy/1424
else:
names[key] = None
ids[key] = id(val)
# Construct a tidy plot DataFrame. This will convert a number of
# types automatically, aligning on index in case of pandas objects
# TODO Note: this fails when variable specs *only* have scalars!
frame = pd.DataFrame(plot_data)
return frame, names, ids
def handle_data_source(data: object) -> pd.DataFrame | Mapping | None:
"""Convert the data source object to a common union representation."""
if isinstance(data, pd.DataFrame) or hasattr(data, "__dataframe__"):
# Check for pd.DataFrame inheritance could be removed once
# minimal pandas version supports dataframe interchange (1.5.0).
data = convert_dataframe_to_pandas(data)
elif data is not None and not isinstance(data, Mapping):
err = f"Data source must be a DataFrame or Mapping, not {type(data)!r}."
raise TypeError(err)
return data
def convert_dataframe_to_pandas(data: object) -> pd.DataFrame:
"""Use the DataFrame exchange protocol, or fail gracefully."""
if isinstance(data, pd.DataFrame):
return data
if not hasattr(pd.api, "interchange"):
msg = (
"Support for non-pandas DataFrame objects requires a version of pandas "
"that implements the DataFrame interchange protocol. Please upgrade "
"your pandas version or coerce your data to pandas before passing "
"it to seaborn."
)
raise TypeError(msg)
if _version_predates(pd, "2.0.2"):
msg = (
"DataFrame interchange with pandas<2.0.2 has some known issues. "
f"You are using pandas {pd.__version__}. "
"Continuing, but it is recommended to carefully inspect the results and to "
"consider upgrading."
)
warnings.warn(msg, stacklevel=2)
try:
# This is going to convert all columns in the input dataframe, even though
# we may only need one or two of them. It would be more efficient to select
# the columns that are going to be used in the plot prior to interchange.
# Solving that in general is a hard problem, especially with the objects
# interface where variables passed in Plot() may only be referenced later
# in Plot.add(). But noting here in case this seems to be a bottleneck.
return pd.api.interchange.from_dataframe(data)
except Exception as err:
msg = (
"Encountered an exception when converting data source "
"to a pandas DataFrame. See traceback above for details."
)
raise RuntimeError(msg) from err

View File

@@ -0,0 +1,32 @@
"""
Custom exceptions for the seaborn.objects interface.
This is very lightweight, but it's a separate module to avoid circular imports.
"""
from __future__ import annotations
class PlotSpecError(RuntimeError):
"""
Error class raised from seaborn.objects.Plot for compile-time failures.
In the declarative Plot interface, exceptions may not be triggered immediately
by bad user input (and validation at input time may not be possible). This class
is used to signal that indirect dependency. It should be raised in an exception
chain when compile-time operations fail with an error message providing useful
context (e.g., scaling errors could specify the variable that failed.)
"""
@classmethod
def _during(cls, step: str, var: str = "") -> PlotSpecError:
"""
Initialize the class to report the failure of a specific operation.
"""
message = []
if var:
message.append(f"{step} failed for the `{var}` variable.")
else:
message.append(f"{step} failed.")
message.append("See the traceback above for more information.")
return cls(" ".join(message))

View File

@@ -0,0 +1,129 @@
"""Simplified split-apply-combine paradigm on dataframes for internal use."""
from __future__ import annotations
from typing import cast, Iterable
import pandas as pd
from seaborn._core.rules import categorical_order
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from typing import Callable
from pandas import DataFrame, MultiIndex, Index
class GroupBy:
"""
Interface for Pandas GroupBy operations allowing specified group order.
Writing our own class to do this has a few advantages:
- It constrains the interface between Plot and Stat/Move objects
- It allows control over the row order of the GroupBy result, which is
important when using in the context of some Move operations (dodge, stack, ...)
- It simplifies some complexities regarding the return type and Index contents
one encounters with Pandas, especially for DataFrame -> DataFrame applies
- It increases future flexibility regarding alternate DataFrame libraries
"""
def __init__(self, order: list[str] | dict[str, list | None]):
"""
Initialize the GroupBy from grouping variables and optional level orders.
Parameters
----------
order
List of variable names or dict mapping names to desired level orders.
Level order values can be None to use default ordering rules. The
variables can include names that are not expected to appear in the
data; these will be dropped before the groups are defined.
"""
if not order:
raise ValueError("GroupBy requires at least one grouping variable")
if isinstance(order, list):
order = {k: None for k in order}
self.order = order
def _get_groups(
self, data: DataFrame
) -> tuple[str | list[str], Index | MultiIndex]:
"""Return index with Cartesian product of ordered grouping variable levels."""
levels = {}
for var, order in self.order.items():
if var in data:
if order is None:
order = categorical_order(data[var])
levels[var] = order
grouper: str | list[str]
groups: Index | MultiIndex
if not levels:
grouper = []
groups = pd.Index([])
elif len(levels) > 1:
grouper = list(levels)
groups = pd.MultiIndex.from_product(levels.values(), names=grouper)
else:
grouper, = list(levels)
groups = pd.Index(levels[grouper], name=grouper)
return grouper, groups
def _reorder_columns(self, res, data):
"""Reorder result columns to match original order with new columns appended."""
cols = [c for c in data if c in res]
cols += [c for c in res if c not in data]
return res.reindex(columns=pd.Index(cols))
def agg(self, data: DataFrame, *args, **kwargs) -> DataFrame:
"""
Reduce each group to a single row in the output.
The output will have a row for each unique combination of the grouping
variable levels with null values for the aggregated variable(s) where
those combinations do not appear in the dataset.
"""
grouper, groups = self._get_groups(data)
if not grouper:
# We will need to see whether there are valid usecases that end up here
raise ValueError("No grouping variables are present in dataframe")
res = (
data
.groupby(grouper, sort=False, observed=False)
.agg(*args, **kwargs)
.reindex(groups)
.reset_index()
.pipe(self._reorder_columns, data)
)
return res
def apply(
self, data: DataFrame, func: Callable[..., DataFrame],
*args, **kwargs,
) -> DataFrame:
"""Apply a DataFrame -> DataFrame mapping to each group."""
grouper, groups = self._get_groups(data)
if not grouper:
return self._reorder_columns(func(data, *args, **kwargs), data)
parts = {}
for key, part_df in data.groupby(grouper, sort=False, observed=False):
parts[key] = func(part_df, *args, **kwargs)
stack = []
for key in groups:
if key in parts:
if isinstance(grouper, list):
# Implies that we had a MultiIndex so key is iterable
group_ids = dict(zip(grouper, cast(Iterable, key)))
else:
group_ids = {grouper: key}
stack.append(parts[key].assign(**group_ids))
res = pd.concat(stack, ignore_index=True)
return self._reorder_columns(res, data)

View File

@@ -0,0 +1,274 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import ClassVar, Callable, Optional, Union, cast
import numpy as np
from pandas import DataFrame
from seaborn._core.groupby import GroupBy
from seaborn._core.scales import Scale
from seaborn._core.typing import Default
default = Default()
@dataclass
class Move:
"""Base class for objects that apply simple positional transforms."""
group_by_orient: ClassVar[bool] = True
def __call__(
self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
) -> DataFrame:
raise NotImplementedError
@dataclass
class Jitter(Move):
"""
Random displacement along one or both axes to reduce overplotting.
Parameters
----------
width : float
Magnitude of jitter, relative to mark width, along the orientation axis.
If not provided, the default value will be 0 when `x` or `y` are set, otherwise
there will be a small amount of jitter applied by default.
x : float
Magnitude of jitter, in data units, along the x axis.
y : float
Magnitude of jitter, in data units, along the y axis.
Examples
--------
.. include:: ../docstrings/objects.Jitter.rst
"""
width: float | Default = default
x: float = 0
y: float = 0
seed: int | None = None
def __call__(
self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
) -> DataFrame:
data = data.copy()
rng = np.random.default_rng(self.seed)
def jitter(data, col, scale):
noise = rng.uniform(-.5, +.5, len(data))
offsets = noise * scale
return data[col] + offsets
if self.width is default:
width = 0.0 if self.x or self.y else 0.2
else:
width = cast(float, self.width)
if self.width:
data[orient] = jitter(data, orient, width * data["width"])
if self.x:
data["x"] = jitter(data, "x", self.x)
if self.y:
data["y"] = jitter(data, "y", self.y)
return data
@dataclass
class Dodge(Move):
"""
Displacement and narrowing of overlapping marks along orientation axis.
Parameters
----------
empty : {'keep', 'drop', 'fill'}
gap : float
Size of gap between dodged marks.
by : list of variable names
Variables to apply the movement to, otherwise use all.
Examples
--------
.. include:: ../docstrings/objects.Dodge.rst
"""
empty: str = "keep" # Options: keep, drop, fill
gap: float = 0
# TODO accept just a str here?
# TODO should this always be present?
# TODO should the default be an "all" singleton?
by: Optional[list[str]] = None
def __call__(
self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
) -> DataFrame:
grouping_vars = [v for v in groupby.order if v in data]
groups = groupby.agg(data, {"width": "max"})
if self.empty == "fill":
groups = groups.dropna()
def groupby_pos(s):
grouper = [groups[v] for v in [orient, "col", "row"] if v in data]
return s.groupby(grouper, sort=False, observed=True)
def scale_widths(w):
# TODO what value to fill missing widths??? Hard problem...
# TODO short circuit this if outer widths has no variance?
empty = 0 if self.empty == "fill" else w.mean()
filled = w.fillna(empty)
scale = filled.max()
norm = filled.sum()
if self.empty == "keep":
w = filled
return w / norm * scale
def widths_to_offsets(w):
return w.shift(1).fillna(0).cumsum() + (w - w.sum()) / 2
new_widths = groupby_pos(groups["width"]).transform(scale_widths)
offsets = groupby_pos(new_widths).transform(widths_to_offsets)
if self.gap:
new_widths *= 1 - self.gap
groups["_dodged"] = groups[orient] + offsets
groups["width"] = new_widths
out = (
data
.drop("width", axis=1)
.merge(groups, on=grouping_vars, how="left")
.drop(orient, axis=1)
.rename(columns={"_dodged": orient})
)
return out
@dataclass
class Stack(Move):
"""
Displacement of overlapping bar or area marks along the value axis.
Examples
--------
.. include:: ../docstrings/objects.Stack.rst
"""
# TODO center? (or should this be a different move, eg. Stream())
def _stack(self, df, orient):
# TODO should stack do something with ymin/ymax style marks?
# Should there be an upstream conversion to baseline/height parameterization?
if df["baseline"].nunique() > 1:
err = "Stack move cannot be used when baselines are already heterogeneous"
raise RuntimeError(err)
other = {"x": "y", "y": "x"}[orient]
stacked_lengths = (df[other] - df["baseline"]).dropna().cumsum()
offsets = stacked_lengths.shift(1).fillna(0)
df[other] = stacked_lengths
df["baseline"] = df["baseline"] + offsets
return df
def __call__(
self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
) -> DataFrame:
# TODO where to ensure that other semantic variables are sorted properly?
# TODO why are we not using the passed in groupby here?
groupers = ["col", "row", orient]
return GroupBy(groupers).apply(data, self._stack, orient)
@dataclass
class Shift(Move):
"""
Displacement of all marks with the same magnitude / direction.
Parameters
----------
x, y : float
Magnitude of shift, in data units, along each axis.
Examples
--------
.. include:: ../docstrings/objects.Shift.rst
"""
x: float = 0
y: float = 0
def __call__(
self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
) -> DataFrame:
data = data.copy(deep=False)
data["x"] = data["x"] + self.x
data["y"] = data["y"] + self.y
return data
@dataclass
class Norm(Move):
"""
Divisive scaling on the value axis after aggregating within groups.
Parameters
----------
func : str or callable
Function called on each group to define the comparison value.
where : str
Query string defining the subset used to define the comparison values.
by : list of variables
Variables used to define aggregation groups.
percent : bool
If True, multiply the result by 100.
Examples
--------
.. include:: ../docstrings/objects.Norm.rst
"""
func: Union[Callable, str] = "max"
where: Optional[str] = None
by: Optional[list[str]] = None
percent: bool = False
group_by_orient: ClassVar[bool] = False
def _norm(self, df, var):
if self.where is None:
denom_data = df[var]
else:
denom_data = df.query(self.where)[var]
df[var] = df[var] / denom_data.agg(self.func)
if self.percent:
df[var] = df[var] * 100
return df
def __call__(
self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
) -> DataFrame:
other = {"x": "y", "y": "x"}[orient]
return groupby.apply(data, self._norm, other)
# TODO
# @dataclass
# class Ridge(Move):
# ...

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,834 @@
from __future__ import annotations
import itertools
import warnings
import numpy as np
from numpy.typing import ArrayLike
from pandas import Series
import matplotlib as mpl
from matplotlib.colors import to_rgb, to_rgba, to_rgba_array
from matplotlib.markers import MarkerStyle
from matplotlib.path import Path
from seaborn._core.scales import Scale, Boolean, Continuous, Nominal, Temporal
from seaborn._core.rules import categorical_order, variable_type
from seaborn.palettes import QUAL_PALETTES, color_palette, blend_palette
from seaborn.utils import get_color_cycle
from typing import Any, Callable, Tuple, List, Union, Optional
RGBTuple = Tuple[float, float, float]
RGBATuple = Tuple[float, float, float, float]
ColorSpec = Union[RGBTuple, RGBATuple, str]
DashPattern = Tuple[float, ...]
DashPatternWithOffset = Tuple[float, Optional[DashPattern]]
MarkerPattern = Union[
float,
str,
Tuple[int, int, float],
List[Tuple[float, float]],
Path,
MarkerStyle,
]
Mapping = Callable[[ArrayLike], ArrayLike]
# =================================================================================== #
# Base classes
# =================================================================================== #
class Property:
"""Base class for visual properties that can be set directly or be data scaling."""
# When True, scales for this property will populate the legend by default
legend = False
# When True, scales for this property normalize data to [0, 1] before mapping
normed = False
def __init__(self, variable: str | None = None):
"""Initialize the property with the name of the corresponding plot variable."""
if not variable:
variable = self.__class__.__name__.lower()
self.variable = variable
def default_scale(self, data: Series) -> Scale:
"""Given data, initialize appropriate scale class."""
var_type = variable_type(data, boolean_type="boolean", strict_boolean=True)
if var_type == "numeric":
return Continuous()
elif var_type == "datetime":
return Temporal()
elif var_type == "boolean":
return Boolean()
else:
return Nominal()
def infer_scale(self, arg: Any, data: Series) -> Scale:
"""Given data and a scaling argument, initialize appropriate scale class."""
# TODO put these somewhere external for validation
# TODO putting this here won't pick it up if subclasses define infer_scale
# (e.g. color). How best to handle that? One option is to call super after
# handling property-specific possibilities (e.g. for color check that the
# arg is not a valid palette name) but that could get tricky.
trans_args = ["log", "symlog", "logit", "pow", "sqrt"]
if isinstance(arg, str):
if any(arg.startswith(k) for k in trans_args):
# TODO validate numeric type? That should happen centrally somewhere
return Continuous(trans=arg)
else:
msg = f"Unknown magic arg for {self.variable} scale: '{arg}'."
raise ValueError(msg)
else:
arg_type = type(arg).__name__
msg = f"Magic arg for {self.variable} scale must be str, not {arg_type}."
raise TypeError(msg)
def get_mapping(self, scale: Scale, data: Series) -> Mapping:
"""Return a function that maps from data domain to property range."""
def identity(x):
return x
return identity
def standardize(self, val: Any) -> Any:
"""Coerce flexible property value to standardized representation."""
return val
def _check_dict_entries(self, levels: list, values: dict) -> None:
"""Input check when values are provided as a dictionary."""
missing = set(levels) - set(values)
if missing:
formatted = ", ".join(map(repr, sorted(missing, key=str)))
err = f"No entry in {self.variable} dictionary for {formatted}"
raise ValueError(err)
def _check_list_length(self, levels: list, values: list) -> list:
"""Input check when values are provided as a list."""
message = ""
if len(levels) > len(values):
message = " ".join([
f"\nThe {self.variable} list has fewer values ({len(values)})",
f"than needed ({len(levels)}) and will cycle, which may",
"produce an uninterpretable plot."
])
values = [x for _, x in zip(levels, itertools.cycle(values))]
elif len(values) > len(levels):
message = " ".join([
f"The {self.variable} list has more values ({len(values)})",
f"than needed ({len(levels)}), which may not be intended.",
])
values = values[:len(levels)]
# TODO look into custom PlotSpecWarning with better formatting
if message:
warnings.warn(message, UserWarning)
return values
# =================================================================================== #
# Properties relating to spatial position of marks on the plotting axes
# =================================================================================== #
class Coordinate(Property):
"""The position of visual marks with respect to the axes of the plot."""
legend = False
normed = False
# =================================================================================== #
# Properties with numeric values where scale range can be defined as an interval
# =================================================================================== #
class IntervalProperty(Property):
"""A numeric property where scale range can be defined as an interval."""
legend = True
normed = True
_default_range: tuple[float, float] = (0, 1)
@property
def default_range(self) -> tuple[float, float]:
"""Min and max values used by default for semantic mapping."""
return self._default_range
def _forward(self, values: ArrayLike) -> ArrayLike:
"""Transform applied to native values before linear mapping into interval."""
return values
def _inverse(self, values: ArrayLike) -> ArrayLike:
"""Transform applied to results of mapping that returns to native values."""
return values
def infer_scale(self, arg: Any, data: Series) -> Scale:
"""Given data and a scaling argument, initialize appropriate scale class."""
# TODO infer continuous based on log/sqrt etc?
var_type = variable_type(data, boolean_type="boolean", strict_boolean=True)
if var_type == "boolean":
return Boolean(arg)
elif isinstance(arg, (list, dict)):
return Nominal(arg)
elif var_type == "categorical":
return Nominal(arg)
elif var_type == "datetime":
return Temporal(arg)
# TODO other variable types
else:
return Continuous(arg)
def get_mapping(self, scale: Scale, data: Series) -> Mapping:
"""Return a function that maps from data domain to property range."""
if isinstance(scale, Nominal):
return self._get_nominal_mapping(scale, data)
elif isinstance(scale, Boolean):
return self._get_boolean_mapping(scale, data)
if scale.values is None:
vmin, vmax = self._forward(self.default_range)
elif isinstance(scale.values, tuple) and len(scale.values) == 2:
vmin, vmax = self._forward(scale.values)
else:
if isinstance(scale.values, tuple):
actual = f"{len(scale.values)}-tuple"
else:
actual = str(type(scale.values))
scale_class = scale.__class__.__name__
err = " ".join([
f"Values for {self.variable} variables with {scale_class} scale",
f"must be 2-tuple; not {actual}.",
])
raise TypeError(err)
def mapping(x):
return self._inverse(np.multiply(x, vmax - vmin) + vmin)
return mapping
def _get_nominal_mapping(self, scale: Nominal, data: Series) -> Mapping:
"""Identify evenly-spaced values using interval or explicit mapping."""
levels = categorical_order(data, scale.order)
values = self._get_values(scale, levels)
def mapping(x):
ixs = np.asarray(x, np.intp)
out = np.full(len(x), np.nan)
use = np.isfinite(x)
out[use] = np.take(values, ixs[use])
return out
return mapping
def _get_boolean_mapping(self, scale: Boolean, data: Series) -> Mapping:
"""Identify evenly-spaced values using interval or explicit mapping."""
values = self._get_values(scale, [True, False])
def mapping(x):
out = np.full(len(x), np.nan)
use = np.isfinite(x)
out[use] = np.where(x[use], *values)
return out
return mapping
def _get_values(self, scale: Scale, levels: list) -> list:
"""Validate scale.values and identify a value for each level."""
if isinstance(scale.values, dict):
self._check_dict_entries(levels, scale.values)
values = [scale.values[x] for x in levels]
elif isinstance(scale.values, list):
values = self._check_list_length(levels, scale.values)
else:
if scale.values is None:
vmin, vmax = self.default_range
elif isinstance(scale.values, tuple):
vmin, vmax = scale.values
else:
scale_class = scale.__class__.__name__
err = " ".join([
f"Values for {self.variable} variables with {scale_class} scale",
f"must be a dict, list or tuple; not {type(scale.values)}",
])
raise TypeError(err)
vmin, vmax = self._forward([vmin, vmax])
values = list(self._inverse(np.linspace(vmax, vmin, len(levels))))
return values
class PointSize(IntervalProperty):
"""Size (diameter) of a point mark, in points, with scaling by area."""
_default_range = 2, 8 # TODO use rcparams?
def _forward(self, values):
"""Square native values to implement linear scaling of point area."""
return np.square(values)
def _inverse(self, values):
"""Invert areal values back to point diameter."""
return np.sqrt(values)
class LineWidth(IntervalProperty):
"""Thickness of a line mark, in points."""
@property
def default_range(self) -> tuple[float, float]:
"""Min and max values used by default for semantic mapping."""
base = mpl.rcParams["lines.linewidth"]
return base * .5, base * 2
class EdgeWidth(IntervalProperty):
"""Thickness of the edges on a patch mark, in points."""
@property
def default_range(self) -> tuple[float, float]:
"""Min and max values used by default for semantic mapping."""
base = mpl.rcParams["patch.linewidth"]
return base * .5, base * 2
class Stroke(IntervalProperty):
"""Thickness of lines that define point glyphs."""
_default_range = .25, 2.5
class Alpha(IntervalProperty):
"""Opacity of the color values for an arbitrary mark."""
_default_range = .3, .95
# TODO validate / enforce that output is in [0, 1]
class Offset(IntervalProperty):
"""Offset for edge-aligned text, in point units."""
_default_range = 0, 5
_legend = False
class FontSize(IntervalProperty):
"""Font size for textual marks, in points."""
_legend = False
@property
def default_range(self) -> tuple[float, float]:
"""Min and max values used by default for semantic mapping."""
base = mpl.rcParams["font.size"]
return base * .5, base * 2
# =================================================================================== #
# Properties defined by arbitrary objects with inherently nominal scaling
# =================================================================================== #
class ObjectProperty(Property):
"""A property defined by arbitrary an object, with inherently nominal scaling."""
legend = True
normed = False
# Object representing null data, should appear invisible when drawn by matplotlib
# Note that we now drop nulls in Plot._plot_layer and thus may not need this
null_value: Any = None
def _default_values(self, n: int) -> list:
raise NotImplementedError()
def default_scale(self, data: Series) -> Scale:
var_type = variable_type(data, boolean_type="boolean", strict_boolean=True)
return Boolean() if var_type == "boolean" else Nominal()
def infer_scale(self, arg: Any, data: Series) -> Scale:
var_type = variable_type(data, boolean_type="boolean", strict_boolean=True)
return Boolean(arg) if var_type == "boolean" else Nominal(arg)
def get_mapping(self, scale: Scale, data: Series) -> Mapping:
"""Define mapping as lookup into list of object values."""
boolean_scale = isinstance(scale, Boolean)
order = getattr(scale, "order", [True, False] if boolean_scale else None)
levels = categorical_order(data, order)
values = self._get_values(scale, levels)
if boolean_scale:
values = values[::-1]
def mapping(x):
ixs = np.asarray(np.nan_to_num(x), np.intp)
return [
values[ix] if np.isfinite(x_i) else self.null_value
for x_i, ix in zip(x, ixs)
]
return mapping
def _get_values(self, scale: Scale, levels: list) -> list:
"""Validate scale.values and identify a value for each level."""
n = len(levels)
if isinstance(scale.values, dict):
self._check_dict_entries(levels, scale.values)
values = [scale.values[x] for x in levels]
elif isinstance(scale.values, list):
values = self._check_list_length(levels, scale.values)
elif scale.values is None:
values = self._default_values(n)
else:
msg = " ".join([
f"Scale values for a {self.variable} variable must be provided",
f"in a dict or list; not {type(scale.values)}."
])
raise TypeError(msg)
values = [self.standardize(x) for x in values]
return values
class Marker(ObjectProperty):
"""Shape of points in scatter-type marks or lines with data points marked."""
null_value = MarkerStyle("")
# TODO should we have named marker "palettes"? (e.g. see d3 options)
# TODO need some sort of "require_scale" functionality
# to raise when we get the wrong kind explicitly specified
def standardize(self, val: MarkerPattern) -> MarkerStyle:
return MarkerStyle(val)
def _default_values(self, n: int) -> list[MarkerStyle]:
"""Build an arbitrarily long list of unique marker styles.
Parameters
----------
n : int
Number of unique marker specs to generate.
Returns
-------
markers : list of string or tuples
Values for defining :class:`matplotlib.markers.MarkerStyle` objects.
All markers will be filled.
"""
# Start with marker specs that are well distinguishable
markers = [
"o", "X", (4, 0, 45), "P", (4, 0, 0), (4, 1, 0), "^", (4, 1, 45), "v",
]
# Now generate more from regular polygons of increasing order
s = 5
while len(markers) < n:
a = 360 / (s + 1) / 2
markers.extend([(s + 1, 1, a), (s + 1, 0, a), (s, 1, 0), (s, 0, 0)])
s += 1
markers = [MarkerStyle(m) for m in markers[:n]]
return markers
class LineStyle(ObjectProperty):
"""Dash pattern for line-type marks."""
null_value = ""
def standardize(self, val: str | DashPattern) -> DashPatternWithOffset:
return self._get_dash_pattern(val)
def _default_values(self, n: int) -> list[DashPatternWithOffset]:
"""Build an arbitrarily long list of unique dash styles for lines.
Parameters
----------
n : int
Number of unique dash specs to generate.
Returns
-------
dashes : list of strings or tuples
Valid arguments for the ``dashes`` parameter on
:class:`matplotlib.lines.Line2D`. The first spec is a solid
line (``""``), the remainder are sequences of long and short
dashes.
"""
# Start with dash specs that are well distinguishable
dashes: list[str | DashPattern] = [
"-", (4, 1.5), (1, 1), (3, 1.25, 1.5, 1.25), (5, 1, 1, 1),
]
# Now programmatically build as many as we need
p = 3
while len(dashes) < n:
# Take combinations of long and short dashes
a = itertools.combinations_with_replacement([3, 1.25], p)
b = itertools.combinations_with_replacement([4, 1], p)
# Interleave the combinations, reversing one of the streams
segment_list = itertools.chain(*zip(list(a)[1:-1][::-1], list(b)[1:-1]))
# Now insert the gaps
for segments in segment_list:
gap = min(segments)
spec = tuple(itertools.chain(*((seg, gap) for seg in segments)))
dashes.append(spec)
p += 1
return [self._get_dash_pattern(x) for x in dashes]
@staticmethod
def _get_dash_pattern(style: str | DashPattern) -> DashPatternWithOffset:
"""Convert linestyle arguments to dash pattern with offset."""
# Copied and modified from Matplotlib 3.4
# go from short hand -> full strings
ls_mapper = {"-": "solid", "--": "dashed", "-.": "dashdot", ":": "dotted"}
if isinstance(style, str):
style = ls_mapper.get(style, style)
# un-dashed styles
if style in ["solid", "none", "None"]:
offset = 0
dashes = None
# dashed styles
elif style in ["dashed", "dashdot", "dotted"]:
offset = 0
dashes = tuple(mpl.rcParams[f"lines.{style}_pattern"])
else:
options = [*ls_mapper.values(), *ls_mapper.keys()]
msg = f"Linestyle string must be one of {options}, not {repr(style)}."
raise ValueError(msg)
elif isinstance(style, tuple):
if len(style) > 1 and isinstance(style[1], tuple):
offset, dashes = style
elif len(style) > 1 and style[1] is None:
offset, dashes = style
else:
offset = 0
dashes = style
else:
val_type = type(style).__name__
msg = f"Linestyle must be str or tuple, not {val_type}."
raise TypeError(msg)
# Normalize offset to be positive and shorter than the dash cycle
if dashes is not None:
try:
dsum = sum(dashes)
except TypeError as err:
msg = f"Invalid dash pattern: {dashes}"
raise TypeError(msg) from err
if dsum:
offset %= dsum
return offset, dashes
class TextAlignment(ObjectProperty):
legend = False
class HorizontalAlignment(TextAlignment):
def _default_values(self, n: int) -> list:
vals = itertools.cycle(["left", "right"])
return [next(vals) for _ in range(n)]
class VerticalAlignment(TextAlignment):
def _default_values(self, n: int) -> list:
vals = itertools.cycle(["top", "bottom"])
return [next(vals) for _ in range(n)]
# =================================================================================== #
# Properties with RGB(A) color values
# =================================================================================== #
class Color(Property):
"""Color, as RGB(A), scalable with nominal palettes or continuous gradients."""
legend = True
normed = True
def standardize(self, val: ColorSpec) -> RGBTuple | RGBATuple:
# Return color with alpha channel only if the input spec has it
# This is so that RGBA colors can override the Alpha property
if to_rgba(val) != to_rgba(val, 1):
return to_rgba(val)
else:
return to_rgb(val)
def _standardize_color_sequence(self, colors: ArrayLike) -> ArrayLike:
"""Convert color sequence to RGB(A) array, preserving but not adding alpha."""
def has_alpha(x):
return to_rgba(x) != to_rgba(x, 1)
if isinstance(colors, np.ndarray):
needs_alpha = colors.shape[1] == 4
else:
needs_alpha = any(has_alpha(x) for x in colors)
if needs_alpha:
return to_rgba_array(colors)
else:
return to_rgba_array(colors)[:, :3]
def infer_scale(self, arg: Any, data: Series) -> Scale:
# TODO when inferring Continuous without data, verify type
# TODO need to rethink the variable type system
# (e.g. boolean, ordered categories as Ordinal, etc)..
var_type = variable_type(data, boolean_type="boolean", strict_boolean=True)
if var_type == "boolean":
return Boolean(arg)
if isinstance(arg, (dict, list)):
return Nominal(arg)
if isinstance(arg, tuple):
if var_type == "categorical":
# TODO It seems reasonable to allow a gradient mapping for nominal
# scale but it also feels "technically" wrong. Should this infer
# Ordinal with categorical data and, if so, verify orderedness?
return Nominal(arg)
return Continuous(arg)
if callable(arg):
return Continuous(arg)
# TODO Do we accept str like "log", "pow", etc. for semantics?
if not isinstance(arg, str):
msg = " ".join([
f"A single scale argument for {self.variable} variables must be",
f"a string, dict, tuple, list, or callable, not {type(arg)}."
])
raise TypeError(msg)
if arg in QUAL_PALETTES:
return Nominal(arg)
elif var_type == "numeric":
return Continuous(arg)
# TODO implement scales for date variables and any others.
else:
return Nominal(arg)
def get_mapping(self, scale: Scale, data: Series) -> Mapping:
"""Return a function that maps from data domain to color values."""
# TODO what is best way to do this conditional?
# Should it be class-based or should classes have behavioral attributes?
if isinstance(scale, Nominal):
return self._get_nominal_mapping(scale, data)
elif isinstance(scale, Boolean):
return self._get_boolean_mapping(scale, data)
if scale.values is None:
# TODO Rethink best default continuous color gradient
mapping = color_palette("ch:", as_cmap=True)
elif isinstance(scale.values, tuple):
# TODO blend_palette will strip alpha, but we should support
# interpolation on all four channels
mapping = blend_palette(scale.values, as_cmap=True)
elif isinstance(scale.values, str):
# TODO for matplotlib colormaps this will clip extremes, which is
# different from what using the named colormap directly would do
# This may or may not be desireable.
mapping = color_palette(scale.values, as_cmap=True)
elif callable(scale.values):
mapping = scale.values
else:
scale_class = scale.__class__.__name__
msg = " ".join([
f"Scale values for {self.variable} with a {scale_class} mapping",
f"must be string, tuple, or callable; not {type(scale.values)}."
])
raise TypeError(msg)
def _mapping(x):
# Remove alpha channel so it does not override alpha property downstream
# TODO this will need to be more flexible to support RGBA tuples (see above)
invalid = ~np.isfinite(x)
out = mapping(x)[:, :3]
out[invalid] = np.nan
return out
return _mapping
def _get_nominal_mapping(self, scale: Nominal, data: Series) -> Mapping:
levels = categorical_order(data, scale.order)
colors = self._get_values(scale, levels)
def mapping(x):
ixs = np.asarray(np.nan_to_num(x), np.intp)
use = np.isfinite(x)
out = np.full((len(ixs), colors.shape[1]), np.nan)
out[use] = np.take(colors, ixs[use], axis=0)
return out
return mapping
def _get_boolean_mapping(self, scale: Boolean, data: Series) -> Mapping:
colors = self._get_values(scale, [True, False])
def mapping(x):
use = np.isfinite(x)
x = np.asarray(np.nan_to_num(x)).astype(bool)
out = np.full((len(x), colors.shape[1]), np.nan)
out[x & use] = colors[0]
out[~x & use] = colors[1]
return out
return mapping
def _get_values(self, scale: Scale, levels: list) -> ArrayLike:
"""Validate scale.values and identify a value for each level."""
n = len(levels)
values = scale.values
if isinstance(values, dict):
self._check_dict_entries(levels, values)
colors = [values[x] for x in levels]
elif isinstance(values, list):
colors = self._check_list_length(levels, values)
elif isinstance(values, tuple):
colors = blend_palette(values, n)
elif isinstance(values, str):
colors = color_palette(values, n)
elif values is None:
if n <= len(get_color_cycle()):
# Use current (global) default palette
colors = color_palette(n_colors=n)
else:
colors = color_palette("husl", n)
else:
scale_class = scale.__class__.__name__
msg = " ".join([
f"Scale values for {self.variable} with a {scale_class} mapping",
f"must be string, list, tuple, or dict; not {type(scale.values)}."
])
raise TypeError(msg)
return self._standardize_color_sequence(colors)
# =================================================================================== #
# Properties that can take only two states
# =================================================================================== #
class Fill(Property):
"""Boolean property of points/bars/patches that can be solid or outlined."""
legend = True
normed = False
def default_scale(self, data: Series) -> Scale:
var_type = variable_type(data, boolean_type="boolean", strict_boolean=True)
return Boolean() if var_type == "boolean" else Nominal()
def infer_scale(self, arg: Any, data: Series) -> Scale:
var_type = variable_type(data, boolean_type="boolean", strict_boolean=True)
return Boolean(arg) if var_type == "boolean" else Nominal(arg)
def standardize(self, val: Any) -> bool:
return bool(val)
def _default_values(self, n: int) -> list:
"""Return a list of n values, alternating True and False."""
if n > 2:
msg = " ".join([
f"The variable assigned to {self.variable} has more than two levels,",
f"so {self.variable} values will cycle and may be uninterpretable",
])
# TODO fire in a "nice" way (see above)
warnings.warn(msg, UserWarning)
return [x for x, _ in zip(itertools.cycle([True, False]), range(n))]
def get_mapping(self, scale: Scale, data: Series) -> Mapping:
"""Return a function that maps each data value to True or False."""
boolean_scale = isinstance(scale, Boolean)
order = getattr(scale, "order", [True, False] if boolean_scale else None)
levels = categorical_order(data, order)
values = self._get_values(scale, levels)
if boolean_scale:
values = values[::-1]
def mapping(x):
ixs = np.asarray(np.nan_to_num(x), np.intp)
return [
values[ix] if np.isfinite(x_i) else False
for x_i, ix in zip(x, ixs)
]
return mapping
def _get_values(self, scale: Scale, levels: list) -> list:
"""Validate scale.values and identify a value for each level."""
if isinstance(scale.values, list):
values = [bool(x) for x in scale.values]
elif isinstance(scale.values, dict):
values = [bool(scale.values[x]) for x in levels]
elif scale.values is None:
values = self._default_values(len(levels))
else:
msg = " ".join([
f"Scale values for {self.variable} must be passed in",
f"a list or dict; not {type(scale.values)}."
])
raise TypeError(msg)
return values
# =================================================================================== #
# Enumeration of properties for use by Plot and Mark classes
# =================================================================================== #
# TODO turn this into a property registry with hooks, etc.
# TODO Users do not interact directly with properties, so how to document them?
PROPERTY_CLASSES = {
"x": Coordinate,
"y": Coordinate,
"color": Color,
"alpha": Alpha,
"fill": Fill,
"marker": Marker,
"pointsize": PointSize,
"stroke": Stroke,
"linewidth": LineWidth,
"linestyle": LineStyle,
"fillcolor": Color,
"fillalpha": Alpha,
"edgewidth": EdgeWidth,
"edgestyle": LineStyle,
"edgecolor": Color,
"edgealpha": Alpha,
"text": Property,
"halign": HorizontalAlignment,
"valign": VerticalAlignment,
"offset": Offset,
"fontsize": FontSize,
"xmin": Coordinate,
"xmax": Coordinate,
"ymin": Coordinate,
"ymax": Coordinate,
"group": Property,
# TODO pattern?
# TODO gradient?
}
PROPERTIES = {var: cls(var) for var, cls in PROPERTY_CLASSES.items()}

View File

@@ -0,0 +1,173 @@
from __future__ import annotations
import warnings
from collections import UserString
from numbers import Number
from datetime import datetime
import numpy as np
import pandas as pd
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from typing import Literal
from pandas import Series
class VarType(UserString):
"""
Prevent comparisons elsewhere in the library from using the wrong name.
Errors are simple assertions because users should not be able to trigger
them. If that changes, they should be more verbose.
"""
# TODO VarType is an awfully overloaded name, but so is DataType ...
# TODO adding unknown because we are using this in for scales, is that right?
allowed = "numeric", "datetime", "categorical", "boolean", "unknown"
def __init__(self, data):
assert data in self.allowed, data
super().__init__(data)
def __eq__(self, other):
assert other in self.allowed, other
return self.data == other
def variable_type(
vector: Series,
boolean_type: Literal["numeric", "categorical", "boolean"] = "numeric",
strict_boolean: bool = False,
) -> VarType:
"""
Determine whether a vector contains numeric, categorical, or datetime data.
This function differs from the pandas typing API in a few ways:
- Python sequences or object-typed PyData objects are considered numeric if
all of their entries are numeric.
- String or mixed-type data are considered categorical even if not
explicitly represented as a :class:`pandas.api.types.CategoricalDtype`.
- There is some flexibility about how to treat binary / boolean data.
Parameters
----------
vector : :func:`pandas.Series`, :func:`numpy.ndarray`, or Python sequence
Input data to test.
boolean_type : 'numeric', 'categorical', or 'boolean'
Type to use for vectors containing only 0s and 1s (and NAs).
strict_boolean : bool
If True, only consider data to be boolean when the dtype is bool or Boolean.
Returns
-------
var_type : 'numeric', 'categorical', or 'datetime'
Name identifying the type of data in the vector.
"""
# If a categorical dtype is set, infer categorical
if isinstance(getattr(vector, 'dtype', None), pd.CategoricalDtype):
return VarType("categorical")
# Special-case all-na data, which is always "numeric"
if pd.isna(vector).all():
return VarType("numeric")
# Now drop nulls to simplify further type inference
vector = vector.dropna()
# Special-case binary/boolean data, allow caller to determine
# This triggers a numpy warning when vector has strings/objects
# https://github.com/numpy/numpy/issues/6784
# Because we reduce with .all(), we are agnostic about whether the
# comparison returns a scalar or vector, so we will ignore the warning.
# It triggers a separate DeprecationWarning when the vector has datetimes:
# https://github.com/numpy/numpy/issues/13548
# This is considered a bug by numpy and will likely go away.
with warnings.catch_warnings():
warnings.simplefilter(
action='ignore',
category=(FutureWarning, DeprecationWarning) # type: ignore # mypy bug?
)
if strict_boolean:
if isinstance(vector.dtype, pd.core.dtypes.base.ExtensionDtype):
boolean_dtypes = ["bool", "boolean"]
else:
boolean_dtypes = ["bool"]
boolean_vector = vector.dtype in boolean_dtypes
else:
try:
boolean_vector = bool(np.isin(vector, [0, 1]).all())
except TypeError:
# .isin comparison is not guaranteed to be possible under NumPy
# casting rules, depending on the (unknown) dtype of 'vector'
boolean_vector = False
if boolean_vector:
return VarType(boolean_type)
# Defer to positive pandas tests
if pd.api.types.is_numeric_dtype(vector):
return VarType("numeric")
if pd.api.types.is_datetime64_dtype(vector):
return VarType("datetime")
# --- If we get to here, we need to check the entries
# Check for a collection where everything is a number
def all_numeric(x):
for x_i in x:
if not isinstance(x_i, Number):
return False
return True
if all_numeric(vector):
return VarType("numeric")
# Check for a collection where everything is a datetime
def all_datetime(x):
for x_i in x:
if not isinstance(x_i, (datetime, np.datetime64)):
return False
return True
if all_datetime(vector):
return VarType("datetime")
# Otherwise, our final fallback is to consider things categorical
return VarType("categorical")
def categorical_order(vector: Series, order: list | None = None) -> list:
"""
Return a list of unique data values using seaborn's ordering rules.
Parameters
----------
vector : Series
Vector of "categorical" values
order : list
Desired order of category levels to override the order determined
from the `data` object.
Returns
-------
order : list
Ordered list of category levels not including null values.
"""
if order is not None:
return order
if vector.dtype.name == "category":
order = list(vector.cat.categories)
else:
order = list(filter(pd.notnull, vector.unique()))
if variable_type(pd.Series(order)) == "numeric":
order.sort()
return order

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,263 @@
from __future__ import annotations
from collections.abc import Generator
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
from matplotlib.figure import Figure
from typing import TYPE_CHECKING
if TYPE_CHECKING: # TODO move to seaborn._core.typing?
from seaborn._core.plot import FacetSpec, PairSpec
from matplotlib.figure import SubFigure
class Subplots:
"""
Interface for creating and using matplotlib subplots based on seaborn parameters.
Parameters
----------
subplot_spec : dict
Keyword args for :meth:`matplotlib.figure.Figure.subplots`.
facet_spec : dict
Parameters that control subplot faceting.
pair_spec : dict
Parameters that control subplot pairing.
data : PlotData
Data used to define figure setup.
"""
def __init__(
self,
subplot_spec: dict, # TODO define as TypedDict
facet_spec: FacetSpec,
pair_spec: PairSpec,
):
self.subplot_spec = subplot_spec
self._check_dimension_uniqueness(facet_spec, pair_spec)
self._determine_grid_dimensions(facet_spec, pair_spec)
self._handle_wrapping(facet_spec, pair_spec)
self._determine_axis_sharing(pair_spec)
def _check_dimension_uniqueness(
self, facet_spec: FacetSpec, pair_spec: PairSpec
) -> None:
"""Reject specs that pair and facet on (or wrap to) same figure dimension."""
err = None
facet_vars = facet_spec.get("variables", {})
if facet_spec.get("wrap") and {"col", "row"} <= set(facet_vars):
err = "Cannot wrap facets when specifying both `col` and `row`."
elif (
pair_spec.get("wrap")
and pair_spec.get("cross", True)
and len(pair_spec.get("structure", {}).get("x", [])) > 1
and len(pair_spec.get("structure", {}).get("y", [])) > 1
):
err = "Cannot wrap subplots when pairing on both `x` and `y`."
collisions = {"x": ["columns", "rows"], "y": ["rows", "columns"]}
for pair_axis, (multi_dim, wrap_dim) in collisions.items():
if pair_axis not in pair_spec.get("structure", {}):
continue
elif multi_dim[:3] in facet_vars:
err = f"Cannot facet the {multi_dim} while pairing on `{pair_axis}``."
elif wrap_dim[:3] in facet_vars and facet_spec.get("wrap"):
err = f"Cannot wrap the {wrap_dim} while pairing on `{pair_axis}``."
elif wrap_dim[:3] in facet_vars and pair_spec.get("wrap"):
err = f"Cannot wrap the {multi_dim} while faceting the {wrap_dim}."
if err is not None:
raise RuntimeError(err) # TODO what err class? Define PlotSpecError?
def _determine_grid_dimensions(
self, facet_spec: FacetSpec, pair_spec: PairSpec
) -> None:
"""Parse faceting and pairing information to define figure structure."""
self.grid_dimensions: dict[str, list] = {}
for dim, axis in zip(["col", "row"], ["x", "y"]):
facet_vars = facet_spec.get("variables", {})
if dim in facet_vars:
self.grid_dimensions[dim] = facet_spec["structure"][dim]
elif axis in pair_spec.get("structure", {}):
self.grid_dimensions[dim] = [
None for _ in pair_spec.get("structure", {})[axis]
]
else:
self.grid_dimensions[dim] = [None]
self.subplot_spec[f"n{dim}s"] = len(self.grid_dimensions[dim])
if not pair_spec.get("cross", True):
self.subplot_spec["nrows"] = 1
self.n_subplots = self.subplot_spec["ncols"] * self.subplot_spec["nrows"]
def _handle_wrapping(
self, facet_spec: FacetSpec, pair_spec: PairSpec
) -> None:
"""Update figure structure parameters based on facet/pair wrapping."""
self.wrap = wrap = facet_spec.get("wrap") or pair_spec.get("wrap")
if not wrap:
return
wrap_dim = "row" if self.subplot_spec["nrows"] > 1 else "col"
flow_dim = {"row": "col", "col": "row"}[wrap_dim]
n_subplots = self.subplot_spec[f"n{wrap_dim}s"]
flow = int(np.ceil(n_subplots / wrap))
if wrap < self.subplot_spec[f"n{wrap_dim}s"]:
self.subplot_spec[f"n{wrap_dim}s"] = wrap
self.subplot_spec[f"n{flow_dim}s"] = flow
self.n_subplots = n_subplots
self.wrap_dim = wrap_dim
def _determine_axis_sharing(self, pair_spec: PairSpec) -> None:
"""Update subplot spec with default or specified axis sharing parameters."""
axis_to_dim = {"x": "col", "y": "row"}
key: str
val: str | bool
for axis in "xy":
key = f"share{axis}"
# Always use user-specified value, if present
if key not in self.subplot_spec:
if axis in pair_spec.get("structure", {}):
# Paired axes are shared along one dimension by default
if self.wrap is None and pair_spec.get("cross", True):
val = axis_to_dim[axis]
else:
val = False
else:
# This will pick up faceted plots, as well as single subplot
# figures, where the value doesn't really matter
val = True
self.subplot_spec[key] = val
def init_figure(
self,
pair_spec: PairSpec,
pyplot: bool = False,
figure_kws: dict | None = None,
target: Axes | Figure | SubFigure | None = None,
) -> Figure:
"""Initialize matplotlib objects and add seaborn-relevant metadata."""
# TODO reduce need to pass pair_spec here?
if figure_kws is None:
figure_kws = {}
if isinstance(target, mpl.axes.Axes):
if max(self.subplot_spec["nrows"], self.subplot_spec["ncols"]) > 1:
err = " ".join([
"Cannot create multiple subplots after calling `Plot.on` with",
f"a {mpl.axes.Axes} object.",
f" You may want to use a {mpl.figure.SubFigure} instead.",
])
raise RuntimeError(err)
self._subplot_list = [{
"ax": target,
"left": True,
"right": True,
"top": True,
"bottom": True,
"col": None,
"row": None,
"x": "x",
"y": "y",
}]
self._figure = target.figure
return self._figure
elif isinstance(target, mpl.figure.SubFigure):
figure = target.figure
elif isinstance(target, mpl.figure.Figure):
figure = target
else:
if pyplot:
figure = plt.figure(**figure_kws)
else:
figure = mpl.figure.Figure(**figure_kws)
target = figure
self._figure = figure
axs = target.subplots(**self.subplot_spec, squeeze=False)
if self.wrap:
# Remove unused Axes and flatten the rest into a (2D) vector
axs_flat = axs.ravel({"col": "C", "row": "F"}[self.wrap_dim])
axs, extra = np.split(axs_flat, [self.n_subplots])
for ax in extra:
ax.remove()
if self.wrap_dim == "col":
axs = axs[np.newaxis, :]
else:
axs = axs[:, np.newaxis]
# Get i, j coordinates for each Axes object
# Note that i, j are with respect to faceting/pairing,
# not the subplot grid itself, (which only matters in the case of wrapping).
iter_axs: np.ndenumerate | zip
if not pair_spec.get("cross", True):
indices = np.arange(self.n_subplots)
iter_axs = zip(zip(indices, indices), axs.flat)
else:
iter_axs = np.ndenumerate(axs)
self._subplot_list = []
for (i, j), ax in iter_axs:
info = {"ax": ax}
nrows, ncols = self.subplot_spec["nrows"], self.subplot_spec["ncols"]
if not self.wrap:
info["left"] = j % ncols == 0
info["right"] = (j + 1) % ncols == 0
info["top"] = i == 0
info["bottom"] = i == nrows - 1
elif self.wrap_dim == "col":
info["left"] = j % ncols == 0
info["right"] = ((j + 1) % ncols == 0) or ((j + 1) == self.n_subplots)
info["top"] = j < ncols
info["bottom"] = j >= (self.n_subplots - ncols)
elif self.wrap_dim == "row":
info["left"] = i < nrows
info["right"] = i >= self.n_subplots - nrows
info["top"] = i % nrows == 0
info["bottom"] = ((i + 1) % nrows == 0) or ((i + 1) == self.n_subplots)
if not pair_spec.get("cross", True):
info["top"] = j < ncols
info["bottom"] = j >= self.n_subplots - ncols
for dim in ["row", "col"]:
idx = {"row": i, "col": j}[dim]
info[dim] = self.grid_dimensions[dim][idx]
for axis in "xy":
idx = {"x": j, "y": i}[axis]
if axis in pair_spec.get("structure", {}):
key = f"{axis}{idx}"
else:
key = axis
info[axis] = key
self._subplot_list.append(info)
return figure
def __iter__(self) -> Generator[dict, None, None]: # TODO TypedDict?
"""Yield each subplot dictionary with Axes object and metadata."""
yield from self._subplot_list
def __len__(self) -> int:
"""Return the number of subplots in this figure."""
return len(self._subplot_list)

View File

@@ -0,0 +1,49 @@
from __future__ import annotations
from collections.abc import Iterable, Mapping
from datetime import date, datetime, timedelta
from typing import Any, Optional, Union, Tuple, List, Dict
from numpy import ndarray # TODO use ArrayLike?
from pandas import Series, Index, Timestamp, Timedelta
from matplotlib.colors import Colormap, Normalize
ColumnName = Union[
str, bytes, date, datetime, timedelta, bool, complex, Timestamp, Timedelta
]
Vector = Union[Series, Index, ndarray]
VariableSpec = Union[ColumnName, Vector, None]
VariableSpecList = Union[List[VariableSpec], Index, None]
# A DataSource can be an object implementing __dataframe__, or a Mapping
# (and is optional in all contexts where it is used).
# I don't think there's an abc for "has __dataframe__", so we type as object
# but keep the (slightly odd) Union alias for better user-facing annotations.
DataSource = Union[object, Mapping, None]
OrderSpec = Union[Iterable, None] # TODO technically str is iterable
NormSpec = Union[Tuple[Optional[float], Optional[float]], Normalize, None]
# TODO for discrete mappings, it would be ideal to use a parameterized type
# as the dict values / list entries should be of specific type(s) for each method
PaletteSpec = Union[str, list, dict, Colormap, None]
DiscreteValueSpec = Union[dict, list, None]
ContinuousValueSpec = Union[
Tuple[float, float], List[float], Dict[Any, float], None,
]
class Default:
def __repr__(self):
return "<default>"
class Deprecated:
def __repr__(self):
return "<deprecated>"
default = Default()
deprecated = Deprecated()

View File

@@ -0,0 +1,198 @@
import re
import pydoc
from .external.docscrape import NumpyDocString
class DocstringComponents:
regexp = re.compile(r"\n((\n|.)+)\n\s*", re.MULTILINE)
def __init__(self, comp_dict, strip_whitespace=True):
"""Read entries from a dict, optionally stripping outer whitespace."""
if strip_whitespace:
entries = {}
for key, val in comp_dict.items():
m = re.match(self.regexp, val)
if m is None:
entries[key] = val
else:
entries[key] = m.group(1)
else:
entries = comp_dict.copy()
self.entries = entries
def __getattr__(self, attr):
"""Provide dot access to entries for clean raw docstrings."""
if attr in self.entries:
return self.entries[attr]
else:
try:
return self.__getattribute__(attr)
except AttributeError as err:
# If Python is run with -OO, it will strip docstrings and our lookup
# from self.entries will fail. We check for __debug__, which is actually
# set to False by -O (it is True for normal execution).
# But we only want to see an error when building the docs;
# not something users should see, so this slight inconsistency is fine.
if __debug__:
raise err
else:
pass
@classmethod
def from_nested_components(cls, **kwargs):
"""Add multiple sub-sets of components."""
return cls(kwargs, strip_whitespace=False)
@classmethod
def from_function_params(cls, func):
"""Use the numpydoc parser to extract components from existing func."""
params = NumpyDocString(pydoc.getdoc(func))["Parameters"]
comp_dict = {}
for p in params:
name = p.name
type = p.type
desc = "\n ".join(p.desc)
comp_dict[name] = f"{name} : {type}\n {desc}"
return cls(comp_dict)
# TODO is "vector" the best term here? We mean to imply 1D data with a variety
# of types?
# TODO now that we can parse numpydoc style strings, do we need to define dicts
# of docstring components, or just write out a docstring?
_core_params = dict(
data="""
data : :class:`pandas.DataFrame`, :class:`numpy.ndarray`, mapping, or sequence
Input data structure. Either a long-form collection of vectors that can be
assigned to named variables or a wide-form dataset that will be internally
reshaped.
""", # TODO add link to user guide narrative when exists
xy="""
x, y : vectors or keys in ``data``
Variables that specify positions on the x and y axes.
""",
hue="""
hue : vector or key in ``data``
Semantic variable that is mapped to determine the color of plot elements.
""",
palette="""
palette : string, list, dict, or :class:`matplotlib.colors.Colormap`
Method for choosing the colors to use when mapping the ``hue`` semantic.
String values are passed to :func:`color_palette`. List or dict values
imply categorical mapping, while a colormap object implies numeric mapping.
""", # noqa: E501
hue_order="""
hue_order : vector of strings
Specify the order of processing and plotting for categorical levels of the
``hue`` semantic.
""",
hue_norm="""
hue_norm : tuple or :class:`matplotlib.colors.Normalize`
Either a pair of values that set the normalization range in data units
or an object that will map from data units into a [0, 1] interval. Usage
implies numeric mapping.
""",
color="""
color : :mod:`matplotlib color <matplotlib.colors>`
Single color specification for when hue mapping is not used. Otherwise, the
plot will try to hook into the matplotlib property cycle.
""",
ax="""
ax : :class:`matplotlib.axes.Axes`
Pre-existing axes for the plot. Otherwise, call :func:`matplotlib.pyplot.gca`
internally.
""", # noqa: E501
)
_core_returns = dict(
ax="""
:class:`matplotlib.axes.Axes`
The matplotlib axes containing the plot.
""",
facetgrid="""
:class:`FacetGrid`
An object managing one or more subplots that correspond to conditional data
subsets with convenient methods for batch-setting of axes attributes.
""",
jointgrid="""
:class:`JointGrid`
An object managing multiple subplots that correspond to joint and marginal axes
for plotting a bivariate relationship or distribution.
""",
pairgrid="""
:class:`PairGrid`
An object managing multiple subplots that correspond to joint and marginal axes
for pairwise combinations of multiple variables in a dataset.
""",
)
_seealso_blurbs = dict(
# Relational plots
scatterplot="""
scatterplot : Plot data using points.
""",
lineplot="""
lineplot : Plot data using lines.
""",
# Distribution plots
displot="""
displot : Figure-level interface to distribution plot functions.
""",
histplot="""
histplot : Plot a histogram of binned counts with optional normalization or smoothing.
""",
kdeplot="""
kdeplot : Plot univariate or bivariate distributions using kernel density estimation.
""",
ecdfplot="""
ecdfplot : Plot empirical cumulative distribution functions.
""",
rugplot="""
rugplot : Plot a tick at each observation value along the x and/or y axes.
""",
# Categorical plots
stripplot="""
stripplot : Plot a categorical scatter with jitter.
""",
swarmplot="""
swarmplot : Plot a categorical scatter with non-overlapping points.
""",
violinplot="""
violinplot : Draw an enhanced boxplot using kernel density estimation.
""",
pointplot="""
pointplot : Plot point estimates and CIs using markers and lines.
""",
# Multiples
jointplot="""
jointplot : Draw a bivariate plot with univariate marginal distributions.
""",
pairplot="""
jointplot : Draw multiple bivariate plots with univariate marginal distributions.
""",
jointgrid="""
JointGrid : Set up a figure with joint and marginal views on bivariate data.
""",
pairgrid="""
PairGrid : Set up a figure with joint and marginal views on multiple variables.
""",
)
_core_docs = dict(
params=DocstringComponents(_core_params),
returns=DocstringComponents(_core_returns),
seealso=DocstringComponents(_seealso_blurbs),
)

View File

@@ -0,0 +1,170 @@
from __future__ import annotations
from collections import defaultdict
from dataclasses import dataclass
import numpy as np
import matplotlib as mpl
from seaborn._marks.base import (
Mark,
Mappable,
MappableBool,
MappableFloat,
MappableColor,
MappableStyle,
resolve_properties,
resolve_color,
document_properties,
)
class AreaBase:
def _plot(self, split_gen, scales, orient):
patches = defaultdict(list)
for keys, data, ax in split_gen():
kws = {}
data = self._standardize_coordinate_parameters(data, orient)
resolved = resolve_properties(self, keys, scales)
verts = self._get_verts(data, orient)
ax.update_datalim(verts)
# TODO should really move this logic into resolve_color
fc = resolve_color(self, keys, "", scales)
if not resolved["fill"]:
fc = mpl.colors.to_rgba(fc, 0)
kws["facecolor"] = fc
kws["edgecolor"] = resolve_color(self, keys, "edge", scales)
kws["linewidth"] = resolved["edgewidth"]
kws["linestyle"] = resolved["edgestyle"]
patches[ax].append(mpl.patches.Polygon(verts, **kws))
for ax, ax_patches in patches.items():
for patch in ax_patches:
self._postprocess_artist(patch, ax, orient)
ax.add_patch(patch)
def _standardize_coordinate_parameters(self, data, orient):
return data
def _postprocess_artist(self, artist, ax, orient):
pass
def _get_verts(self, data, orient):
dv = {"x": "y", "y": "x"}[orient]
data = data.sort_values(orient, kind="mergesort")
verts = np.concatenate([
data[[orient, f"{dv}min"]].to_numpy(),
data[[orient, f"{dv}max"]].to_numpy()[::-1],
])
if orient == "y":
verts = verts[:, ::-1]
return verts
def _legend_artist(self, variables, value, scales):
keys = {v: value for v in variables}
resolved = resolve_properties(self, keys, scales)
fc = resolve_color(self, keys, "", scales)
if not resolved["fill"]:
fc = mpl.colors.to_rgba(fc, 0)
return mpl.patches.Patch(
facecolor=fc,
edgecolor=resolve_color(self, keys, "edge", scales),
linewidth=resolved["edgewidth"],
linestyle=resolved["edgestyle"],
**self.artist_kws,
)
@document_properties
@dataclass
class Area(AreaBase, Mark):
"""
A fill mark drawn from a baseline to data values.
See also
--------
Band : A fill mark representing an interval between values.
Examples
--------
.. include:: ../docstrings/objects.Area.rst
"""
color: MappableColor = Mappable("C0", )
alpha: MappableFloat = Mappable(.2, )
fill: MappableBool = Mappable(True, )
edgecolor: MappableColor = Mappable(depend="color")
edgealpha: MappableFloat = Mappable(1, )
edgewidth: MappableFloat = Mappable(rc="patch.linewidth", )
edgestyle: MappableStyle = Mappable("-", )
# TODO should this be settable / mappable?
baseline: MappableFloat = Mappable(0, grouping=False)
def _standardize_coordinate_parameters(self, data, orient):
dv = {"x": "y", "y": "x"}[orient]
return data.rename(columns={"baseline": f"{dv}min", dv: f"{dv}max"})
def _postprocess_artist(self, artist, ax, orient):
# TODO copying a lot of code from Bar, let's abstract this
# See comments there, I am not going to repeat them too
artist.set_linewidth(artist.get_linewidth() * 2)
linestyle = artist.get_linestyle()
if linestyle[1]:
linestyle = (linestyle[0], tuple(x / 2 for x in linestyle[1]))
artist.set_linestyle(linestyle)
artist.set_clip_path(artist.get_path(), artist.get_transform() + ax.transData)
if self.artist_kws.get("clip_on", True):
artist.set_clip_box(ax.bbox)
val_idx = ["y", "x"].index(orient)
artist.sticky_edges[val_idx][:] = (0, np.inf)
@document_properties
@dataclass
class Band(AreaBase, Mark):
"""
A fill mark representing an interval between values.
See also
--------
Area : A fill mark drawn from a baseline to data values.
Examples
--------
.. include:: ../docstrings/objects.Band.rst
"""
color: MappableColor = Mappable("C0", )
alpha: MappableFloat = Mappable(.2, )
fill: MappableBool = Mappable(True, )
edgecolor: MappableColor = Mappable(depend="color", )
edgealpha: MappableFloat = Mappable(1, )
edgewidth: MappableFloat = Mappable(0, )
edgestyle: MappableFloat = Mappable("-", )
def _standardize_coordinate_parameters(self, data, orient):
# dv = {"x": "y", "y": "x"}[orient]
# TODO assert that all(ymax >= ymin)?
# TODO what if only one exist?
other = {"x": "y", "y": "x"}[orient]
if not set(data.columns) & {f"{other}min", f"{other}max"}:
agg = {f"{other}min": (other, "min"), f"{other}max": (other, "max")}
data = data.groupby(orient).agg(**agg).reset_index()
return data

View File

@@ -0,0 +1,252 @@
from __future__ import annotations
from collections import defaultdict
from dataclasses import dataclass
import numpy as np
import matplotlib as mpl
from seaborn._marks.base import (
Mark,
Mappable,
MappableBool,
MappableColor,
MappableFloat,
MappableStyle,
resolve_properties,
resolve_color,
document_properties
)
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from typing import Any
from matplotlib.artist import Artist
from seaborn._core.scales import Scale
class BarBase(Mark):
def _make_patches(self, data, scales, orient):
transform = scales[orient]._matplotlib_scale.get_transform()
forward = transform.transform
reverse = transform.inverted().transform
other = {"x": "y", "y": "x"}[orient]
pos = reverse(forward(data[orient]) - data["width"] / 2)
width = reverse(forward(data[orient]) + data["width"] / 2) - pos
val = (data[other] - data["baseline"]).to_numpy()
base = data["baseline"].to_numpy()
kws = self._resolve_properties(data, scales)
if orient == "x":
kws.update(x=pos, y=base, w=width, h=val)
else:
kws.update(x=base, y=pos, w=val, h=width)
kws.pop("width", None)
kws.pop("baseline", None)
val_dim = {"x": "h", "y": "w"}[orient]
bars, vals = [], []
for i in range(len(data)):
row = {k: v[i] for k, v in kws.items()}
# Skip bars with no value. It's possible we'll want to make this
# an option (i.e so you have an artist for animating or annotating),
# but let's keep things simple for now.
if not np.nan_to_num(row[val_dim]):
continue
bar = mpl.patches.Rectangle(
xy=(row["x"], row["y"]),
width=row["w"],
height=row["h"],
facecolor=row["facecolor"],
edgecolor=row["edgecolor"],
linestyle=row["edgestyle"],
linewidth=row["edgewidth"],
**self.artist_kws,
)
bars.append(bar)
vals.append(row[val_dim])
return bars, vals
def _resolve_properties(self, data, scales):
resolved = resolve_properties(self, data, scales)
resolved["facecolor"] = resolve_color(self, data, "", scales)
resolved["edgecolor"] = resolve_color(self, data, "edge", scales)
fc = resolved["facecolor"]
if isinstance(fc, tuple):
resolved["facecolor"] = fc[0], fc[1], fc[2], fc[3] * resolved["fill"]
else:
fc[:, 3] = fc[:, 3] * resolved["fill"] # TODO Is inplace mod a problem?
resolved["facecolor"] = fc
return resolved
def _legend_artist(
self, variables: list[str], value: Any, scales: dict[str, Scale],
) -> Artist:
# TODO return some sensible default?
key = {v: value for v in variables}
key = self._resolve_properties(key, scales)
artist = mpl.patches.Patch(
facecolor=key["facecolor"],
edgecolor=key["edgecolor"],
linewidth=key["edgewidth"],
linestyle=key["edgestyle"],
)
return artist
@document_properties
@dataclass
class Bar(BarBase):
"""
A bar mark drawn between baseline and data values.
See also
--------
Bars : A faster bar mark with defaults more suitable for histograms.
Examples
--------
.. include:: ../docstrings/objects.Bar.rst
"""
color: MappableColor = Mappable("C0", grouping=False)
alpha: MappableFloat = Mappable(.7, grouping=False)
fill: MappableBool = Mappable(True, grouping=False)
edgecolor: MappableColor = Mappable(depend="color", grouping=False)
edgealpha: MappableFloat = Mappable(1, grouping=False)
edgewidth: MappableFloat = Mappable(rc="patch.linewidth", grouping=False)
edgestyle: MappableStyle = Mappable("-", grouping=False)
# pattern: MappableString = Mappable(None) # TODO no Property yet
width: MappableFloat = Mappable(.8, grouping=False)
baseline: MappableFloat = Mappable(0, grouping=False) # TODO *is* this mappable?
def _plot(self, split_gen, scales, orient):
val_idx = ["y", "x"].index(orient)
for _, data, ax in split_gen():
bars, vals = self._make_patches(data, scales, orient)
for bar in bars:
# Because we are clipping the artist (see below), the edges end up
# looking half as wide as they actually are. I don't love this clumsy
# workaround, which is going to cause surprises if you work with the
# artists directly. We may need to revisit after feedback.
bar.set_linewidth(bar.get_linewidth() * 2)
linestyle = bar.get_linestyle()
if linestyle[1]:
linestyle = (linestyle[0], tuple(x / 2 for x in linestyle[1]))
bar.set_linestyle(linestyle)
# This is a bit of a hack to handle the fact that the edge lines are
# centered on the actual extents of the bar, and overlap when bars are
# stacked or dodged. We may discover that this causes problems and needs
# to be revisited at some point. Also it should be faster to clip with
# a bbox than a path, but I cant't work out how to get the intersection
# with the axes bbox.
bar.set_clip_path(bar.get_path(), bar.get_transform() + ax.transData)
if self.artist_kws.get("clip_on", True):
# It seems the above hack undoes the default axes clipping
bar.set_clip_box(ax.bbox)
bar.sticky_edges[val_idx][:] = (0, np.inf)
ax.add_patch(bar)
# Add a container which is useful for, e.g. Axes.bar_label
orientation = {"x": "vertical", "y": "horizontal"}[orient]
container_kws = dict(datavalues=vals, orientation=orientation)
container = mpl.container.BarContainer(bars, **container_kws)
ax.add_container(container)
@document_properties
@dataclass
class Bars(BarBase):
"""
A faster bar mark with defaults more suitable for histograms.
See also
--------
Bar : A bar mark drawn between baseline and data values.
Examples
--------
.. include:: ../docstrings/objects.Bars.rst
"""
color: MappableColor = Mappable("C0", grouping=False)
alpha: MappableFloat = Mappable(.7, grouping=False)
fill: MappableBool = Mappable(True, grouping=False)
edgecolor: MappableColor = Mappable(rc="patch.edgecolor", grouping=False)
edgealpha: MappableFloat = Mappable(1, grouping=False)
edgewidth: MappableFloat = Mappable(auto=True, grouping=False)
edgestyle: MappableStyle = Mappable("-", grouping=False)
# pattern: MappableString = Mappable(None) # TODO no Property yet
width: MappableFloat = Mappable(1, grouping=False)
baseline: MappableFloat = Mappable(0, grouping=False) # TODO *is* this mappable?
def _plot(self, split_gen, scales, orient):
ori_idx = ["x", "y"].index(orient)
val_idx = ["y", "x"].index(orient)
patches = defaultdict(list)
for _, data, ax in split_gen():
bars, _ = self._make_patches(data, scales, orient)
patches[ax].extend(bars)
collections = {}
for ax, ax_patches in patches.items():
col = mpl.collections.PatchCollection(ax_patches, match_original=True)
col.sticky_edges[val_idx][:] = (0, np.inf)
ax.add_collection(col, autolim=False)
collections[ax] = col
# Workaround for matplotlib autoscaling bug
# https://github.com/matplotlib/matplotlib/issues/11898
# https://github.com/matplotlib/matplotlib/issues/23129
xys = np.vstack([path.vertices for path in col.get_paths()])
ax.update_datalim(xys)
if "edgewidth" not in scales and isinstance(self.edgewidth, Mappable):
for ax in collections:
ax.autoscale_view()
def get_dimensions(collection):
edges, widths = [], []
for verts in (path.vertices for path in collection.get_paths()):
edges.append(min(verts[:, ori_idx]))
widths.append(np.ptp(verts[:, ori_idx]))
return np.array(edges), np.array(widths)
min_width = np.inf
for ax, col in collections.items():
edges, widths = get_dimensions(col)
points = 72 / ax.figure.dpi * abs(
ax.transData.transform([edges + widths] * 2)
- ax.transData.transform([edges] * 2)
)
min_width = min(min_width, min(points[:, ori_idx]))
linewidth = min(.1 * min_width, mpl.rcParams["patch.linewidth"])
for _, col in collections.items():
col.set_linewidth(linewidth)

View File

@@ -0,0 +1,317 @@
from __future__ import annotations
from dataclasses import dataclass, fields, field
import textwrap
from typing import Any, Callable, Union
from collections.abc import Generator
import numpy as np
import pandas as pd
import matplotlib as mpl
from numpy import ndarray
from pandas import DataFrame
from matplotlib.artist import Artist
from seaborn._core.scales import Scale
from seaborn._core.properties import (
PROPERTIES,
Property,
RGBATuple,
DashPattern,
DashPatternWithOffset,
)
from seaborn._core.exceptions import PlotSpecError
class Mappable:
def __init__(
self,
val: Any = None,
depend: str | None = None,
rc: str | None = None,
auto: bool = False,
grouping: bool = True,
):
"""
Property that can be mapped from data or set directly, with flexible defaults.
Parameters
----------
val : Any
Use this value as the default.
depend : str
Use the value of this feature as the default.
rc : str
Use the value of this rcParam as the default.
auto : bool
The default value will depend on other parameters at compile time.
grouping : bool
If True, use the mapped variable to define groups.
"""
if depend is not None:
assert depend in PROPERTIES
if rc is not None:
assert rc in mpl.rcParams
self._val = val
self._rc = rc
self._depend = depend
self._auto = auto
self._grouping = grouping
def __repr__(self):
"""Nice formatting for when object appears in Mark init signature."""
if self._val is not None:
s = f"<{repr(self._val)}>"
elif self._depend is not None:
s = f"<depend:{self._depend}>"
elif self._rc is not None:
s = f"<rc:{self._rc}>"
elif self._auto:
s = "<auto>"
else:
s = "<undefined>"
return s
@property
def depend(self) -> Any:
"""Return the name of the feature to source a default value from."""
return self._depend
@property
def grouping(self) -> bool:
return self._grouping
@property
def default(self) -> Any:
"""Get the default value for this feature, or access the relevant rcParam."""
if self._val is not None:
return self._val
elif self._rc is not None:
return mpl.rcParams.get(self._rc)
# TODO where is the right place to put this kind of type aliasing?
MappableBool = Union[bool, Mappable]
MappableString = Union[str, Mappable]
MappableFloat = Union[float, Mappable]
MappableColor = Union[str, tuple, Mappable]
MappableStyle = Union[str, DashPattern, DashPatternWithOffset, Mappable]
@dataclass
class Mark:
"""Base class for objects that visually represent data."""
artist_kws: dict = field(default_factory=dict)
@property
def _mappable_props(self):
return {
f.name: getattr(self, f.name) for f in fields(self)
if isinstance(f.default, Mappable)
}
@property
def _grouping_props(self):
# TODO does it make sense to have variation within a Mark's
# properties about whether they are grouping?
return [
f.name for f in fields(self)
if isinstance(f.default, Mappable) and f.default.grouping
]
# TODO make this method private? Would extender every need to call directly?
def _resolve(
self,
data: DataFrame | dict[str, Any],
name: str,
scales: dict[str, Scale] | None = None,
) -> Any:
"""Obtain default, specified, or mapped value for a named feature.
Parameters
----------
data : DataFrame or dict with scalar values
Container with data values for features that will be semantically mapped.
name : string
Identity of the feature / semantic.
scales: dict
Mapping from variable to corresponding scale object.
Returns
-------
value or array of values
Outer return type depends on whether `data` is a dict (implying that
we want a single value) or DataFrame (implying that we want an array
of values with matching length).
"""
feature = self._mappable_props[name]
prop = PROPERTIES.get(name, Property(name))
directly_specified = not isinstance(feature, Mappable)
return_multiple = isinstance(data, pd.DataFrame)
return_array = return_multiple and not name.endswith("style")
# Special case width because it needs to be resolved and added to the dataframe
# during layer prep (so the Move operations use it properly).
# TODO how does width *scaling* work, e.g. for violin width by count?
if name == "width":
directly_specified = directly_specified and name not in data
if directly_specified:
feature = prop.standardize(feature)
if return_multiple:
feature = [feature] * len(data)
if return_array:
feature = np.array(feature)
return feature
if name in data:
if scales is None or name not in scales:
# TODO Might this obviate the identity scale? Just don't add a scale?
feature = data[name]
else:
scale = scales[name]
value = data[name]
try:
feature = scale(value)
except Exception as err:
raise PlotSpecError._during("Scaling operation", name) from err
if return_array:
feature = np.asarray(feature)
return feature
if feature.depend is not None:
# TODO add source_func or similar to transform the source value?
# e.g. set linewidth as a proportion of pointsize?
return self._resolve(data, feature.depend, scales)
default = prop.standardize(feature.default)
if return_multiple:
default = [default] * len(data)
if return_array:
default = np.array(default)
return default
def _infer_orient(self, scales: dict) -> str: # TODO type scales
# TODO The original version of this (in seaborn._base) did more checking.
# Paring that down here for the prototype to see what restrictions make sense.
# TODO rethink this to map from scale type to "DV priority" and use that?
# e.g. Nominal > Discrete > Continuous
x = 0 if "x" not in scales else scales["x"]._priority
y = 0 if "y" not in scales else scales["y"]._priority
if y > x:
return "y"
else:
return "x"
def _plot(
self,
split_generator: Callable[[], Generator],
scales: dict[str, Scale],
orient: str,
) -> None:
"""Main interface for creating a plot."""
raise NotImplementedError()
def _legend_artist(
self, variables: list[str], value: Any, scales: dict[str, Scale],
) -> Artist | None:
return None
def resolve_properties(
mark: Mark, data: DataFrame, scales: dict[str, Scale]
) -> dict[str, Any]:
props = {
name: mark._resolve(data, name, scales) for name in mark._mappable_props
}
return props
def resolve_color(
mark: Mark,
data: DataFrame | dict,
prefix: str = "",
scales: dict[str, Scale] | None = None,
) -> RGBATuple | ndarray:
"""
Obtain a default, specified, or mapped value for a color feature.
This method exists separately to support the relationship between a
color and its corresponding alpha. We want to respect alpha values that
are passed in specified (or mapped) color values but also make use of a
separate `alpha` variable, which can be mapped. This approach may also
be extended to support mapping of specific color channels (i.e.
luminance, chroma) in the future.
Parameters
----------
mark :
Mark with the color property.
data :
Container with data values for features that will be semantically mapped.
prefix :
Support "color", "fillcolor", etc.
"""
color = mark._resolve(data, f"{prefix}color", scales)
if f"{prefix}alpha" in mark._mappable_props:
alpha = mark._resolve(data, f"{prefix}alpha", scales)
else:
alpha = mark._resolve(data, "alpha", scales)
def visible(x, axis=None):
"""Detect "invisible" colors to set alpha appropriately."""
# TODO First clause only needed to handle non-rgba arrays,
# which we are trying to handle upstream
return np.array(x).dtype.kind != "f" or np.isfinite(x).all(axis)
# Second check here catches vectors of strings with identity scale
# It could probably be handled better upstream. This is a tricky problem
if np.ndim(color) < 2 and all(isinstance(x, float) for x in color):
if len(color) == 4:
return mpl.colors.to_rgba(color)
alpha = alpha if visible(color) else np.nan
return mpl.colors.to_rgba(color, alpha)
else:
if np.ndim(color) == 2 and color.shape[1] == 4:
return mpl.colors.to_rgba_array(color)
alpha = np.where(visible(color, axis=1), alpha, np.nan)
return mpl.colors.to_rgba_array(color, alpha)
# TODO should we be implementing fill here too?
# (i.e. set fillalpha to 0 when fill=False)
def document_properties(mark):
properties = [f.name for f in fields(mark) if isinstance(f.default, Mappable)]
text = [
"",
" This mark defines the following properties:",
textwrap.fill(
", ".join([f"|{p}|" for p in properties]),
width=78, initial_indent=" " * 8, subsequent_indent=" " * 8,
),
]
docstring_lines = mark.__doc__.split("\n")
new_docstring = "\n".join([
*docstring_lines[:2],
*text,
*docstring_lines[2:],
])
mark.__doc__ = new_docstring
return mark

View File

@@ -0,0 +1,200 @@
from __future__ import annotations
from dataclasses import dataclass
import numpy as np
import matplotlib as mpl
from seaborn._marks.base import (
Mark,
Mappable,
MappableBool,
MappableFloat,
MappableString,
MappableColor,
MappableStyle,
resolve_properties,
resolve_color,
document_properties,
)
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from typing import Any
from matplotlib.artist import Artist
from seaborn._core.scales import Scale
class DotBase(Mark):
def _resolve_paths(self, data):
paths = []
path_cache = {}
marker = data["marker"]
def get_transformed_path(m):
return m.get_path().transformed(m.get_transform())
if isinstance(marker, mpl.markers.MarkerStyle):
return get_transformed_path(marker)
for m in marker:
if m not in path_cache:
path_cache[m] = get_transformed_path(m)
paths.append(path_cache[m])
return paths
def _resolve_properties(self, data, scales):
resolved = resolve_properties(self, data, scales)
resolved["path"] = self._resolve_paths(resolved)
resolved["size"] = resolved["pointsize"] ** 2
if isinstance(data, dict): # Properties for single dot
filled_marker = resolved["marker"].is_filled()
else:
filled_marker = [m.is_filled() for m in resolved["marker"]]
resolved["fill"] = resolved["fill"] * filled_marker
return resolved
def _plot(self, split_gen, scales, orient):
# TODO Not backcompat with allowed (but nonfunctional) univariate plots
# (That should be solved upstream by defaulting to "" for unset x/y?)
# (Be mindful of xmin/xmax, etc!)
for _, data, ax in split_gen():
offsets = np.column_stack([data["x"], data["y"]])
data = self._resolve_properties(data, scales)
points = mpl.collections.PathCollection(
offsets=offsets,
paths=data["path"],
sizes=data["size"],
facecolors=data["facecolor"],
edgecolors=data["edgecolor"],
linewidths=data["linewidth"],
linestyles=data["edgestyle"],
transOffset=ax.transData,
transform=mpl.transforms.IdentityTransform(),
**self.artist_kws,
)
ax.add_collection(points)
def _legend_artist(
self, variables: list[str], value: Any, scales: dict[str, Scale],
) -> Artist:
key = {v: value for v in variables}
res = self._resolve_properties(key, scales)
return mpl.collections.PathCollection(
paths=[res["path"]],
sizes=[res["size"]],
facecolors=[res["facecolor"]],
edgecolors=[res["edgecolor"]],
linewidths=[res["linewidth"]],
linestyles=[res["edgestyle"]],
transform=mpl.transforms.IdentityTransform(),
**self.artist_kws,
)
@document_properties
@dataclass
class Dot(DotBase):
"""
A mark suitable for dot plots or less-dense scatterplots.
See also
--------
Dots : A dot mark defined by strokes to better handle overplotting.
Examples
--------
.. include:: ../docstrings/objects.Dot.rst
"""
marker: MappableString = Mappable("o", grouping=False)
pointsize: MappableFloat = Mappable(6, grouping=False) # TODO rcParam?
stroke: MappableFloat = Mappable(.75, grouping=False) # TODO rcParam?
color: MappableColor = Mappable("C0", grouping=False)
alpha: MappableFloat = Mappable(1, grouping=False)
fill: MappableBool = Mappable(True, grouping=False)
edgecolor: MappableColor = Mappable(depend="color", grouping=False)
edgealpha: MappableFloat = Mappable(depend="alpha", grouping=False)
edgewidth: MappableFloat = Mappable(.5, grouping=False) # TODO rcParam?
edgestyle: MappableStyle = Mappable("-", grouping=False)
def _resolve_properties(self, data, scales):
resolved = super()._resolve_properties(data, scales)
filled = resolved["fill"]
main_stroke = resolved["stroke"]
edge_stroke = resolved["edgewidth"]
resolved["linewidth"] = np.where(filled, edge_stroke, main_stroke)
main_color = resolve_color(self, data, "", scales)
edge_color = resolve_color(self, data, "edge", scales)
if not np.isscalar(filled):
# Expand dims to use in np.where with rgba arrays
filled = filled[:, None]
resolved["edgecolor"] = np.where(filled, edge_color, main_color)
filled = np.squeeze(filled)
if isinstance(main_color, tuple):
# TODO handle this in resolve_color
main_color = tuple([*main_color[:3], main_color[3] * filled])
else:
main_color = np.c_[main_color[:, :3], main_color[:, 3] * filled]
resolved["facecolor"] = main_color
return resolved
@document_properties
@dataclass
class Dots(DotBase):
"""
A dot mark defined by strokes to better handle overplotting.
See also
--------
Dot : A mark suitable for dot plots or less-dense scatterplots.
Examples
--------
.. include:: ../docstrings/objects.Dots.rst
"""
# TODO retype marker as MappableMarker
marker: MappableString = Mappable(rc="scatter.marker", grouping=False)
pointsize: MappableFloat = Mappable(4, grouping=False) # TODO rcParam?
stroke: MappableFloat = Mappable(.75, grouping=False) # TODO rcParam?
color: MappableColor = Mappable("C0", grouping=False)
alpha: MappableFloat = Mappable(1, grouping=False) # TODO auto alpha?
fill: MappableBool = Mappable(True, grouping=False)
fillcolor: MappableColor = Mappable(depend="color", grouping=False)
fillalpha: MappableFloat = Mappable(.2, grouping=False)
def _resolve_properties(self, data, scales):
resolved = super()._resolve_properties(data, scales)
resolved["linewidth"] = resolved.pop("stroke")
resolved["facecolor"] = resolve_color(self, data, "fill", scales)
resolved["edgecolor"] = resolve_color(self, data, "", scales)
resolved.setdefault("edgestyle", (0, None))
fc = resolved["facecolor"]
if isinstance(fc, tuple):
resolved["facecolor"] = fc[0], fc[1], fc[2], fc[3] * resolved["fill"]
else:
fc[:, 3] = fc[:, 3] * resolved["fill"] # TODO Is inplace mod a problem?
resolved["facecolor"] = fc
return resolved

View File

@@ -0,0 +1,285 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import ClassVar
import numpy as np
import matplotlib as mpl
from seaborn._marks.base import (
Mark,
Mappable,
MappableFloat,
MappableString,
MappableColor,
resolve_properties,
resolve_color,
document_properties,
)
@document_properties
@dataclass
class Path(Mark):
"""
A mark connecting data points in the order they appear.
See also
--------
Line : A mark connecting data points with sorting along the orientation axis.
Paths : A faster but less-flexible mark for drawing many paths.
Examples
--------
.. include:: ../docstrings/objects.Path.rst
"""
color: MappableColor = Mappable("C0")
alpha: MappableFloat = Mappable(1)
linewidth: MappableFloat = Mappable(rc="lines.linewidth")
linestyle: MappableString = Mappable(rc="lines.linestyle")
marker: MappableString = Mappable(rc="lines.marker")
pointsize: MappableFloat = Mappable(rc="lines.markersize")
fillcolor: MappableColor = Mappable(depend="color")
edgecolor: MappableColor = Mappable(depend="color")
edgewidth: MappableFloat = Mappable(rc="lines.markeredgewidth")
_sort: ClassVar[bool] = False
def _plot(self, split_gen, scales, orient):
for keys, data, ax in split_gen(keep_na=not self._sort):
vals = resolve_properties(self, keys, scales)
vals["color"] = resolve_color(self, keys, scales=scales)
vals["fillcolor"] = resolve_color(self, keys, prefix="fill", scales=scales)
vals["edgecolor"] = resolve_color(self, keys, prefix="edge", scales=scales)
if self._sort:
data = data.sort_values(orient, kind="mergesort")
artist_kws = self.artist_kws.copy()
self._handle_capstyle(artist_kws, vals)
line = mpl.lines.Line2D(
data["x"].to_numpy(),
data["y"].to_numpy(),
color=vals["color"],
linewidth=vals["linewidth"],
linestyle=vals["linestyle"],
marker=vals["marker"],
markersize=vals["pointsize"],
markerfacecolor=vals["fillcolor"],
markeredgecolor=vals["edgecolor"],
markeredgewidth=vals["edgewidth"],
**artist_kws,
)
ax.add_line(line)
def _legend_artist(self, variables, value, scales):
keys = {v: value for v in variables}
vals = resolve_properties(self, keys, scales)
vals["color"] = resolve_color(self, keys, scales=scales)
vals["fillcolor"] = resolve_color(self, keys, prefix="fill", scales=scales)
vals["edgecolor"] = resolve_color(self, keys, prefix="edge", scales=scales)
artist_kws = self.artist_kws.copy()
self._handle_capstyle(artist_kws, vals)
return mpl.lines.Line2D(
[], [],
color=vals["color"],
linewidth=vals["linewidth"],
linestyle=vals["linestyle"],
marker=vals["marker"],
markersize=vals["pointsize"],
markerfacecolor=vals["fillcolor"],
markeredgecolor=vals["edgecolor"],
markeredgewidth=vals["edgewidth"],
**artist_kws,
)
def _handle_capstyle(self, kws, vals):
# Work around for this matplotlib issue:
# https://github.com/matplotlib/matplotlib/issues/23437
if vals["linestyle"][1] is None:
capstyle = kws.get("solid_capstyle", mpl.rcParams["lines.solid_capstyle"])
kws["dash_capstyle"] = capstyle
@document_properties
@dataclass
class Line(Path):
"""
A mark connecting data points with sorting along the orientation axis.
See also
--------
Path : A mark connecting data points in the order they appear.
Lines : A faster but less-flexible mark for drawing many lines.
Examples
--------
.. include:: ../docstrings/objects.Line.rst
"""
_sort: ClassVar[bool] = True
@document_properties
@dataclass
class Paths(Mark):
"""
A faster but less-flexible mark for drawing many paths.
See also
--------
Path : A mark connecting data points in the order they appear.
Examples
--------
.. include:: ../docstrings/objects.Paths.rst
"""
color: MappableColor = Mappable("C0")
alpha: MappableFloat = Mappable(1)
linewidth: MappableFloat = Mappable(rc="lines.linewidth")
linestyle: MappableString = Mappable(rc="lines.linestyle")
_sort: ClassVar[bool] = False
def __post_init__(self):
# LineCollection artists have a capstyle property but don't source its value
# from the rc, so we do that manually here. Unfortunately, because we add
# only one LineCollection, we have the use the same capstyle for all lines
# even when they are dashed. It's a slight inconsistency, but looks fine IMO.
self.artist_kws.setdefault("capstyle", mpl.rcParams["lines.solid_capstyle"])
def _plot(self, split_gen, scales, orient):
line_data = {}
for keys, data, ax in split_gen(keep_na=not self._sort):
if ax not in line_data:
line_data[ax] = {
"segments": [],
"colors": [],
"linewidths": [],
"linestyles": [],
}
segments = self._setup_segments(data, orient)
line_data[ax]["segments"].extend(segments)
n = len(segments)
vals = resolve_properties(self, keys, scales)
vals["color"] = resolve_color(self, keys, scales=scales)
line_data[ax]["colors"].extend([vals["color"]] * n)
line_data[ax]["linewidths"].extend([vals["linewidth"]] * n)
line_data[ax]["linestyles"].extend([vals["linestyle"]] * n)
for ax, ax_data in line_data.items():
lines = mpl.collections.LineCollection(**ax_data, **self.artist_kws)
# Handle datalim update manually
# https://github.com/matplotlib/matplotlib/issues/23129
ax.add_collection(lines, autolim=False)
if ax_data["segments"]:
xy = np.concatenate(ax_data["segments"])
ax.update_datalim(xy)
def _legend_artist(self, variables, value, scales):
key = resolve_properties(self, {v: value for v in variables}, scales)
artist_kws = self.artist_kws.copy()
capstyle = artist_kws.pop("capstyle")
artist_kws["solid_capstyle"] = capstyle
artist_kws["dash_capstyle"] = capstyle
return mpl.lines.Line2D(
[], [],
color=key["color"],
linewidth=key["linewidth"],
linestyle=key["linestyle"],
**artist_kws,
)
def _setup_segments(self, data, orient):
if self._sort:
data = data.sort_values(orient, kind="mergesort")
# Column stack to avoid block consolidation
xy = np.column_stack([data["x"], data["y"]])
return [xy]
@document_properties
@dataclass
class Lines(Paths):
"""
A faster but less-flexible mark for drawing many lines.
See also
--------
Line : A mark connecting data points with sorting along the orientation axis.
Examples
--------
.. include:: ../docstrings/objects.Lines.rst
"""
_sort: ClassVar[bool] = True
@document_properties
@dataclass
class Range(Paths):
"""
An oriented line mark drawn between min/max values.
Examples
--------
.. include:: ../docstrings/objects.Range.rst
"""
def _setup_segments(self, data, orient):
# TODO better checks on what variables we have
# TODO what if only one exist?
val = {"x": "y", "y": "x"}[orient]
if not set(data.columns) & {f"{val}min", f"{val}max"}:
agg = {f"{val}min": (val, "min"), f"{val}max": (val, "max")}
data = data.groupby(orient).agg(**agg).reset_index()
cols = [orient, f"{val}min", f"{val}max"]
data = data[cols].melt(orient, value_name=val)[["x", "y"]]
segments = [d.to_numpy() for _, d in data.groupby(orient)]
return segments
@document_properties
@dataclass
class Dash(Paths):
"""
A line mark drawn as an oriented segment for each datapoint.
Examples
--------
.. include:: ../docstrings/objects.Dash.rst
"""
width: MappableFloat = Mappable(.8, grouping=False)
def _setup_segments(self, data, orient):
ori = ["x", "y"].index(orient)
xys = data[["x", "y"]].to_numpy().astype(float)
segments = np.stack([xys, xys], axis=1)
segments[:, 0, ori] -= data["width"] / 2
segments[:, 1, ori] += data["width"] / 2
return segments

View File

@@ -0,0 +1,76 @@
from __future__ import annotations
from collections import defaultdict
from dataclasses import dataclass
import numpy as np
import matplotlib as mpl
from matplotlib.transforms import ScaledTranslation
from seaborn._marks.base import (
Mark,
Mappable,
MappableFloat,
MappableString,
MappableColor,
resolve_properties,
resolve_color,
document_properties,
)
@document_properties
@dataclass
class Text(Mark):
"""
A textual mark to annotate or represent data values.
Examples
--------
.. include:: ../docstrings/objects.Text.rst
"""
text: MappableString = Mappable("")
color: MappableColor = Mappable("k")
alpha: MappableFloat = Mappable(1)
fontsize: MappableFloat = Mappable(rc="font.size")
halign: MappableString = Mappable("center")
valign: MappableString = Mappable("center_baseline")
offset: MappableFloat = Mappable(4)
def _plot(self, split_gen, scales, orient):
ax_data = defaultdict(list)
for keys, data, ax in split_gen():
vals = resolve_properties(self, keys, scales)
color = resolve_color(self, keys, "", scales)
halign = vals["halign"]
valign = vals["valign"]
fontsize = vals["fontsize"]
offset = vals["offset"] / 72
offset_trans = ScaledTranslation(
{"right": -offset, "left": +offset}.get(halign, 0),
{"top": -offset, "bottom": +offset, "baseline": +offset}.get(valign, 0),
ax.figure.dpi_scale_trans,
)
for row in data.to_dict("records"):
artist = mpl.text.Text(
x=row["x"],
y=row["y"],
text=str(row.get("text", vals["text"])),
color=color,
fontsize=fontsize,
horizontalalignment=halign,
verticalalignment=valign,
transform=ax.transData + offset_trans,
**self.artist_kws,
)
ax.add_artist(artist)
ax_data[ax].append([row["x"], row["y"]])
for ax, ax_vals in ax_data.items():
ax.update_datalim(np.array(ax_vals))

View File

@@ -0,0 +1,698 @@
"""Statistical transformations for visualization.
This module is currently private, but is being written to eventually form part
of the public API.
The classes should behave roughly in the style of scikit-learn.
- All data-independent parameters should be passed to the class constructor.
- Each class should implement a default transformation that is exposed through
__call__. These are currently written for vector arguments, but I think
consuming a whole `plot_data` DataFrame and return it with transformed
variables would make more sense.
- Some class have data-dependent preprocessing that should be cached and used
multiple times (think defining histogram bins off all data and then counting
observations within each bin multiple times per data subsets). These currently
have unique names, but it would be good to have a common name. Not quite
`fit`, but something similar.
- Alternatively, the transform interface could take some information about grouping
variables and do a groupby internally.
- Some classes should define alternate transforms that might make the most sense
with a different function. For example, KDE usually evaluates the distribution
on a regular grid, but it would be useful for it to transform at the actual
datapoints. Then again, this could be controlled by a parameter at the time of
class instantiation.
"""
from numbers import Number
from statistics import NormalDist
import numpy as np
import pandas as pd
try:
from scipy.stats import gaussian_kde
_no_scipy = False
except ImportError:
from .external.kde import gaussian_kde
_no_scipy = True
from .algorithms import bootstrap
from .utils import _check_argument
class KDE:
"""Univariate and bivariate kernel density estimator."""
def __init__(
self, *,
bw_method=None,
bw_adjust=1,
gridsize=200,
cut=3,
clip=None,
cumulative=False,
):
"""Initialize the estimator with its parameters.
Parameters
----------
bw_method : string, scalar, or callable, optional
Method for determining the smoothing bandwidth to use; passed to
:class:`scipy.stats.gaussian_kde`.
bw_adjust : number, optional
Factor that multiplicatively scales the value chosen using
``bw_method``. Increasing will make the curve smoother. See Notes.
gridsize : int, optional
Number of points on each dimension of the evaluation grid.
cut : number, optional
Factor, multiplied by the smoothing bandwidth, that determines how
far the evaluation grid extends past the extreme datapoints. When
set to 0, truncate the curve at the data limits.
clip : pair of numbers or None, or a pair of such pairs
Do not evaluate the density outside of these limits.
cumulative : bool, optional
If True, estimate a cumulative distribution function. Requires scipy.
"""
if clip is None:
clip = None, None
self.bw_method = bw_method
self.bw_adjust = bw_adjust
self.gridsize = gridsize
self.cut = cut
self.clip = clip
self.cumulative = cumulative
if cumulative and _no_scipy:
raise RuntimeError("Cumulative KDE evaluation requires scipy")
self.support = None
def _define_support_grid(self, x, bw, cut, clip, gridsize):
"""Create the grid of evaluation points depending for vector x."""
clip_lo = -np.inf if clip[0] is None else clip[0]
clip_hi = +np.inf if clip[1] is None else clip[1]
gridmin = max(x.min() - bw * cut, clip_lo)
gridmax = min(x.max() + bw * cut, clip_hi)
return np.linspace(gridmin, gridmax, gridsize)
def _define_support_univariate(self, x, weights):
"""Create a 1D grid of evaluation points."""
kde = self._fit(x, weights)
bw = np.sqrt(kde.covariance.squeeze())
grid = self._define_support_grid(
x, bw, self.cut, self.clip, self.gridsize
)
return grid
def _define_support_bivariate(self, x1, x2, weights):
"""Create a 2D grid of evaluation points."""
clip = self.clip
if clip[0] is None or np.isscalar(clip[0]):
clip = (clip, clip)
kde = self._fit([x1, x2], weights)
bw = np.sqrt(np.diag(kde.covariance).squeeze())
grid1 = self._define_support_grid(
x1, bw[0], self.cut, clip[0], self.gridsize
)
grid2 = self._define_support_grid(
x2, bw[1], self.cut, clip[1], self.gridsize
)
return grid1, grid2
def define_support(self, x1, x2=None, weights=None, cache=True):
"""Create the evaluation grid for a given data set."""
if x2 is None:
support = self._define_support_univariate(x1, weights)
else:
support = self._define_support_bivariate(x1, x2, weights)
if cache:
self.support = support
return support
def _fit(self, fit_data, weights=None):
"""Fit the scipy kde while adding bw_adjust logic and version check."""
fit_kws = {"bw_method": self.bw_method}
if weights is not None:
fit_kws["weights"] = weights
kde = gaussian_kde(fit_data, **fit_kws)
kde.set_bandwidth(kde.factor * self.bw_adjust)
return kde
def _eval_univariate(self, x, weights=None):
"""Fit and evaluate a univariate on univariate data."""
support = self.support
if support is None:
support = self.define_support(x, cache=False)
kde = self._fit(x, weights)
if self.cumulative:
s_0 = support[0]
density = np.array([
kde.integrate_box_1d(s_0, s_i) for s_i in support
])
else:
density = kde(support)
return density, support
def _eval_bivariate(self, x1, x2, weights=None):
"""Fit and evaluate a univariate on bivariate data."""
support = self.support
if support is None:
support = self.define_support(x1, x2, cache=False)
kde = self._fit([x1, x2], weights)
if self.cumulative:
grid1, grid2 = support
density = np.zeros((grid1.size, grid2.size))
p0 = grid1.min(), grid2.min()
for i, xi in enumerate(grid1):
for j, xj in enumerate(grid2):
density[i, j] = kde.integrate_box(p0, (xi, xj))
else:
xx1, xx2 = np.meshgrid(*support)
density = kde([xx1.ravel(), xx2.ravel()]).reshape(xx1.shape)
return density, support
def __call__(self, x1, x2=None, weights=None):
"""Fit and evaluate on univariate or bivariate data."""
if x2 is None:
return self._eval_univariate(x1, weights)
else:
return self._eval_bivariate(x1, x2, weights)
# Note: we no longer use this for univariate histograms in histplot,
# preferring _stats.Hist. We'll deprecate this once we have a bivariate Stat class.
class Histogram:
"""Univariate and bivariate histogram estimator."""
def __init__(
self,
stat="count",
bins="auto",
binwidth=None,
binrange=None,
discrete=False,
cumulative=False,
):
"""Initialize the estimator with its parameters.
Parameters
----------
stat : str
Aggregate statistic to compute in each bin.
- `count`: show the number of observations in each bin
- `frequency`: show the number of observations divided by the bin width
- `probability` or `proportion`: normalize such that bar heights sum to 1
- `percent`: normalize such that bar heights sum to 100
- `density`: normalize such that the total area of the histogram equals 1
bins : str, number, vector, or a pair of such values
Generic bin parameter that can be the name of a reference rule,
the number of bins, or the breaks of the bins.
Passed to :func:`numpy.histogram_bin_edges`.
binwidth : number or pair of numbers
Width of each bin, overrides ``bins`` but can be used with
``binrange``.
binrange : pair of numbers or a pair of pairs
Lowest and highest value for bin edges; can be used either
with ``bins`` or ``binwidth``. Defaults to data extremes.
discrete : bool or pair of bools
If True, set ``binwidth`` and ``binrange`` such that bin
edges cover integer values in the dataset.
cumulative : bool
If True, return the cumulative statistic.
"""
stat_choices = [
"count", "frequency", "density", "probability", "proportion", "percent",
]
_check_argument("stat", stat_choices, stat)
self.stat = stat
self.bins = bins
self.binwidth = binwidth
self.binrange = binrange
self.discrete = discrete
self.cumulative = cumulative
self.bin_kws = None
def _define_bin_edges(self, x, weights, bins, binwidth, binrange, discrete):
"""Inner function that takes bin parameters as arguments."""
if binrange is None:
start, stop = x.min(), x.max()
else:
start, stop = binrange
if discrete:
bin_edges = np.arange(start - .5, stop + 1.5)
elif binwidth is not None:
step = binwidth
bin_edges = np.arange(start, stop + step, step)
# Handle roundoff error (maybe there is a less clumsy way?)
if bin_edges.max() < stop or len(bin_edges) < 2:
bin_edges = np.append(bin_edges, bin_edges.max() + step)
else:
bin_edges = np.histogram_bin_edges(
x, bins, binrange, weights,
)
return bin_edges
def define_bin_params(self, x1, x2=None, weights=None, cache=True):
"""Given data, return numpy.histogram parameters to define bins."""
if x2 is None:
bin_edges = self._define_bin_edges(
x1, weights, self.bins, self.binwidth, self.binrange, self.discrete,
)
if isinstance(self.bins, (str, Number)):
n_bins = len(bin_edges) - 1
bin_range = bin_edges.min(), bin_edges.max()
bin_kws = dict(bins=n_bins, range=bin_range)
else:
bin_kws = dict(bins=bin_edges)
else:
bin_edges = []
for i, x in enumerate([x1, x2]):
# Resolve out whether bin parameters are shared
# or specific to each variable
bins = self.bins
if not bins or isinstance(bins, (str, Number)):
pass
elif isinstance(bins[i], str):
bins = bins[i]
elif len(bins) == 2:
bins = bins[i]
binwidth = self.binwidth
if binwidth is None:
pass
elif not isinstance(binwidth, Number):
binwidth = binwidth[i]
binrange = self.binrange
if binrange is None:
pass
elif not isinstance(binrange[0], Number):
binrange = binrange[i]
discrete = self.discrete
if not isinstance(discrete, bool):
discrete = discrete[i]
# Define the bins for this variable
bin_edges.append(self._define_bin_edges(
x, weights, bins, binwidth, binrange, discrete,
))
bin_kws = dict(bins=tuple(bin_edges))
if cache:
self.bin_kws = bin_kws
return bin_kws
def _eval_bivariate(self, x1, x2, weights):
"""Inner function for histogram of two variables."""
bin_kws = self.bin_kws
if bin_kws is None:
bin_kws = self.define_bin_params(x1, x2, cache=False)
density = self.stat == "density"
hist, *bin_edges = np.histogram2d(
x1, x2, **bin_kws, weights=weights, density=density
)
area = np.outer(
np.diff(bin_edges[0]),
np.diff(bin_edges[1]),
)
if self.stat == "probability" or self.stat == "proportion":
hist = hist.astype(float) / hist.sum()
elif self.stat == "percent":
hist = hist.astype(float) / hist.sum() * 100
elif self.stat == "frequency":
hist = hist.astype(float) / area
if self.cumulative:
if self.stat in ["density", "frequency"]:
hist = (hist * area).cumsum(axis=0).cumsum(axis=1)
else:
hist = hist.cumsum(axis=0).cumsum(axis=1)
return hist, bin_edges
def _eval_univariate(self, x, weights):
"""Inner function for histogram of one variable."""
bin_kws = self.bin_kws
if bin_kws is None:
bin_kws = self.define_bin_params(x, weights=weights, cache=False)
density = self.stat == "density"
hist, bin_edges = np.histogram(
x, **bin_kws, weights=weights, density=density,
)
if self.stat == "probability" or self.stat == "proportion":
hist = hist.astype(float) / hist.sum()
elif self.stat == "percent":
hist = hist.astype(float) / hist.sum() * 100
elif self.stat == "frequency":
hist = hist.astype(float) / np.diff(bin_edges)
if self.cumulative:
if self.stat in ["density", "frequency"]:
hist = (hist * np.diff(bin_edges)).cumsum()
else:
hist = hist.cumsum()
return hist, bin_edges
def __call__(self, x1, x2=None, weights=None):
"""Count the occurrences in each bin, maybe normalize."""
if x2 is None:
return self._eval_univariate(x1, weights)
else:
return self._eval_bivariate(x1, x2, weights)
class ECDF:
"""Univariate empirical cumulative distribution estimator."""
def __init__(self, stat="proportion", complementary=False):
"""Initialize the class with its parameters
Parameters
----------
stat : {{"proportion", "percent", "count"}}
Distribution statistic to compute.
complementary : bool
If True, use the complementary CDF (1 - CDF)
"""
_check_argument("stat", ["count", "percent", "proportion"], stat)
self.stat = stat
self.complementary = complementary
def _eval_bivariate(self, x1, x2, weights):
"""Inner function for ECDF of two variables."""
raise NotImplementedError("Bivariate ECDF is not implemented")
def _eval_univariate(self, x, weights):
"""Inner function for ECDF of one variable."""
sorter = x.argsort()
x = x[sorter]
weights = weights[sorter]
y = weights.cumsum()
if self.stat in ["percent", "proportion"]:
y = y / y.max()
if self.stat == "percent":
y = y * 100
x = np.r_[-np.inf, x]
y = np.r_[0, y]
if self.complementary:
y = y.max() - y
return y, x
def __call__(self, x1, x2=None, weights=None):
"""Return proportion or count of observations below each sorted datapoint."""
x1 = np.asarray(x1)
if weights is None:
weights = np.ones_like(x1)
else:
weights = np.asarray(weights)
if x2 is None:
return self._eval_univariate(x1, weights)
else:
return self._eval_bivariate(x1, x2, weights)
class EstimateAggregator:
def __init__(self, estimator, errorbar=None, **boot_kws):
"""
Data aggregator that produces an estimate and error bar interval.
Parameters
----------
estimator : callable or string
Function (or method name) that maps a vector to a scalar.
errorbar : string, (string, number) tuple, or callable
Name of errorbar method (either "ci", "pi", "se", or "sd"), or a tuple
with a method name and a level parameter, or a function that maps from a
vector to a (min, max) interval, or None to hide errorbar. See the
:doc:`errorbar tutorial </tutorial/error_bars>` for more information.
boot_kws
Additional keywords are passed to bootstrap when error_method is "ci".
"""
self.estimator = estimator
method, level = _validate_errorbar_arg(errorbar)
self.error_method = method
self.error_level = level
self.boot_kws = boot_kws
def __call__(self, data, var):
"""Aggregate over `var` column of `data` with estimate and error interval."""
vals = data[var]
if callable(self.estimator):
# You would think we could pass to vals.agg, and yet:
# https://github.com/mwaskom/seaborn/issues/2943
estimate = self.estimator(vals)
else:
estimate = vals.agg(self.estimator)
# Options that produce no error bars
if self.error_method is None:
err_min = err_max = np.nan
elif len(data) <= 1:
err_min = err_max = np.nan
# Generic errorbars from user-supplied function
elif callable(self.error_method):
err_min, err_max = self.error_method(vals)
# Parametric options
elif self.error_method == "sd":
half_interval = vals.std() * self.error_level
err_min, err_max = estimate - half_interval, estimate + half_interval
elif self.error_method == "se":
half_interval = vals.sem() * self.error_level
err_min, err_max = estimate - half_interval, estimate + half_interval
# Nonparametric options
elif self.error_method == "pi":
err_min, err_max = _percentile_interval(vals, self.error_level)
elif self.error_method == "ci":
units = data.get("units", None)
boots = bootstrap(vals, units=units, func=self.estimator, **self.boot_kws)
err_min, err_max = _percentile_interval(boots, self.error_level)
return pd.Series({var: estimate, f"{var}min": err_min, f"{var}max": err_max})
class WeightedAggregator:
def __init__(self, estimator, errorbar=None, **boot_kws):
"""
Data aggregator that produces a weighted estimate and error bar interval.
Parameters
----------
estimator : string
Function (or method name) that maps a vector to a scalar. Currently
supports only "mean".
errorbar : string or (string, number) tuple
Name of errorbar method or a tuple with a method name and a level parameter.
Currently the only supported method is "ci".
boot_kws
Additional keywords are passed to bootstrap when error_method is "ci".
"""
if estimator != "mean":
# Note that, while other weighted estimators may make sense (e.g. median),
# I'm not aware of an implementation in our dependencies. We can add one
# in seaborn later, if there is sufficient interest. For now, limit to mean.
raise ValueError(f"Weighted estimator must be 'mean', not {estimator!r}.")
self.estimator = estimator
method, level = _validate_errorbar_arg(errorbar)
if method is not None and method != "ci":
# As with the estimator, weighted 'sd' or 'pi' error bars may make sense.
# But we'll keep things simple for now and limit to (bootstrap) CI.
raise ValueError(f"Error bar method must be 'ci', not {method!r}.")
self.error_method = method
self.error_level = level
self.boot_kws = boot_kws
def __call__(self, data, var):
"""Aggregate over `var` column of `data` with estimate and error interval."""
vals = data[var]
weights = data["weight"]
estimate = np.average(vals, weights=weights)
if self.error_method == "ci" and len(data) > 1:
def error_func(x, w):
return np.average(x, weights=w)
boots = bootstrap(vals, weights, func=error_func, **self.boot_kws)
err_min, err_max = _percentile_interval(boots, self.error_level)
else:
err_min = err_max = np.nan
return pd.Series({var: estimate, f"{var}min": err_min, f"{var}max": err_max})
class LetterValues:
def __init__(self, k_depth, outlier_prop, trust_alpha):
"""
Compute percentiles of a distribution using various tail stopping rules.
Parameters
----------
k_depth: "tukey", "proportion", "trustworthy", or "full"
Stopping rule for choosing tail percentiled to show:
- tukey: Show a similar number of outliers as in a conventional boxplot.
- proportion: Show approximately `outlier_prop` outliers.
- trust_alpha: Use `trust_alpha` level for most extreme tail percentile.
outlier_prop: float
Parameter for `k_depth="proportion"` setting the expected outlier rate.
trust_alpha: float
Parameter for `k_depth="trustworthy"` setting the confidence threshold.
Notes
-----
Based on the proposal in this paper:
https://vita.had.co.nz/papers/letter-value-plot.pdf
"""
k_options = ["tukey", "proportion", "trustworthy", "full"]
if isinstance(k_depth, str):
_check_argument("k_depth", k_options, k_depth)
elif not isinstance(k_depth, int):
err = (
"The `k_depth` parameter must be either an integer or string "
f"(one of {k_options}), not {k_depth!r}."
)
raise TypeError(err)
self.k_depth = k_depth
self.outlier_prop = outlier_prop
self.trust_alpha = trust_alpha
def _compute_k(self, n):
# Select the depth, i.e. number of boxes to draw, based on the method
if self.k_depth == "full":
# extend boxes to 100% of the data
k = int(np.log2(n)) + 1
elif self.k_depth == "tukey":
# This results with 5-8 points in each tail
k = int(np.log2(n)) - 3
elif self.k_depth == "proportion":
k = int(np.log2(n)) - int(np.log2(n * self.outlier_prop)) + 1
elif self.k_depth == "trustworthy":
normal_quantile_func = np.vectorize(NormalDist().inv_cdf)
point_conf = 2 * normal_quantile_func(1 - self.trust_alpha / 2) ** 2
k = int(np.log2(n / point_conf)) + 1
else:
# Allow having k directly specified as input
k = int(self.k_depth)
return max(k, 1)
def __call__(self, x):
"""Evaluate the letter values."""
k = self._compute_k(len(x))
exp = np.arange(k + 1, 1, -1), np.arange(2, k + 2)
levels = k + 1 - np.concatenate([exp[0], exp[1][1:]])
percentiles = 100 * np.concatenate([0.5 ** exp[0], 1 - 0.5 ** exp[1]])
if self.k_depth == "full":
percentiles[0] = 0
percentiles[-1] = 100
values = np.percentile(x, percentiles)
fliers = np.asarray(x[(x < values.min()) | (x > values.max())])
median = np.percentile(x, 50)
return {
"k": k,
"levels": levels,
"percs": percentiles,
"values": values,
"fliers": fliers,
"median": median,
}
def _percentile_interval(data, width):
"""Return a percentile interval from data of a given width."""
edge = (100 - width) / 2
percentiles = edge, 100 - edge
return np.nanpercentile(data, percentiles)
def _validate_errorbar_arg(arg):
"""Check type and value of errorbar argument and assign default level."""
DEFAULT_LEVELS = {
"ci": 95,
"pi": 95,
"se": 1,
"sd": 1,
}
usage = "`errorbar` must be a callable, string, or (string, number) tuple"
if arg is None:
return None, None
elif callable(arg):
return arg, None
elif isinstance(arg, str):
method = arg
level = DEFAULT_LEVELS.get(method, None)
else:
try:
method, level = arg
except (ValueError, TypeError) as err:
raise err.__class__(usage) from err
_check_argument("errorbar", list(DEFAULT_LEVELS), method)
if level is not None and not isinstance(level, Number):
raise TypeError(usage)
return method, level

View File

@@ -0,0 +1,130 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import ClassVar, Callable
import pandas as pd
from pandas import DataFrame
from seaborn._core.scales import Scale
from seaborn._core.groupby import GroupBy
from seaborn._stats.base import Stat
from seaborn._statistics import (
EstimateAggregator,
WeightedAggregator,
)
from seaborn._core.typing import Vector
@dataclass
class Agg(Stat):
"""
Aggregate data along the value axis using given method.
Parameters
----------
func : str or callable
Name of a :class:`pandas.Series` method or a vector -> scalar function.
See Also
--------
objects.Est : Aggregation with error bars.
Examples
--------
.. include:: ../docstrings/objects.Agg.rst
"""
func: str | Callable[[Vector], float] = "mean"
group_by_orient: ClassVar[bool] = True
def __call__(
self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
) -> DataFrame:
var = {"x": "y", "y": "x"}.get(orient)
res = (
groupby
.agg(data, {var: self.func})
.dropna(subset=[var])
.reset_index(drop=True)
)
return res
@dataclass
class Est(Stat):
"""
Calculate a point estimate and error bar interval.
For more information about the various `errorbar` choices, see the
:doc:`errorbar tutorial </tutorial/error_bars>`.
Additional variables:
- **weight**: When passed to a layer that uses this stat, a weighted estimate
will be computed. Note that use of weights currently limits the choice of
function and error bar method to `"mean"` and `"ci"`, respectively.
Parameters
----------
func : str or callable
Name of a :class:`numpy.ndarray` method or a vector -> scalar function.
errorbar : str, (str, float) tuple, or callable
Name of errorbar method (one of "ci", "pi", "se" or "sd"), or a tuple
with a method name ane a level parameter, or a function that maps from a
vector to a (min, max) interval.
n_boot : int
Number of bootstrap samples to draw for "ci" errorbars.
seed : int
Seed for the PRNG used to draw bootstrap samples.
Examples
--------
.. include:: ../docstrings/objects.Est.rst
"""
func: str | Callable[[Vector], float] = "mean"
errorbar: str | tuple[str, float] = ("ci", 95)
n_boot: int = 1000
seed: int | None = None
group_by_orient: ClassVar[bool] = True
def _process(
self, data: DataFrame, var: str, estimator: EstimateAggregator
) -> DataFrame:
# Needed because GroupBy.apply assumes func is DataFrame -> DataFrame
# which we could probably make more general to allow Series return
res = estimator(data, var)
return pd.DataFrame([res])
def __call__(
self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
) -> DataFrame:
boot_kws = {"n_boot": self.n_boot, "seed": self.seed}
if "weight" in data:
engine = WeightedAggregator(self.func, self.errorbar, **boot_kws)
else:
engine = EstimateAggregator(self.func, self.errorbar, **boot_kws)
var = {"x": "y", "y": "x"}[orient]
res = (
groupby
.apply(data, self._process, var, engine)
.dropna(subset=[var])
.reset_index(drop=True)
)
res = res.fillna({f"{var}min": res[var], f"{var}max": res[var]})
return res
@dataclass
class Rolling(Stat):
...
def __call__(self, data, groupby, orient, scales):
...

View File

@@ -0,0 +1,65 @@
"""Base module for statistical transformations."""
from __future__ import annotations
from collections.abc import Iterable
from dataclasses import dataclass
from typing import ClassVar, Any
import warnings
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from pandas import DataFrame
from seaborn._core.groupby import GroupBy
from seaborn._core.scales import Scale
@dataclass
class Stat:
"""Base class for objects that apply statistical transformations."""
# The class supports a partial-function application pattern. The object is
# initialized with desired parameters and the result is a callable that
# accepts and returns dataframes.
# The statistical transformation logic should not add any state to the instance
# beyond what is defined with the initialization parameters.
# Subclasses can declare whether the orient dimension should be used in grouping
# TODO consider whether this should be a parameter. Motivating example:
# use the same KDE class violin plots and univariate density estimation.
# In the former case, we would expect separate densities for each unique
# value on the orient axis, but we would not in the latter case.
group_by_orient: ClassVar[bool] = False
def _check_param_one_of(self, param: str, options: Iterable[Any]) -> None:
"""Raise when parameter value is not one of a specified set."""
value = getattr(self, param)
if value not in options:
*most, last = options
option_str = ", ".join(f"{x!r}" for x in most[:-1]) + f" or {last!r}"
err = " ".join([
f"The `{param}` parameter for `{self.__class__.__name__}` must be",
f"one of {option_str}; not {value!r}.",
])
raise ValueError(err)
def _check_grouping_vars(
self, param: str, data_vars: list[str], stacklevel: int = 2,
) -> None:
"""Warn if vars are named in parameter without being present in the data."""
param_vars = getattr(self, param)
undefined = set(param_vars) - set(data_vars)
if undefined:
param = f"{self.__class__.__name__}.{param}"
names = ", ".join(f"{x!r}" for x in undefined)
msg = f"Undefined variable(s) passed for {param}: {names}."
warnings.warn(msg, stacklevel=stacklevel)
def __call__(
self,
data: DataFrame,
groupby: GroupBy,
orient: str,
scales: dict[str, Scale],
) -> DataFrame:
"""Apply statistical transform to data subgroups and return combined result."""
return data

View File

@@ -0,0 +1,232 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import ClassVar
import numpy as np
import pandas as pd
from pandas import DataFrame
from seaborn._core.groupby import GroupBy
from seaborn._core.scales import Scale
from seaborn._stats.base import Stat
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from numpy.typing import ArrayLike
@dataclass
class Count(Stat):
"""
Count distinct observations within groups.
See Also
--------
Hist : A more fully-featured transform including binning and/or normalization.
Examples
--------
.. include:: ../docstrings/objects.Count.rst
"""
group_by_orient: ClassVar[bool] = True
def __call__(
self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
) -> DataFrame:
var = {"x": "y", "y": "x"}[orient]
res = (
groupby
.agg(data.assign(**{var: data[orient]}), {var: len})
.dropna(subset=["x", "y"])
.reset_index(drop=True)
)
return res
@dataclass
class Hist(Stat):
"""
Bin observations, count them, and optionally normalize or cumulate.
Parameters
----------
stat : str
Aggregate statistic to compute in each bin:
- `count`: the number of observations
- `density`: normalize so that the total area of the histogram equals 1
- `percent`: normalize so that bar heights sum to 100
- `probability` or `proportion`: normalize so that bar heights sum to 1
- `frequency`: divide the number of observations by the bin width
bins : str, int, or ArrayLike
Generic parameter that can be the name of a reference rule, the number
of bins, or the bin breaks. Passed to :func:`numpy.histogram_bin_edges`.
binwidth : float
Width of each bin; overrides `bins` but can be used with `binrange`.
Note that if `binwidth` does not evenly divide the bin range, the actual
bin width used will be only approximately equal to the parameter value.
binrange : (min, max)
Lowest and highest value for bin edges; can be used with either
`bins` (when a number) or `binwidth`. Defaults to data extremes.
common_norm : bool or list of variables
When not `False`, the normalization is applied across groups. Use
`True` to normalize across all groups, or pass variable name(s) that
define normalization groups.
common_bins : bool or list of variables
When not `False`, the same bins are used for all groups. Use `True` to
share bins across all groups, or pass variable name(s) to share within.
cumulative : bool
If True, cumulate the bin values.
discrete : bool
If True, set `binwidth` and `binrange` so that bins have unit width and
are centered on integer values
Notes
-----
The choice of bins for computing and plotting a histogram can exert
substantial influence on the insights that one is able to draw from the
visualization. If the bins are too large, they may erase important features.
On the other hand, bins that are too small may be dominated by random
variability, obscuring the shape of the true underlying distribution. The
default bin size is determined using a reference rule that depends on the
sample size and variance. This works well in many cases, (i.e., with
"well-behaved" data) but it fails in others. It is always a good to try
different bin sizes to be sure that you are not missing something important.
This function allows you to specify bins in several different ways, such as
by setting the total number of bins to use, the width of each bin, or the
specific locations where the bins should break.
Examples
--------
.. include:: ../docstrings/objects.Hist.rst
"""
stat: str = "count"
bins: str | int | ArrayLike = "auto"
binwidth: float | None = None
binrange: tuple[float, float] | None = None
common_norm: bool | list[str] = True
common_bins: bool | list[str] = True
cumulative: bool = False
discrete: bool = False
def __post_init__(self):
stat_options = [
"count", "density", "percent", "probability", "proportion", "frequency"
]
self._check_param_one_of("stat", stat_options)
def _define_bin_edges(self, vals, weight, bins, binwidth, binrange, discrete):
"""Inner function that takes bin parameters as arguments."""
vals = vals.replace(-np.inf, np.nan).replace(np.inf, np.nan).dropna()
if binrange is None:
start, stop = vals.min(), vals.max()
else:
start, stop = binrange
if discrete:
bin_edges = np.arange(start - .5, stop + 1.5)
else:
if binwidth is not None:
bins = int(round((stop - start) / binwidth))
bin_edges = np.histogram_bin_edges(vals, bins, binrange, weight)
# TODO warning or cap on too many bins?
return bin_edges
def _define_bin_params(self, data, orient, scale_type):
"""Given data, return numpy.histogram parameters to define bins."""
vals = data[orient]
weights = data.get("weight", None)
# TODO We'll want this for ordinal / discrete scales too
# (Do we need discrete as a parameter or just infer from scale?)
discrete = self.discrete or scale_type == "nominal"
bin_edges = self._define_bin_edges(
vals, weights, self.bins, self.binwidth, self.binrange, discrete,
)
if isinstance(self.bins, (str, int)):
n_bins = len(bin_edges) - 1
bin_range = bin_edges.min(), bin_edges.max()
bin_kws = dict(bins=n_bins, range=bin_range)
else:
bin_kws = dict(bins=bin_edges)
return bin_kws
def _get_bins_and_eval(self, data, orient, groupby, scale_type):
bin_kws = self._define_bin_params(data, orient, scale_type)
return groupby.apply(data, self._eval, orient, bin_kws)
def _eval(self, data, orient, bin_kws):
vals = data[orient]
weights = data.get("weight", None)
density = self.stat == "density"
hist, edges = np.histogram(vals, **bin_kws, weights=weights, density=density)
width = np.diff(edges)
center = edges[:-1] + width / 2
return pd.DataFrame({orient: center, "count": hist, "space": width})
def _normalize(self, data):
hist = data["count"]
if self.stat == "probability" or self.stat == "proportion":
hist = hist.astype(float) / hist.sum()
elif self.stat == "percent":
hist = hist.astype(float) / hist.sum() * 100
elif self.stat == "frequency":
hist = hist.astype(float) / data["space"]
if self.cumulative:
if self.stat in ["density", "frequency"]:
hist = (hist * data["space"]).cumsum()
else:
hist = hist.cumsum()
return data.assign(**{self.stat: hist})
def __call__(
self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
) -> DataFrame:
scale_type = scales[orient].__class__.__name__.lower()
grouping_vars = [str(v) for v in data if v in groupby.order]
if not grouping_vars or self.common_bins is True:
bin_kws = self._define_bin_params(data, orient, scale_type)
data = groupby.apply(data, self._eval, orient, bin_kws)
else:
if self.common_bins is False:
bin_groupby = GroupBy(grouping_vars)
else:
bin_groupby = GroupBy(self.common_bins)
self._check_grouping_vars("common_bins", grouping_vars)
data = bin_groupby.apply(
data, self._get_bins_and_eval, orient, groupby, scale_type,
)
if not grouping_vars or self.common_norm is True:
data = self._normalize(data)
else:
if self.common_norm is False:
norm_groupby = GroupBy(grouping_vars)
else:
norm_groupby = GroupBy(self.common_norm)
self._check_grouping_vars("common_norm", grouping_vars)
data = norm_groupby.apply(data, self._normalize)
other = {"x": "y", "y": "x"}[orient]
return data.assign(**{other: data[self.stat]})

View File

@@ -0,0 +1,214 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Callable
import numpy as np
from numpy import ndarray
import pandas as pd
from pandas import DataFrame
try:
from scipy.stats import gaussian_kde
_no_scipy = False
except ImportError:
from seaborn.external.kde import gaussian_kde
_no_scipy = True
from seaborn._core.groupby import GroupBy
from seaborn._core.scales import Scale
from seaborn._stats.base import Stat
@dataclass
class KDE(Stat):
"""
Compute a univariate kernel density estimate.
Parameters
----------
bw_adjust : float
Factor that multiplicatively scales the value chosen using
`bw_method`. Increasing will make the curve smoother. See Notes.
bw_method : string, scalar, or callable
Method for determining the smoothing bandwidth to use. Passed directly
to :class:`scipy.stats.gaussian_kde`; see there for options.
common_norm : bool or list of variables
If `True`, normalize so that the areas of all curves sums to 1.
If `False`, normalize each curve independently. If a list, defines
variable(s) to group by and normalize within.
common_grid : bool or list of variables
If `True`, all curves will share the same evaluation grid.
If `False`, each evaluation grid is independent. If a list, defines
variable(s) to group by and share a grid within.
gridsize : int or None
Number of points in the evaluation grid. If None, the density is
evaluated at the original datapoints.
cut : float
Factor, multiplied by the kernel bandwidth, that determines how far
the evaluation grid extends past the extreme datapoints. When set to 0,
the curve is truncated at the data limits.
cumulative : bool
If True, estimate a cumulative distribution function. Requires scipy.
Notes
-----
The *bandwidth*, or standard deviation of the smoothing kernel, is an
important parameter. Much like histogram bin width, using the wrong
bandwidth can produce a distorted representation. Over-smoothing can erase
true features, while under-smoothing can create false ones. The default
uses a rule-of-thumb that works best for distributions that are roughly
bell-shaped. It is a good idea to check the default by varying `bw_adjust`.
Because the smoothing is performed with a Gaussian kernel, the estimated
density curve can extend to values that may not make sense. For example, the
curve may be drawn over negative values when data that are naturally
positive. The `cut` parameter can be used to control the evaluation range,
but datasets that have many observations close to a natural boundary may be
better served by a different method.
Similar distortions may arise when a dataset is naturally discrete or "spiky"
(containing many repeated observations of the same value). KDEs will always
produce a smooth curve, which could be misleading.
The units on the density axis are a common source of confusion. While kernel
density estimation produces a probability distribution, the height of the curve
at each point gives a density, not a probability. A probability can be obtained
only by integrating the density across a range. The curve is normalized so
that the integral over all possible values is 1, meaning that the scale of
the density axis depends on the data values.
If scipy is installed, its cython-accelerated implementation will be used.
Examples
--------
.. include:: ../docstrings/objects.KDE.rst
"""
bw_adjust: float = 1
bw_method: str | float | Callable[[gaussian_kde], float] = "scott"
common_norm: bool | list[str] = True
common_grid: bool | list[str] = True
gridsize: int | None = 200
cut: float = 3
cumulative: bool = False
def __post_init__(self):
if self.cumulative and _no_scipy:
raise RuntimeError("Cumulative KDE evaluation requires scipy")
def _check_var_list_or_boolean(self, param: str, grouping_vars: Any) -> None:
"""Do input checks on grouping parameters."""
value = getattr(self, param)
if not (
isinstance(value, bool)
or (isinstance(value, list) and all(isinstance(v, str) for v in value))
):
param_name = f"{self.__class__.__name__}.{param}"
raise TypeError(f"{param_name} must be a boolean or list of strings.")
self._check_grouping_vars(param, grouping_vars, stacklevel=3)
def _fit(self, data: DataFrame, orient: str) -> gaussian_kde:
"""Fit and return a KDE object."""
# TODO need to handle singular data
fit_kws: dict[str, Any] = {"bw_method": self.bw_method}
if "weight" in data:
fit_kws["weights"] = data["weight"]
kde = gaussian_kde(data[orient], **fit_kws)
kde.set_bandwidth(kde.factor * self.bw_adjust)
return kde
def _get_support(self, data: DataFrame, orient: str) -> ndarray:
"""Define the grid that the KDE will be evaluated on."""
if self.gridsize is None:
return data[orient].to_numpy()
kde = self._fit(data, orient)
bw = np.sqrt(kde.covariance.squeeze())
gridmin = data[orient].min() - bw * self.cut
gridmax = data[orient].max() + bw * self.cut
return np.linspace(gridmin, gridmax, self.gridsize)
def _fit_and_evaluate(
self, data: DataFrame, orient: str, support: ndarray
) -> DataFrame:
"""Transform single group by fitting a KDE and evaluating on a support grid."""
empty = pd.DataFrame(columns=[orient, "weight", "density"], dtype=float)
if len(data) < 2:
return empty
try:
kde = self._fit(data, orient)
except np.linalg.LinAlgError:
return empty
if self.cumulative:
s_0 = support[0]
density = np.array([kde.integrate_box_1d(s_0, s_i) for s_i in support])
else:
density = kde(support)
weight = data["weight"].sum()
return pd.DataFrame({orient: support, "weight": weight, "density": density})
def _transform(
self, data: DataFrame, orient: str, grouping_vars: list[str]
) -> DataFrame:
"""Transform multiple groups by fitting KDEs and evaluating."""
empty = pd.DataFrame(columns=[*data.columns, "density"], dtype=float)
if len(data) < 2:
return empty
try:
support = self._get_support(data, orient)
except np.linalg.LinAlgError:
return empty
grouping_vars = [x for x in grouping_vars if data[x].nunique() > 1]
if not grouping_vars:
return self._fit_and_evaluate(data, orient, support)
groupby = GroupBy(grouping_vars)
return groupby.apply(data, self._fit_and_evaluate, orient, support)
def __call__(
self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
) -> DataFrame:
if "weight" not in data:
data = data.assign(weight=1)
data = data.dropna(subset=[orient, "weight"])
# Transform each group separately
grouping_vars = [str(v) for v in data if v in groupby.order]
if not grouping_vars or self.common_grid is True:
res = self._transform(data, orient, grouping_vars)
else:
if self.common_grid is False:
grid_vars = grouping_vars
else:
self._check_var_list_or_boolean("common_grid", grouping_vars)
grid_vars = [v for v in self.common_grid if v in grouping_vars]
res = (
GroupBy(grid_vars)
.apply(data, self._transform, orient, grouping_vars)
)
# Normalize, potentially within groups
if not grouping_vars or self.common_norm is True:
res = res.assign(group_weight=data["weight"].sum())
else:
if self.common_norm is False:
norm_vars = grouping_vars
else:
self._check_var_list_or_boolean("common_norm", grouping_vars)
norm_vars = [v for v in self.common_norm if v in grouping_vars]
res = res.join(
data.groupby(norm_vars)["weight"].sum().rename("group_weight"),
on=norm_vars,
)
res["density"] *= res.eval("weight / group_weight")
value = {"x": "y", "y": "x"}[orient]
res[value] = res["density"]
return res.drop(["weight", "group_weight"], axis=1)

View File

@@ -0,0 +1,78 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import ClassVar, cast
try:
from typing import Literal
except ImportError:
from typing_extensions import Literal # type: ignore
import numpy as np
from pandas import DataFrame
from seaborn._core.scales import Scale
from seaborn._core.groupby import GroupBy
from seaborn._stats.base import Stat
from seaborn.utils import _version_predates
# From https://github.com/numpy/numpy/blob/main/numpy/lib/function_base.pyi
_MethodKind = Literal[
"inverted_cdf",
"averaged_inverted_cdf",
"closest_observation",
"interpolated_inverted_cdf",
"hazen",
"weibull",
"linear",
"median_unbiased",
"normal_unbiased",
"lower",
"higher",
"midpoint",
"nearest",
]
@dataclass
class Perc(Stat):
"""
Replace observations with percentile values.
Parameters
----------
k : list of numbers or int
If a list of numbers, this gives the percentiles (in [0, 100]) to compute.
If an integer, compute `k` evenly-spaced percentiles between 0 and 100.
For example, `k=5` computes the 0, 25, 50, 75, and 100th percentiles.
method : str
Method for interpolating percentiles between observed datapoints.
See :func:`numpy.percentile` for valid options and more information.
Examples
--------
.. include:: ../docstrings/objects.Perc.rst
"""
k: int | list[float] = 5
method: str = "linear"
group_by_orient: ClassVar[bool] = True
def _percentile(self, data: DataFrame, var: str) -> DataFrame:
k = list(np.linspace(0, 100, self.k)) if isinstance(self.k, int) else self.k
method = cast(_MethodKind, self.method)
values = data[var].dropna()
if _version_predates(np, "1.22"):
res = np.percentile(values, k, interpolation=method) # type: ignore
else:
res = np.percentile(data[var].dropna(), k, method=method)
return DataFrame({var: res, "percentile": k})
def __call__(
self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
) -> DataFrame:
var = {"x": "y", "y": "x"}[orient]
return groupby.apply(data, self._percentile, var)

View File

@@ -0,0 +1,50 @@
from __future__ import annotations
from dataclasses import dataclass
import numpy as np
import pandas as pd
from seaborn._stats.base import Stat
@dataclass
class PolyFit(Stat):
"""
Fit a polynomial of the given order and resample data onto predicted curve.
"""
# This is a provisional class that is useful for building out functionality.
# It may or may not change substantially in form or dissappear as we think
# through the organization of the stats subpackage.
order: int = 2
gridsize: int = 100
def _fit_predict(self, data):
x = data["x"]
y = data["y"]
if x.nunique() <= self.order:
# TODO warn?
xx = yy = []
else:
p = np.polyfit(x, y, self.order)
xx = np.linspace(x.min(), x.max(), self.gridsize)
yy = np.polyval(p, xx)
return pd.DataFrame(dict(x=xx, y=yy))
# TODO we should have a way of identifying the method that will be applied
# and then only define __call__ on a base-class of stats with this pattern
def __call__(self, data, groupby, orient, scales):
return (
groupby
.apply(data.dropna(subset=["x", "y"]), self._fit_predict)
)
@dataclass
class OLSFit(Stat):
...

View File

@@ -0,0 +1,90 @@
import numpy as np
import matplotlib as mpl
from matplotlib.colors import to_rgb, to_rgba
from numpy.testing import assert_array_equal
USE_PROPS = [
"alpha",
"edgecolor",
"facecolor",
"fill",
"hatch",
"height",
"linestyle",
"linewidth",
"paths",
"xy",
"xydata",
"sizes",
"zorder",
]
def assert_artists_equal(list1, list2):
assert len(list1) == len(list2)
for a1, a2 in zip(list1, list2):
assert a1.__class__ == a2.__class__
prop1 = a1.properties()
prop2 = a2.properties()
for key in USE_PROPS:
if key not in prop1:
continue
v1 = prop1[key]
v2 = prop2[key]
if key == "paths":
for p1, p2 in zip(v1, v2):
assert_array_equal(p1.vertices, p2.vertices)
assert_array_equal(p1.codes, p2.codes)
elif key == "color":
v1 = mpl.colors.to_rgba(v1)
v2 = mpl.colors.to_rgba(v2)
assert v1 == v2
elif isinstance(v1, np.ndarray):
assert_array_equal(v1, v2)
else:
assert v1 == v2
def assert_legends_equal(leg1, leg2):
assert leg1.get_title().get_text() == leg2.get_title().get_text()
for t1, t2 in zip(leg1.get_texts(), leg2.get_texts()):
assert t1.get_text() == t2.get_text()
assert_artists_equal(
leg1.get_patches(), leg2.get_patches(),
)
assert_artists_equal(
leg1.get_lines(), leg2.get_lines(),
)
def assert_plots_equal(ax1, ax2, labels=True):
assert_artists_equal(ax1.patches, ax2.patches)
assert_artists_equal(ax1.lines, ax2.lines)
assert_artists_equal(ax1.collections, ax2.collections)
if labels:
assert ax1.get_xlabel() == ax2.get_xlabel()
assert ax1.get_ylabel() == ax2.get_ylabel()
def assert_colors_equal(a, b, check_alpha=True):
def handle_array(x):
if isinstance(x, np.ndarray):
if x.ndim > 1:
x = np.unique(x, axis=0).squeeze()
if x.ndim > 1:
raise ValueError("Color arrays must be 1 dimensional")
return x
a = handle_array(a)
b = handle_array(b)
f = to_rgba if check_alpha else to_rgb
assert f(a) == f(b)

View File

@@ -0,0 +1,120 @@
"""Algorithms to support fitting routines in seaborn plotting functions."""
import numpy as np
import warnings
def bootstrap(*args, **kwargs):
"""Resample one or more arrays with replacement and store aggregate values.
Positional arguments are a sequence of arrays to bootstrap along the first
axis and pass to a summary function.
Keyword arguments:
n_boot : int, default=10000
Number of iterations
axis : int, default=None
Will pass axis to ``func`` as a keyword argument.
units : array, default=None
Array of sampling unit IDs. When used the bootstrap resamples units
and then observations within units instead of individual
datapoints.
func : string or callable, default="mean"
Function to call on the args that are passed in. If string, uses as
name of function in the numpy namespace. If nans are present in the
data, will try to use nan-aware version of named function.
seed : Generator | SeedSequence | RandomState | int | None
Seed for the random number generator; useful if you want
reproducible resamples.
Returns
-------
boot_dist: array
array of bootstrapped statistic values
"""
# Ensure list of arrays are same length
if len(np.unique(list(map(len, args)))) > 1:
raise ValueError("All input arrays must have the same length")
n = len(args[0])
# Default keyword arguments
n_boot = kwargs.get("n_boot", 10000)
func = kwargs.get("func", "mean")
axis = kwargs.get("axis", None)
units = kwargs.get("units", None)
random_seed = kwargs.get("random_seed", None)
if random_seed is not None:
msg = "`random_seed` has been renamed to `seed` and will be removed"
warnings.warn(msg)
seed = kwargs.get("seed", random_seed)
if axis is None:
func_kwargs = dict()
else:
func_kwargs = dict(axis=axis)
# Initialize the resampler
if isinstance(seed, np.random.RandomState):
rng = seed
else:
rng = np.random.default_rng(seed)
# Coerce to arrays
args = list(map(np.asarray, args))
if units is not None:
units = np.asarray(units)
if isinstance(func, str):
# Allow named numpy functions
f = getattr(np, func)
# Try to use nan-aware version of function if necessary
missing_data = np.isnan(np.sum(np.column_stack(args)))
if missing_data and not func.startswith("nan"):
nanf = getattr(np, f"nan{func}", None)
if nanf is None:
msg = f"Data contain nans but no nan-aware version of `{func}` found"
warnings.warn(msg, UserWarning)
else:
f = nanf
else:
f = func
# Handle numpy changes
try:
integers = rng.integers
except AttributeError:
integers = rng.randint
# Do the bootstrap
if units is not None:
return _structured_bootstrap(args, n_boot, units, f,
func_kwargs, integers)
boot_dist = []
for i in range(int(n_boot)):
resampler = integers(0, n, n, dtype=np.intp) # intp is indexing dtype
sample = [a.take(resampler, axis=0) for a in args]
boot_dist.append(f(*sample, **func_kwargs))
return np.array(boot_dist)
def _structured_bootstrap(args, n_boot, units, func, func_kwargs, integers):
"""Resample units instead of datapoints."""
unique_units = np.unique(units)
n_units = len(unique_units)
args = [[a[units == unit] for unit in unique_units] for a in args]
boot_dist = []
for i in range(int(n_boot)):
resampler = integers(0, n_units, n_units, dtype=np.intp)
sample = [[a[i] for i in resampler] for a in args]
lengths = map(len, sample[0])
resampler = [integers(0, n, n, dtype=np.intp) for n in lengths]
sample = [[c.take(r, axis=0) for c, r in zip(a, resampler)] for a in sample]
sample = list(map(np.concatenate, sample))
boot_dist.append(func(*sample, **func_kwargs))
return np.array(boot_dist)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,2 @@
from .xkcd_rgb import xkcd_rgb # noqa: F401
from .crayons import crayons # noqa: F401

View File

@@ -0,0 +1,120 @@
crayons = {'Almond': '#EFDECD',
'Antique Brass': '#CD9575',
'Apricot': '#FDD9B5',
'Aquamarine': '#78DBE2',
'Asparagus': '#87A96B',
'Atomic Tangerine': '#FFA474',
'Banana Mania': '#FAE7B5',
'Beaver': '#9F8170',
'Bittersweet': '#FD7C6E',
'Black': '#000000',
'Blue': '#1F75FE',
'Blue Bell': '#A2A2D0',
'Blue Green': '#0D98BA',
'Blue Violet': '#7366BD',
'Blush': '#DE5D83',
'Brick Red': '#CB4154',
'Brown': '#B4674D',
'Burnt Orange': '#FF7F49',
'Burnt Sienna': '#EA7E5D',
'Cadet Blue': '#B0B7C6',
'Canary': '#FFFF99',
'Caribbean Green': '#00CC99',
'Carnation Pink': '#FFAACC',
'Cerise': '#DD4492',
'Cerulean': '#1DACD6',
'Chestnut': '#BC5D58',
'Copper': '#DD9475',
'Cornflower': '#9ACEEB',
'Cotton Candy': '#FFBCD9',
'Dandelion': '#FDDB6D',
'Denim': '#2B6CC4',
'Desert Sand': '#EFCDB8',
'Eggplant': '#6E5160',
'Electric Lime': '#CEFF1D',
'Fern': '#71BC78',
'Forest Green': '#6DAE81',
'Fuchsia': '#C364C5',
'Fuzzy Wuzzy': '#CC6666',
'Gold': '#E7C697',
'Goldenrod': '#FCD975',
'Granny Smith Apple': '#A8E4A0',
'Gray': '#95918C',
'Green': '#1CAC78',
'Green Yellow': '#F0E891',
'Hot Magenta': '#FF1DCE',
'Inchworm': '#B2EC5D',
'Indigo': '#5D76CB',
'Jazzberry Jam': '#CA3767',
'Jungle Green': '#3BB08F',
'Laser Lemon': '#FEFE22',
'Lavender': '#FCB4D5',
'Macaroni and Cheese': '#FFBD88',
'Magenta': '#F664AF',
'Mahogany': '#CD4A4C',
'Manatee': '#979AAA',
'Mango Tango': '#FF8243',
'Maroon': '#C8385A',
'Mauvelous': '#EF98AA',
'Melon': '#FDBCB4',
'Midnight Blue': '#1A4876',
'Mountain Meadow': '#30BA8F',
'Navy Blue': '#1974D2',
'Neon Carrot': '#FFA343',
'Olive Green': '#BAB86C',
'Orange': '#FF7538',
'Orchid': '#E6A8D7',
'Outer Space': '#414A4C',
'Outrageous Orange': '#FF6E4A',
'Pacific Blue': '#1CA9C9',
'Peach': '#FFCFAB',
'Periwinkle': '#C5D0E6',
'Piggy Pink': '#FDDDE6',
'Pine Green': '#158078',
'Pink Flamingo': '#FC74FD',
'Pink Sherbert': '#F78FA7',
'Plum': '#8E4585',
'Purple Heart': '#7442C8',
"Purple Mountains' Majesty": '#9D81BA',
'Purple Pizzazz': '#FE4EDA',
'Radical Red': '#FF496C',
'Raw Sienna': '#D68A59',
'Razzle Dazzle Rose': '#FF48D0',
'Razzmatazz': '#E3256B',
'Red': '#EE204D',
'Red Orange': '#FF5349',
'Red Violet': '#C0448F',
"Robin's Egg Blue": '#1FCECB',
'Royal Purple': '#7851A9',
'Salmon': '#FF9BAA',
'Scarlet': '#FC2847',
"Screamin' Green": '#76FF7A',
'Sea Green': '#93DFB8',
'Sepia': '#A5694F',
'Shadow': '#8A795D',
'Shamrock': '#45CEA2',
'Shocking Pink': '#FB7EFD',
'Silver': '#CDC5C2',
'Sky Blue': '#80DAEB',
'Spring Green': '#ECEABE',
'Sunglow': '#FFCF48',
'Sunset Orange': '#FD5E53',
'Tan': '#FAA76C',
'Tickle Me Pink': '#FC89AC',
'Timberwolf': '#DBD7D2',
'Tropical Rain Forest': '#17806D',
'Tumbleweed': '#DEAA88',
'Turquoise Blue': '#77DDE7',
'Unmellow Yellow': '#FFFF66',
'Violet (Purple)': '#926EAE',
'Violet Red': '#F75394',
'Vivid Tangerine': '#FFA089',
'Vivid Violet': '#8F509D',
'White': '#FFFFFF',
'Wild Blue Yonder': '#A2ADD0',
'Wild Strawberry': '#FF43A4',
'Wild Watermelon': '#FC6C85',
'Wisteria': '#CDA4DE',
'Yellow': '#FCE883',
'Yellow Green': '#C5E384',
'Yellow Orange': '#FFAE42'}

View File

@@ -0,0 +1,949 @@
xkcd_rgb = {'acid green': '#8ffe09',
'adobe': '#bd6c48',
'algae': '#54ac68',
'algae green': '#21c36f',
'almost black': '#070d0d',
'amber': '#feb308',
'amethyst': '#9b5fc0',
'apple': '#6ecb3c',
'apple green': '#76cd26',
'apricot': '#ffb16d',
'aqua': '#13eac9',
'aqua blue': '#02d8e9',
'aqua green': '#12e193',
'aqua marine': '#2ee8bb',
'aquamarine': '#04d8b2',
'army green': '#4b5d16',
'asparagus': '#77ab56',
'aubergine': '#3d0734',
'auburn': '#9a3001',
'avocado': '#90b134',
'avocado green': '#87a922',
'azul': '#1d5dec',
'azure': '#069af3',
'baby blue': '#a2cffe',
'baby green': '#8cff9e',
'baby pink': '#ffb7ce',
'baby poo': '#ab9004',
'baby poop': '#937c00',
'baby poop green': '#8f9805',
'baby puke green': '#b6c406',
'baby purple': '#ca9bf7',
'baby shit brown': '#ad900d',
'baby shit green': '#889717',
'banana': '#ffff7e',
'banana yellow': '#fafe4b',
'barbie pink': '#fe46a5',
'barf green': '#94ac02',
'barney': '#ac1db8',
'barney purple': '#a00498',
'battleship grey': '#6b7c85',
'beige': '#e6daa6',
'berry': '#990f4b',
'bile': '#b5c306',
'black': '#000000',
'bland': '#afa88b',
'blood': '#770001',
'blood orange': '#fe4b03',
'blood red': '#980002',
'blue': '#0343df',
'blue blue': '#2242c7',
'blue green': '#137e6d',
'blue grey': '#607c8e',
'blue purple': '#5729ce',
'blue violet': '#5d06e9',
'blue with a hint of purple': '#533cc6',
'blue/green': '#0f9b8e',
'blue/grey': '#758da3',
'blue/purple': '#5a06ef',
'blueberry': '#464196',
'bluegreen': '#017a79',
'bluegrey': '#85a3b2',
'bluey green': '#2bb179',
'bluey grey': '#89a0b0',
'bluey purple': '#6241c7',
'bluish': '#2976bb',
'bluish green': '#10a674',
'bluish grey': '#748b97',
'bluish purple': '#703be7',
'blurple': '#5539cc',
'blush': '#f29e8e',
'blush pink': '#fe828c',
'booger': '#9bb53c',
'booger green': '#96b403',
'bordeaux': '#7b002c',
'boring green': '#63b365',
'bottle green': '#044a05',
'brick': '#a03623',
'brick orange': '#c14a09',
'brick red': '#8f1402',
'bright aqua': '#0bf9ea',
'bright blue': '#0165fc',
'bright cyan': '#41fdfe',
'bright green': '#01ff07',
'bright lavender': '#c760ff',
'bright light blue': '#26f7fd',
'bright light green': '#2dfe54',
'bright lilac': '#c95efb',
'bright lime': '#87fd05',
'bright lime green': '#65fe08',
'bright magenta': '#ff08e8',
'bright olive': '#9cbb04',
'bright orange': '#ff5b00',
'bright pink': '#fe01b1',
'bright purple': '#be03fd',
'bright red': '#ff000d',
'bright sea green': '#05ffa6',
'bright sky blue': '#02ccfe',
'bright teal': '#01f9c6',
'bright turquoise': '#0ffef9',
'bright violet': '#ad0afd',
'bright yellow': '#fffd01',
'bright yellow green': '#9dff00',
'british racing green': '#05480d',
'bronze': '#a87900',
'brown': '#653700',
'brown green': '#706c11',
'brown grey': '#8d8468',
'brown orange': '#b96902',
'brown red': '#922b05',
'brown yellow': '#b29705',
'brownish': '#9c6d57',
'brownish green': '#6a6e09',
'brownish grey': '#86775f',
'brownish orange': '#cb7723',
'brownish pink': '#c27e79',
'brownish purple': '#76424e',
'brownish red': '#9e3623',
'brownish yellow': '#c9b003',
'browny green': '#6f6c0a',
'browny orange': '#ca6b02',
'bruise': '#7e4071',
'bubble gum pink': '#ff69af',
'bubblegum': '#ff6cb5',
'bubblegum pink': '#fe83cc',
'buff': '#fef69e',
'burgundy': '#610023',
'burnt orange': '#c04e01',
'burnt red': '#9f2305',
'burnt siena': '#b75203',
'burnt sienna': '#b04e0f',
'burnt umber': '#a0450e',
'burnt yellow': '#d5ab09',
'burple': '#6832e3',
'butter': '#ffff81',
'butter yellow': '#fffd74',
'butterscotch': '#fdb147',
'cadet blue': '#4e7496',
'camel': '#c69f59',
'camo': '#7f8f4e',
'camo green': '#526525',
'camouflage green': '#4b6113',
'canary': '#fdff63',
'canary yellow': '#fffe40',
'candy pink': '#ff63e9',
'caramel': '#af6f09',
'carmine': '#9d0216',
'carnation': '#fd798f',
'carnation pink': '#ff7fa7',
'carolina blue': '#8ab8fe',
'celadon': '#befdb7',
'celery': '#c1fd95',
'cement': '#a5a391',
'cerise': '#de0c62',
'cerulean': '#0485d1',
'cerulean blue': '#056eee',
'charcoal': '#343837',
'charcoal grey': '#3c4142',
'chartreuse': '#c1f80a',
'cherry': '#cf0234',
'cherry red': '#f7022a',
'chestnut': '#742802',
'chocolate': '#3d1c02',
'chocolate brown': '#411900',
'cinnamon': '#ac4f06',
'claret': '#680018',
'clay': '#b66a50',
'clay brown': '#b2713d',
'clear blue': '#247afd',
'cloudy blue': '#acc2d9',
'cobalt': '#1e488f',
'cobalt blue': '#030aa7',
'cocoa': '#875f42',
'coffee': '#a6814c',
'cool blue': '#4984b8',
'cool green': '#33b864',
'cool grey': '#95a3a6',
'copper': '#b66325',
'coral': '#fc5a50',
'coral pink': '#ff6163',
'cornflower': '#6a79f7',
'cornflower blue': '#5170d7',
'cranberry': '#9e003a',
'cream': '#ffffc2',
'creme': '#ffffb6',
'crimson': '#8c000f',
'custard': '#fffd78',
'cyan': '#00ffff',
'dandelion': '#fedf08',
'dark': '#1b2431',
'dark aqua': '#05696b',
'dark aquamarine': '#017371',
'dark beige': '#ac9362',
'dark blue': '#00035b',
'dark blue green': '#005249',
'dark blue grey': '#1f3b4d',
'dark brown': '#341c02',
'dark coral': '#cf524e',
'dark cream': '#fff39a',
'dark cyan': '#0a888a',
'dark forest green': '#002d04',
'dark fuchsia': '#9d0759',
'dark gold': '#b59410',
'dark grass green': '#388004',
'dark green': '#033500',
'dark green blue': '#1f6357',
'dark grey': '#363737',
'dark grey blue': '#29465b',
'dark hot pink': '#d90166',
'dark indigo': '#1f0954',
'dark khaki': '#9b8f55',
'dark lavender': '#856798',
'dark lilac': '#9c6da5',
'dark lime': '#84b701',
'dark lime green': '#7ebd01',
'dark magenta': '#960056',
'dark maroon': '#3c0008',
'dark mauve': '#874c62',
'dark mint': '#48c072',
'dark mint green': '#20c073',
'dark mustard': '#a88905',
'dark navy': '#000435',
'dark navy blue': '#00022e',
'dark olive': '#373e02',
'dark olive green': '#3c4d03',
'dark orange': '#c65102',
'dark pastel green': '#56ae57',
'dark peach': '#de7e5d',
'dark periwinkle': '#665fd1',
'dark pink': '#cb416b',
'dark plum': '#3f012c',
'dark purple': '#35063e',
'dark red': '#840000',
'dark rose': '#b5485d',
'dark royal blue': '#02066f',
'dark sage': '#598556',
'dark salmon': '#c85a53',
'dark sand': '#a88f59',
'dark sea green': '#11875d',
'dark seafoam': '#1fb57a',
'dark seafoam green': '#3eaf76',
'dark sky blue': '#448ee4',
'dark slate blue': '#214761',
'dark tan': '#af884a',
'dark taupe': '#7f684e',
'dark teal': '#014d4e',
'dark turquoise': '#045c5a',
'dark violet': '#34013f',
'dark yellow': '#d5b60a',
'dark yellow green': '#728f02',
'darkblue': '#030764',
'darkgreen': '#054907',
'darkish blue': '#014182',
'darkish green': '#287c37',
'darkish pink': '#da467d',
'darkish purple': '#751973',
'darkish red': '#a90308',
'deep aqua': '#08787f',
'deep blue': '#040273',
'deep brown': '#410200',
'deep green': '#02590f',
'deep lavender': '#8d5eb7',
'deep lilac': '#966ebd',
'deep magenta': '#a0025c',
'deep orange': '#dc4d01',
'deep pink': '#cb0162',
'deep purple': '#36013f',
'deep red': '#9a0200',
'deep rose': '#c74767',
'deep sea blue': '#015482',
'deep sky blue': '#0d75f8',
'deep teal': '#00555a',
'deep turquoise': '#017374',
'deep violet': '#490648',
'denim': '#3b638c',
'denim blue': '#3b5b92',
'desert': '#ccad60',
'diarrhea': '#9f8303',
'dirt': '#8a6e45',
'dirt brown': '#836539',
'dirty blue': '#3f829d',
'dirty green': '#667e2c',
'dirty orange': '#c87606',
'dirty pink': '#ca7b80',
'dirty purple': '#734a65',
'dirty yellow': '#cdc50a',
'dodger blue': '#3e82fc',
'drab': '#828344',
'drab green': '#749551',
'dried blood': '#4b0101',
'duck egg blue': '#c3fbf4',
'dull blue': '#49759c',
'dull brown': '#876e4b',
'dull green': '#74a662',
'dull orange': '#d8863b',
'dull pink': '#d5869d',
'dull purple': '#84597e',
'dull red': '#bb3f3f',
'dull teal': '#5f9e8f',
'dull yellow': '#eedc5b',
'dusk': '#4e5481',
'dusk blue': '#26538d',
'dusky blue': '#475f94',
'dusky pink': '#cc7a8b',
'dusky purple': '#895b7b',
'dusky rose': '#ba6873',
'dust': '#b2996e',
'dusty blue': '#5a86ad',
'dusty green': '#76a973',
'dusty lavender': '#ac86a8',
'dusty orange': '#f0833a',
'dusty pink': '#d58a94',
'dusty purple': '#825f87',
'dusty red': '#b9484e',
'dusty rose': '#c0737a',
'dusty teal': '#4c9085',
'earth': '#a2653e',
'easter green': '#8cfd7e',
'easter purple': '#c071fe',
'ecru': '#feffca',
'egg shell': '#fffcc4',
'eggplant': '#380835',
'eggplant purple': '#430541',
'eggshell': '#ffffd4',
'eggshell blue': '#c4fff7',
'electric blue': '#0652ff',
'electric green': '#21fc0d',
'electric lime': '#a8ff04',
'electric pink': '#ff0490',
'electric purple': '#aa23ff',
'emerald': '#01a049',
'emerald green': '#028f1e',
'evergreen': '#05472a',
'faded blue': '#658cbb',
'faded green': '#7bb274',
'faded orange': '#f0944d',
'faded pink': '#de9dac',
'faded purple': '#916e99',
'faded red': '#d3494e',
'faded yellow': '#feff7f',
'fawn': '#cfaf7b',
'fern': '#63a950',
'fern green': '#548d44',
'fire engine red': '#fe0002',
'flat blue': '#3c73a8',
'flat green': '#699d4c',
'fluorescent green': '#08ff08',
'fluro green': '#0aff02',
'foam green': '#90fda9',
'forest': '#0b5509',
'forest green': '#06470c',
'forrest green': '#154406',
'french blue': '#436bad',
'fresh green': '#69d84f',
'frog green': '#58bc08',
'fuchsia': '#ed0dd9',
'gold': '#dbb40c',
'golden': '#f5bf03',
'golden brown': '#b27a01',
'golden rod': '#f9bc08',
'golden yellow': '#fec615',
'goldenrod': '#fac205',
'grape': '#6c3461',
'grape purple': '#5d1451',
'grapefruit': '#fd5956',
'grass': '#5cac2d',
'grass green': '#3f9b0b',
'grassy green': '#419c03',
'green': '#15b01a',
'green apple': '#5edc1f',
'green blue': '#06b48b',
'green brown': '#544e03',
'green grey': '#77926f',
'green teal': '#0cb577',
'green yellow': '#c9ff27',
'green/blue': '#01c08d',
'green/yellow': '#b5ce08',
'greenblue': '#23c48b',
'greenish': '#40a368',
'greenish beige': '#c9d179',
'greenish blue': '#0b8b87',
'greenish brown': '#696112',
'greenish cyan': '#2afeb7',
'greenish grey': '#96ae8d',
'greenish tan': '#bccb7a',
'greenish teal': '#32bf84',
'greenish turquoise': '#00fbb0',
'greenish yellow': '#cdfd02',
'greeny blue': '#42b395',
'greeny brown': '#696006',
'greeny grey': '#7ea07a',
'greeny yellow': '#c6f808',
'grey': '#929591',
'grey blue': '#6b8ba4',
'grey brown': '#7f7053',
'grey green': '#789b73',
'grey pink': '#c3909b',
'grey purple': '#826d8c',
'grey teal': '#5e9b8a',
'grey/blue': '#647d8e',
'grey/green': '#86a17d',
'greyblue': '#77a1b5',
'greyish': '#a8a495',
'greyish blue': '#5e819d',
'greyish brown': '#7a6a4f',
'greyish green': '#82a67d',
'greyish pink': '#c88d94',
'greyish purple': '#887191',
'greyish teal': '#719f91',
'gross green': '#a0bf16',
'gunmetal': '#536267',
'hazel': '#8e7618',
'heather': '#a484ac',
'heliotrope': '#d94ff5',
'highlighter green': '#1bfc06',
'hospital green': '#9be5aa',
'hot green': '#25ff29',
'hot magenta': '#f504c9',
'hot pink': '#ff028d',
'hot purple': '#cb00f5',
'hunter green': '#0b4008',
'ice': '#d6fffa',
'ice blue': '#d7fffe',
'icky green': '#8fae22',
'indian red': '#850e04',
'indigo': '#380282',
'indigo blue': '#3a18b1',
'iris': '#6258c4',
'irish green': '#019529',
'ivory': '#ffffcb',
'jade': '#1fa774',
'jade green': '#2baf6a',
'jungle green': '#048243',
'kelley green': '#009337',
'kelly green': '#02ab2e',
'kermit green': '#5cb200',
'key lime': '#aeff6e',
'khaki': '#aaa662',
'khaki green': '#728639',
'kiwi': '#9cef43',
'kiwi green': '#8ee53f',
'lavender': '#c79fef',
'lavender blue': '#8b88f8',
'lavender pink': '#dd85d7',
'lawn green': '#4da409',
'leaf': '#71aa34',
'leaf green': '#5ca904',
'leafy green': '#51b73b',
'leather': '#ac7434',
'lemon': '#fdff52',
'lemon green': '#adf802',
'lemon lime': '#bffe28',
'lemon yellow': '#fdff38',
'lichen': '#8fb67b',
'light aqua': '#8cffdb',
'light aquamarine': '#7bfdc7',
'light beige': '#fffeb6',
'light blue': '#95d0fc',
'light blue green': '#7efbb3',
'light blue grey': '#b7c9e2',
'light bluish green': '#76fda8',
'light bright green': '#53fe5c',
'light brown': '#ad8150',
'light burgundy': '#a8415b',
'light cyan': '#acfffc',
'light eggplant': '#894585',
'light forest green': '#4f9153',
'light gold': '#fddc5c',
'light grass green': '#9af764',
'light green': '#96f97b',
'light green blue': '#56fca2',
'light greenish blue': '#63f7b4',
'light grey': '#d8dcd6',
'light grey blue': '#9dbcd4',
'light grey green': '#b7e1a1',
'light indigo': '#6d5acf',
'light khaki': '#e6f2a2',
'light lavendar': '#efc0fe',
'light lavender': '#dfc5fe',
'light light blue': '#cafffb',
'light light green': '#c8ffb0',
'light lilac': '#edc8ff',
'light lime': '#aefd6c',
'light lime green': '#b9ff66',
'light magenta': '#fa5ff7',
'light maroon': '#a24857',
'light mauve': '#c292a1',
'light mint': '#b6ffbb',
'light mint green': '#a6fbb2',
'light moss green': '#a6c875',
'light mustard': '#f7d560',
'light navy': '#155084',
'light navy blue': '#2e5a88',
'light neon green': '#4efd54',
'light olive': '#acbf69',
'light olive green': '#a4be5c',
'light orange': '#fdaa48',
'light pastel green': '#b2fba5',
'light pea green': '#c4fe82',
'light peach': '#ffd8b1',
'light periwinkle': '#c1c6fc',
'light pink': '#ffd1df',
'light plum': '#9d5783',
'light purple': '#bf77f6',
'light red': '#ff474c',
'light rose': '#ffc5cb',
'light royal blue': '#3a2efe',
'light sage': '#bcecac',
'light salmon': '#fea993',
'light sea green': '#98f6b0',
'light seafoam': '#a0febf',
'light seafoam green': '#a7ffb5',
'light sky blue': '#c6fcff',
'light tan': '#fbeeac',
'light teal': '#90e4c1',
'light turquoise': '#7ef4cc',
'light urple': '#b36ff6',
'light violet': '#d6b4fc',
'light yellow': '#fffe7a',
'light yellow green': '#ccfd7f',
'light yellowish green': '#c2ff89',
'lightblue': '#7bc8f6',
'lighter green': '#75fd63',
'lighter purple': '#a55af4',
'lightgreen': '#76ff7b',
'lightish blue': '#3d7afd',
'lightish green': '#61e160',
'lightish purple': '#a552e6',
'lightish red': '#fe2f4a',
'lilac': '#cea2fd',
'liliac': '#c48efd',
'lime': '#aaff32',
'lime green': '#89fe05',
'lime yellow': '#d0fe1d',
'lipstick': '#d5174e',
'lipstick red': '#c0022f',
'macaroni and cheese': '#efb435',
'magenta': '#c20078',
'mahogany': '#4a0100',
'maize': '#f4d054',
'mango': '#ffa62b',
'manilla': '#fffa86',
'marigold': '#fcc006',
'marine': '#042e60',
'marine blue': '#01386a',
'maroon': '#650021',
'mauve': '#ae7181',
'medium blue': '#2c6fbb',
'medium brown': '#7f5112',
'medium green': '#39ad48',
'medium grey': '#7d7f7c',
'medium pink': '#f36196',
'medium purple': '#9e43a2',
'melon': '#ff7855',
'merlot': '#730039',
'metallic blue': '#4f738e',
'mid blue': '#276ab3',
'mid green': '#50a747',
'midnight': '#03012d',
'midnight blue': '#020035',
'midnight purple': '#280137',
'military green': '#667c3e',
'milk chocolate': '#7f4e1e',
'mint': '#9ffeb0',
'mint green': '#8fff9f',
'minty green': '#0bf77d',
'mocha': '#9d7651',
'moss': '#769958',
'moss green': '#658b38',
'mossy green': '#638b27',
'mud': '#735c12',
'mud brown': '#60460f',
'mud green': '#606602',
'muddy brown': '#886806',
'muddy green': '#657432',
'muddy yellow': '#bfac05',
'mulberry': '#920a4e',
'murky green': '#6c7a0e',
'mushroom': '#ba9e88',
'mustard': '#ceb301',
'mustard brown': '#ac7e04',
'mustard green': '#a8b504',
'mustard yellow': '#d2bd0a',
'muted blue': '#3b719f',
'muted green': '#5fa052',
'muted pink': '#d1768f',
'muted purple': '#805b87',
'nasty green': '#70b23f',
'navy': '#01153e',
'navy blue': '#001146',
'navy green': '#35530a',
'neon blue': '#04d9ff',
'neon green': '#0cff0c',
'neon pink': '#fe019a',
'neon purple': '#bc13fe',
'neon red': '#ff073a',
'neon yellow': '#cfff04',
'nice blue': '#107ab0',
'night blue': '#040348',
'ocean': '#017b92',
'ocean blue': '#03719c',
'ocean green': '#3d9973',
'ocher': '#bf9b0c',
'ochre': '#bf9005',
'ocre': '#c69c04',
'off blue': '#5684ae',
'off green': '#6ba353',
'off white': '#ffffe4',
'off yellow': '#f1f33f',
'old pink': '#c77986',
'old rose': '#c87f89',
'olive': '#6e750e',
'olive brown': '#645403',
'olive drab': '#6f7632',
'olive green': '#677a04',
'olive yellow': '#c2b709',
'orange': '#f97306',
'orange brown': '#be6400',
'orange pink': '#ff6f52',
'orange red': '#fd411e',
'orange yellow': '#ffad01',
'orangeish': '#fd8d49',
'orangered': '#fe420f',
'orangey brown': '#b16002',
'orangey red': '#fa4224',
'orangey yellow': '#fdb915',
'orangish': '#fc824a',
'orangish brown': '#b25f03',
'orangish red': '#f43605',
'orchid': '#c875c4',
'pale': '#fff9d0',
'pale aqua': '#b8ffeb',
'pale blue': '#d0fefe',
'pale brown': '#b1916e',
'pale cyan': '#b7fffa',
'pale gold': '#fdde6c',
'pale green': '#c7fdb5',
'pale grey': '#fdfdfe',
'pale lavender': '#eecffe',
'pale light green': '#b1fc99',
'pale lilac': '#e4cbff',
'pale lime': '#befd73',
'pale lime green': '#b1ff65',
'pale magenta': '#d767ad',
'pale mauve': '#fed0fc',
'pale olive': '#b9cc81',
'pale olive green': '#b1d27b',
'pale orange': '#ffa756',
'pale peach': '#ffe5ad',
'pale pink': '#ffcfdc',
'pale purple': '#b790d4',
'pale red': '#d9544d',
'pale rose': '#fdc1c5',
'pale salmon': '#ffb19a',
'pale sky blue': '#bdf6fe',
'pale teal': '#82cbb2',
'pale turquoise': '#a5fbd5',
'pale violet': '#ceaefa',
'pale yellow': '#ffff84',
'parchment': '#fefcaf',
'pastel blue': '#a2bffe',
'pastel green': '#b0ff9d',
'pastel orange': '#ff964f',
'pastel pink': '#ffbacd',
'pastel purple': '#caa0ff',
'pastel red': '#db5856',
'pastel yellow': '#fffe71',
'pea': '#a4bf20',
'pea green': '#8eab12',
'pea soup': '#929901',
'pea soup green': '#94a617',
'peach': '#ffb07c',
'peachy pink': '#ff9a8a',
'peacock blue': '#016795',
'pear': '#cbf85f',
'periwinkle': '#8e82fe',
'periwinkle blue': '#8f99fb',
'perrywinkle': '#8f8ce7',
'petrol': '#005f6a',
'pig pink': '#e78ea5',
'pine': '#2b5d34',
'pine green': '#0a481e',
'pink': '#ff81c0',
'pink purple': '#db4bda',
'pink red': '#f5054f',
'pink/purple': '#ef1de7',
'pinkish': '#d46a7e',
'pinkish brown': '#b17261',
'pinkish grey': '#c8aca9',
'pinkish orange': '#ff724c',
'pinkish purple': '#d648d7',
'pinkish red': '#f10c45',
'pinkish tan': '#d99b82',
'pinky': '#fc86aa',
'pinky purple': '#c94cbe',
'pinky red': '#fc2647',
'piss yellow': '#ddd618',
'pistachio': '#c0fa8b',
'plum': '#580f41',
'plum purple': '#4e0550',
'poison green': '#40fd14',
'poo': '#8f7303',
'poo brown': '#885f01',
'poop': '#7f5e00',
'poop brown': '#7a5901',
'poop green': '#6f7c00',
'powder blue': '#b1d1fc',
'powder pink': '#ffb2d0',
'primary blue': '#0804f9',
'prussian blue': '#004577',
'puce': '#a57e52',
'puke': '#a5a502',
'puke brown': '#947706',
'puke green': '#9aae07',
'puke yellow': '#c2be0e',
'pumpkin': '#e17701',
'pumpkin orange': '#fb7d07',
'pure blue': '#0203e2',
'purple': '#7e1e9c',
'purple blue': '#632de9',
'purple brown': '#673a3f',
'purple grey': '#866f85',
'purple pink': '#e03fd8',
'purple red': '#990147',
'purple/blue': '#5d21d0',
'purple/pink': '#d725de',
'purpleish': '#98568d',
'purpleish blue': '#6140ef',
'purpleish pink': '#df4ec8',
'purpley': '#8756e4',
'purpley blue': '#5f34e7',
'purpley grey': '#947e94',
'purpley pink': '#c83cb9',
'purplish': '#94568c',
'purplish blue': '#601ef9',
'purplish brown': '#6b4247',
'purplish grey': '#7a687f',
'purplish pink': '#ce5dae',
'purplish red': '#b0054b',
'purply': '#983fb2',
'purply blue': '#661aee',
'purply pink': '#f075e6',
'putty': '#beae8a',
'racing green': '#014600',
'radioactive green': '#2cfa1f',
'raspberry': '#b00149',
'raw sienna': '#9a6200',
'raw umber': '#a75e09',
'really light blue': '#d4ffff',
'red': '#e50000',
'red brown': '#8b2e16',
'red orange': '#fd3c06',
'red pink': '#fa2a55',
'red purple': '#820747',
'red violet': '#9e0168',
'red wine': '#8c0034',
'reddish': '#c44240',
'reddish brown': '#7f2b0a',
'reddish grey': '#997570',
'reddish orange': '#f8481c',
'reddish pink': '#fe2c54',
'reddish purple': '#910951',
'reddy brown': '#6e1005',
'rich blue': '#021bf9',
'rich purple': '#720058',
'robin egg blue': '#8af1fe',
"robin's egg": '#6dedfd',
"robin's egg blue": '#98eff9',
'rosa': '#fe86a4',
'rose': '#cf6275',
'rose pink': '#f7879a',
'rose red': '#be013c',
'rosy pink': '#f6688e',
'rouge': '#ab1239',
'royal': '#0c1793',
'royal blue': '#0504aa',
'royal purple': '#4b006e',
'ruby': '#ca0147',
'russet': '#a13905',
'rust': '#a83c09',
'rust brown': '#8b3103',
'rust orange': '#c45508',
'rust red': '#aa2704',
'rusty orange': '#cd5909',
'rusty red': '#af2f0d',
'saffron': '#feb209',
'sage': '#87ae73',
'sage green': '#88b378',
'salmon': '#ff796c',
'salmon pink': '#fe7b7c',
'sand': '#e2ca76',
'sand brown': '#cba560',
'sand yellow': '#fce166',
'sandstone': '#c9ae74',
'sandy': '#f1da7a',
'sandy brown': '#c4a661',
'sandy yellow': '#fdee73',
'sap green': '#5c8b15',
'sapphire': '#2138ab',
'scarlet': '#be0119',
'sea': '#3c9992',
'sea blue': '#047495',
'sea green': '#53fca1',
'seafoam': '#80f9ad',
'seafoam blue': '#78d1b6',
'seafoam green': '#7af9ab',
'seaweed': '#18d17b',
'seaweed green': '#35ad6b',
'sepia': '#985e2b',
'shamrock': '#01b44c',
'shamrock green': '#02c14d',
'shit': '#7f5f00',
'shit brown': '#7b5804',
'shit green': '#758000',
'shocking pink': '#fe02a2',
'sick green': '#9db92c',
'sickly green': '#94b21c',
'sickly yellow': '#d0e429',
'sienna': '#a9561e',
'silver': '#c5c9c7',
'sky': '#82cafc',
'sky blue': '#75bbfd',
'slate': '#516572',
'slate blue': '#5b7c99',
'slate green': '#658d6d',
'slate grey': '#59656d',
'slime green': '#99cc04',
'snot': '#acbb0d',
'snot green': '#9dc100',
'soft blue': '#6488ea',
'soft green': '#6fc276',
'soft pink': '#fdb0c0',
'soft purple': '#a66fb5',
'spearmint': '#1ef876',
'spring green': '#a9f971',
'spruce': '#0a5f38',
'squash': '#f2ab15',
'steel': '#738595',
'steel blue': '#5a7d9a',
'steel grey': '#6f828a',
'stone': '#ada587',
'stormy blue': '#507b9c',
'straw': '#fcf679',
'strawberry': '#fb2943',
'strong blue': '#0c06f7',
'strong pink': '#ff0789',
'sun yellow': '#ffdf22',
'sunflower': '#ffc512',
'sunflower yellow': '#ffda03',
'sunny yellow': '#fff917',
'sunshine yellow': '#fffd37',
'swamp': '#698339',
'swamp green': '#748500',
'tan': '#d1b26f',
'tan brown': '#ab7e4c',
'tan green': '#a9be70',
'tangerine': '#ff9408',
'taupe': '#b9a281',
'tea': '#65ab7c',
'tea green': '#bdf8a3',
'teal': '#029386',
'teal blue': '#01889f',
'teal green': '#25a36f',
'tealish': '#24bca8',
'tealish green': '#0cdc73',
'terra cotta': '#c9643b',
'terracota': '#cb6843',
'terracotta': '#ca6641',
'tiffany blue': '#7bf2da',
'tomato': '#ef4026',
'tomato red': '#ec2d01',
'topaz': '#13bbaf',
'toupe': '#c7ac7d',
'toxic green': '#61de2a',
'tree green': '#2a7e19',
'true blue': '#010fcc',
'true green': '#089404',
'turquoise': '#06c2ac',
'turquoise blue': '#06b1c4',
'turquoise green': '#04f489',
'turtle green': '#75b84f',
'twilight': '#4e518b',
'twilight blue': '#0a437a',
'ugly blue': '#31668a',
'ugly brown': '#7d7103',
'ugly green': '#7a9703',
'ugly pink': '#cd7584',
'ugly purple': '#a442a0',
'ugly yellow': '#d0c101',
'ultramarine': '#2000b1',
'ultramarine blue': '#1805db',
'umber': '#b26400',
'velvet': '#750851',
'vermillion': '#f4320c',
'very dark blue': '#000133',
'very dark brown': '#1d0200',
'very dark green': '#062e03',
'very dark purple': '#2a0134',
'very light blue': '#d5ffff',
'very light brown': '#d3b683',
'very light green': '#d1ffbd',
'very light pink': '#fff4f2',
'very light purple': '#f6cefc',
'very pale blue': '#d6fffe',
'very pale green': '#cffdbc',
'vibrant blue': '#0339f8',
'vibrant green': '#0add08',
'vibrant purple': '#ad03de',
'violet': '#9a0eea',
'violet blue': '#510ac9',
'violet pink': '#fb5ffc',
'violet red': '#a50055',
'viridian': '#1e9167',
'vivid blue': '#152eff',
'vivid green': '#2fef10',
'vivid purple': '#9900fa',
'vomit': '#a2a415',
'vomit green': '#89a203',
'vomit yellow': '#c7c10c',
'warm blue': '#4b57db',
'warm brown': '#964e02',
'warm grey': '#978a84',
'warm pink': '#fb5581',
'warm purple': '#952e8f',
'washed out green': '#bcf5a6',
'water blue': '#0e87cc',
'watermelon': '#fd4659',
'weird green': '#3ae57f',
'wheat': '#fbdd7e',
'white': '#ffffff',
'windows blue': '#3778bf',
'wine': '#80013f',
'wine red': '#7b0323',
'wintergreen': '#20f986',
'wisteria': '#a87dc2',
'yellow': '#ffff14',
'yellow brown': '#b79400',
'yellow green': '#c0fb2d',
'yellow ochre': '#cb9d06',
'yellow orange': '#fcb001',
'yellow tan': '#ffe36e',
'yellow/green': '#c8fd3d',
'yellowgreen': '#bbf90f',
'yellowish': '#faee66',
'yellowish brown': '#9b7a01',
'yellowish green': '#b0dd16',
'yellowish orange': '#ffab0f',
'yellowish tan': '#fcfc81',
'yellowy brown': '#ae8b0c',
'yellowy green': '#bff128'}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,245 @@
#!/usr/bin/env python3
# Copyright (c) 2005-2010 ActiveState Software Inc.
# Copyright (c) 2013 Eddy Petrișor
# flake8: noqa
"""
This file is directly from
https://github.com/ActiveState/appdirs/blob/3fe6a83776843a46f20c2e5587afcffe05e03b39/appdirs.py
The license of https://github.com/ActiveState/appdirs copied below:
# This is the MIT license
Copyright (c) 2010 ActiveState Software Inc.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
"""Utilities for determining application-specific dirs.
See <https://github.com/ActiveState/appdirs> for details and usage.
"""
# Dev Notes:
# - MSDN on where to store app data files:
# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
# - XDG spec for Un*x: https://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
__version__ = "1.4.4"
__version_info__ = tuple(int(segment) for segment in __version__.split("."))
import sys
import os
unicode = str
if sys.platform.startswith('java'):
import platform
os_name = platform.java_ver()[3][0]
if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc.
system = 'win32'
elif os_name.startswith('Mac'): # "Mac OS X", etc.
system = 'darwin'
else: # "Linux", "SunOS", "FreeBSD", etc.
# Setting this to "linux2" is not ideal, but only Windows or Mac
# are actually checked for and the rest of the module expects
# *sys.platform* style strings.
system = 'linux2'
else:
system = sys.platform
def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True):
r"""Return full path to the user-specific cache dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"opinion" (boolean) can be False to disable the appending of
"Cache" to the base app data dir for Windows. See
discussion below.
Typical user cache directories are:
Mac OS X: ~/Library/Caches/<AppName>
Unix: ~/.cache/<AppName> (XDG default)
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
On Windows the only suggestion in the MSDN docs is that local settings go in
the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming
app data dir (the default returned by `user_data_dir` above). Apps typically
put cache data somewhere *under* the given dir here. Some examples:
...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
...\Acme\SuperApp\Cache\1.0
OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
This can be disabled with the `opinion=False` option.
"""
if system == "win32":
if appauthor is None:
appauthor = appname
path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
if appname:
if appauthor is not False:
path = os.path.join(path, appauthor, appname)
else:
path = os.path.join(path, appname)
if opinion:
path = os.path.join(path, "Cache")
elif system == 'darwin':
path = os.path.expanduser('~/Library/Caches')
if appname:
path = os.path.join(path, appname)
else:
path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
if appname:
path = os.path.join(path, appname)
if appname and version:
path = os.path.join(path, version)
return path
#---- internal support stuff
def _get_win_folder_from_registry(csidl_name):
"""This is a fallback technique at best. I'm not sure if using the
registry for this guarantees us the correct answer for all CSIDL_*
names.
"""
import winreg as _winreg
shell_folder_name = {
"CSIDL_APPDATA": "AppData",
"CSIDL_COMMON_APPDATA": "Common AppData",
"CSIDL_LOCAL_APPDATA": "Local AppData",
}[csidl_name]
key = _winreg.OpenKey(
_winreg.HKEY_CURRENT_USER,
r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
)
dir, type = _winreg.QueryValueEx(key, shell_folder_name)
return dir
def _get_win_folder_with_pywin32(csidl_name):
from win32com.shell import shellcon, shell
dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0)
# Try to make this a unicode path because SHGetFolderPath does
# not return unicode strings when there is unicode data in the
# path.
try:
dir = unicode(dir)
# Downgrade to short path name if have highbit chars. See
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
has_high_char = False
for c in dir:
if ord(c) > 255:
has_high_char = True
break
if has_high_char:
try:
import win32api
dir = win32api.GetShortPathName(dir)
except ImportError:
pass
except UnicodeError:
pass
return dir
def _get_win_folder_with_ctypes(csidl_name):
import ctypes
csidl_const = {
"CSIDL_APPDATA": 26,
"CSIDL_COMMON_APPDATA": 35,
"CSIDL_LOCAL_APPDATA": 28,
}[csidl_name]
buf = ctypes.create_unicode_buffer(1024)
ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
# Downgrade to short path name if have highbit chars. See
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
has_high_char = False
for c in buf:
if ord(c) > 255:
has_high_char = True
break
if has_high_char:
buf2 = ctypes.create_unicode_buffer(1024)
if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
buf = buf2
return buf.value
def _get_win_folder_with_jna(csidl_name):
import array
from com.sun import jna
from com.sun.jna.platform import win32
buf_size = win32.WinDef.MAX_PATH * 2
buf = array.zeros('c', buf_size)
shell = win32.Shell32.INSTANCE
shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf)
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
# Downgrade to short path name if have highbit chars. See
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
has_high_char = False
for c in dir:
if ord(c) > 255:
has_high_char = True
break
if has_high_char:
buf = array.zeros('c', buf_size)
kernel = win32.Kernel32.INSTANCE
if kernel.GetShortPathName(dir, buf, buf_size):
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
return dir
if system == "win32":
try:
import win32com.shell
_get_win_folder = _get_win_folder_with_pywin32
except ImportError:
try:
from ctypes import windll
_get_win_folder = _get_win_folder_with_ctypes
except ImportError:
try:
import com.sun.jna
_get_win_folder = _get_win_folder_with_jna
except ImportError:
_get_win_folder = _get_win_folder_from_registry

View File

@@ -0,0 +1,715 @@
"""Extract reference documentation from the NumPy source tree.
Copyright (C) 2008 Stefan van der Walt <stefan@mentat.za.net>, Pauli Virtanen <pav@iki.fi>
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
"""
import inspect
import textwrap
import re
import pydoc
from warnings import warn
from collections import namedtuple
from collections.abc import Callable, Mapping
import copy
import sys
def strip_blank_lines(l):
"Remove leading and trailing blank lines from a list of lines"
while l and not l[0].strip():
del l[0]
while l and not l[-1].strip():
del l[-1]
return l
class Reader:
"""A line-based string reader.
"""
def __init__(self, data):
"""
Parameters
----------
data : str
String with lines separated by '\n'.
"""
if isinstance(data, list):
self._str = data
else:
self._str = data.split('\n') # store string as list of lines
self.reset()
def __getitem__(self, n):
return self._str[n]
def reset(self):
self._l = 0 # current line nr
def read(self):
if not self.eof():
out = self[self._l]
self._l += 1
return out
else:
return ''
def seek_next_non_empty_line(self):
for l in self[self._l:]:
if l.strip():
break
else:
self._l += 1
def eof(self):
return self._l >= len(self._str)
def read_to_condition(self, condition_func):
start = self._l
for line in self[start:]:
if condition_func(line):
return self[start:self._l]
self._l += 1
if self.eof():
return self[start:self._l+1]
return []
def read_to_next_empty_line(self):
self.seek_next_non_empty_line()
def is_empty(line):
return not line.strip()
return self.read_to_condition(is_empty)
def read_to_next_unindented_line(self):
def is_unindented(line):
return (line.strip() and (len(line.lstrip()) == len(line)))
return self.read_to_condition(is_unindented)
def peek(self, n=0):
if self._l + n < len(self._str):
return self[self._l + n]
else:
return ''
def is_empty(self):
return not ''.join(self._str).strip()
class ParseError(Exception):
def __str__(self):
message = self.args[0]
if hasattr(self, 'docstring'):
message = f"{message} in {self.docstring!r}"
return message
Parameter = namedtuple('Parameter', ['name', 'type', 'desc'])
class NumpyDocString(Mapping):
"""Parses a numpydoc string to an abstract representation
Instances define a mapping from section title to structured data.
"""
sections = {
'Signature': '',
'Summary': [''],
'Extended Summary': [],
'Parameters': [],
'Returns': [],
'Yields': [],
'Receives': [],
'Raises': [],
'Warns': [],
'Other Parameters': [],
'Attributes': [],
'Methods': [],
'See Also': [],
'Notes': [],
'Warnings': [],
'References': '',
'Examples': '',
'index': {}
}
def __init__(self, docstring, config={}):
orig_docstring = docstring
docstring = textwrap.dedent(docstring).split('\n')
self._doc = Reader(docstring)
self._parsed_data = copy.deepcopy(self.sections)
try:
self._parse()
except ParseError as e:
e.docstring = orig_docstring
raise
def __getitem__(self, key):
return self._parsed_data[key]
def __setitem__(self, key, val):
if key not in self._parsed_data:
self._error_location(f"Unknown section {key}", error=False)
else:
self._parsed_data[key] = val
def __iter__(self):
return iter(self._parsed_data)
def __len__(self):
return len(self._parsed_data)
def _is_at_section(self):
self._doc.seek_next_non_empty_line()
if self._doc.eof():
return False
l1 = self._doc.peek().strip() # e.g. Parameters
if l1.startswith('.. index::'):
return True
l2 = self._doc.peek(1).strip() # ---------- or ==========
return l2.startswith('-'*len(l1)) or l2.startswith('='*len(l1))
def _strip(self, doc):
i = 0
j = 0
for i, line in enumerate(doc):
if line.strip():
break
for j, line in enumerate(doc[::-1]):
if line.strip():
break
return doc[i:len(doc)-j]
def _read_to_next_section(self):
section = self._doc.read_to_next_empty_line()
while not self._is_at_section() and not self._doc.eof():
if not self._doc.peek(-1).strip(): # previous line was empty
section += ['']
section += self._doc.read_to_next_empty_line()
return section
def _read_sections(self):
while not self._doc.eof():
data = self._read_to_next_section()
name = data[0].strip()
if name.startswith('..'): # index section
yield name, data[1:]
elif len(data) < 2:
yield StopIteration
else:
yield name, self._strip(data[2:])
def _parse_param_list(self, content, single_element_is_type=False):
r = Reader(content)
params = []
while not r.eof():
header = r.read().strip()
if ' : ' in header:
arg_name, arg_type = header.split(' : ')[:2]
else:
if single_element_is_type:
arg_name, arg_type = '', header
else:
arg_name, arg_type = header, ''
desc = r.read_to_next_unindented_line()
desc = dedent_lines(desc)
desc = strip_blank_lines(desc)
params.append(Parameter(arg_name, arg_type, desc))
return params
# See also supports the following formats.
#
# <FUNCNAME>
# <FUNCNAME> SPACE* COLON SPACE+ <DESC> SPACE*
# <FUNCNAME> ( COMMA SPACE+ <FUNCNAME>)+ (COMMA | PERIOD)? SPACE*
# <FUNCNAME> ( COMMA SPACE+ <FUNCNAME>)* SPACE* COLON SPACE+ <DESC> SPACE*
# <FUNCNAME> is one of
# <PLAIN_FUNCNAME>
# COLON <ROLE> COLON BACKTICK <PLAIN_FUNCNAME> BACKTICK
# where
# <PLAIN_FUNCNAME> is a legal function name, and
# <ROLE> is any nonempty sequence of word characters.
# Examples: func_f1 :meth:`func_h1` :obj:`~baz.obj_r` :class:`class_j`
# <DESC> is a string describing the function.
_role = r":(?P<role>\w+):"
_funcbacktick = r"`(?P<name>(?:~\w+\.)?[a-zA-Z0-9_\.-]+)`"
_funcplain = r"(?P<name2>[a-zA-Z0-9_\.-]+)"
_funcname = r"(" + _role + _funcbacktick + r"|" + _funcplain + r")"
_funcnamenext = _funcname.replace('role', 'rolenext')
_funcnamenext = _funcnamenext.replace('name', 'namenext')
_description = r"(?P<description>\s*:(\s+(?P<desc>\S+.*))?)?\s*$"
_func_rgx = re.compile(r"^\s*" + _funcname + r"\s*")
_line_rgx = re.compile(
r"^\s*" +
r"(?P<allfuncs>" + # group for all function names
_funcname +
r"(?P<morefuncs>([,]\s+" + _funcnamenext + r")*)" +
r")" + # end of "allfuncs"
r"(?P<trailing>[,\.])?" + # Some function lists have a trailing comma (or period) '\s*'
_description)
# Empty <DESC> elements are replaced with '..'
empty_description = '..'
def _parse_see_also(self, content):
"""
func_name : Descriptive text
continued text
another_func_name : Descriptive text
func_name1, func_name2, :meth:`func_name`, func_name3
"""
items = []
def parse_item_name(text):
"""Match ':role:`name`' or 'name'."""
m = self._func_rgx.match(text)
if not m:
raise ParseError(f"{text} is not a item name")
role = m.group('role')
name = m.group('name') if role else m.group('name2')
return name, role, m.end()
rest = []
for line in content:
if not line.strip():
continue
line_match = self._line_rgx.match(line)
description = None
if line_match:
description = line_match.group('desc')
if line_match.group('trailing') and description:
self._error_location(
'Unexpected comma or period after function list at index %d of '
'line "%s"' % (line_match.end('trailing'), line),
error=False)
if not description and line.startswith(' '):
rest.append(line.strip())
elif line_match:
funcs = []
text = line_match.group('allfuncs')
while True:
if not text.strip():
break
name, role, match_end = parse_item_name(text)
funcs.append((name, role))
text = text[match_end:].strip()
if text and text[0] == ',':
text = text[1:].strip()
rest = list(filter(None, [description]))
items.append((funcs, rest))
else:
raise ParseError(f"{line} is not a item name")
return items
def _parse_index(self, section, content):
"""
.. index: default
:refguide: something, else, and more
"""
def strip_each_in(lst):
return [s.strip() for s in lst]
out = {}
section = section.split('::')
if len(section) > 1:
out['default'] = strip_each_in(section[1].split(','))[0]
for line in content:
line = line.split(':')
if len(line) > 2:
out[line[1]] = strip_each_in(line[2].split(','))
return out
def _parse_summary(self):
"""Grab signature (if given) and summary"""
if self._is_at_section():
return
# If several signatures present, take the last one
while True:
summary = self._doc.read_to_next_empty_line()
summary_str = " ".join([s.strip() for s in summary]).strip()
compiled = re.compile(r'^([\w., ]+=)?\s*[\w\.]+\(.*\)$')
if compiled.match(summary_str):
self['Signature'] = summary_str
if not self._is_at_section():
continue
break
if summary is not None:
self['Summary'] = summary
if not self._is_at_section():
self['Extended Summary'] = self._read_to_next_section()
def _parse(self):
self._doc.reset()
self._parse_summary()
sections = list(self._read_sections())
section_names = {section for section, content in sections}
has_returns = 'Returns' in section_names
has_yields = 'Yields' in section_names
# We could do more tests, but we are not. Arbitrarily.
if has_returns and has_yields:
msg = 'Docstring contains both a Returns and Yields section.'
raise ValueError(msg)
if not has_yields and 'Receives' in section_names:
msg = 'Docstring contains a Receives section but not Yields.'
raise ValueError(msg)
for (section, content) in sections:
if not section.startswith('..'):
section = (s.capitalize() for s in section.split(' '))
section = ' '.join(section)
if self.get(section):
self._error_location(f"The section {section} appears twice")
if section in ('Parameters', 'Other Parameters', 'Attributes',
'Methods'):
self[section] = self._parse_param_list(content)
elif section in ('Returns', 'Yields', 'Raises', 'Warns', 'Receives'):
self[section] = self._parse_param_list(
content, single_element_is_type=True)
elif section.startswith('.. index::'):
self['index'] = self._parse_index(section, content)
elif section == 'See Also':
self['See Also'] = self._parse_see_also(content)
else:
self[section] = content
def _error_location(self, msg, error=True):
if hasattr(self, '_obj'):
# we know where the docs came from:
try:
filename = inspect.getsourcefile(self._obj)
except TypeError:
filename = None
msg = msg + f" in the docstring of {self._obj} in {filename}."
if error:
raise ValueError(msg)
else:
warn(msg)
# string conversion routines
def _str_header(self, name, symbol='-'):
return [name, len(name)*symbol]
def _str_indent(self, doc, indent=4):
out = []
for line in doc:
out += [' '*indent + line]
return out
def _str_signature(self):
if self['Signature']:
return [self['Signature'].replace('*', r'\*')] + ['']
else:
return ['']
def _str_summary(self):
if self['Summary']:
return self['Summary'] + ['']
else:
return []
def _str_extended_summary(self):
if self['Extended Summary']:
return self['Extended Summary'] + ['']
else:
return []
def _str_param_list(self, name):
out = []
if self[name]:
out += self._str_header(name)
for param in self[name]:
parts = []
if param.name:
parts.append(param.name)
if param.type:
parts.append(param.type)
out += [' : '.join(parts)]
if param.desc and ''.join(param.desc).strip():
out += self._str_indent(param.desc)
out += ['']
return out
def _str_section(self, name):
out = []
if self[name]:
out += self._str_header(name)
out += self[name]
out += ['']
return out
def _str_see_also(self, func_role):
if not self['See Also']:
return []
out = []
out += self._str_header("See Also")
out += ['']
last_had_desc = True
for funcs, desc in self['See Also']:
assert isinstance(funcs, list)
links = []
for func, role in funcs:
if role:
link = f':{role}:`{func}`'
elif func_role:
link = f':{func_role}:`{func}`'
else:
link = f"`{func}`_"
links.append(link)
link = ', '.join(links)
out += [link]
if desc:
out += self._str_indent([' '.join(desc)])
last_had_desc = True
else:
last_had_desc = False
out += self._str_indent([self.empty_description])
if last_had_desc:
out += ['']
out += ['']
return out
def _str_index(self):
idx = self['index']
out = []
output_index = False
default_index = idx.get('default', '')
if default_index:
output_index = True
out += [f'.. index:: {default_index}']
for section, references in idx.items():
if section == 'default':
continue
output_index = True
out += [f" :{section}: {', '.join(references)}"]
if output_index:
return out
else:
return ''
def __str__(self, func_role=''):
out = []
out += self._str_signature()
out += self._str_summary()
out += self._str_extended_summary()
for param_list in ('Parameters', 'Returns', 'Yields', 'Receives',
'Other Parameters', 'Raises', 'Warns'):
out += self._str_param_list(param_list)
out += self._str_section('Warnings')
out += self._str_see_also(func_role)
for s in ('Notes', 'References', 'Examples'):
out += self._str_section(s)
for param_list in ('Attributes', 'Methods'):
out += self._str_param_list(param_list)
out += self._str_index()
return '\n'.join(out)
def indent(str, indent=4):
indent_str = ' '*indent
if str is None:
return indent_str
lines = str.split('\n')
return '\n'.join(indent_str + l for l in lines)
def dedent_lines(lines):
"""Deindent a list of lines maximally"""
return textwrap.dedent("\n".join(lines)).split("\n")
def header(text, style='-'):
return text + '\n' + style*len(text) + '\n'
class FunctionDoc(NumpyDocString):
def __init__(self, func, role='func', doc=None, config={}):
self._f = func
self._role = role # e.g. "func" or "meth"
if doc is None:
if func is None:
raise ValueError("No function or docstring given")
doc = inspect.getdoc(func) or ''
NumpyDocString.__init__(self, doc, config)
if not self['Signature'] and func is not None:
func, func_name = self.get_func()
try:
try:
signature = str(inspect.signature(func))
except (AttributeError, ValueError):
# try to read signature, backward compat for older Python
if sys.version_info[0] >= 3:
argspec = inspect.getfullargspec(func)
else:
argspec = inspect.getargspec(func)
signature = inspect.formatargspec(*argspec)
signature = f'{func_name}{signature}'
except TypeError:
signature = f'{func_name}()'
self['Signature'] = signature
def get_func(self):
func_name = getattr(self._f, '__name__', self.__class__.__name__)
if inspect.isclass(self._f):
func = getattr(self._f, '__call__', self._f.__init__)
else:
func = self._f
return func, func_name
def __str__(self):
out = ''
func, func_name = self.get_func()
roles = {'func': 'function',
'meth': 'method'}
if self._role:
if self._role not in roles:
print(f"Warning: invalid role {self._role}")
out += f".. {roles.get(self._role, '')}:: {func_name}\n \n\n"
out += super().__str__(func_role=self._role)
return out
class ClassDoc(NumpyDocString):
extra_public_methods = ['__call__']
def __init__(self, cls, doc=None, modulename='', func_doc=FunctionDoc,
config={}):
if not inspect.isclass(cls) and cls is not None:
raise ValueError(f"Expected a class or None, but got {cls!r}")
self._cls = cls
if 'sphinx' in sys.modules:
from sphinx.ext.autodoc import ALL
else:
ALL = object()
self.show_inherited_members = config.get(
'show_inherited_class_members', True)
if modulename and not modulename.endswith('.'):
modulename += '.'
self._mod = modulename
if doc is None:
if cls is None:
raise ValueError("No class or documentation string given")
doc = pydoc.getdoc(cls)
NumpyDocString.__init__(self, doc)
_members = config.get('members', [])
if _members is ALL:
_members = None
_exclude = config.get('exclude-members', [])
if config.get('show_class_members', True) and _exclude is not ALL:
def splitlines_x(s):
if not s:
return []
else:
return s.splitlines()
for field, items in [('Methods', self.methods),
('Attributes', self.properties)]:
if not self[field]:
doc_list = []
for name in sorted(items):
if (name in _exclude or
(_members and name not in _members)):
continue
try:
doc_item = pydoc.getdoc(getattr(self._cls, name))
doc_list.append(
Parameter(name, '', splitlines_x(doc_item)))
except AttributeError:
pass # method doesn't exist
self[field] = doc_list
@property
def methods(self):
if self._cls is None:
return []
return [name for name, func in inspect.getmembers(self._cls)
if ((not name.startswith('_')
or name in self.extra_public_methods)
and isinstance(func, Callable)
and self._is_show_member(name))]
@property
def properties(self):
if self._cls is None:
return []
return [name for name, func in inspect.getmembers(self._cls)
if (not name.startswith('_') and
(func is None or isinstance(func, property) or
inspect.isdatadescriptor(func))
and self._is_show_member(name))]
def _is_show_member(self, name):
if self.show_inherited_members:
return True # show all class members
if name not in self._cls.__dict__:
return False # class member is inherited, we do not show it
return True

View File

@@ -0,0 +1,313 @@
import operator
import math
__version__ = "2.1.0"
m = [
[3.2406, -1.5372, -0.4986],
[-0.9689, 1.8758, 0.0415],
[0.0557, -0.2040, 1.0570]
]
m_inv = [
[0.4124, 0.3576, 0.1805],
[0.2126, 0.7152, 0.0722],
[0.0193, 0.1192, 0.9505]
]
# Hard-coded D65 illuminant
refX = 0.95047
refY = 1.00000
refZ = 1.08883
refU = 0.19784
refV = 0.46834
lab_e = 0.008856
lab_k = 903.3
# Public API
def husl_to_rgb(h, s, l):
return lch_to_rgb(*husl_to_lch([h, s, l]))
def husl_to_hex(h, s, l):
return rgb_to_hex(husl_to_rgb(h, s, l))
def rgb_to_husl(r, g, b):
return lch_to_husl(rgb_to_lch(r, g, b))
def hex_to_husl(hex):
return rgb_to_husl(*hex_to_rgb(hex))
def huslp_to_rgb(h, s, l):
return lch_to_rgb(*huslp_to_lch([h, s, l]))
def huslp_to_hex(h, s, l):
return rgb_to_hex(huslp_to_rgb(h, s, l))
def rgb_to_huslp(r, g, b):
return lch_to_huslp(rgb_to_lch(r, g, b))
def hex_to_huslp(hex):
return rgb_to_huslp(*hex_to_rgb(hex))
def lch_to_rgb(l, c, h):
return xyz_to_rgb(luv_to_xyz(lch_to_luv([l, c, h])))
def rgb_to_lch(r, g, b):
return luv_to_lch(xyz_to_luv(rgb_to_xyz([r, g, b])))
def max_chroma(L, H):
hrad = math.radians(H)
sinH = (math.sin(hrad))
cosH = (math.cos(hrad))
sub1 = (math.pow(L + 16, 3.0) / 1560896.0)
sub2 = sub1 if sub1 > 0.008856 else (L / 903.3)
result = float("inf")
for row in m:
m1 = row[0]
m2 = row[1]
m3 = row[2]
top = ((0.99915 * m1 + 1.05122 * m2 + 1.14460 * m3) * sub2)
rbottom = (0.86330 * m3 - 0.17266 * m2)
lbottom = (0.12949 * m3 - 0.38848 * m1)
bottom = (rbottom * sinH + lbottom * cosH) * sub2
for t in (0.0, 1.0):
C = (L * (top - 1.05122 * t) / (bottom + 0.17266 * sinH * t))
if C > 0.0 and C < result:
result = C
return result
def _hrad_extremum(L):
lhs = (math.pow(L, 3.0) + 48.0 * math.pow(L, 2.0) + 768.0 * L + 4096.0) / 1560896.0
rhs = 1107.0 / 125000.0
sub = lhs if lhs > rhs else 10.0 * L / 9033.0
chroma = float("inf")
result = None
for row in m:
for limit in (0.0, 1.0):
[m1, m2, m3] = row
top = -3015466475.0 * m3 * sub + 603093295.0 * m2 * sub - 603093295.0 * limit
bottom = 1356959916.0 * m1 * sub - 452319972.0 * m3 * sub
hrad = math.atan2(top, bottom)
# This is a math hack to deal with tan quadrants, I'm too lazy to figure
# out how to do this properly
if limit == 0.0:
hrad += math.pi
test = max_chroma(L, math.degrees(hrad))
if test < chroma:
chroma = test
result = hrad
return result
def max_chroma_pastel(L):
H = math.degrees(_hrad_extremum(L))
return max_chroma(L, H)
def dot_product(a, b):
return sum(map(operator.mul, a, b))
def f(t):
if t > lab_e:
return (math.pow(t, 1.0 / 3.0))
else:
return (7.787 * t + 16.0 / 116.0)
def f_inv(t):
if math.pow(t, 3.0) > lab_e:
return (math.pow(t, 3.0))
else:
return (116.0 * t - 16.0) / lab_k
def from_linear(c):
if c <= 0.0031308:
return 12.92 * c
else:
return (1.055 * math.pow(c, 1.0 / 2.4) - 0.055)
def to_linear(c):
a = 0.055
if c > 0.04045:
return (math.pow((c + a) / (1.0 + a), 2.4))
else:
return (c / 12.92)
def rgb_prepare(triple):
ret = []
for ch in triple:
ch = round(ch, 3)
if ch < -0.0001 or ch > 1.0001:
raise Exception(f"Illegal RGB value {ch:f}")
if ch < 0:
ch = 0
if ch > 1:
ch = 1
# Fix for Python 3 which by default rounds 4.5 down to 4.0
# instead of Python 2 which is rounded to 5.0 which caused
# a couple off by one errors in the tests. Tests now all pass
# in Python 2 and Python 3
ret.append(int(round(ch * 255 + 0.001, 0)))
return ret
def hex_to_rgb(hex):
if hex.startswith('#'):
hex = hex[1:]
r = int(hex[0:2], 16) / 255.0
g = int(hex[2:4], 16) / 255.0
b = int(hex[4:6], 16) / 255.0
return [r, g, b]
def rgb_to_hex(triple):
[r, g, b] = triple
return '#%02x%02x%02x' % tuple(rgb_prepare([r, g, b]))
def xyz_to_rgb(triple):
xyz = map(lambda row: dot_product(row, triple), m)
return list(map(from_linear, xyz))
def rgb_to_xyz(triple):
rgbl = list(map(to_linear, triple))
return list(map(lambda row: dot_product(row, rgbl), m_inv))
def xyz_to_luv(triple):
X, Y, Z = triple
if X == Y == Z == 0.0:
return [0.0, 0.0, 0.0]
varU = (4.0 * X) / (X + (15.0 * Y) + (3.0 * Z))
varV = (9.0 * Y) / (X + (15.0 * Y) + (3.0 * Z))
L = 116.0 * f(Y / refY) - 16.0
# Black will create a divide-by-zero error
if L == 0.0:
return [0.0, 0.0, 0.0]
U = 13.0 * L * (varU - refU)
V = 13.0 * L * (varV - refV)
return [L, U, V]
def luv_to_xyz(triple):
L, U, V = triple
if L == 0:
return [0.0, 0.0, 0.0]
varY = f_inv((L + 16.0) / 116.0)
varU = U / (13.0 * L) + refU
varV = V / (13.0 * L) + refV
Y = varY * refY
X = 0.0 - (9.0 * Y * varU) / ((varU - 4.0) * varV - varU * varV)
Z = (9.0 * Y - (15.0 * varV * Y) - (varV * X)) / (3.0 * varV)
return [X, Y, Z]
def luv_to_lch(triple):
L, U, V = triple
C = (math.pow(math.pow(U, 2) + math.pow(V, 2), (1.0 / 2.0)))
hrad = (math.atan2(V, U))
H = math.degrees(hrad)
if H < 0.0:
H = 360.0 + H
return [L, C, H]
def lch_to_luv(triple):
L, C, H = triple
Hrad = math.radians(H)
U = (math.cos(Hrad) * C)
V = (math.sin(Hrad) * C)
return [L, U, V]
def husl_to_lch(triple):
H, S, L = triple
if L > 99.9999999:
return [100, 0.0, H]
if L < 0.00000001:
return [0.0, 0.0, H]
mx = max_chroma(L, H)
C = mx / 100.0 * S
return [L, C, H]
def lch_to_husl(triple):
L, C, H = triple
if L > 99.9999999:
return [H, 0.0, 100.0]
if L < 0.00000001:
return [H, 0.0, 0.0]
mx = max_chroma(L, H)
S = C / mx * 100.0
return [H, S, L]
def huslp_to_lch(triple):
H, S, L = triple
if L > 99.9999999:
return [100, 0.0, H]
if L < 0.00000001:
return [0.0, 0.0, H]
mx = max_chroma_pastel(L)
C = mx / 100.0 * S
return [L, C, H]
def lch_to_huslp(triple):
L, C, H = triple
if L > 99.9999999:
return [H, 0.0, 100.0]
if L < 0.00000001:
return [H, 0.0, 0.0]
mx = max_chroma_pastel(L)
S = C / mx * 100.0
return [H, S, L]

View File

@@ -0,0 +1,380 @@
"""
This module was copied from the scipy project.
In the process of copying, some methods were removed because they depended on
other parts of scipy (especially on compiled components), allowing seaborn to
have a simple and pure Python implementation. These include:
- integrate_gaussian
- integrate_box
- integrate_box_1d
- integrate_kde
- logpdf
- resample
Additionally, the numpy.linalg module was substituted for scipy.linalg,
and the examples section (with doctests) was removed from the docstring
The original scipy license is copied below:
Copyright (c) 2001-2002 Enthought, Inc. 2003-2019, SciPy Developers.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
# -------------------------------------------------------------------------------
#
# Define classes for (uni/multi)-variate kernel density estimation.
#
# Currently, only Gaussian kernels are implemented.
#
# Written by: Robert Kern
#
# Date: 2004-08-09
#
# Modified: 2005-02-10 by Robert Kern.
# Contributed to SciPy
# 2005-10-07 by Robert Kern.
# Some fixes to match the new scipy_core
#
# Copyright 2004-2005 by Enthought, Inc.
#
# -------------------------------------------------------------------------------
import numpy as np
from numpy import (asarray, atleast_2d, reshape, zeros, newaxis, dot, exp, pi,
sqrt, power, atleast_1d, sum, ones, cov)
from numpy import linalg
__all__ = ['gaussian_kde']
class gaussian_kde:
"""Representation of a kernel-density estimate using Gaussian kernels.
Kernel density estimation is a way to estimate the probability density
function (PDF) of a random variable in a non-parametric way.
`gaussian_kde` works for both uni-variate and multi-variate data. It
includes automatic bandwidth determination. The estimation works best for
a unimodal distribution; bimodal or multi-modal distributions tend to be
oversmoothed.
Parameters
----------
dataset : array_like
Datapoints to estimate from. In case of univariate data this is a 1-D
array, otherwise a 2-D array with shape (# of dims, # of data).
bw_method : str, scalar or callable, optional
The method used to calculate the estimator bandwidth. This can be
'scott', 'silverman', a scalar constant or a callable. If a scalar,
this will be used directly as `kde.factor`. If a callable, it should
take a `gaussian_kde` instance as only parameter and return a scalar.
If None (default), 'scott' is used. See Notes for more details.
weights : array_like, optional
weights of datapoints. This must be the same shape as dataset.
If None (default), the samples are assumed to be equally weighted
Attributes
----------
dataset : ndarray
The dataset with which `gaussian_kde` was initialized.
d : int
Number of dimensions.
n : int
Number of datapoints.
neff : int
Effective number of datapoints.
.. versionadded:: 1.2.0
factor : float
The bandwidth factor, obtained from `kde.covariance_factor`, with which
the covariance matrix is multiplied.
covariance : ndarray
The covariance matrix of `dataset`, scaled by the calculated bandwidth
(`kde.factor`).
inv_cov : ndarray
The inverse of `covariance`.
Methods
-------
evaluate
__call__
integrate_gaussian
integrate_box_1d
integrate_box
integrate_kde
pdf
logpdf
resample
set_bandwidth
covariance_factor
Notes
-----
Bandwidth selection strongly influences the estimate obtained from the KDE
(much more so than the actual shape of the kernel). Bandwidth selection
can be done by a "rule of thumb", by cross-validation, by "plug-in
methods" or by other means; see [3]_, [4]_ for reviews. `gaussian_kde`
uses a rule of thumb, the default is Scott's Rule.
Scott's Rule [1]_, implemented as `scotts_factor`, is::
n**(-1./(d+4)),
with ``n`` the number of data points and ``d`` the number of dimensions.
In the case of unequally weighted points, `scotts_factor` becomes::
neff**(-1./(d+4)),
with ``neff`` the effective number of datapoints.
Silverman's Rule [2]_, implemented as `silverman_factor`, is::
(n * (d + 2) / 4.)**(-1. / (d + 4)).
or in the case of unequally weighted points::
(neff * (d + 2) / 4.)**(-1. / (d + 4)).
Good general descriptions of kernel density estimation can be found in [1]_
and [2]_, the mathematics for this multi-dimensional implementation can be
found in [1]_.
With a set of weighted samples, the effective number of datapoints ``neff``
is defined by::
neff = sum(weights)^2 / sum(weights^2)
as detailed in [5]_.
References
----------
.. [1] D.W. Scott, "Multivariate Density Estimation: Theory, Practice, and
Visualization", John Wiley & Sons, New York, Chicester, 1992.
.. [2] B.W. Silverman, "Density Estimation for Statistics and Data
Analysis", Vol. 26, Monographs on Statistics and Applied Probability,
Chapman and Hall, London, 1986.
.. [3] B.A. Turlach, "Bandwidth Selection in Kernel Density Estimation: A
Review", CORE and Institut de Statistique, Vol. 19, pp. 1-33, 1993.
.. [4] D.M. Bashtannyk and R.J. Hyndman, "Bandwidth selection for kernel
conditional density estimation", Computational Statistics & Data
Analysis, Vol. 36, pp. 279-298, 2001.
.. [5] Gray P. G., 1969, Journal of the Royal Statistical Society.
Series A (General), 132, 272
"""
def __init__(self, dataset, bw_method=None, weights=None):
self.dataset = atleast_2d(asarray(dataset))
if not self.dataset.size > 1:
raise ValueError("`dataset` input should have multiple elements.")
self.d, self.n = self.dataset.shape
if weights is not None:
self._weights = atleast_1d(weights).astype(float)
self._weights /= sum(self._weights)
if self.weights.ndim != 1:
raise ValueError("`weights` input should be one-dimensional.")
if len(self._weights) != self.n:
raise ValueError("`weights` input should be of length n")
self._neff = 1/sum(self._weights**2)
self.set_bandwidth(bw_method=bw_method)
def evaluate(self, points):
"""Evaluate the estimated pdf on a set of points.
Parameters
----------
points : (# of dimensions, # of points)-array
Alternatively, a (# of dimensions,) vector can be passed in and
treated as a single point.
Returns
-------
values : (# of points,)-array
The values at each point.
Raises
------
ValueError : if the dimensionality of the input points is different than
the dimensionality of the KDE.
"""
points = atleast_2d(asarray(points))
d, m = points.shape
if d != self.d:
if d == 1 and m == self.d:
# points was passed in as a row vector
points = reshape(points, (self.d, 1))
m = 1
else:
msg = f"points have dimension {d}, dataset has dimension {self.d}"
raise ValueError(msg)
output_dtype = np.common_type(self.covariance, points)
result = zeros((m,), dtype=output_dtype)
whitening = linalg.cholesky(self.inv_cov)
scaled_dataset = dot(whitening, self.dataset)
scaled_points = dot(whitening, points)
if m >= self.n:
# there are more points than data, so loop over data
for i in range(self.n):
diff = scaled_dataset[:, i, newaxis] - scaled_points
energy = sum(diff * diff, axis=0) / 2.0
result += self.weights[i]*exp(-energy)
else:
# loop over points
for i in range(m):
diff = scaled_dataset - scaled_points[:, i, newaxis]
energy = sum(diff * diff, axis=0) / 2.0
result[i] = sum(exp(-energy)*self.weights, axis=0)
result = result / self._norm_factor
return result
__call__ = evaluate
def scotts_factor(self):
"""Compute Scott's factor.
Returns
-------
s : float
Scott's factor.
"""
return power(self.neff, -1./(self.d+4))
def silverman_factor(self):
"""Compute the Silverman factor.
Returns
-------
s : float
The silverman factor.
"""
return power(self.neff*(self.d+2.0)/4.0, -1./(self.d+4))
# Default method to calculate bandwidth, can be overwritten by subclass
covariance_factor = scotts_factor
covariance_factor.__doc__ = """Computes the coefficient (`kde.factor`) that
multiplies the data covariance matrix to obtain the kernel covariance
matrix. The default is `scotts_factor`. A subclass can overwrite this
method to provide a different method, or set it through a call to
`kde.set_bandwidth`."""
def set_bandwidth(self, bw_method=None):
"""Compute the estimator bandwidth with given method.
The new bandwidth calculated after a call to `set_bandwidth` is used
for subsequent evaluations of the estimated density.
Parameters
----------
bw_method : str, scalar or callable, optional
The method used to calculate the estimator bandwidth. This can be
'scott', 'silverman', a scalar constant or a callable. If a
scalar, this will be used directly as `kde.factor`. If a callable,
it should take a `gaussian_kde` instance as only parameter and
return a scalar. If None (default), nothing happens; the current
`kde.covariance_factor` method is kept.
Notes
-----
.. versionadded:: 0.11
"""
if bw_method is None:
pass
elif bw_method == 'scott':
self.covariance_factor = self.scotts_factor
elif bw_method == 'silverman':
self.covariance_factor = self.silverman_factor
elif np.isscalar(bw_method) and not isinstance(bw_method, str):
self._bw_method = 'use constant'
self.covariance_factor = lambda: bw_method
elif callable(bw_method):
self._bw_method = bw_method
self.covariance_factor = lambda: self._bw_method(self)
else:
msg = "`bw_method` should be 'scott', 'silverman', a scalar " \
"or a callable."
raise ValueError(msg)
self._compute_covariance()
def _compute_covariance(self):
"""Computes the covariance matrix for each Gaussian kernel using
covariance_factor().
"""
self.factor = self.covariance_factor()
# Cache covariance and inverse covariance of the data
if not hasattr(self, '_data_inv_cov'):
self._data_covariance = atleast_2d(cov(self.dataset, rowvar=1,
bias=False,
aweights=self.weights))
self._data_inv_cov = linalg.inv(self._data_covariance)
self.covariance = self._data_covariance * self.factor**2
self.inv_cov = self._data_inv_cov / self.factor**2
self._norm_factor = sqrt(linalg.det(2*pi*self.covariance))
def pdf(self, x):
"""
Evaluate the estimated pdf on a provided set of points.
Notes
-----
This is an alias for `gaussian_kde.evaluate`. See the ``evaluate``
docstring for more details.
"""
return self.evaluate(x)
@property
def weights(self):
try:
return self._weights
except AttributeError:
self._weights = ones(self.n)/self.n
return self._weights
@property
def neff(self):
try:
return self._neff
except AttributeError:
self._neff = 1/sum(self.weights**2)
return self._neff

View File

@@ -0,0 +1,461 @@
"""Extract reference documentation from the pypa/packaging source tree.
In the process of copying, some unused methods / classes were removed.
These include:
- parse()
- anything involving LegacyVersion
This software is made available under the terms of *either* of the licenses
found in LICENSE.APACHE or LICENSE.BSD. Contributions to this software is made
under the terms of *both* these licenses.
Vendored from:
- https://github.com/pypa/packaging/
- commit ba07d8287b4554754ac7178d177033ea3f75d489 (09/09/2021)
"""
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. See the LICENSE file in the root of this repository
# for complete details.
import collections
import itertools
import re
from typing import Callable, Optional, SupportsInt, Tuple, Union
__all__ = ["Version", "InvalidVersion", "VERSION_PATTERN"]
# Vendored from https://github.com/pypa/packaging/blob/main/packaging/_structures.py
class InfinityType:
def __repr__(self) -> str:
return "Infinity"
def __hash__(self) -> int:
return hash(repr(self))
def __lt__(self, other: object) -> bool:
return False
def __le__(self, other: object) -> bool:
return False
def __eq__(self, other: object) -> bool:
return isinstance(other, self.__class__)
def __ne__(self, other: object) -> bool:
return not isinstance(other, self.__class__)
def __gt__(self, other: object) -> bool:
return True
def __ge__(self, other: object) -> bool:
return True
def __neg__(self: object) -> "NegativeInfinityType":
return NegativeInfinity
Infinity = InfinityType()
class NegativeInfinityType:
def __repr__(self) -> str:
return "-Infinity"
def __hash__(self) -> int:
return hash(repr(self))
def __lt__(self, other: object) -> bool:
return True
def __le__(self, other: object) -> bool:
return True
def __eq__(self, other: object) -> bool:
return isinstance(other, self.__class__)
def __ne__(self, other: object) -> bool:
return not isinstance(other, self.__class__)
def __gt__(self, other: object) -> bool:
return False
def __ge__(self, other: object) -> bool:
return False
def __neg__(self: object) -> InfinityType:
return Infinity
NegativeInfinity = NegativeInfinityType()
# Vendored from https://github.com/pypa/packaging/blob/main/packaging/version.py
InfiniteTypes = Union[InfinityType, NegativeInfinityType]
PrePostDevType = Union[InfiniteTypes, Tuple[str, int]]
SubLocalType = Union[InfiniteTypes, int, str]
LocalType = Union[
NegativeInfinityType,
Tuple[
Union[
SubLocalType,
Tuple[SubLocalType, str],
Tuple[NegativeInfinityType, SubLocalType],
],
...,
],
]
CmpKey = Tuple[
int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType
]
LegacyCmpKey = Tuple[int, Tuple[str, ...]]
VersionComparisonMethod = Callable[
[Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool
]
_Version = collections.namedtuple(
"_Version", ["epoch", "release", "dev", "pre", "post", "local"]
)
class InvalidVersion(ValueError):
"""
An invalid version was found, users should refer to PEP 440.
"""
class _BaseVersion:
_key: Union[CmpKey, LegacyCmpKey]
def __hash__(self) -> int:
return hash(self._key)
# Please keep the duplicated `isinstance` check
# in the six comparisons hereunder
# unless you find a way to avoid adding overhead function calls.
def __lt__(self, other: "_BaseVersion") -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key < other._key
def __le__(self, other: "_BaseVersion") -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key <= other._key
def __eq__(self, other: object) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key == other._key
def __ge__(self, other: "_BaseVersion") -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key >= other._key
def __gt__(self, other: "_BaseVersion") -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key > other._key
def __ne__(self, other: object) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key != other._key
# Deliberately not anchored to the start and end of the string, to make it
# easier for 3rd party code to reuse
VERSION_PATTERN = r"""
v?
(?:
(?:(?P<epoch>[0-9]+)!)? # epoch
(?P<release>[0-9]+(?:\.[0-9]+)*) # release segment
(?P<pre> # pre-release
[-_\.]?
(?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
[-_\.]?
(?P<pre_n>[0-9]+)?
)?
(?P<post> # post release
(?:-(?P<post_n1>[0-9]+))
|
(?:
[-_\.]?
(?P<post_l>post|rev|r)
[-_\.]?
(?P<post_n2>[0-9]+)?
)
)?
(?P<dev> # dev release
[-_\.]?
(?P<dev_l>dev)
[-_\.]?
(?P<dev_n>[0-9]+)?
)?
)
(?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version
"""
class Version(_BaseVersion):
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
def __init__(self, version: str) -> None:
# Validate the version and parse it into pieces
match = self._regex.search(version)
if not match:
raise InvalidVersion(f"Invalid version: '{version}'")
# Store the parsed out pieces of the version
self._version = _Version(
epoch=int(match.group("epoch")) if match.group("epoch") else 0,
release=tuple(int(i) for i in match.group("release").split(".")),
pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
post=_parse_letter_version(
match.group("post_l"), match.group("post_n1") or match.group("post_n2")
),
dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
local=_parse_local_version(match.group("local")),
)
# Generate a key which will be used for sorting
self._key = _cmpkey(
self._version.epoch,
self._version.release,
self._version.pre,
self._version.post,
self._version.dev,
self._version.local,
)
def __repr__(self) -> str:
return f"<Version('{self}')>"
def __str__(self) -> str:
parts = []
# Epoch
if self.epoch != 0:
parts.append(f"{self.epoch}!")
# Release segment
parts.append(".".join(str(x) for x in self.release))
# Pre-release
if self.pre is not None:
parts.append("".join(str(x) for x in self.pre))
# Post-release
if self.post is not None:
parts.append(f".post{self.post}")
# Development release
if self.dev is not None:
parts.append(f".dev{self.dev}")
# Local version segment
if self.local is not None:
parts.append(f"+{self.local}")
return "".join(parts)
@property
def epoch(self) -> int:
_epoch: int = self._version.epoch
return _epoch
@property
def release(self) -> Tuple[int, ...]:
_release: Tuple[int, ...] = self._version.release
return _release
@property
def pre(self) -> Optional[Tuple[str, int]]:
_pre: Optional[Tuple[str, int]] = self._version.pre
return _pre
@property
def post(self) -> Optional[int]:
return self._version.post[1] if self._version.post else None
@property
def dev(self) -> Optional[int]:
return self._version.dev[1] if self._version.dev else None
@property
def local(self) -> Optional[str]:
if self._version.local:
return ".".join(str(x) for x in self._version.local)
else:
return None
@property
def public(self) -> str:
return str(self).split("+", 1)[0]
@property
def base_version(self) -> str:
parts = []
# Epoch
if self.epoch != 0:
parts.append(f"{self.epoch}!")
# Release segment
parts.append(".".join(str(x) for x in self.release))
return "".join(parts)
@property
def is_prerelease(self) -> bool:
return self.dev is not None or self.pre is not None
@property
def is_postrelease(self) -> bool:
return self.post is not None
@property
def is_devrelease(self) -> bool:
return self.dev is not None
@property
def major(self) -> int:
return self.release[0] if len(self.release) >= 1 else 0
@property
def minor(self) -> int:
return self.release[1] if len(self.release) >= 2 else 0
@property
def micro(self) -> int:
return self.release[2] if len(self.release) >= 3 else 0
def _parse_letter_version(
letter: str, number: Union[str, bytes, SupportsInt]
) -> Optional[Tuple[str, int]]:
if letter:
# We consider there to be an implicit 0 in a pre-release if there is
# not a numeral associated with it.
if number is None:
number = 0
# We normalize any letters to their lower case form
letter = letter.lower()
# We consider some words to be alternate spellings of other words and
# in those cases we want to normalize the spellings to our preferred
# spelling.
if letter == "alpha":
letter = "a"
elif letter == "beta":
letter = "b"
elif letter in ["c", "pre", "preview"]:
letter = "rc"
elif letter in ["rev", "r"]:
letter = "post"
return letter, int(number)
if not letter and number:
# We assume if we are given a number, but we are not given a letter
# then this is using the implicit post release syntax (e.g. 1.0-1)
letter = "post"
return letter, int(number)
return None
_local_version_separators = re.compile(r"[\._-]")
def _parse_local_version(local: str) -> Optional[LocalType]:
"""
Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
"""
if local is not None:
return tuple(
part.lower() if not part.isdigit() else int(part)
for part in _local_version_separators.split(local)
)
return None
def _cmpkey(
epoch: int,
release: Tuple[int, ...],
pre: Optional[Tuple[str, int]],
post: Optional[Tuple[str, int]],
dev: Optional[Tuple[str, int]],
local: Optional[Tuple[SubLocalType]],
) -> CmpKey:
# When we compare a release version, we want to compare it with all of the
# trailing zeros removed. So we'll use a reverse the list, drop all the now
# leading zeros until we come to something non zero, then take the rest
# re-reverse it back into the correct order and make it a tuple and use
# that for our sorting key.
_release = tuple(
reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release))))
)
# We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
# We'll do this by abusing the pre segment, but we _only_ want to do this
# if there is not a pre or a post segment. If we have one of those then
# the normal sorting rules will handle this case correctly.
if pre is None and post is None and dev is not None:
_pre: PrePostDevType = NegativeInfinity
# Versions without a pre-release (except as noted above) should sort after
# those with one.
elif pre is None:
_pre = Infinity
else:
_pre = pre
# Versions without a post segment should sort before those with one.
if post is None:
_post: PrePostDevType = NegativeInfinity
else:
_post = post
# Versions without a development segment should sort after those with one.
if dev is None:
_dev: PrePostDevType = Infinity
else:
_dev = dev
if local is None:
# Versions without a local segment should sort before those with one.
_local: LocalType = NegativeInfinity
else:
# Versions with a local segment need that segment parsed to implement
# the sorting rules in PEP440.
# - Alpha numeric segments sort before numeric segments
# - Alpha numeric segments sort lexicographically
# - Numeric segments sort numerically
# - Shorter versions sort before longer versions when the prefixes
# match exactly
_local = tuple(
(i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
)
return epoch, _release, _pre, _post, _dev, _local

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More