Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added variance as a unary reduction #593

Merged
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
fa1fb4d
added variance as a unary reduction
jjwilke Sep 20, 2022
ee97269
fix variance eager implementation
jjwilke Sep 20, 2022
a990180
build fixes
jjwilke Aug 4, 2023
6246755
Added more tests.
aschaffer Aug 24, 2023
b0bb9ca
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 24, 2023
99b0171
Merge branch 'nv-legate:branch-23.09' into variance-unary-red-cherry-…
jjwilke Aug 25, 2023
9219d6e
Work-around (consistent) for 1D array.
aschaffer Aug 30, 2023
5aec1a2
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 30, 2023
eecb494
Fix for 1D arrays masquerading as Nd.
aschaffer Aug 30, 2023
dc6baf3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 30, 2023
920265e
Added relevant comment for 1D array branch.
aschaffer Aug 30, 2023
c504a05
Tests for keepdims.
aschaffer Aug 31, 2023
e5f5ad8
Clean-up.
aschaffer Aug 31, 2023
9cb06d0
Merge branch 'nv-legate:branch-23.09' into variance-unary-red-cherry-…
jjwilke Sep 1, 2023
d86cd4e
Fix for test_mean.py.
aschaffer Sep 6, 2023
9910935
Merge branch 'nv-legate:branch-23.09' into variance-unary-red-cherry-…
aschaffer Sep 7, 2023
3dc1e68
Dox fix: added var entry in RST file.
aschaffer Sep 12, 2023
68e41b4
Put ignore directive back.
aschaffer Sep 12, 2023
50baf92
Fixed doc.
aschaffer Sep 12, 2023
34003dd
More dox fixes.
aschaffer Sep 12, 2023
1e6befd
Merge branch 'nv-legate:branch-23.11' into variance-unary-red-cherry-…
aschaffer Sep 28, 2023
9924c75
Merge branch 'branch-23.11' into variance-unary-red-cherry-pick
manopapad Oct 5, 2023
1475898
Fix the check for the cases that trigger use of VARIANCE
manopapad Oct 6, 2023
3126836
Addressed minor review comments on dox.
aschaffer Oct 17, 2023
f244645
Commit fixes necessary for 1475898 to work
manopapad Oct 18, 2023
3f236a5
Addressed changes on np.square().
aschaffer Oct 18, 2023
cf309c8
Addressed changes on where arg.
aschaffer Oct 18, 2023
4d3294d
Addressed changes on module.py var doc string.
aschaffer Oct 18, 2023
52c73e2
Addressed changes on axis signature in var().
aschaffer Oct 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 116 additions & 20 deletions cunumeric/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -3084,12 +3084,40 @@ def max(
where=where,
)

def _summation_dtype(
self, dtype: Optional[np.dtype[Any]]
) -> np.dtype[Any]:
# Pick our dtype if it wasn't picked yet
if dtype is None:
if self.dtype.kind != "f" and self.dtype.kind != "c":
return np.dtype(np.float64)
else:
return self.dtype
return dtype

def _normalize_summation(
aschaffer marked this conversation as resolved.
Show resolved Hide resolved
self, sum_array: Any, axis: Any, dtype: np.dtype[Any], ddof: int = 0
aschaffer marked this conversation as resolved.
Show resolved Hide resolved
) -> None:
if axis is None:
aschaffer marked this conversation as resolved.
Show resolved Hide resolved
divisor = reduce(lambda x, y: x * y, self.shape, 1) - ddof
else:
divisor = self.shape[axis] - ddof

# Divide by the number of things in the collapsed dimensions
# Pick the right kinds of division based on the dtype
if dtype.kind == "f" or dtype.kind == "c":
sum_array.__itruediv__(
np.array(divisor, dtype=sum_array.dtype),
)
else:
sum_array.__ifloordiv__(np.array(divisor, dtype=sum_array.dtype))

@add_boilerplate()
def mean(
self,
axis: Any = None,
dtype: Union[np.dtype[Any], None] = None,
out: Union[ndarray, None] = None,
dtype: Optional[np.dtype[Any]] = None,
out: Optional[ndarray] = None,
keepdims: bool = False,
) -> ndarray:
"""a.mean(axis=None, dtype=None, out=None, keepdims=False)
Expand All @@ -3112,12 +3140,9 @@ def mean(
"cunumeric.mean only supports int types for "
"'axis' currently"
)
# Pick our dtype if it wasn't picked yet
if dtype is None:
if self.dtype.kind != "f" and self.dtype.kind != "c":
dtype = np.dtype(np.float64)
else:
dtype = self.dtype

dtype = self._summation_dtype(dtype)

# Do the sum
if out is not None and out.dtype == dtype:
sum_array = self.sum(
Expand All @@ -3132,18 +3157,9 @@ def mean(
dtype=dtype,
keepdims=keepdims,
)
if axis is None:
divisor = reduce(lambda x, y: x * y, self.shape, 1)
else:
divisor = self.shape[axis]
# Divide by the number of things in the collapsed dimensions
# Pick the right kinds of division based on the dtype
if dtype.kind == "f" or dtype.kind == "c":
sum_array.__itruediv__(
np.array(divisor, dtype=sum_array.dtype),
)
else:
sum_array.__ifloordiv__(np.array(divisor, dtype=sum_array.dtype))

self._normalize_summation(sum_array, axis, dtype)
aschaffer marked this conversation as resolved.
Show resolved Hide resolved

# Convert to the output we didn't already put it there
if out is not None and sum_array is not out:
assert out.dtype != sum_array.dtype
Expand All @@ -3152,6 +3168,86 @@ def mean(
else:
return sum_array

@add_boilerplate()
def var(
self,
axis: Any = None,
aschaffer marked this conversation as resolved.
Show resolved Hide resolved
dtype: Optional[np.dtype[Any]] = None,
out: Optional[ndarray] = None,
ddof: int = 0,
keepdims: bool = False,
*,
where: Union[bool, ndarray] = True,
) -> ndarray:
"""a.var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False)

Returns the variance of the array elements along given axis.

Refer to :func:`cunumeric.var` for full documentation.

See Also
--------
cunumeric.mean : equivalent function
aschaffer marked this conversation as resolved.
Show resolved Hide resolved

Availability
--------
Multiple GPUs, Multiple CPUs

"""
# this could be computed as a single pass through the array
# by computing both <x^2> and <x> and then computing <x^2> - <x>^2.
# this would takee the difference of two large numbers and is unstable
# the mean needs to be computed first and the variance computed
# directly as <(x-mu)^2>, which then requires two passes through the
# data to first compute the mean and then compute the variance
# see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
# TODO(https://github.com/nv-legate/cunumeric/issues/590)

dtype = self._summation_dtype(dtype)
# calculate the mean, but keep the dimensions so that the
# mean can be broadcast against the original array
mu = self.mean(axis=axis, dtype=dtype, keepdims=True)
aschaffer marked this conversation as resolved.
Show resolved Hide resolved

# 1D arrays (or equivalent) should benefit from this unary reduction:
#
if axis is None or self.ndim == 1 or max(self.shape) == self.size:
manopapad marked this conversation as resolved.
Show resolved Hide resolved
# this is a scalar reduction and we can optimize this as a single
# pass through a scalar reduction
result = self._perform_unary_reduction(
UnaryRedCode.VARIANCE,
self,
axis=None,
manopapad marked this conversation as resolved.
Show resolved Hide resolved
dtype=dtype,
out=out,
keepdims=keepdims,
where=where,
args=(mu,),
)
else:
# TODO(https://github.com/nv-legate/cunumeric/issues/591)
# there isn't really support for generic binary reductions
# right now all of the current binary reductions are boolean
# reductions like allclose. To implement this a single pass would
# require a variant of einsum/dot that produces
# (self-mu)*(self-mu) rather than self*mu. For now, we have to
# compute delta = self-mu in a first pass and then compute
# delta*delta in second pass
delta = self - mu

result = self._perform_unary_reduction(
UnaryRedCode.SUM_SQUARES,
delta,
axis=axis,
dtype=dtype,
out=out,
keepdims=keepdims,
where=where,
)

self._normalize_summation(result, axis=axis, dtype=dtype, ddof=ddof)
aschaffer marked this conversation as resolved.
Show resolved Hide resolved

return result

@add_boilerplate()
def min(
self,
Expand Down
4 changes: 4 additions & 0 deletions cunumeric/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ class _CunumericSharedLib:
CUNUMERIC_RED_NANSUM: int
CUNUMERIC_RED_PROD: int
CUNUMERIC_RED_SUM: int
CUNUMERIC_RED_SUM_SQUARES: int
CUNUMERIC_RED_VARIANCE: int
CUNUMERIC_REPEAT: int
CUNUMERIC_SCALAR_UNARY_RED: int
CUNUMERIC_SCAN_GLOBAL: int
Expand Down Expand Up @@ -452,6 +454,8 @@ class UnaryRedCode(IntEnum):
NANSUM = _cunumeric.CUNUMERIC_RED_NANSUM
PROD = _cunumeric.CUNUMERIC_RED_PROD
SUM = _cunumeric.CUNUMERIC_RED_SUM
SUM_SQUARES = _cunumeric.CUNUMERIC_RED_SUM_SQUARES
VARIANCE = _cunumeric.CUNUMERIC_RED_VARIANCE


# Match these to CuNumericBinaryOpCode in cunumeric_c.h
Expand Down
4 changes: 4 additions & 0 deletions cunumeric/deferred.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ def __init__(

_UNARY_RED_TO_REDUCTION_OPS: Dict[int, int] = {
UnaryRedCode.SUM: ReductionOp.ADD,
UnaryRedCode.SUM_SQUARES: ReductionOp.ADD,
UnaryRedCode.VARIANCE: ReductionOp.ADD,
UnaryRedCode.PROD: ReductionOp.MUL,
UnaryRedCode.MAX: ReductionOp.MAX,
UnaryRedCode.MIN: ReductionOp.MIN,
Expand Down Expand Up @@ -208,6 +210,8 @@ def min_identity(

_UNARY_RED_IDENTITIES: Dict[UnaryRedCode, Callable[[Any], Any]] = {
UnaryRedCode.SUM: lambda _: 0,
UnaryRedCode.SUM_SQUARES: lambda _: 0,
UnaryRedCode.VARIANCE: lambda _: 0,
UnaryRedCode.PROD: lambda _: 1,
UnaryRedCode.MIN: min_identity,
UnaryRedCode.MAX: max_identity,
Expand Down
14 changes: 14 additions & 0 deletions cunumeric/eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1524,6 +1524,20 @@ def unary_reduction(
else where.array,
**kws,
)
elif op == UnaryRedCode.SUM_SQUARES:
squared = np.multiply(rhs.array, rhs.array)
aschaffer marked this conversation as resolved.
Show resolved Hide resolved
np.sum(squared, out=self.array, axis=orig_axis, keepdims=keepdims)
aschaffer marked this conversation as resolved.
Show resolved Hide resolved
elif op == UnaryRedCode.VARIANCE:
(mu,) = args
centered = np.subtract(rhs.array, mu)
squares = np.multiply(centered, centered)
aschaffer marked this conversation as resolved.
Show resolved Hide resolved
np.sum(
squares,
axis=orig_axis,
where=where,
keepdims=keepdims,
out=self.array,
)
elif op == UnaryRedCode.CONTAINS:
self.array.fill(args[0] in rhs.array)
elif op == UnaryRedCode.COUNT_NONZERO:
Expand Down
80 changes: 80 additions & 0 deletions cunumeric/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -7061,6 +7061,86 @@ def mean(
return a.mean(axis=axis, dtype=dtype, out=out, keepdims=keepdims)


@add_boilerplate("a")
def var(
a: ndarray,
axis: Optional[Union[int, tuple[int, ...]]] = None,
dtype: Optional[np.dtype[Any]] = None,
out: Optional[ndarray] = None,
ddof: int = 0,
keepdims: bool = False,
*,
where: Union[bool, ndarray] = True,
) -> ndarray:
"""
Compute the variance along the specified axis.

Returns the variance of the array elements, a measure of the spread of
a distribution. The variance is computed for the flattened array
by default, otherwise over the specified axis.

Parameters
----------
a : array_like
Array containing numbers whose variance is desired. If `a` is not an
array, a conversion is attempted.
axis : None or int or tuple[int], optional
Axis or axes along which the variance is computed. The default is to
compute the variance of the flattened array.

If this is a tuple of ints, a variance is performed over multiple axes,
instead of a single axis or all the axes as before.
dtype : data-type, optional
Type to use in computing the variance. For arrays of integer type
the default is float64; for arrays of float types
it is the same as the array type.
out : ndarray, optional
Alternate output array in which to place the result. It must have the
same shape as the expected output, but the type is cast if necessary.
See `ufuncs-output-type` for more details.
aschaffer marked this conversation as resolved.
Show resolved Hide resolved
ddof : int, optional
“Delta Degrees of Freedom”: the divisor used in the calculation is
N - ddof, where N represents the number of elements. By default
ddof is zero.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the input array.

If the default value is passed, then `keepdims` will not be
passed through to the `variance` method of sub-classes of
`ndarray`, however any non-default value will be. If the
sub-class' method does not implement `keepdims` any
exceptions will be raised.
aschaffer marked this conversation as resolved.
Show resolved Hide resolved
where : array_like of bool, optional
A boolean array which is broadcasted to match the dimensions of array,
and selects elements to include in the reduction.

Returns
-------
m : ndarray, see dtype parameter above
If `out=None`, returns a new array of the same dtype a above
aschaffer marked this conversation as resolved.
Show resolved Hide resolved
containing the variance values, otherwise a reference to the output
array is returned.

See Also
--------
numpy.var

Availability
--------
Multiple GPUs, Multiple CPUs
"""
return a.var(
axis=axis,
dtype=dtype,
out=out,
ddof=ddof,
keepdims=keepdims,
where=where,
)


# Histograms


Expand Down
1 change: 1 addition & 0 deletions docs/cunumeric/source/api/statistics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Averages and variances
:toctree: generated/

mean
var


Histograms
Expand Down
2 changes: 2 additions & 0 deletions src/cunumeric/cunumeric_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ enum CuNumericUnaryRedCode {
CUNUMERIC_RED_NANSUM,
CUNUMERIC_RED_PROD,
CUNUMERIC_RED_SUM,
CUNUMERIC_RED_SUM_SQUARES,
CUNUMERIC_RED_VARIANCE
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
CUNUMERIC_RED_VARIANCE
CUNUMERIC_RED_VARIANCE,

};

// Match these to BinaryOpCode in config.py
Expand Down
12 changes: 8 additions & 4 deletions src/cunumeric/unary/scalar_unary_red_template.inl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct ScalarUnaryRed {
Point<DIM> origin;
Point<DIM> shape;
RHS to_find;
RHS mu;
bool dense;

struct DenseReduction {};
Expand All @@ -61,6 +62,7 @@ struct ScalarUnaryRed {

out = args.out.reduce_accessor<LG_OP, true, 1>();
if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { to_find = args.args[0].scalar<RHS>(); }
if constexpr (OP_CODE == UnaryRedCode::VARIANCE) { mu = args.args[0].scalar<RHS>(); }

#ifndef LEGATE_BOUNDS_CHECKS
// Check to see if this is dense or not
Expand All @@ -79,22 +81,24 @@ struct ScalarUnaryRed {
OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) {
auto p = pitches.unflatten(idx, origin);
OP::template fold<true>(lhs, OP::convert(p, shape, identity, inptr[idx]));
} else if constexpr (OP_CODE == UnaryRedCode::VARIANCE) {
OP::template fold<true>(lhs, OP::convert(inptr[idx] - mu, identity));
} else {
OP::template fold<true>(lhs, OP::convert(inptr[idx], identity));
}
}

__CUDA_HD__ void operator()(LHS& lhs, size_t idx, LHS identity, SparseReduction) const noexcept
{
auto p = pitches.unflatten(idx, origin);
if constexpr (OP_CODE == UnaryRedCode::CONTAINS) {
auto point = pitches.unflatten(idx, origin);
if (in[point] == to_find) { lhs = true; }
if (in[p] == to_find) { lhs = true; }
} else if constexpr (OP_CODE == UnaryRedCode::ARGMAX || OP_CODE == UnaryRedCode::ARGMIN ||
OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) {
auto p = pitches.unflatten(idx, origin);
OP::template fold<true>(lhs, OP::convert(p, shape, identity, in[p]));
} else if constexpr (OP_CODE == UnaryRedCode::VARIANCE) {
OP::template fold<true>(lhs, in[p] - mu);
manopapad marked this conversation as resolved.
Show resolved Hide resolved
} else {
auto p = pitches.unflatten(idx, origin);
OP::template fold<true>(lhs, OP::convert(in[p], identity));
}
}
Expand Down
Loading
Loading