Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

patsy pickles #104

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions patsy/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,33 @@
pandas_Categorical_categories,
pandas_Categorical_codes,
safe_issubdtype,
no_pickling, assert_no_pickling)
no_pickling, assert_no_pickling, check_pickle_version)

if have_pandas:
import pandas

# Objects of this type will always be treated as categorical, with the
# specified levels and contrast (if given).

class _CategoricalBox(object):
def __init__(self, data, contrast, levels):
self.data = data
self.contrast = contrast
self.levels = levels

__getstate__ = no_pickling
def __getstate__(self):
data = getattr(self, 'data')
contrast = getattr(self, 'contrast')
levels = getattr(self, 'levels')
return {'version': 0, 'data': data, 'contrast': contrast,
'levels': levels}

def __setstate__(self, pickle):
check_pickle_version(pickle['version'], 0, self.__class__.__name__)
self.data = pickle['data']
self.contrast = pickle['contrast']
self.levels = pickle['levels']


def C(data, contrast=None, levels=None):
"""
Expand Down Expand Up @@ -120,7 +133,19 @@ def test_C():
assert c4.contrast == "NEW CONTRAST"
assert c4.levels == "LEVELS"

assert_no_pickling(c4)

def test_C_pickle():
from six.moves import cPickle as pickle
from patsy.util import assert_pickled_equals
c1 = C("asdf")
assert_pickled_equals(c1, pickle.loads(pickle.dumps(c1)))
c2 = C("DATA", "CONTRAST", "LEVELS")
assert_pickled_equals(c2, pickle.loads(pickle.dumps(c2)))
c3 = C(c2, levels="NEW LEVELS")
assert_pickled_equals(c3, pickle.loads(pickle.dumps(c3)))
c4 = C(c2, "NEW CONTRAST")
assert_pickled_equals(c4, pickle.loads(pickle.dumps(c4)))


def guess_categorical(data):
if safe_is_pandas_categorical(data):
Expand Down
15 changes: 11 additions & 4 deletions patsy/constraint.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from patsy.origin import Origin
from patsy.util import (atleast_2d_column_default,
repr_pretty_delegate, repr_pretty_impl,
no_pickling, assert_no_pickling)
no_pickling, assert_no_pickling, check_pickle_version)
from patsy.infix_parser import Token, Operator, infix_parse
from patsy.parse_formula import _parsing_error_test

Expand Down Expand Up @@ -69,7 +69,16 @@ def _repr_pretty_(self, p, cycle):
return repr_pretty_impl(p, self,
[self.variable_names, self.coefs, self.constants])

__getstate__ = no_pickling
def __getstate__(self):
return {'version': 0, 'variable_names': self.variable_names,
'coefs': self.coefs, 'constants': self.constants}

def __setstate__(self, pickle):
check_pickle_version(pickle['version'], 0, self.__class__.__name__)
self.variable_names = pickle['variable_names']
self.coefs = pickle['coefs']
self.constants = pickle['constants']


@classmethod
def combine(cls, constraints):
Expand Down Expand Up @@ -125,8 +134,6 @@ def test_LinearConstraint():
assert_raises(ValueError, LinearConstraint, ["a", "b"],
np.zeros((0, 2)))

assert_no_pickling(lc)

def test_LinearConstraint_combine():
comb = LinearConstraint.combine([LinearConstraint(["a", "b"], [1, 0]),
LinearConstraint(["a", "b"], [0, 1], [1])])
Expand Down
14 changes: 11 additions & 3 deletions patsy/contrasts.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from patsy import PatsyError
from patsy.util import (repr_pretty_delegate, repr_pretty_impl,
safe_issubdtype,
no_pickling, assert_no_pickling)
no_pickling, assert_no_pickling, check_pickle_version)

class ContrastMatrix(object):
"""A simple container for a matrix used for coding categorical factors.
Expand Down Expand Up @@ -47,7 +47,16 @@ def __init__(self, matrix, column_suffixes):
def _repr_pretty_(self, p, cycle):
repr_pretty_impl(p, self, [self.matrix, self.column_suffixes])

__getstate__ = no_pickling

def __getstate__(self):
return {'version': 0, 'matrix': self.matrix,
'column_suffixes': self.column_suffixes}

def __setstate__(self, pickle):
check_pickle_version(pickle['version'], 0, name=self.__class__.__name__)
self.matrix = pickle['matrix']
self.column_suffixes = pickle['column_suffixes']


def test_ContrastMatrix():
cm = ContrastMatrix([[1, 0], [0, 1]], ["a", "b"])
Expand All @@ -59,7 +68,6 @@ def test_ContrastMatrix():
from nose.tools import assert_raises
assert_raises(PatsyError, ContrastMatrix, [[1], [0]], ["a", "b"])

assert_no_pickling(cm)

# This always produces an object of the type that Python calls 'str' (whether
# that be a Python 2 string-of-bytes or a Python 3 string-of-unicode). It does
Expand Down
42 changes: 36 additions & 6 deletions patsy/desc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from patsy.eval import EvalEnvironment, EvalFactor
from patsy.util import uniqueify_list
from patsy.util import repr_pretty_delegate, repr_pretty_impl
from patsy.util import no_pickling, assert_no_pickling
from patsy.util import no_pickling, assert_no_pickling, check_pickle_version

# These are made available in the patsy.* namespace
__all__ = ["Term", "ModelDesc", "INTERCEPT"]
Expand Down Expand Up @@ -65,17 +65,25 @@ def name(self):
else:
return "Intercept"

__getstate__ = no_pickling
def __getstate__(self):
return {'version': 0, 'factors': self.factors}

def __setstate__(self, pickle):
check_pickle_version(pickle['version'], 0, self.__class__.__name__)
self.factors = pickle['factors']


INTERCEPT = Term([])


class _MockFactor(object):
def __init__(self, name):
self._name = name

def name(self):
return self._name


def test_Term():
assert Term([1, 2, 1]).factors == (1, 2)
assert Term([1, 2]) == Term([2, 1])
Expand All @@ -86,7 +94,12 @@ def test_Term():
assert Term([f2, f1]).name() == "b:a"
assert Term([]).name() == "Intercept"

assert_no_pickling(Term([]))
from six.moves import cPickle as pickle
from patsy.util import assert_pickled_equals
t = Term([f1, f2])
t2 = pickle.loads(pickle.dumps(t, pickle.HIGHEST_PROTOCOL))
assert_pickled_equals(t, t2)


class ModelDesc(object):
"""A simple container representing the termlists parsed from a formula.
Expand Down Expand Up @@ -148,7 +161,7 @@ def term_code(term):
if term != INTERCEPT]
result += " + ".join(term_names)
return result

@classmethod
def from_formula(cls, tree_or_string):
"""Construct a :class:`ModelDesc` from a formula string.
Expand All @@ -166,7 +179,15 @@ def from_formula(cls, tree_or_string):
assert isinstance(value, cls)
return value

__getstate__ = no_pickling
def __getstate__(self):
return {'version': 0, 'lhs_termlist': self.lhs_termlist,
'rhs_termlist': self.rhs_termlist}

def __setstate__(self, pickle):
check_pickle_version(pickle['version'], 0, self.__class__.__name__)
self.lhs_termlist = pickle['lhs_termlist']
self.rhs_termlist = pickle['rhs_termlist']


def test_ModelDesc():
f1 = _MockFactor("a")
Expand All @@ -177,7 +198,11 @@ def test_ModelDesc():
print(m.describe())
assert m.describe() == "1 + a ~ 0 + a + a:b"

assert_no_pickling(m)
# assert_no_pickling(m)
from six.moves import cPickle as pickle
from patsy.util import assert_pickled_equals
m2 = pickle.loads(pickle.dumps(m, pickle.HIGHEST_PROTOCOL))
assert_pickled_equals(m, m2)

assert ModelDesc([], []).describe() == "~ 0"
assert ModelDesc([INTERCEPT], []).describe() == "1 ~ 0"
Expand Down Expand Up @@ -211,6 +236,11 @@ def _pretty_repr_(self, p, cycle): # pragma: no cover

__getstate__ = no_pickling


def test_IntermediateExpr_smoke():
assert_no_pickling(IntermediateExpr(False, None, True, []))


def _maybe_add_intercept(doit, terms):
if doit:
return (INTERCEPT,) + terms
Expand Down
68 changes: 57 additions & 11 deletions patsy/design_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,11 @@
from patsy.compat import OrderedDict
from patsy.util import (repr_pretty_delegate, repr_pretty_impl,
safe_issubdtype,
no_pickling, assert_no_pickling)
no_pickling, assert_no_pickling, check_pickle_version)
from patsy.constraint import linear_constraint
from patsy.contrasts import ContrastMatrix
from patsy.desc import ModelDesc, Term
from patsy import __version__

class FactorInfo(object):
"""A FactorInfo object is a simple class that provides some metadata about
Expand Down Expand Up @@ -120,7 +121,19 @@ def __repr__(self):
kwlist.append(("categories", self.categories))
repr_pretty_impl(p, self, [], kwlist)

__getstate__ = no_pickling
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This class is also using the default __getstate__?

def __getstate__(self):
return {'version': 0, 'factor': self.factor, 'type': self.type,
'state': self.state, 'num_columns': self.num_columns,
'categories': self.categories}

def __setstate__(self, pickle):
check_pickle_version(pickle['version'], 0, self.__class__.__name__)
self.factor = pickle['factor']
self.type = pickle['type']
self.state = pickle['state']
self.num_columns = pickle['num_columns']
self.categories = pickle['categories']


def test_FactorInfo():
fi1 = FactorInfo("asdf", "numerical", {"a": 1}, num_columns=10)
Expand Down Expand Up @@ -234,7 +247,17 @@ def _repr_pretty_(self, p, cycle):
("contrast_matrices", self.contrast_matrices),
("num_columns", self.num_columns)])

__getstate__ = no_pickling
def __getstate__(self):
return {'version': 0, 'factors': self.factors,
'contrast_matrices': self.contrast_matrices,
'num_columns': self.num_columns}

def __setstate__(self, pickle):
check_pickle_version(pickle['version'], 0, self.__class__.__name__)
self.factors = pickle['factors']
self.contrast_matrices = pickle['contrast_matrices']
self.num_columns = pickle['num_columns']


def test_SubtermInfo():
cm = ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"])
Expand Down Expand Up @@ -691,16 +714,32 @@ def from_array(cls, array_like, default_column_prefix="column"):
for i in columns]
return DesignInfo(column_names)

__getstate__ = no_pickling
def __getstate__(self):
return {'version': 0, 'column_name_indexes': self.column_name_indexes,
'factor_infos': self.factor_infos,
'term_codings': self.term_codings,
'term_slices': self.term_slices,
'term_name_slices': self.term_name_slices}

def __setstate__(self, pickle):
check_pickle_version(pickle['version'], 0, self.__class__.__name__)
self.column_name_indexes = pickle['column_name_indexes']
self.factor_infos = pickle['factor_infos']
self.term_codings = pickle['term_codings']
self.term_slices = pickle['term_slices']
self.term_name_slices = pickle['term_name_slices']


class _MockFactor(object):
def __init__(self, name):
self._name = name

def name(self):
return self._name


def test_DesignInfo():
from nose.tools import assert_raises
class _MockFactor(object):
def __init__(self, name):
self._name = name

def name(self):
return self._name
f_x = _MockFactor("x")
f_y = _MockFactor("y")
t_x = Term([f_x])
Expand Down Expand Up @@ -735,7 +774,11 @@ def name(self):
# smoke test
repr(di)

assert_no_pickling(di)
# Pickling check
from six.moves import cPickle as pickle
from patsy.util import assert_pickled_equals
di2 = pickle.loads(pickle.dumps(di, pickle.HIGHEST_PROTOCOL))
assert_pickled_equals(di, di2)

# One without term objects
di = DesignInfo(["a1", "a2", "a3", "b"])
Expand All @@ -756,6 +799,9 @@ def name(self):
assert di.slice("a3") == slice(2, 3)
assert di.slice("b") == slice(3, 4)

di2 = pickle.loads(pickle.dumps(di, pickle.HIGHEST_PROTOCOL))
assert_pickled_equals(di, di2)

# Check intercept handling in describe()
assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b"

Expand Down
Loading