Skip to content
This repository has been archived by the owner on Jan 9, 2023. It is now read-only.

Commit

Permalink
Merge pull request #54 from chrisburr/develop
Browse files Browse the repository at this point in the history
Add support for columns of arrays and general tidying
  • Loading branch information
chrisburr authored Feb 3, 2018
2 parents 1dc249c + 354bc15 commit 4ab672f
Show file tree
Hide file tree
Showing 56 changed files with 215 additions and 84 deletions.
52 changes: 26 additions & 26 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,36 +1,27 @@
#sudo: false
# travis-ci.org build & test configuration
language: python

matrix:
include:
- python: 2.7
env: PYTHON=2.7 ROOT=5.34.32
- python: 2.7
env: PYTHON=2.7 ROOT=6.04
- python: 3.4
env: PYTHON=3.4 ROOT=5.34.32
- python: 3.4
env: PYTHON=3.4 ROOT=6.04
- python: 3.5
env: PYTHON=3.4 ROOT=5.34.32
- python: 3.5
env: PYTHON=3.4 ROOT=6.04
- python: 3.6
env: PYTHON=3.4 ROOT=5.34.32
- python: 3.6
env: PYTHON=3.4 ROOT=6.04
#install: source ci/install.sh
install:
- if [ "${TRAVIS_OS_NAME}" == "osx" ]; then curl --silent http://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh -o miniconda.sh; fi
- if [ "${TRAVIS_OS_NAME}" == "linux" ]; then wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; fi
include:
- python: 2.7
env: PYTHON=2.7 ROOT=5.34.32
- python: 2.7
env: PYTHON=2.7 ROOT=6.04
- python: 3.4
env: PYTHON=3.4 ROOT=5.34.32
- python: 3.4
env: PYTHON=3.4 ROOT=6.04

install:
- if [ "${TRAVIS_OS_NAME}" == "osx" ]; then curl --silent http://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh
-o miniconda.sh; fi
- if [ "${TRAVIS_OS_NAME}" == "linux" ]; then wget -nv http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-O miniconda.sh; fi
- bash miniconda.sh -b -p $HOME/miniconda
- export PATH="$HOME/miniconda/bin:$PATH"
- hash -r
- conda config --set always_yes yes --set changeps1 no
- conda update -q conda
- conda info -a # Useful for debugging any issues with conda
- conda info -a
- conda config --add channels http://conda.anaconda.org/NLeSC
- conda config --set show_channel_urls yes
- conda create -q -n testenv python=${PYTHON} root=${ROOT} rootpy pandas nose
Expand All @@ -41,7 +32,16 @@ install:
script: nosetests --with-coverage --cover-package=root_pandas

after_success:
- time coveralls
- time coveralls

notifications:
email: false
email: false

deploy:
provider: pypi
user: chrisburr
password:
secure: MyD2Q4zASzpXWaOBnbkGGm7luYB2SrrBVdX4faN0JmSmDcssn/exu2XDAIwhbZhg3uZC4bq7mBUpPiw/3Mx1f5kFgWlnjpnSRDaGhGLLc6rBp9Kqt6IOWcQ64yQ+S6LIuJ+tjbTMJAlNZgy3HDEwBWXKBvectWKJPZdVCenfMPA=
on:
tags: true
branch: master
5 changes: 5 additions & 0 deletions root_pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
from .readwrite import read_root
from .readwrite import to_root

__all__ = [
'read_root',
'to_root',
]
78 changes: 52 additions & 26 deletions root_pandas/readwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@


def expand_braces(orig):
r = r'.*(\{.+?[^\\]\})'
r = r'.*?(\{.+[^\\]\})'
p = re.compile(r)

s = orig[:]
Expand All @@ -40,12 +40,10 @@ def expand_braces(orig):
open_brace = s.find(sub)
close_brace = open_brace + len(sub) - 1
if sub.find(',') != -1:
for pat in sub.strip('{}').split(','):
for pat in sub[1:-1].split(','):
res.extend(expand_braces(s[:open_brace] + pat + s[close_brace+1:]))

else:
res.extend(expand_braces(s[:open_brace] + sub.replace('}', '\\}') + s[close_brace+1:]))

else:
res.append(s.replace('\\}', '}'))

Expand All @@ -59,6 +57,7 @@ def get_nonscalar_columns(array):
bad_names = col_names[bad_cols]
return list(bad_names)


def get_matching_variables(branches, patterns, fail=True):
selected = []

Expand Down Expand Up @@ -93,6 +92,30 @@ def filter_noexpand_columns(columns):
return other, noexpand


def do_flatten(arr, flatten):
if flatten is True:
warnings.warn(" The option flatten=True is deprecated. Please specify the branches you would like "
"to flatten in a list: flatten=['foo', 'bar']", FutureWarning)
arr_, idx = stretch(arr, return_indices=True)
else:
nonscalar = get_nonscalar_columns(arr)
fields = [x for x in arr.dtype.names if (x not in nonscalar or x in flatten)]

for col in flatten:
if col in nonscalar:
pass
elif col in fields:
raise ValueError("Requested to flatten {col} but it has a scalar type"
.format(col=col))
else:
raise ValueError("Requested to flatten {col} but it wasn't loaded from the input file"
.format(col=col))

arr_, idx = stretch(arr, fields=fields, return_indices=True)
arr = append_fields(arr_, '__array_index', idx, usemask=False, asrecarray=True)
return arr


def read_root(paths, key=None, columns=None, ignore=None, chunksize=None, where=None, flatten=False, *args, **kwargs):
"""
Read a ROOT file, or list of ROOT files, into a pandas DataFrame.
Expand Down Expand Up @@ -175,22 +198,6 @@ def read_root(paths, key=None, columns=None, ignore=None, chunksize=None, where=
for var in ignored:
all_vars.remove(var)

def do_flatten(arr, flatten):
if flatten is True:
warnings.warn(" The option flatten=True is deprecated. Please specify the branches you would like "
"to flatten in a list: flatten=['foo', 'bar']", FutureWarning)
arr_, idx = stretch(arr, return_indices=True)
else:
nonscalar = get_nonscalar_columns(arr)
fields = [x for x in arr.dtype.names if (x not in nonscalar or x in flatten)]
will_drop = [x for x in arr.dtype.names if x not in fields]
if will_drop:
warnings.warn("Ignored the following non-scalar branches: {bad_names}"
.format(bad_names=", ".join(will_drop)), UserWarning)
arr_, idx = stretch(arr, fields=fields, return_indices=True)
arr = append_fields(arr_, '__array_index', idx, usemask=False, asrecarray=True)
return arr

if chunksize:
tchain = ROOT.TChain(key)
for path in paths:
Expand All @@ -216,26 +223,45 @@ def genchunks():

def convert_to_dataframe(array, start_index=None):
nonscalar_columns = get_nonscalar_columns(array)
if nonscalar_columns:
warnings.warn("Ignored the following non-scalar branches: {bad_names}"
.format(bad_names=", ".join(nonscalar_columns)), UserWarning)
indices = list(filter(lambda x: x.startswith('__index__') and x not in nonscalar_columns, array.dtype.names))

# Columns containing 2D arrays can't be loaded so convert them 1D arrays of arrays
reshaped_columns = {}
for col in nonscalar_columns:
if array[col].ndim >= 2:
reshaped = np.zeros(len(array[col]), dtype='O')
for i, row in enumerate(array[col]):
reshaped[i] = row
reshaped_columns[col] = reshaped

indices = list(filter(lambda x: x.startswith('__index__'), array.dtype.names))
if len(indices) == 0:
index = None
if start_index is not None:
index = RangeIndex(start=start_index, stop=start_index + len(array))
df = DataFrame.from_records(array, exclude=nonscalar_columns, index=index)
df = DataFrame.from_records(array, exclude=reshaped_columns, index=index)
elif len(indices) == 1:
# We store the index under the __index__* branch, where
# * is the name of the index
df = DataFrame.from_records(array, index=indices[0], exclude=nonscalar_columns)
df = DataFrame.from_records(array, exclude=reshaped_columns, index=indices[0])
index_name = indices[0][len('__index__'):]
if not index_name:
# None means the index has no name
index_name = None
df.index.name = index_name
else:
raise ValueError("More than one index found in file")

# Manually the columns which were reshaped
for key, reshaped in reshaped_columns.items():
df[key] = reshaped

# Reshaping can cause the order of columns to change so we have to change it back
if reshaped_columns:
# Filter to remove __index__ columns
columns = [c for c in array.dtype.names if c in df.columns]
assert len(columns) == len(df.columns), (columns, df.columns)
df = df.reindex_axis(columns, axis=1, copy=False)

return df


Expand Down
9 changes: 5 additions & 4 deletions root_pandas/utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# Copyright (c) 2012 rootpy developers and contributors
#
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
Expand All @@ -24,6 +24,7 @@
import numpy as np
VLEN = np.vectorize(len)


def stretch(arr, fields=None, return_indices=False):
"""Stretch an array.
Stretch an array by ``hstack()``-ing multiple array fields while
Expand Down Expand Up @@ -104,5 +105,5 @@ def stretch(arr, fields=None, return_indices=False):
if return_indices:
idx = np.concatenate(list(map(np.arange, len_array)))
return ret, idx

return ret
Binary file added tests/samples/HZZ-lz4.root
Binary file not shown.
Binary file added tests/samples/HZZ-lzma.root
Binary file not shown.
Binary file added tests/samples/HZZ-uncompressed.root
Binary file not shown.
Binary file added tests/samples/HZZ-zlib.root
Binary file not shown.
Binary file added tests/samples/HZZ.root
Binary file not shown.
3 changes: 3 additions & 0 deletions tests/samples/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Files for testing `root_pandas`

These files are taken from [uproot](https://github.com/scikit-hep/uproot/).
Binary file added tests/samples/Zmumu-lz4.root
Binary file not shown.
Binary file added tests/samples/Zmumu-lzma.root
Binary file not shown.
Binary file added tests/samples/Zmumu-uncompressed.root
Binary file not shown.
Binary file added tests/samples/Zmumu-zlib.root
Binary file not shown.
Binary file added tests/samples/Zmumu.root
Binary file not shown.
Binary file added tests/samples/foriter.root
Binary file not shown.
Binary file added tests/samples/foriter2.root
Binary file not shown.
Binary file added tests/samples/histograms.root
Binary file not shown.
Binary file added tests/samples/issue21.root
Binary file not shown.
Binary file added tests/samples/issue30.root
Binary file not shown.
Binary file added tests/samples/issue31.root
Binary file not shown.
Binary file added tests/samples/issue33.root
Binary file not shown.
Binary file added tests/samples/issue38a.root
Binary file not shown.
Binary file added tests/samples/issue38b.root
Binary file not shown.
Binary file added tests/samples/issue49.root
Binary file not shown.
Binary file added tests/samples/mc10events.root
Binary file not shown.
Binary file added tests/samples/nesteddirs.root
Binary file not shown.
Binary file added tests/samples/sample-5.23.02-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.23.02-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-5.24.00-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.24.00-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-5.25.02-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.25.02-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-5.26.00-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.26.00-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-5.27.02-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.27.02-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-5.28.00-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.28.00-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-5.29.02-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.29.02-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-5.30.00-lzma.root
Binary file not shown.
Binary file added tests/samples/sample-5.30.00-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-5.30.00-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-6.08.04-lzma.root
Binary file not shown.
Binary file added tests/samples/sample-6.08.04-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-6.08.04-zlib.root
Binary file not shown.
Binary file added tests/samples/sample-6.10.05-lz4.root
Binary file not shown.
Binary file added tests/samples/sample-6.10.05-lzma.root
Binary file not shown.
Binary file added tests/samples/sample-6.10.05-uncompressed.root
Binary file not shown.
Binary file added tests/samples/sample-6.10.05-zlib.root
Binary file not shown.
Binary file added tests/samples/simple.root
Binary file not shown.
Binary file added tests/samples/small-evnt-tree-fullsplit.root
Binary file not shown.
Binary file added tests/samples/small-flat-tree.root
Binary file not shown.
Loading

0 comments on commit 4ab672f

Please sign in to comment.