Skip to content

Commit

Permalink
Add clustering functions (#688)
Browse files Browse the repository at this point in the history
* Add ClusterFinder classes

* Add clusterIntersecting to geosop

* GeosOp: Log runtime to stderr
  • Loading branch information
dbaston authored Sep 26, 2022
1 parent 8c0acc6 commit 6ea4e94
Show file tree
Hide file tree
Showing 19 changed files with 1,445 additions and 1 deletion.
136 changes: 136 additions & 0 deletions include/geos/operation/cluster/AbstractClusterFinder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/**********************************************************************
*
* GEOS - Geometry Engine Open Source
* http://geos.osgeo.org
*
* Copyright (C) 2020-2022 Daniel Baston
*
* This is free software; you can redistribute and/or modify it under
* the terms of the GNU Lesser General Public Licence as published
* by the Free Software Foundation.
* See the COPYING file for more information.
*
**********************************************************************/

#ifndef GEOS_OPERATION_CLUSTER_ABSTRACTCLUSTERFINDER
#define GEOS_OPERATION_CLUSTER_ABSTRACTCLUSTERFINDER

#include <functional>
#include <memory>
#include <vector>

#include <geos/export.h>
#include <geos/index/strtree/TemplateSTRtree.h>
#include <geos/operation/cluster/Clusters.h>

// Forward declarations
namespace geos {
namespace geom {
class Envelope;
class Geometry;
}
namespace operation {
namespace cluster {
class UnionFind;
}
}
}

namespace geos {
namespace operation {
namespace cluster {

/** AbstractClusterFinder defines an interface for bottom-up clustering algorithms,
* where spatial index queries can be used to identify geometries that should be
* clustered together.
*/
class GEOS_DLL AbstractClusterFinder {

public:
/**
* Cluster the provided geometries, returning an object that provides access
* to the components of each cluster.
*
* @param g A vector of geometries to cluster
*/
Clusters cluster(const std::vector<const geom::Geometry*>& g);

/**
* Cluster the components of the provided geometry, returning a vector of clusters.
* This function will take ownership of the provided geometry. Any components that
* are included in a cluster will be returned. Components that are not included in
* any cluster will be destroyed.
*
* @param g A geometry whose components should be clustered.
* @return a Geometry vector, with each entry representing a single cluster.
*/
std::vector<std::unique_ptr<geom::Geometry>> clusterToVector(std::unique_ptr<geom::Geometry> && g);

/**
* Cluster the components of the provided geometry, returning a vector of clusters.
* The input geometry will not be modified.
*
* @param g A geometry whose components should be clustered.
* @return a Geometry vector, with each entry representing a single cluster.
*/
std::vector<std::unique_ptr<geom::Geometry>> clusterToVector(const geom::Geometry& g);

/**
* Cluster the components of the provided geometry, returning a GeometryCollection.
* This function will take ownership of the provided geometry. Any components that
* are included in a cluster will be returned. Components that are not included in
* any cluster will be destroyed.
*
* @param g A geometry whose components should be clustered.
* @return a GeometryCollection, with each sub-geometry representing a single cluster.
*/
std::unique_ptr<geom::Geometry> clusterToCollection(std::unique_ptr<geom::Geometry> && g);

/**
* Cluster the components of the provided geometry, returning a GeometryCollection.
* The input geometry will not be modified.
*
* @param g A geometry whose components should be clustered.
* @return a GeometryCollection, with each sub-geometry representing a single cluster.
*/
std::unique_ptr<geom::Geometry> clusterToCollection(const geom::Geometry & g);

protected:
/**
* Determine whether two geometries should be considered in the same cluster.
* @param a Geometry
* @param b Geometry
* @return `true` if the clusters associated with `a` and `b` should be merged.
*/
virtual bool shouldJoin(const geom::Geometry* a, const geom::Geometry* b) = 0;

/**
* Provide an query Envelope that can be used to find all geometries possibly
* in the same cluster as the input.
* @param a Geometry
* @return an Envelope suitable for querying
*/
virtual const geom::Envelope& queryEnvelope(const geom::Geometry* a) = 0;

/**
* Given a vector and index of components,
* @param components a vector of Geometry components
* @param index a spatial index storing pointers to those components
* @param uf a UnionFind
* @return a vector of with the indices of all components that should be included in a cluster
*/
virtual Clusters process(const std::vector<const geom::Geometry*> & components,
index::strtree::TemplateSTRtree<std::size_t> & index,
UnionFind & uf);

private:
static std::vector<std::unique_ptr<geom::Geometry>> getComponents(std::unique_ptr<geom::Geometry>&& g);
};



}
}
}

#endif
72 changes: 72 additions & 0 deletions include/geos/operation/cluster/Clusters.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/**********************************************************************
*
* GEOS - Geometry Engine Open Source
* http://geos.osgeo.org
*
* Copyright (C) 2021-2022 Daniel Baston
*
* This is free software; you can redistribute and/or modify it under
* the terms of the GNU Lesser General Public Licence as published
* by the Free Software Foundation.
* See the COPYING file for more information.
*
**********************************************************************/

#ifndef GEOS_OPERATION_CLUSTER_CLUSTERS
#define GEOS_OPERATION_CLUSTER_CLUSTERS

#include <geos/export.h>
#include <limits>
#include <vector>

namespace geos {
namespace operation {
namespace cluster {

class UnionFind;

class GEOS_DLL Clusters {
private:
std::vector<std::size_t> m_elemsInCluster; // The IDs of elements that are included in a cluster
std::vector<std::size_t> m_starts; // Start position of each cluster in m_elemsInCluster
std::size_t m_numElems; // The number of elements from which clusters were generated

public:
using const_iterator = decltype(m_elemsInCluster)::const_iterator;

explicit Clusters(UnionFind & uf, std::vector<std::size_t> elemsInCluster, std::size_t numElems);

// Get the number of clusters available
std::size_t getNumClusters() const {
return m_starts.size();
}

// Get the size of a given cluster
std::size_t getSize(std::size_t cluster) const {
return static_cast<std::size_t>(std::distance(begin(cluster), end(cluster)));
}

// Get a vector containing the cluster ID for each item in `elems`
std::vector<std::size_t> getClusterIds(std::size_t noClusterValue = std::numeric_limits<std::size_t>::max()) const;

// Get an iterator to the first element in a given cluster
const_iterator begin(std::size_t cluster) const {
return std::next(m_elemsInCluster.begin(), static_cast<std::ptrdiff_t>(m_starts[cluster]));
}

// Get an iterator beyond the last element in a given cluster
const_iterator end(std::size_t cluster) const {
if (cluster == static_cast<std::size_t>(m_starts.size() - 1)) {
return m_elemsInCluster.end();
}

return begin(cluster + 1);
}

};

}
}
}

#endif
59 changes: 59 additions & 0 deletions include/geos/operation/cluster/DBSCANClusterFinder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/**********************************************************************
*
* GEOS - Geometry Engine Open Source
* http://geos.osgeo.org
*
* Copyright (C) 2020-2021 Daniel Baston
*
* This is free software; you can redistribute and/or modify it under
* the terms of the GNU Lesser General Public Licence as published
* by the Free Software Foundation.
* See the COPYING file for more information.
*
**********************************************************************/

#ifndef GEOS_OPERATION_CLUSTER_DBSCANCLUSTERFINDER
#define GEOS_OPERATION_CLUSTER_DBSCANCLUSTERFINDER

#include <geos/operation/cluster/AbstractClusterFinder.h>
#include <geos/geom/Geometry.h>
#include <stdexcept>

namespace geos {
namespace operation {
namespace cluster {

/** DBSCANClusterFinder clusters geometries according to the DBSCAN algorithm.
*
*/
class GEOS_DLL DBSCANClusterFinder : public AbstractClusterFinder {
public:
DBSCANClusterFinder(double eps, size_t minPoints) : m_eps(eps), m_minPoints(minPoints) {}

protected:

const geom::Envelope& queryEnvelope(const geom::Geometry* a) override {
m_envelope = *a->getEnvelopeInternal();
m_envelope.expandBy(m_eps);
return m_envelope;
}

Clusters process(const std::vector<const geom::Geometry*> & components,
index::strtree::TemplateSTRtree<std::size_t> & index,
UnionFind & uf) override;

bool shouldJoin(const geom::Geometry*, const geom::Geometry*) override {
throw std::runtime_error("Never get here.");
}

private:
double m_eps;
size_t m_minPoints;
geom::Envelope m_envelope;
};

}
}
}

#endif
65 changes: 65 additions & 0 deletions include/geos/operation/cluster/DisjointOperation.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/**********************************************************************
*
* GEOS - Geometry Engine Open Source
* http://geos.osgeo.org
*
* Copyright (C) 2022 ISciences LLC
*
* This is free software; you can redistribute and/or modify it under
* the terms of the GNU Lesser General Public Licence as published
* by the Free Software Foundation.
* See the COPYING file for more information.
*
**********************************************************************/

#pragma once

#include <geos/export.h>
#include <geos/operation/cluster/AbstractClusterFinder.h>
#include <geos/operation/cluster/GeometryFlattener.h>
#include <geos/geom/Geometry.h>
#include <geos/geom/GeometryFactory.h>

namespace geos {
namespace operation {
namespace cluster {

class GEOS_DLL DisjointOperation {
public:
DisjointOperation(AbstractClusterFinder& finder) : m_finder(finder) {}

/** Decompose a geometry into disjoint subsets using the provided `ClusterFinder`,
* process each subset using `f`, and combine the results. It is assumed that
* the processed results of each subset will also be disjoint; therefore, this
* algorithm may not be suitable for operations such as buffering.
*
* @brief process
* @param g a geometry to be processed
* @param f function taking a single argument of `const Geometry&`
*/
template<typename Function>
std::unique_ptr<geom::Geometry> processDisjointSubsets(const geom::Geometry& g, Function&& f) {
if (g.getNumGeometries() == 1) {
return f(g);
}

auto flattened = operation::cluster::GeometryFlattener::flatten(g.clone());
auto clustered = m_finder.cluster(std::move(flattened));

for (auto& subset : clustered) {
subset = f(*subset);
}

auto coll = g.getFactory()->createGeometryCollection(std::move(clustered));

return operation::cluster::GeometryFlattener::flatten(std::move(coll));
}

private:
AbstractClusterFinder& m_finder;
};


}
}
}
53 changes: 53 additions & 0 deletions include/geos/operation/cluster/EnvelopeDistanceClusterFinder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/**********************************************************************
*
* GEOS - Geometry Engine Open Source
* http://geos.osgeo.org
*
* Copyright (C) 2020-2021 Daniel Baston
*
* This is free software; you can redistribute and/or modify it under
* the terms of the GNU Lesser General Public Licence as published
* by the Free Software Foundation.
* See the COPYING file for more information.
*
**********************************************************************/

#ifndef GEOS_OPERATION_CLUSTER_ENVELOPEDISTANCECLUSTERFINDER
#define GEOS_OPERATION_CLUSTER_ENVELOPEDISTANCECLUSTERFINDER

#include <geos/operation/cluster/AbstractClusterFinder.h>
#include <geos/geom/Envelope.h>

namespace geos {
namespace operation {
namespace cluster {

/** EnvelopeDistanceClusterFinder clusters geometries by the distance between their envelopes.
* Any two geometries whose envelopes are within the specified distance will be included in the same cluster.
*/
class GEOS_DLL EnvelopeDistanceClusterFinder : public AbstractClusterFinder {
public:
explicit EnvelopeDistanceClusterFinder(double d) : m_distance(d), m_distance_squared(d*d) {}

protected:
const geom::Envelope& queryEnvelope(const geom::Geometry* a) override {
m_envelope = *a->getEnvelopeInternal();
m_envelope.expandBy(m_distance);
return m_envelope;
}

bool shouldJoin(const geom::Geometry* a, const geom::Geometry *b) override {
return a->getEnvelopeInternal()->distanceSquared(*b->getEnvelopeInternal()) <= m_distance_squared;
}

private:
geom::Envelope m_envelope;
double m_distance;
double m_distance_squared;
};

}
}
}

#endif
Loading

0 comments on commit 6ea4e94

Please sign in to comment.