Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add clustering functions #688

Merged
merged 3 commits into from
Sep 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 136 additions & 0 deletions include/geos/operation/cluster/AbstractClusterFinder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/**********************************************************************
*
* GEOS - Geometry Engine Open Source
* http://geos.osgeo.org
*
* Copyright (C) 2020-2022 Daniel Baston
*
* This is free software; you can redistribute and/or modify it under
* the terms of the GNU Lesser General Public Licence as published
* by the Free Software Foundation.
* See the COPYING file for more information.
*
**********************************************************************/

#ifndef GEOS_OPERATION_CLUSTER_ABSTRACTCLUSTERFINDER
#define GEOS_OPERATION_CLUSTER_ABSTRACTCLUSTERFINDER

#include <functional>
#include <memory>
#include <vector>

#include <geos/export.h>
#include <geos/index/strtree/TemplateSTRtree.h>
#include <geos/operation/cluster/Clusters.h>

// Forward declarations
namespace geos {
namespace geom {
class Envelope;
class Geometry;
}
namespace operation {
namespace cluster {
class UnionFind;
}
}
}

namespace geos {
namespace operation {
namespace cluster {

/** AbstractClusterFinder defines an interface for bottom-up clustering algorithms,
* where spatial index queries can be used to identify geometries that should be
* clustered together.
*/
class GEOS_DLL AbstractClusterFinder {

public:
/**
* Cluster the provided geometries, returning an object that provides access
* to the components of each cluster.
*
* @param g A vector of geometries to cluster
*/
Clusters cluster(const std::vector<const geom::Geometry*>& g);

/**
* Cluster the components of the provided geometry, returning a vector of clusters.
* This function will take ownership of the provided geometry. Any components that
* are included in a cluster will be returned. Components that are not included in
* any cluster will be destroyed.
dbaston marked this conversation as resolved.
Show resolved Hide resolved
*
* @param g A geometry whose components should be clustered.
* @return a Geometry vector, with each entry representing a single cluster.
*/
std::vector<std::unique_ptr<geom::Geometry>> clusterToVector(std::unique_ptr<geom::Geometry> && g);

/**
* Cluster the components of the provided geometry, returning a vector of clusters.
* The input geometry will not be modified.
*
* @param g A geometry whose components should be clustered.
* @return a Geometry vector, with each entry representing a single cluster.
*/
std::vector<std::unique_ptr<geom::Geometry>> clusterToVector(const geom::Geometry& g);

/**
* Cluster the components of the provided geometry, returning a GeometryCollection.
* This function will take ownership of the provided geometry. Any components that
* are included in a cluster will be returned. Components that are not included in
* any cluster will be destroyed.
*
* @param g A geometry whose components should be clustered.
* @return a GeometryCollection, with each sub-geometry representing a single cluster.
*/
std::unique_ptr<geom::Geometry> clusterToCollection(std::unique_ptr<geom::Geometry> && g);

/**
* Cluster the components of the provided geometry, returning a GeometryCollection.
* The input geometry will not be modified.
*
* @param g A geometry whose components should be clustered.
* @return a GeometryCollection, with each sub-geometry representing a single cluster.
*/
std::unique_ptr<geom::Geometry> clusterToCollection(const geom::Geometry & g);

protected:
/**
* Determine whether two geometries should be considered in the same cluster.
* @param a Geometry
* @param b Geometry
* @return `true` if the clusters associated with `a` and `b` should be merged.
*/
virtual bool shouldJoin(const geom::Geometry* a, const geom::Geometry* b) = 0;

/**
* Provide an query Envelope that can be used to find all geometries possibly
* in the same cluster as the input.
* @param a Geometry
* @return an Envelope suitable for querying
*/
virtual const geom::Envelope& queryEnvelope(const geom::Geometry* a) = 0;

/**
* Given a vector and index of components,
* @param components a vector of Geometry components
* @param index a spatial index storing pointers to those components
* @param uf a UnionFind
* @return a vector of with the indices of all components that should be included in a cluster
*/
virtual Clusters process(const std::vector<const geom::Geometry*> & components,
index::strtree::TemplateSTRtree<std::size_t> & index,
UnionFind & uf);

private:
static std::vector<std::unique_ptr<geom::Geometry>> getComponents(std::unique_ptr<geom::Geometry>&& g);
};



}
}
}

#endif
72 changes: 72 additions & 0 deletions include/geos/operation/cluster/Clusters.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/**********************************************************************
*
* GEOS - Geometry Engine Open Source
* http://geos.osgeo.org
*
* Copyright (C) 2021-2022 Daniel Baston
*
* This is free software; you can redistribute and/or modify it under
* the terms of the GNU Lesser General Public Licence as published
* by the Free Software Foundation.
* See the COPYING file for more information.
*
**********************************************************************/

#ifndef GEOS_OPERATION_CLUSTER_CLUSTERS
#define GEOS_OPERATION_CLUSTER_CLUSTERS

#include <geos/export.h>
#include <limits>
#include <vector>

namespace geos {
namespace operation {
namespace cluster {

class UnionFind;

class GEOS_DLL Clusters {
private:
std::vector<std::size_t> m_elemsInCluster; // The IDs of elements that are included in a cluster
std::vector<std::size_t> m_starts; // Start position of each cluster in m_elemsInCluster
std::size_t m_numElems; // The number of elements from which clusters were generated

public:
using const_iterator = decltype(m_elemsInCluster)::const_iterator;

explicit Clusters(UnionFind & uf, std::vector<std::size_t> elemsInCluster, std::size_t numElems);

// Get the number of clusters available
std::size_t getNumClusters() const {
return m_starts.size();
}

// Get the size of a given cluster
std::size_t getSize(std::size_t cluster) const {
return static_cast<std::size_t>(std::distance(begin(cluster), end(cluster)));
}

// Get a vector containing the cluster ID for each item in `elems`
std::vector<std::size_t> getClusterIds(std::size_t noClusterValue = std::numeric_limits<std::size_t>::max()) const;

// Get an iterator to the first element in a given cluster
const_iterator begin(std::size_t cluster) const {
return std::next(m_elemsInCluster.begin(), static_cast<std::ptrdiff_t>(m_starts[cluster]));
}

// Get an iterator beyond the last element in a given cluster
const_iterator end(std::size_t cluster) const {
if (cluster == static_cast<std::size_t>(m_starts.size() - 1)) {
return m_elemsInCluster.end();
}

return begin(cluster + 1);
}

};

}
}
}

#endif
59 changes: 59 additions & 0 deletions include/geos/operation/cluster/DBSCANClusterFinder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/**********************************************************************
*
* GEOS - Geometry Engine Open Source
* http://geos.osgeo.org
*
* Copyright (C) 2020-2021 Daniel Baston
*
* This is free software; you can redistribute and/or modify it under
* the terms of the GNU Lesser General Public Licence as published
* by the Free Software Foundation.
* See the COPYING file for more information.
*
**********************************************************************/

#ifndef GEOS_OPERATION_CLUSTER_DBSCANCLUSTERFINDER
#define GEOS_OPERATION_CLUSTER_DBSCANCLUSTERFINDER

#include <geos/operation/cluster/AbstractClusterFinder.h>
#include <geos/geom/Geometry.h>
#include <stdexcept>

namespace geos {
namespace operation {
namespace cluster {

/** DBSCANClusterFinder clusters geometries according to the DBSCAN algorithm.
*
*/
class GEOS_DLL DBSCANClusterFinder : public AbstractClusterFinder {
public:
DBSCANClusterFinder(double eps, size_t minPoints) : m_eps(eps), m_minPoints(minPoints) {}

protected:

const geom::Envelope& queryEnvelope(const geom::Geometry* a) override {
m_envelope = *a->getEnvelopeInternal();
m_envelope.expandBy(m_eps);
return m_envelope;
}

Clusters process(const std::vector<const geom::Geometry*> & components,
index::strtree::TemplateSTRtree<std::size_t> & index,
UnionFind & uf) override;

bool shouldJoin(const geom::Geometry*, const geom::Geometry*) override {
throw std::runtime_error("Never get here.");
}

private:
double m_eps;
size_t m_minPoints;
geom::Envelope m_envelope;
};

}
}
}

#endif
65 changes: 65 additions & 0 deletions include/geos/operation/cluster/DisjointOperation.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/**********************************************************************
*
* GEOS - Geometry Engine Open Source
* http://geos.osgeo.org
*
* Copyright (C) 2022 ISciences LLC
*
* This is free software; you can redistribute and/or modify it under
* the terms of the GNU Lesser General Public Licence as published
* by the Free Software Foundation.
* See the COPYING file for more information.
*
**********************************************************************/

#pragma once

#include <geos/export.h>
#include <geos/operation/cluster/AbstractClusterFinder.h>
#include <geos/operation/cluster/GeometryFlattener.h>
#include <geos/geom/Geometry.h>
#include <geos/geom/GeometryFactory.h>

namespace geos {
namespace operation {
namespace cluster {

class GEOS_DLL DisjointOperation {
public:
DisjointOperation(AbstractClusterFinder& finder) : m_finder(finder) {}

/** Decompose a geometry into disjoint subsets using the provided `ClusterFinder`,
* process each subset using `f`, and combine the results. It is assumed that
* the processed results of each subset will also be disjoint; therefore, this
* algorithm may not be suitable for operations such as buffering.
*
* @brief process
* @param g a geometry to be processed
* @param f function taking a single argument of `const Geometry&`
*/
template<typename Function>
std::unique_ptr<geom::Geometry> processDisjointSubsets(const geom::Geometry& g, Function&& f) {
if (g.getNumGeometries() == 1) {
return f(g);
}

auto flattened = operation::cluster::GeometryFlattener::flatten(g.clone());
auto clustered = m_finder.cluster(std::move(flattened));

for (auto& subset : clustered) {
subset = f(*subset);
}

auto coll = g.getFactory()->createGeometryCollection(std::move(clustered));

return operation::cluster::GeometryFlattener::flatten(std::move(coll));
}

private:
AbstractClusterFinder& m_finder;
};


}
}
}
53 changes: 53 additions & 0 deletions include/geos/operation/cluster/EnvelopeDistanceClusterFinder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/**********************************************************************
*
* GEOS - Geometry Engine Open Source
* http://geos.osgeo.org
*
* Copyright (C) 2020-2021 Daniel Baston
*
* This is free software; you can redistribute and/or modify it under
* the terms of the GNU Lesser General Public Licence as published
* by the Free Software Foundation.
* See the COPYING file for more information.
*
**********************************************************************/

#ifndef GEOS_OPERATION_CLUSTER_ENVELOPEDISTANCECLUSTERFINDER
#define GEOS_OPERATION_CLUSTER_ENVELOPEDISTANCECLUSTERFINDER

#include <geos/operation/cluster/AbstractClusterFinder.h>
#include <geos/geom/Envelope.h>

namespace geos {
namespace operation {
namespace cluster {

/** EnvelopeDistanceClusterFinder clusters geometries by the distance between their envelopes.
* Any two geometries whose envelopes are within the specified distance will be included in the same cluster.
*/
class GEOS_DLL EnvelopeDistanceClusterFinder : public AbstractClusterFinder {
public:
explicit EnvelopeDistanceClusterFinder(double d) : m_distance(d), m_distance_squared(d*d) {}

protected:
const geom::Envelope& queryEnvelope(const geom::Geometry* a) override {
m_envelope = *a->getEnvelopeInternal();
m_envelope.expandBy(m_distance);
return m_envelope;
}

bool shouldJoin(const geom::Geometry* a, const geom::Geometry *b) override {
return a->getEnvelopeInternal()->distanceSquared(*b->getEnvelopeInternal()) <= m_distance_squared;
}

private:
geom::Envelope m_envelope;
double m_distance;
double m_distance_squared;
};

}
}
}

#endif
Loading