Skip to content

Commit

Permalink
Fixing bug in Ulam distance (#411)
Browse files Browse the repository at this point in the history
* fixed bug and added test

* simplifying

* styling
  • Loading branch information
osorensen authored Mar 25, 2024
1 parent 83c505a commit 1d34d99
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 117 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: BayesMallows
Type: Package
Title: Bayesian Preference Learning with the Mallows Rank Model
Version: 2.1.1.9004
Version: 2.1.1.9005
Authors@R: c(person("Oystein", "Sorensen",
email = "oystein.sorensen.1985@gmail.com",
role = c("aut", "cre"),
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# BayesMallows (development versions)

* Fixed a bug in the Ulam distance. Thanks for Marta Crispino for discovering
it.
* Fixed a bug in SMC algorithm for pairwise preference data, where the proposal
distribution incorrectly was assumed to be uniform.
* It is now possible to report progress of MCMC more flexibly using
Expand Down
81 changes: 48 additions & 33 deletions src/distances.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "distances.h"
using namespace arma;

std::unique_ptr<Distance> choose_distance_function(std::string metric) {
if(metric == "cayley") {
Expand All @@ -19,75 +20,75 @@ std::unique_ptr<Distance> choose_distance_function(std::string metric) {
}

// [[Rcpp::export]]
arma::vec get_rank_distance(arma::mat rankings, arma::vec rho,
std::string metric) {
arma::vec get_rank_distance(
arma::mat rankings, arma::vec rho, std::string metric) {
auto distfun = choose_distance_function(metric);
return distfun->matdist(rankings, rho);
}

arma::vec Distance::matdist(const arma::mat& r1, const arma::vec& r2) {
return matdist(r1, r2, arma::regspace<arma::uvec>(0, r2.n_elem - 1));
vec Distance::matdist(const mat& r1, const vec& r2) {
return matdist(r1, r2, regspace<uvec>(0, r2.n_elem - 1));
}

arma::vec Distance::matdist(const arma::mat& r1, const arma::vec& r2,
const arma::uvec& inds) {
arma::vec result(r1.n_cols);
vec Distance::matdist(const mat& r1, const vec& r2,
const uvec& inds) {
vec result(r1.n_cols);
for(size_t i{}; i < r1.n_cols; i++) {
const arma::vec& v1 = r1.col(i);
const vec& v1 = r1.col(i);
result(i) = d(v1, r2, inds);
}
return result;
}

arma::vec Distance::scalardist(const arma::vec& r1, const double r2) {
arma::vec v2 = arma::ones(r1.size()) * r2;
arma::vec result = arma::zeros(r1.size());
vec Distance::scalardist(const vec& r1, const double r2) {
vec v2 = ones(r1.size()) * r2;
vec result = zeros(r1.size());
for(size_t i{}; i < r1.n_elem; i++) {
result(i) = d(arma::vec{r1(i)}, arma::vec{v2(i)});
result(i) = d(vec{r1(i)}, vec{v2(i)});
}
return result;
}

double CayleyDistance::d(const arma::vec& r1, const arma::vec& r2) {
double CayleyDistance::d(const vec& r1, const vec& r2) {
double distance{};
arma::vec tmp2 = r1;
vec tmp2 = r1;

for(size_t i{}; i < r1.n_elem; ++i){
if(tmp2(i) != r2(i)) {
distance += 1;
double tmp1 = tmp2(i);
tmp2(i) = r2(i);
arma::uvec inds = find(tmp2 == r2(i));
uvec inds = find(tmp2 == r2(i));
tmp2.elem(inds).fill(tmp1);
}
}
return distance;
}

double CayleyDistance::d(
const arma::vec& r1, const arma::vec& r2, const arma::uvec& inds) {
const vec& r1, const vec& r2, const uvec& inds) {
return d(r1, r2);
}

double FootruleDistance::d(const arma::vec& r1, const arma::vec& r2) {
return arma::norm(r1 - r2, 1);
double FootruleDistance::d(const vec& r1, const vec& r2) {
return norm(r1 - r2, 1);
}

double FootruleDistance::d(
const arma::vec& r1, const arma::vec& r2, const arma::uvec& inds) {
const vec& r1, const vec& r2, const uvec& inds) {
return d(r1(inds), r2(inds));
}

double HammingDistance::d(const arma::vec& r1, const arma::vec& r2) {
return arma::sum(r1 != r2);
double HammingDistance::d(const vec& r1, const vec& r2) {
return sum(r1 != r2);
}

double HammingDistance::d(
const arma::vec& r1, const arma::vec& r2, const arma::uvec& inds) {
const vec& r1, const vec& r2, const uvec& inds) {
return d(r1(inds), r2(inds));
}

double KendallDistance::d(const arma::vec& r1, const arma::vec& r2) {
double KendallDistance::d(const vec& r1, const vec& r2) {
double distance{};
for(size_t i{}; i < r1.n_elem; ++i){
for(size_t j{}; j < i; ++j){
Expand All @@ -100,25 +101,39 @@ double KendallDistance::d(const arma::vec& r1, const arma::vec& r2) {
return distance;
}

double KendallDistance::d(const arma::vec& r1, const arma::vec& r2, const arma::uvec& inds) {
double KendallDistance::d(const vec& r1, const vec& r2, const uvec& inds) {
return d(r1, r2);
}

double SpearmanDistance::d(const arma::vec& r1, const arma::vec& r2) {
return std::pow(arma::norm(r1 - r2, 2), 2);
double SpearmanDistance::d(const vec& r1, const vec& r2) {
return std::pow(norm(r1 - r2, 2), 2);
}

double SpearmanDistance::d(const arma::vec& r1, const arma::vec& r2, const arma::uvec& inds) {
double SpearmanDistance::d(const vec& r1, const vec& r2, const uvec& inds) {
return d(r1(inds), r2(inds));
}

double UlamDistance::d(const arma::vec& r1, const arma::vec& r2) {
arma::ivec a = arma::conv_to<arma::ivec>::from(r1) - 1;
arma::ivec b = arma::conv_to<arma::ivec>::from(r2) - 1;
auto distance = perm0_distance ( a, b );
return static_cast<double>(distance);
// Rewritten from https://www.geeksforgeeks.org/c-program-for-longest-increasing-subsequence/
double longest_increasing_subsequence(const vec& permutation) {
int n = permutation.n_elem;
vec lis(n, fill::ones);

for (int i = 1; i < n; i++) {
for (int j = 0; j < i; j++) {
if (permutation(i) > permutation(j) && lis(i) < lis(j) + 1) {
lis(i) = lis(j) + 1;
}
}
}
return max(lis);
}


double UlamDistance::d(const vec& r1, const vec& r2) {
uvec x = sort_index(r2);
return r1.size() - longest_increasing_subsequence(r1(x));
}

double UlamDistance::d(const arma::vec& r1, const arma::vec& r2, const arma::uvec& inds) {
double UlamDistance::d(const vec& r1, const vec& r2, const uvec& inds) {
return d(r1, r2);
}
4 changes: 0 additions & 4 deletions src/distances.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@
#include <memory>
#include <RcppArmadillo.h>

int perm0_distance (
const arma::ivec& a,
const arma::ivec& b);

struct Distance {
Distance() {};
virtual ~Distance() = default;
Expand Down
78 changes: 0 additions & 78 deletions src/subset.cpp

This file was deleted.

16 changes: 16 additions & 0 deletions tests/testthat/test-compute_rank_distance.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,19 @@ test_that("compute_rank_distance works", {
), c(6, 3)
)
})

test_that("distances are right-invariant", {
n_items <- 10
rho1 <- sample(n_items)
rho2 <- sample(n_items)
eta <- sample(n_items)

metrics <- c("footrule", "spearman", "kendall", "cayley", "hamming", "ulam")
names(expectations <- metrics)

for (metric in metrics) {
r1 <- compute_rank_distance(rho1, rho2, metric = metric)
r2 <- compute_rank_distance(rho1[eta], rho2[eta], metric = metric)
expect_equal(r1, r2)
}
})
2 changes: 1 addition & 1 deletion tests/testthat/test-smc_update_correctness.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ test_that("update_mallows is correct for new rankings", {
as.numeric(as.factor(smc_consensus$item)),
metric = "ulam"
),
2
4
)

set.seed(123)
Expand Down

0 comments on commit 1d34d99

Please sign in to comment.