Skip to content

Commit

Permalink
Merge pull request #2 from sc0Vu/add-utf8-encoding
Browse files Browse the repository at this point in the history
Add utf8 encoding
  • Loading branch information
sc0Vu authored Sep 4, 2023
2 parents 27965c2 + 9f8df67 commit 22d3156
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 20 deletions.
15 changes: 8 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
GOPATH= $(shell go env GOPATH)

.PHONY: default
default: lint fmt test
default: lint test fuzz

.PHONY: test
test:
go test -race ./...

.PHONY: fuzz
fuzz:
go test -run=FuzzFindEditDistance -fuzztime=1s ./...

.PHONY: lint
lint:
go get github.com/golangci/golangci-lint/cmd/golangci-lint@v1.37.0
$(GOPATH)/bin/golangci-lint run -e gosec ./...
# go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.37.0
# $(GOPATH)/bin/golangci-lint run -e gosec ./...
go mod tidy

.PHONY: fmt
fmt:
go vet ./...
go fmt ./...

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# didyoumean
![Go](https://github.com/sc0Vu/didyoumean/workflows/Go/badge.svg)
[![Go](https://github.com/sc0Vu/didyoumean/actions/workflows/go.yml/badge.svg)](https://github.com/sc0Vu/didyoumean/actions/workflows/go.yml)
[![Go Report Card](https://goreportcard.com/badge/github.com/sc0Vu/didyoumean)](https://goreportcard.com/report/github.com/sc0Vu/didyoumean)

Didyoumean written in golang, find the similar string from the given string list. Currently it use Levenshtein distance to calculate edit distannce between two strings. See: https://en.wikipedia.org/wiki/Levenshtein_distance
Expand Down
15 changes: 11 additions & 4 deletions didyoumean.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ package didyoumean

import (
"strings"
"unicode/utf8"
)

var (
// ThresholdRate is the rate that allows the edit distanse less than, eg 0.4
// means the edit distance less than 40%
// means the edit distance less than 40%
ThresholdRate float64
// CaseInsensitive compare the edit distance in case insensitive mode
CaseInsensitive bool
Expand All @@ -27,8 +28,13 @@ func minimum(values ...int) (min int) {
// it's use Levenshtein distance, see: https://en.wikipedia.org/wiki/Levenshtein_distance for
// more information
func findEditDistance(a, b string) (distance int) {
lenA := len(a)
lenB := len(b)
if !utf8.ValidString(a) && !utf8.ValidString(b) {
return
}
ra := []rune(a)
rb := []rune(b)
lenA := len(ra)
lenB := len(rb)
totalLen := lenB + 1
// only use two rows
v0 := make([]int, totalLen)
Expand All @@ -53,7 +59,8 @@ func findEditDistance(a, b string) (distance int) {
// copy v1 to v0
copy(v0, v1)
}
return v0[lenB]
distance = v0[lenB]
return
}

// FirstMatch returns first match of didyoumean
Expand Down
33 changes: 25 additions & 8 deletions didyoumean_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,29 +36,33 @@ var (
A: "Saturday",
B: "Sunday",
Distance: 3,
}, {
A: "台灣 Taiwan",
B: "台味",
Distance: 7,
},
}
firstMatchTests = []FirstMatchTest{
{
Key: "insargrm",
List: []string{"facebook", "twitter", "instagram", "linkedin"},
List: []string{"facebook", "twitter", "instagram", "linkedin", "台灣 Taiwan"},
Match: "instagram",
ThreadRate: 0.4,
}, {
Key: "insargrm",
List: []string{"facebook", "twitter", "instagram", "linkedin"},
List: []string{"facebook", "twitter", "instagram", "linkedin", "台灣 Taiwan"},
Match: "",
ThreadRate: 0.3,
},
{
Key: "insarGrm",
List: []string{"facebook", "twiTter", "InstaGram", "linkedin"},
List: []string{"facebook", "twiTter", "InstaGram", "linkedin", "台灣 Taiwan"},
Match: "",
ThreadRate: 0.4,
CaseInsensitive: false,
}, {
Key: "insarGrm",
List: []string{"facebook", "twitter", "InstaGram", "linkedin"},
List: []string{"facebook", "twitter", "InstaGram", "linkedin", "台灣 Taiwan"},
Match: "InstaGram",
ThreadRate: 0.5,
CaseInsensitive: false,
Expand All @@ -67,23 +71,23 @@ var (
matchTests = []MatchTest{
{
Key: "insargrm",
List: []string{"facebook", "twitter", "instagram", "linkedin"},
List: []string{"facebook", "twitter", "instagram", "linkedin", "台灣 Taiwan"},
Match: []string{"instagram"},
ThreadRate: 0.4,
}, {
Key: "insargrm",
List: []string{"facebook", "twitter", "instagram", "linkedin"},
List: []string{"facebook", "twitter", "instagram", "linkedin", "台灣 Taiwan"},
Match: []string{},
ThreadRate: 0.3,
}, {
Key: "insarGrm",
List: []string{"facebook", "twiTter", "InstaGram", "linkedin"},
List: []string{"facebook", "twiTter", "InstaGram", "linkedin", "台灣 Taiwan"},
Match: []string{},
ThreadRate: 0.4,
CaseInsensitive: false,
}, {
Key: "insarGrm",
List: []string{"facebook", "twitter", "InstaGram", "linkedin"},
List: []string{"facebook", "twitter", "InstaGram", "linkedin", "台灣 Taiwan"},
Match: []string{"InstaGram"},
ThreadRate: 0.5,
CaseInsensitive: false,
Expand All @@ -101,6 +105,19 @@ func TestFindEditDistance(t *testing.T) {
}
}

// FuzzFindEditDistance
func FuzzFindEditDistance(f *testing.F) {
for _, test := range editDistanceTests {
f.Add(test.A, test.B)
}
f.Fuzz(func(t *testing.T, a string, b string) {
d := findEditDistance(a, b)
if d < 0 {
t.Errorf("distance should not be smaller than 0: %d", d)
}
})
}

// TestFirstMatch
func TestFirstMatch(t *testing.T) {
for _, test := range firstMatchTests {
Expand Down

0 comments on commit 22d3156

Please sign in to comment.