[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [bridgedb/develop] Add Levenshtein Distance function to bridgedb.util module.
commit 55d05ac4bf876af85078de491de858ed8550dc29
Author: Isis Lovecruft <isis@xxxxxxxxxxxxxx>
Date: Sun Jul 6 19:43:09 2014 +0000
Add Levenshtein Distance function to bridgedb.util module.
* ADD new function, `bridgedb.util.levenshteinDistance()`.
---
lib/bridgedb/util.py | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/lib/bridgedb/util.py b/lib/bridgedb/util.py
index 6d95129..63fd5e6 100644
--- a/lib/bridgedb/util.py
+++ b/lib/bridgedb/util.py
@@ -142,6 +142,46 @@ def configureLogging(cfg):
logging.info("Level: %s", logLevel)
logging.info("Safe Logging: %sabled" % ("En" if safelogging else "Dis"))
+def levenshteinDistance(s1, s2, len1=None, len2=None,
+ offset1=0, offset2=0, memo=None):
+ """Compute the Levenstein Distance between two strings.
+
+ The `Levenshtein String Distance Algorithm`_ efficiently computes the
+ number of characters which must be changed in **s1** to make it
+ identical to **s2**.
+
+ .. `Levenshtein String Distance Algorithm`:
+ https://en.wikipedia.org/wiki/Levenshtein_distance
+
+ >>> levenshteinDistance('cat', 'cat')
+ 0
+ >>> levenshteinDistance('cat', 'hat')
+ 1
+ >>> levenshteinDistance('arma', 'armadillo')
+ 5
+
+ :param str s1: The string which should be changed.
+ :param str s2: The string which **stringOne** should be compared to.
+ """
+ len1 = len(s1) if len1 is None else len1
+ len2 = len(s2) if len2 is None else len2
+ memo = {} if memo is None else memo
+
+ key = ','.join([str(offset1), str(len1), str(offset2), str(len2)])
+ if memo.get(key) is not None: return memo[key]
+
+ if len1 == 0: return len2
+ elif len2 == 0: return len1
+
+ cost = 0 if (s1[offset1] == s2[offset2]) else 1
+ distance = min(
+ levenshteinDistance(s1, s2, len1-1, len2, offset1+1, offset2, memo) + 1,
+ levenshteinDistance(s1, s2, len1, len2-1, offset1, offset2+1, memo) + 1,
+ levenshteinDistance(s1, s2, len1-1, len2-1, offset1+1, offset2+1, memo) + cost,
+ )
+ memo[key] = distance
+ return distance
+
class JustifiedLogFormatter(logging.Formatter):
"""A logging formatter which pretty prints thread and calling function
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits