detect differences between two strings with Javascript
With Javascript, I want to check how many differences there are between two strings.
Something like:
var oldName = "Alec";
var newName = "Alexander";
var differences = getDifference(oldName, newName) // differences = 6
- Any letters added to the name should count as one change per letter.
- Changing a letter should count as a change per letter. Swaping two
- letters should count as two changes as your really changing each
leter. - However, shifting a letter and inserting another should only count as one change.
For example:
Changing "Alex" to "Alexander" would be 5 changes as 5 letters have been added
Changing "Alex" to "Allex" would only be one change as you added an "l" and shifted the rest over but didnt change them
Changing "Alexander" to "Allesander"would be 2 changes (adding the "l" and changing "x" to a "s").
I can split each name into an array of letters and compare them easy enough like in this jsFiddle with the below function:
function compareNames(){
var oldName = $('#old').val().split("");
var newName = $('#new').val().split("");
var changeCount = 0;
var testLength = 0;
if(oldName.length > newName.length){
testLength=oldName.length;
}
else testLength=newName.length;
for(var i=0;i<testLength;i++){
if(oldName[i]!=newName[i]) {
changeCount++;
}
}
alert(changeCount);
}
But how can I account for the shifting of letters not counting as a change?
Update: Here's how I got it working
Levenshtein distance was exactly what I needed. Thanks to Peter!
Working jsFiddle
$(function () {
$('#compare').click(function () {
var oldName = $('.compare:eq(0)').val();
var newName = $('.compare:eq(1)').val();
var count = levDist(oldName, newName);
$('#display').html('There are ' + count + ' differences present');
});
});
function levDist(s, t) {
var d = []; //2d matrix
// Step 1
var n = s.length;
var m = t.length;
if (n == 0) return m;
if (m == 0) return n;
//Create an array of arrays in javascript (a descending loop is quicker)
for (var i = n; i >= 0; i--) d[i] = [];
// Step 2
for (var i = n; i >= 0; i--) d[i][0] = i;
for (var j = m; j >= 0; j--) d[0][j] = j;
// Step 3
for (var i = 1; i <= n; i++) {
var s_i = s.charAt(i - 1);
// Step 4
for (var j = 1; j <= m; j++) {
//Check the jagged ld total so far
if (i == j && d[i][j] > 4) return n;
var t_j = t.charAt(j - 1);
var cost = (s_i == t_j) ? 0 : 1; // Step 5
//Calculate the minimum
var mi = d[i - 1][j] + 1;
var b = d[i][j - 1] + 1;
var c = d[i - 1][j - 1] + cost;
if (b < mi) mi = b;
if (c < mi) mi = c;
d[i][j] = mi; // Step 6
//Damerau transposition
if (i > 1 && j > 1 && s_i == t.charAt(j - 2) && s.charAt(i - 2) == t_j) {
d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost);
}
}
}
// Step 7
return d[n][m];
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.0/jquery.min.js"></script>
<input type="button" id="compare" value="Compare" /><br><br>
<input type="text" id="old" class="compare" value="Alec" />
<input type="text" id="new" class="compare" value="Alexander" />
<br>
<br>
<span id="display"></span>
Credit to James Westgate for the function:
Jame's post showing this function
I don't have a Javascript implementation on hand per se, but you're doing something for which well-established algorithms exist. Specifically, I believe you're looking for the "Levenshtein distance" between two strings -- i.e. the number of insertions, substitutions and deletions (assuming you are treating a deletion as a change).
The wikipedia page for Levenshtein distance has various pseudo-code implementations from which you could start, and references which may also help you.
Alternative implemenation:
/**
* Computes the Levenshtein edit distance between two strings.
* @param {string} a
* @param {string} b
* @return {number} The edit distance between the two strings.
*/
goog.string.editDistance = function(a, b) {
var v0 = [];
var v1 = [];
if (a == b) {
return 0;
}
if (!a.length || !b.length) {
return Math.max(a.length, b.length);
}
for (var i = 0; i < b.length + 1; i++) {
v0[i] = i;
}
for (var i = 0; i < a.length; i++) {
v1[0] = i + 1;
for (var j = 0; j < b.length; j++) {
var cost = Number(a[i] != b[j]);
// Cost for the substring is the minimum of adding one character, removing
// one character, or a swap.
v1[j + 1] = Math.min(v1[j] + 1, v0[j + 1] + 1, v0[j] + cost);
}
for (var j = 0; j < v0.length; j++) {
v0[j] = v1[j];
}
}
return v1[b.length];
};