Soundex Encoding Text in C Sharp

Soundex is a text encoding algorithm designed to create hashes of words according to their sound so that similar sounding words can be matched even though they might be spelled very differently. This sort of phonetic algorithm is what makes it possible for you to search for someone's name on the internet and be reasonably sure to find them, even if you don't know the exact spelling.


[TestFixture]
public class SoundexTests
{
	[Test]
	[TestCase("Robert", "Rupert")]
	[TestCase("Campbel", "Cammmppppbbbeeelll")]
	[TestCase("Ravi", "Ravee")]
	public void Soundex_Similar_Sounding_Words_Give_Identical_Scores(string word1, string word2)
	{
		Assert.AreEqual(word1.ToSoundex(), word2.ToSoundex());
	}

	[Test]
	[TestCase("Rupert", "Rubin")]
	public void Soundex_Different_Sounding_Words_Give_Different_Scores(string word1, string word2)
	{
		Assert.AreNotEqual(word1.ToSoundex(), word2.ToSoundex());
	}

	[Test]
	[TestCase("Donald", "D543")]
	[TestCase("Zach", "Z200")]
	[TestCase("David", "D130")]
	[TestCase("Ashcraft", "A261")]
	[TestCase("Ashcroft", "A261")]
	[TestCase("Tymczak", "T520")]
	[TestCase("Honeyman", "H500")]
	public void Soundex_Word_Encodes_Correctly(string word, string encoding)
	{
		Assert.AreEqual(encoding, word.ToSoundex());
	}
}


public static class SoundexExtension
{
	const int NotFound = -1;
	const int SoundexLength = 4;
	const char PaddingChar = '0';

	public static string ToSoundex(this string text)
	{
		if (string.IsNullOrEmpty(text)) return string.Empty.PadRight(SoundexLength, PaddingChar); 

		var encoded = new StringBuilder();

		// keep first letter 
		encoded.Append(text.First());

		int encodeValue = NotFound;
		int lastValue = NotFound;

		foreach(char letter in text.ToLower().Skip(1))
		{
			encodeValue = MapCharToSoundexCode(letter);

			if (encodeValue != NotFound && encodeValue != lastValue)
				encoded.Append(encodeValue);

			if (encoded.Length >= SoundexLength)
				break;

			if (encodeValue != NotFound)
				lastValue = encodeValue;
		}

		if (encoded.Length < SoundexLength)
		{
			encoded.Append(PaddingChar, SoundexLength - encoded.Length);
		}

		return encoded.ToString().ToUpper();
	}

	private static int MapCharToSoundexCode(char letter)
	{
		if ("bfpv".Contains(letter)) return 1;
		if ("cgjkqsxz".Contains(letter)) return 2;
		if ("dt".Contains(letter)) return 3;
		if ("l".Contains(letter)) return 4;
		if ("mn".Contains(letter)) return 5;
		if ("r".Contains(letter)) return 6;

		return NotFound;
	}
}