本文概述
- C
- C#
- D
- F#
- Go
- Java
- JavaScript
- Objective-C
- PHP
- python
- Ruby
- Scala
- Swift
- VBScript
代码的第一个字符是表达式的第一个字符, 转换为大写。该代码的第二个到第四个字符是代表表达式中字母的数字。字母A, E, I, O, U, H, W和Y会被忽略, 除非它们是字符串的第一个字母。所有A-Z范围以外的国际字母字符都被视为元音。因此, 听起来几乎相同的两个弦应该具有相同的soundex弦。例如, 单词” text” 和” tixt” 都产生” T230″ 的声音。
让我们开始吧 !
C
#include <
stdio.h>
static char code[128] = { 0 };
const char* soundex(const char *s){ static char out[5];
int c, prev, i;
out[0] = out[4] = 0;
if (!s || !*s) return out;
out[0] = *s++;
/* first letter, though not coded, can still affect next letter: Pfister */ prev = code[(int)out[0]];
for (i = 1;
*s &
&
i <
4;
s++) {if ((c = code[(int)*s]) == prev) continue;
if (c == -1) prev = 0;
/* vowel as separator */else if (c >
0) {out[i++] = c + '0';
prev = c;
} } while (i <
4) out[i++] = '0';
return out;
}void add_code(const char *s, int c){ while (*s) {code[(int)*s] = code[0x20 ^ (int)*s] = c;
s++;
}} void init(){ static const char *cls[] ={ "AEIOU", "", "BFPV", "CGJKQSXZ", "DT", "L", "MN", "R", 0};
int i;
for (i = 0;
cls[i];
i++)add_code(cls[i], i - 1);
}
用法
int main(){init();
/* J126 */printf(soundex("Javascript"));
return 0;
}
C#
using System.Text.RegularExpressions;
public static class Soundex{public static string For(string word){const int MaxSoundexCodeLength = 4;
var soundexCode = new StringBuilder();
var previousWasHOrW = false;
word = Regex.Replace(word == null ? string.Empty : word.ToUpper(), @"[^\w\s]", string.Empty);
if (string.IsNullOrEmpty(word))return string.Empty.PadRight(MaxSoundexCodeLength, '0');
soundexCode.Append(word.First());
for (var i = 1;
i <
word.Length;
i++){var numberCharForCurrentLetter =GetCharNumberForLetter(word[i]);
if (i == 1 &
&
numberCharForCurrentLetter ==GetCharNumberForLetter(soundexCode[0]))continue;
if (soundexCode.Length >
2 &
&
previousWasHOrW &
&
numberCharForCurrentLetter ==soundexCode[soundexCode.Length - 2])continue;
if (soundexCode.Length >
0 &
&
numberCharForCurrentLetter ==soundexCode[soundexCode.Length - 1])continue;
soundexCode.Append(numberCharForCurrentLetter);
previousWasHOrW = "HW".Contains(word[i]);
}return soundexCode.Replace("0", string.Empty).ToString().PadRight(MaxSoundexCodeLength, '0').Substring(0, MaxSoundexCodeLength);
}private static char GetCharNumberForLetter(char letter){if ("BFPV".Contains(letter)) return '1';
if ("CGJKQSXZ".Contains(letter)) return '2';
if ("DT".Contains(letter)) return '3';
if ('L' == letter) return '4';
if ("MN".Contains(letter)) return '5';
if ('R' == letter) return '6';
return '0';
}}
用法
Soundex.For("CSharp Language") == Soundex.For("CSherp Language");
// True as C614 == C614
DD标准库(Phobos)已包含soundex函数。
import std.stdio: writeln;
import std.string: soundex;
void main() {assert(soundex("soundex") == "S532");
assert(soundex("example") == "E251");
assert(soundex("ciondecks") == "C532");
assert(soundex("ekzampul") == "E251");
assert(soundex("Robert") == "R163");
assert(soundex("Rupert") == "R163");
assert(soundex("Rubin") == "R150");
assert(soundex("Ashcraft") == "A261");
assert(soundex("Ashcroft") == "A261");
assert(soundex("Tymczak") == "T522");
}
F#
let americanSoundex (x : string) = let toString (xs : char list) = new System.String(xs |>
Array.ofList)let _americanSoundex =let toUpper (x : string) = x.ToUpper()let toArray (x : string) = x.ToCharArray()let f1 ch = match ch with| 'H' | 'W' ->
false| _ ->
truelet f2 ch =match ch with| 'B' | 'F' | 'P' | 'V' ->
'1'| 'C' | 'G' | 'J' | 'K' | 'Q' | 'S' | 'X' | 'Z' ->
'2'| 'D' | 'T' ->
'3'| 'L' ->
'4'| 'M' | 'N' ->
'5'| 'R' ->
'6'| _ ->
chlet rec f3 xs =match xs with| h0 :: h1 :: t ->
h0 :: f3 (if (h0 = h1) then t else (h1 :: t))| h :: _ ->
[h]| _ ->
[]let f4 ch = match ch with| 'A' | 'E' | 'I' | 'O' | 'U' | 'Y' ->
false| _ ->
truelet f5 ch first =if ('0' <
= ch &
&
ch <
= '9') then firstelse chlet f6 xs =let len = List.length xsseq{for i = 0 to 3 - len do yield '0'} |>
Seq.append (xs |>
Seq.take (System.Math.Min(4, len)))|>
Seq.toListlet a = x |>
toUpper |>
toArray |>
Array.toListlet b = a |>
List.filter f1 //1let c = b |>
List.map f2 //2let d = c |>
f3 //3let e = d |>
List.tail |>
List.filter f4 //4let f = f5 (d |>
List.head) (a |>
List.head) :: e //5f6 f //6if (x.Length >
0) then toString(_americanSoundex) else "0000"["Robert";
"Rupert";
"Robbert";
"Rubin";
"Beer";
"Bear";
"Bearer";
"Smith";
"Smyth";
"Ashcraft";
"Ashcroft";
"Tymczak";
"Pfister"] |>
List.map (fun x ->
(x, americanSoundex x)) |>
List.iter (fun (x, y) ->
printfn "%-8s = %s" x y)(*Robert= R163Rupert= R163Robbert= R163Rubin= R150Beer= B600Bear= B600Bearer= B660Smith= S530Smyth= S530Ashcraft = A261Ashcroft = A261Tymczak= T522Pfister= P236*)
Go
package myPackageNameimport ( "bytes" "strings" "fmt")const codeLen = 4var codes = map[string]string{ "a": "", "b": "1", "c": "2", "d": "3", "e": "", "f": "1", "g": "2", "h": "", "i": "", "j": "2", "k": "2", "l": "4", "m": "5", "n": "5", "o": "", "p": "1", "q": "2", "r": "6", "s": "2", "t": "3", "u": "", "v": "1", "w": "", "x": "2", "y": "", "z": "2", }func Soundex(s string) string { var encoded bytes.Buffer encoded.WriteByte(s[0]) for i := 1;
i <
len(s);
i++ {if encoded.Len() == codeLen {break}previous, current := strings.ToLower(string(s[i-1])), strings.ToLower(string(s[i]))var next stringif i+1 <
len(s) {next = strings.ToLower(string(s[i+1]))}if (current == "h" || current == "w") &
&
(codes[previous] == codes[next]) {i = i + 1continue}if c, ok := codes[current];
ok &
&
len(c) >
0 {encoded.WriteByte(c[0])}if codes[current] == codes[next] {i = i + 1continue} } if encoded.Len() <
codeLen {padding := strings.Repeat("0", codeLen-encoded.Len())encoded.WriteString(padding) } return strings.ToUpper(encoded.String())}
用法
func main() {/* J126 */fmt.Println(Soundex("Javascript"))}
Java
private static String getCode(char c){switch(c){case 'B': case 'F': case 'P': case 'V':return "1";
case 'C': case 'G': case 'J': case 'K':case 'Q': case 'S': case 'X': case 'Z':return "2";
case 'D': case 'T':return "3";
case 'L':return "4";
case 'M': case 'N':return "5";
case 'R':return "6";
default:return "";
}} public static String soundex(String s){String code, previous, soundex;
code = s.toUpperCase().charAt(0) + "";
previous = "7";
for(int i = 1;
i <
s.length();
i++){String current = getCode(s.toUpperCase().charAt(i));
if(current.length() >
0 &
&
!current.equals(previous)){code = code + current;
}previous = current;
}soundex = (code + "0000").substring(0, 4);
return soundex;
}
用法
public static void main(String[] args){System.out.println(soundex("Soundex"));
//S532System.out.println(soundex("Example"));
//E251System.out.println(soundex("Sownteks"));
//S532System.out.println(soundex("Ekzampul"));
//E251}
JavaScript
var soundex = function(s) {var a = s.toLowerCase().split(''), f = a.shift(), r = '', codes = { a: '', e: '', i: '', o: '', u: '', b: 1, f: 1, p: 1, v: 1, c: 2, g: 2, j: 2, k: 2, q: 2, s: 2, x: 2, z: 2, d: 3, t: 3, l: 4, m: 5, n: 5, r: 6 };
r = f +a.map(function(v, i, a) {return codes[v]}).filter(function(v, i, a) {return ((i === 0) ? v !== codes[f] : v !== a[i - 1]);
}).join('');
return (r + '000').slice(0, 4).toUpperCase();
};
用法
soundex("Javascript") == soundex("Jabascript");
// True as J126 == J126
Objective-C你可以在Darkseed编写的github gist中找到Soundex算法Objective-C的实现。
PHPPHP已经将soundex作为内置函数来计算字符串的soundex键。
用法
soundex("PHP Server Language") == soundex("PHP Serber language");
// True as P100 == P100
python函数
def get_soundex(name): """Get the soundex code for the string""" name = name.upper() soundex = "" soundex += name[0] dictionary = {"BFPV": "1", "CGJKQSXZ":"2", "DT":"3", "L":"4", "MN":"5", "R":"6", "AEIOUHWY":"."} for char in name[1:]:for key in dictionary.keys():if char in key:code = dictionary[key]if code != soundex[-1]:soundex += code soundex = soundex.replace(".", "") soundex = soundex[:4].ljust(4, "0") return soundex
用法
list = ["Smith", "Smythe", "Robert", "Rupert", "Schultz", "Shultz"] print("NAME\t\tSOUNDEX") for name in list:print("%s\t\t%s" % (name, get_soundex(name)))
图书馆
如果你更喜欢使用库, 则可以使用模糊包(使用C扩展(通过Pyrex)来提高速度)。
Ruby
class String SoundexChars = 'BFPVCGJKQSXZDTLMNR'SoundexNums= '111122222222334556'SoundexCharsEx = '^' + SoundexCharsSoundexCharsDel = '^A-Z' # desc: http://en.wikipedia.org/wiki/Soundexdef soundex(census = true)str = self.upcase.delete(SoundexCharsDel)str[0, 1] + str[1..-1].delete(SoundexCharsEx).tr_s(SoundexChars, SoundexNums)\[0 .. (census ? 2 : -1)].ljust(3, '0') rescue ''end def sounds_like(other)self.soundex == other.soundexendend
用法
%w(Soundex Sownteks Example Ekzampul foo bar).each_slice(2) do |word1, word2|[word1, word2].each {|word| puts '%-8s ->
%s' % [word, word.soundex]} print "'#{word1}' "print word1.sounds_like(word2) ? "sounds" : "does not sound"print " like '#{word2}'\n"end#Soundex->
S532#Sownteks ->
S532#'Soundex' sounds like 'Sownteks'#Example->
E251#Ekzampul ->
E251#'Example' sounds like 'Ekzampul'#foo->
F000#bar->
B600#'foo' does not sound like 'bar'
Scala
def soundex(s:String)={var code=s.head.toUpper.toStringvar previous=getCode(code.head)for(ch <
- s.drop(1);
current=getCode(ch.toUpper)){if (!current.isEmpty &
&
current!=previous)code+=currentprevious=current}code+="0000"code.slice(0, 4)} def getCode(c:Char)={val code=Map("1"->
List('B', 'F', 'P', 'V'), "2"->
List('C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z'), "3"->
List('D', 'T'), "4"->
List('L'), "5"->
List('M', 'N'), "6"->
List('R')) code.find(_._2.exists(_==c)) match {case Some((k, _)) =>
kcase _ =>
""}}
用法
def main(args: Array[String]): Unit = {val tests=Map("Soundex"->
"S532", "Euler"->
"E460", "Gauss"->
"G200", "Hilbert"->
"H416", "Knuth"->
"K530", "Lloyd"->
"L300", "Lukasiewicz" ->
"L222", "Ellery"->
"E460", "Ghosh"->
"G200", "Heilbronn"->
"H416", "Kant"->
"K530", "Ladd"->
"L300", "Lissajous"->
"L222", "Wheaton"->
"W350", "Ashcraft"->
"A226", "Burroughs"->
"B622", "Burrows"->
"B620", "O'Hara"->
"O600") tests.foreach{(v)=>
val code=soundex(v._1)val status=if (code==v._2) "OK" else "ERROR"printf("Name: %-20sCode: %sFound: %s- %s\n", v._1, v._2, code, status)}}
Swift在这个github仓库中, cafford编写的类是Swift语言中原始Soundex算法的实现。
////Soundex.swift//speller////Created by Clifford Helsel on 4/28/16.////Based on standard Soundex algorithm and loosely ported from Apache Commons//https://commons.apache.org/proper/commons-codec/apidocs/src-html/org/apache/commons/codec/language/Soundex.htmlpublic class Soundex {private static let en_mapping_string = Array("01230120022455012623010202".characters)private static let en_alphabet = Array("ABCDEFGHIJKLMNOPQRSTUVWXYZ".characters)private let mapping: [Character:Character] = Soundex.buildMapping(codes:en_alphabet, alphabet:en_mapping_string)private static func buildMapping(codes: Array<
Character>
, alphabet: Array<
Character>
) ->
[Character:Character] {var retval: [Character:Character] = [:]for (index, code) in codes.enumerated() {retval[code] = alphabet[index]}return retval}private var soundexMapping: Array<
Character>
= Array(repeating:" ", count:4)private func getMappingCode(s: String, index:Int) ->
Character {let i = s.index(s.startIndex, offsetBy: index)let mappedChar = mapChar(c:s[i])if (index>
1 &
&
!(mappedChar=="0")){let j = s.index(s.startIndex, offsetBy:index-1)let hwChar = s[j]if (hwChar=="H" || hwChar=="W"){let k = s.index(s.startIndex, offsetBy:index-2)let prehwChar = s[k]let firstCode = mapChar(c:prehwChar)if (firstCode==mappedChar || "H"==prehwChar || "W"==prehwChar) {return "0"}}}return mappedChar}private func mapChar(c: Character) ->
Character {if let val = mapping[c] {return val}return "0" // not specified in original Soundex specification, if character is not found, code is 0}public func soundex(of: String) ->
String {guard (of.characters.count>
0) else {return ""}let str=of.uppercased()var out: Array<
Character>
= Array("".characters)var last: Character = " "var mapped: Character = " "var incount=1var count = 1out[0]=str[str.startIndex]last = getMappingCode(s:str, index: 0)while (incount <
str.characters.count &
&
count <
out.count) {mapped = getMappingCode(s:str, index: incount)incount += 1if (mapped != "0") {if (mapped != "0" &
&
mapped != last) {out[count]=mappedcount += 1}}}return String(out)}}
用法
let c = Soundex()c.soundex(of:"Christopher") // C631
VBScript
Function getCode(c)Select Case cCase "B", "F", "P", "V"getCode = "1"Case "C", "G", "J", "K", "Q", "S", "X", "Z"getCode = "2"Case "D", "T"getCode = "3"Case "L"getCode = "4"Case "M", "N"getCode = "5"Case "R"getCode = "6"End SelectEnd Function Function soundex(s)Dim code, previouscode = UCase(Mid(s, 1, 1))previous = 7For i = 2 to (Len(s) + 1)current = getCode(UCase(Mid(s, i, 1)))If Len(current) >
0 And current <
>
previous Thencode = code &
currentEnd Ifprevious = currentNextsoundex = Mid(code, 1, 4)If Len(code) <
4 Thensoundex = soundex &
String(4 - Len(code), "0")End IfEnd Function
最后, 如果你知道Soundex算法在另一种语言中的实现(或者你对现有语言有更好的摘录), 请不要害羞, 并在评论框中与我们分享, 祝你玩得开心!
推荐阅读
- Windows XP下不能安装USB 2.0驱动
- 如何在PHP中使用pngquant
- 如何在Symfony 3中使用SnappyBundle(wkhtmltoimage)创建网站的屏幕截图
- 如何在Symfony 3中强制通过HTTPS(基于SSL的HTTP)进行访问
- 如何在Symfony 4中安装和配置FOSUserBundle
- 如何使用LFTP脚本使用LFTP(sftp)下载远程目录
- 如何使用Doctrine和Symfony 3实现Soundex搜索(在MySql中)
- 如何解决Ubuntu 16.04中的Plesk安装/升级错误(系统中没有/etc/localtime文件)
- 如何在AWS Ubuntu 16.04实例上以root身份允许SSH和SFTP访问