python简单实现字符串比对

需求源于 crnn识别效果统计,需要统计出识别正确的字符数
需要支持中间漏掉字符或者多出字符时 不影响后续判断
1v1 判断如果漏认或者中途多出一个字符,后面的就会因为移位 全部错误。
【python简单实现字符串比对】所以就试着写了个比对方法,
尽可能接近于 文本比对软件那种比对效果。他们是行为单位,这里是字符为单位
修复上一版的问题:如果先手外循环中的靠前字符直接匹配到了内循环中的靠后字符

比如 str1 = "baaaaaaaa" str2 = "aaaaaaaab" 之前可能会出现 只匹配到了字符 b

def compSTR(str1: str, str2: str):
# 弄几个计数器 st1_index_start = 0 st2_index_start = 0 st1_index_end = len(str1) st2_index_end = len(str2) # 还是不能步子太大,徐徐渐进 st1_index_spd = 1 st2_index_spd = 1# 这段需要使用两次,单纯减少代码重复 def update(st1_index_start: int, st2_index_start: int): rightInfo["num"] += 1 rightInfo["str1"].append(ind1) rightInfo["str2"].append(ind2) st1_index_start = ind1 + 1 st2_index_start = ind2 + 1 if st2_index_start >= st2_index_end: st1_index_start = st1_index_end elif st1_index_start >= st1_index_end: st2_index_start = st2_index_end return st1_index_start, st2_index_start# 结果信息 rightInfo = {"str1": [], "str2": [], "num": 0}rotation = st1_index_end < st2_index_end # 开始比对 while st1_index_end > st1_index_start or st2_index_end > st2_index_start: # 如果单个字符串提前到达尾部 if st1_index_start >= st1_index_end: st1_index_start = st1_index_end - 1 if st2_index_start >= st2_index_end: st2_index_start = st2_index_end - 1# 如果两个字符串都到达尾部 if st1_index_end - st1_index_start == 1 == st2_index_end - st2_index_start: if str2[-1] == str1[-1]: rightInfo["num"] += 1 rightInfo["str1"].append(st1_index_start) rightInfo["str2"].append(st2_index_start) break rotation = not rotation # 推演不出内外循环变换有没有必要了,但还是留着吧 if rotation: # 循环拿出 str1 的每一个字符 for ind1 in range(st1_index_start, st1_index_spd): # 去 str2中逐一比对 for ind2 in range(st2_index_start, st2_index_spd): # 如果匹配成功,则更新计数器、填写结果信息 if str1[ind1] == str2[ind2]: st1_index_start, st2_index_start = update( st1_index_start, st2_index_start) st2_index_spd = min( max(st2_index_spd, st2_index_start + 1), st2_index_end) # 计数器已更新,重新开始 break else: # 如果str2已到尾部,str1 中这个字符匹配失败,计数器更新 1 if ind2 == st2_index_end - 1: st1_index_start = ind1 + 1 # 如果上面不是 break结束的,外层循环继续。否则后面是外层break continue st1_index_spd = min( max(st1_index_spd, st1_index_start + 1), st1_index_end) break else: st1_index_spd = min(st1_index_spd + 1, st1_index_end) st2_index_spd = min(st2_index_spd + 1, st2_index_end) else: # 同上,只是 俩字符串颠倒,内外循环换位子 for ind2 in range(st2_index_start, st2_index_spd): for ind1 in range(st1_index_start, st1_index_spd): if str2[ind2] == str1[ind1]: st1_index_start, st2_index_start = update( st1_index_start, st2_index_start) st1_index_spd = min( max(st1_index_spd, st1_index_start + 1), st1_index_end) break else: if ind1 == st1_index_end - 1: st2_index_start = ind2 + 1 continue st2_index_spd = min( max(st2_index_spd, st2_index_start + 1), st2_index_end) break else: st1_index_spd = min(st1_index_spd + 1, st1_index_end) st2_index_spd = min(st2_index_spd + 1, st2_index_end)return rightInfo

旧代码留着吧
def compSTR(str1: str, str2: str):
# 弄几个计数器 st1_index_start = 0 st2_index_start = 0 st1_index_end = len(str1) st2_index_end = len(str2)# 这段需要使用两次,单纯减少代码重复 def update(st1_index_start: int, st2_index_start: int): rightInfo["num"] += 1 rightInfo["str1"].append(ind1) rightInfo["str2"].append(ind2) st1_index_start = ind1 + 1 st2_index_start = ind2 + 1 if st2_index_start >= st2_index_end: st1_index_start = st1_index_end elif st1_index_start >= st1_index_end: st2_index_start = st2_index_end return st1_index_start, st2_index_start# 结果信息 rightInfo = {"str1": [], "str2": [], "num": 0}# 开始比对 while st1_index_end > st1_index_start or st2_index_end > st2_index_start: # 如果单个字符串提前到达尾部 if st1_index_start >= st1_index_end: st1_index_start = st1_index_end - 1 if st2_index_start >= st2_index_end: st2_index_start = st2_index_end - 1# 如果两个字符串都到达尾部 if st1_index_end - st1_index_start == 1 == st2_index_end - st2_index_start: if str2[-1] == str1[-1]: rightInfo["num"] += 1 rightInfo["str1"].append(st1_index_start) rightInfo["str2"].append(st2_index_start) break# 判断以谁做参考 if st2_index_start == st2_index_end - 1 or (st1_index_start / st1_index_end - 1 > st2_index_start / st2_index_end - 1 and st1_index_start < st1_index_end - 1): # 循环拿出 str1 的每一个字符 for ind1 in range(st1_index_start, st1_index_end): # 去 str2中逐一比对 for ind2 in range(st2_index_start, st2_index_end): # 如果匹配成功,则更新计数器、填写结果信息 if str1[ind1] == str2[ind2]: st1_index_start, st2_index_start = update( st1_index_start, st2_index_start) # 计数器已更新,重新开始 break else: # 如果上面全部未匹配,str1 中这个字符匹配失败,计数器更新 1 st1_index_start = ind1 + 1 # 如果上面不是 break结束的,外层循环继续。否则后面是外层break continue break else: # 同上,只是 俩字符串颠倒,内外循环换位子 for ind2 in range(st2_index_start, st2_index_end): for ind1 in range(st1_index_start, st1_index_end): if str2[ind2] == str1[ind1]: st1_index_start, st2_index_start = update( st1_index_start, st2_index_start) break else: st2_index_start = ind2 + 1 continue breakreturn rightInfo

if name == "__main__":
str1 = "啊啊啊啊啊啊啊" str2 = "aaaaaaa"rightInfo = compSTR(str1, str2) print(rightInfo)for i1, i2 in zip(rightInfo["str1"], rightInfo["str2"]): print((str1[i1], str2[i2]))

    推荐阅读