需求源于 crnn识别效果统计,需要统计出识别正确的字符数
需要支持中间漏掉字符或者多出字符时 不影响后续判断
1v1 判断如果漏认或者中途多出一个字符,后面的就会因为移位 全部错误。
【python简单实现字符串比对】所以就试着写了个比对方法,
尽可能接近于 文本比对软件那种比对效果。他们是行为单位,这里是字符为单位
修复上一版的问题:如果先手外循环中的靠前字符直接匹配到了内循环中的靠后字符
比如
str1 = "baaaaaaaa"
str2 = "aaaaaaaab"
之前可能会出现 只匹配到了字符 b
def compSTR(str1: str, str2: str):
# 弄几个计数器
st1_index_start = 0
st2_index_start = 0
st1_index_end = len(str1)
st2_index_end = len(str2)
# 还是不能步子太大,徐徐渐进
st1_index_spd = 1
st2_index_spd = 1# 这段需要使用两次,单纯减少代码重复
def update(st1_index_start: int, st2_index_start: int):
rightInfo["num"] += 1
rightInfo["str1"].append(ind1)
rightInfo["str2"].append(ind2)
st1_index_start = ind1 + 1
st2_index_start = ind2 + 1
if st2_index_start >= st2_index_end:
st1_index_start = st1_index_end
elif st1_index_start >= st1_index_end:
st2_index_start = st2_index_end
return st1_index_start, st2_index_start# 结果信息
rightInfo = {"str1": [], "str2": [], "num": 0}rotation = st1_index_end < st2_index_end
# 开始比对
while st1_index_end > st1_index_start or st2_index_end > st2_index_start:
# 如果单个字符串提前到达尾部
if st1_index_start >= st1_index_end:
st1_index_start = st1_index_end - 1
if st2_index_start >= st2_index_end:
st2_index_start = st2_index_end - 1# 如果两个字符串都到达尾部
if st1_index_end - st1_index_start == 1 == st2_index_end - st2_index_start:
if str2[-1] == str1[-1]:
rightInfo["num"] += 1
rightInfo["str1"].append(st1_index_start)
rightInfo["str2"].append(st2_index_start)
break
rotation = not rotation
# 推演不出内外循环变换有没有必要了,但还是留着吧
if rotation:
# 循环拿出 str1 的每一个字符
for ind1 in range(st1_index_start, st1_index_spd):
# 去 str2中逐一比对
for ind2 in range(st2_index_start, st2_index_spd):
# 如果匹配成功,则更新计数器、填写结果信息
if str1[ind1] == str2[ind2]:
st1_index_start, st2_index_start = update(
st1_index_start, st2_index_start)
st2_index_spd = min(
max(st2_index_spd, st2_index_start + 1), st2_index_end)
# 计数器已更新,重新开始
break
else:
# 如果str2已到尾部,str1 中这个字符匹配失败,计数器更新 1
if ind2 == st2_index_end - 1:
st1_index_start = ind1 + 1
# 如果上面不是 break结束的,外层循环继续。否则后面是外层break
continue
st1_index_spd = min(
max(st1_index_spd, st1_index_start + 1), st1_index_end)
break
else:
st1_index_spd = min(st1_index_spd + 1, st1_index_end)
st2_index_spd = min(st2_index_spd + 1, st2_index_end)
else:
# 同上,只是 俩字符串颠倒,内外循环换位子
for ind2 in range(st2_index_start, st2_index_spd):
for ind1 in range(st1_index_start, st1_index_spd):
if str2[ind2] == str1[ind1]:
st1_index_start, st2_index_start = update(
st1_index_start, st2_index_start)
st1_index_spd = min(
max(st1_index_spd, st1_index_start + 1), st1_index_end)
break
else:
if ind1 == st1_index_end - 1:
st2_index_start = ind2 + 1
continue
st2_index_spd = min(
max(st2_index_spd, st2_index_start + 1), st2_index_end)
break
else:
st1_index_spd = min(st1_index_spd + 1, st1_index_end)
st2_index_spd = min(st2_index_spd + 1, st2_index_end)return rightInfo
旧代码留着吧
def compSTR(str1: str, str2: str):
# 弄几个计数器
st1_index_start = 0
st2_index_start = 0
st1_index_end = len(str1)
st2_index_end = len(str2)# 这段需要使用两次,单纯减少代码重复
def update(st1_index_start: int, st2_index_start: int):
rightInfo["num"] += 1
rightInfo["str1"].append(ind1)
rightInfo["str2"].append(ind2)
st1_index_start = ind1 + 1
st2_index_start = ind2 + 1
if st2_index_start >= st2_index_end:
st1_index_start = st1_index_end
elif st1_index_start >= st1_index_end:
st2_index_start = st2_index_end
return st1_index_start, st2_index_start# 结果信息
rightInfo = {"str1": [], "str2": [], "num": 0}# 开始比对
while st1_index_end > st1_index_start or st2_index_end > st2_index_start:
# 如果单个字符串提前到达尾部
if st1_index_start >= st1_index_end:
st1_index_start = st1_index_end - 1
if st2_index_start >= st2_index_end:
st2_index_start = st2_index_end - 1# 如果两个字符串都到达尾部
if st1_index_end - st1_index_start == 1 == st2_index_end - st2_index_start:
if str2[-1] == str1[-1]:
rightInfo["num"] += 1
rightInfo["str1"].append(st1_index_start)
rightInfo["str2"].append(st2_index_start)
break# 判断以谁做参考
if st2_index_start == st2_index_end - 1 or (st1_index_start / st1_index_end - 1 > st2_index_start / st2_index_end - 1 and st1_index_start < st1_index_end - 1):
# 循环拿出 str1 的每一个字符
for ind1 in range(st1_index_start, st1_index_end):
# 去 str2中逐一比对
for ind2 in range(st2_index_start, st2_index_end):
# 如果匹配成功,则更新计数器、填写结果信息
if str1[ind1] == str2[ind2]:
st1_index_start, st2_index_start = update(
st1_index_start, st2_index_start)
# 计数器已更新,重新开始
break
else:
# 如果上面全部未匹配,str1 中这个字符匹配失败,计数器更新 1
st1_index_start = ind1 + 1
# 如果上面不是 break结束的,外层循环继续。否则后面是外层break
continue
break
else:
# 同上,只是 俩字符串颠倒,内外循环换位子
for ind2 in range(st2_index_start, st2_index_end):
for ind1 in range(st1_index_start, st1_index_end):
if str2[ind2] == str1[ind1]:
st1_index_start, st2_index_start = update(
st1_index_start, st2_index_start)
break
else:
st2_index_start = ind2 + 1
continue
breakreturn rightInfo
if name == "__main__":
str1 = "啊啊啊啊啊啊啊"
str2 = "aaaaaaa"rightInfo = compSTR(str1, str2)
print(rightInfo)for i1, i2 in zip(rightInfo["str1"], rightInfo["str2"]):
print((str1[i1], str2[i2]))
推荐阅读
- 推荐系统论文进阶|CTR预估 论文精读(十一)--Deep Interest Evolution Network(DIEN)
- Python专栏|数据分析的常规流程
- Python|Win10下 Python开发环境搭建(PyCharm + Anaconda) && 环境变量配置 && 常用工具安装配置
- Python绘制小红花
- Pytorch学习|sklearn-SVM 模型保存、交叉验证与网格搜索
- OpenCV|OpenCV-Python实战(18)——深度学习简介与入门示例
- python|8. 文件系统——文件的删除、移动、复制过程以及链接文件
- 爬虫|若想拿下爬虫大单,怎能不会逆向爬虫,价值过万的逆向爬虫教程限时分享
- 分布式|《Python3网络爬虫开发实战(第二版)》内容介绍
- java|微软认真聆听了开源 .NET 开发社区的炮轰( 通过CLI 支持 Hot Reload 功能)