-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathJaro.py
More file actions
42 lines (34 loc) · 1.6 KB
/
Jaro.py
File metadata and controls
42 lines (34 loc) · 1.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#Jaro distance function
#Compares two strings returns Jaro similarity as a float
#03/07/2021 Modlizard
def jaro(a,b):
if type(a) != str or type(b) != str:
raise TypeError('Non-string comparison parameters provided!')
elif a == b:
return 1.0
else:
#Match counting
mChars = 0 #Matching chars
mDist = int(max(len(a),len(b))/2 - 1) #Max distance for matching chars
connA = [False] * len(a) #If given character already has a connected match
connB = [False] * len(b)
for x in range(len(a)):
for y in range(max(0, x-mDist), min(len(b), x+mDist + 1)): #Only compare characters based on mDist range
if a[x] == b[y] and connB[y] == 0: #Every character of a will be checked once only so we check for pairing presence in b
connA[x], connB[y] = True, True
mChars += 1
break #This character in a has been paired so we break
if mChars == 0:
return 0.0
#Transpositon counting
t = 0 #Transpositions
bPos = 0 #Current character being checked in b
for x in range(len(a)):
if connA[x]:
while not(connB[bPos]): #Find next paired character in b
bPos += 1
if a[x] != b[bPos]: #If the next paired character is different a transposition occurred
bPos += 1
t += 1
t = t/2 #All but one or all transpositions were detected twice
return (mChars/len(a) + mChars/len(b) + (mChars - t) / mChars) / 3.0