本文共 7900 字,大约阅读时间需要 26 分钟。
'''Implementation of a violent substring lookup algorithmArgs: pat: pattern string txt: textReturn: match successfully, return i which is the location of pat in txt failed, return the length of txt'''def search1(pat, txt): M = len(pat) # the length of pattern N = len( txt) # the length of text for i in range(N-M+1): # i is following txt m = 0 # the value of j for j in range(M): # j is following pat if txt[i+j] != pat[j]: break m += 1 if m == M: return i # we find the match return N # not found def search2(pat, txt): M = len(pat) # the length of pattern N = len(txt) # the length of text i = 0 # i is following txt j = 0 # j is following pat while(i
'''Implementation of a Knuth-Morris-Pratt substring lookup algorithmord() : Converts ASCII characters to corresponding values'''class KMP: def __init__(self, pat, txt, R = 256): self.__pat = pat self.__txt = txt self.R = R # ASCII num self.dfa = [[0 for i in range(len(self.__pat))] for j in range(self.R)] # DFA ''' Args: pat: pattern Return: dfa ''' def __initdfa(self): self.dfa[ord(self.__pat[0])][0] = 1 X = 0 # restarting index for j in range(1,len(self.__pat)): # count dfa[][j] for c in range(self.R): self.dfa[c][j] = self.dfa[c][X] self.dfa[ord(self.__pat[j])][j] = j+1 X = self.dfa[ord(self.__pat[j])][X] # update restarting index ''' Args: txt: text Return: match successfully, return i-M which is the location of pat in txt failed, return the length of txt N ''' def search(self): self.__initdfa() M = len(self.__pat) # the length of pattern N = len(self.__txt) # the length of text i = 0 # i is following txt j = 0 # j is following pat while(i
j | 0 | 1 | 2 | 3 | 4 |
---|---|---|---|---|---|
pat[j] | A | B | A | B | A |
s[j] | 0 | 0 | 1 | 2 | 3 |
'''Implementation of a Knuth-Morris-Pratt substring lookup algorithm'''class KMP: def __init__(self, pat, txt): self.__pat = pat self.__txt = txt self.s = [0 for j in range(len(self.__pat))] # s ''' Find the maximum same prefix length for a string Args: pat: pattern Return: s ''' def __inits(self): self.s[0] = 0 # Maximum prefix length of the first character of a template string is 0 X = 0 # restarting index for j in range(1,len(self.__pat)): # count s[j] while (X > 0 and self.__pat[j] != self.__pat[X]): X = self.s[X-1] # update restarting index if self.__pat[j] == self.__pat[X]: X += 1 self.s[j] = X ''' Args: txt: text pat: pattern Return: match successfully, return d which is the location of pat in txt failed, return the length of txt N j 0 1 2 3 4 pat[j] A B A B A s[j] 0 0 1 2 3 ''' def search(self): #s = [0,0,1,2,3] self.__inits() M = len(self.__pat) # the length of pattern N = len(self.__txt) # the length of text i = 0 # i is following txt j = 0 # j is following pat d = 0 # current distance dd = 0 # willing distance while(i < N): if self.__txt[i] == self.__pat[j]: dd = d + (j + 1 - self.s[j]) print i,j,d,dd i += 1 j += 1 else: d = dd j = i - d if j < 0: i += 1 j = 0 dd = d + (j + 1 - self.s[j]) print i,j,d,dd if j == M: return d # we find the match return N # not found