实现一个正则表达式引擎in Python(三)
项目地址:Regex in Python 前两篇已经完成的写了一个基于NFA的正则表达式引擎了,下面要做的就是更近一步,把NFA转换为DFA,并对DFA最小化 DFA的定义对于NFA转换为DFA的算法,主要就是将NFA中可以状态节点进行合并,进而让状态节点对于一个输入字符都有唯一的一个跳转节点 所以对于DFA的节点就含有一个nfa状态节点的集合和一个唯一的标识和对是否是接收状态的flag class Dfa(object): STATUS_NUM = 0 def __init__(self): self.nfa_sets = [] self.accepted = False self.status_num = -1 @classmethod def nfas_to_dfa(cls,nfas): dfa = cls() for n in nfas: dfa.nfa_sets.append(n) if n.next_1 is None and n.next_2 is None: dfa.accepted = True dfa.status_num = Dfa.STATUS_NUM Dfa.STATUS_NUM = Dfa.STATUS_NUM + 1 return dfa NFA转换为DFA将NFA转换为DFA的最终目标是获得一张跳转表,这个和之前C语言编译的语法分析表有点像 这个函数就是NFA转换为DFA的全部算法了,主要逻辑就是:
def convert_to_dfa(nfa_start_node): jump_table = list_dict(MAX_DFA_STATUS_NUM) ns = [nfa_start_node] n_closure = closure(ns) dfa = Dfa.nfas_to_dfa(n_closure) dfa_list.append(dfa) dfa_index = 0 while dfa_index < len(dfa_list): dfa = dfa_list[dfa_index] for i in range(ASCII_COUNT): c = chr(i) nfa_move = move(dfa.nfa_sets,c) if nfa_move is not None: nfa_closure = closure(nfa_move) if nfa_closure is None: continue new_dfa = convert_completed(dfa_list,nfa_closure) if new_dfa is None: new_dfa = Dfa.nfas_to_dfa(nfa_closure) dfa_list.append(new_dfa) next_state = new_dfa.status_num jump_table[dfa.status_num][c] = next_state if new_dfa.accepted: jump_table[new_dfa.status_num]['accepted'] = True dfa_index = dfa_index + 1 return jump_table DFA最小化DFA最小化本质上是也是对状态节点的合并,然后分区
Dfa分区定义DfaGroup和之前的定义大同小异,都是有一个唯一的标识和一个放DFA状态节点的list class DfaGroup(object): GROUP_COUNT = 0 def __init__(self): self.set_count() self.group = [] def set_count(self): self.group_num = DfaGroup.GROUP_COUNT DfaGroup.GROUP_COUNT = DfaGroup.GROUP_COUNT + 1 def remove(self,element): self.group.remove(element) def add(self,element): self.group.append(element) def get(self,count): if count > len(self.group) - 1: return None return self.group[count] def __len__(self): return len(self.group) Minimize DFApartition是最小化DFA算法最重要的部分
所以其实DFA最小化做的就是合并相同的下一个跳转状态的节点 def partition(jump_table,group,first,next,ch): goto_first = jump_table[first.status_num].get(ch) goto_next = jump_table[next.status_num].get(ch) if dfa_in_group(goto_first) != dfa_in_group(goto_next): new_group = DfaGroup() group_list.append(new_group) group.remove(next) new_group.add(next) return True return False 创建跳转表再分完区之后节点和节点间的跳转就变成了区和区间的跳转了
def create_mindfa_table(jump_table): trans_table = list_dict(ASCII_COUNT) for dfa in dfa_list: from_dfa = dfa.status_num for i in range(ASCII_COUNT): ch = chr(i) to_dfa = jump_table[from_dfa].get(ch) if to_dfa: from_group = dfa_in_group(from_dfa) to_group = dfa_in_group(to_dfa) trans_table[from_group.group_num][ch] = to_group.group_num if dfa.accepted: from_group = dfa_in_group(from_dfa) trans_table[from_group.group_num]['accepted'] = True return trans_table 匹配输入字符串利用跳转表进行对输入字符串的匹配的逻辑非常简单
def dfa_match(input_string,jump_table,minimize=True): if minimize: cur_status = dfa_in_group(0).group_num else: cur_status = 0 for i,c in enumerate(input_string): jump_dict = jump_table[cur_status] if jump_dict: js = jump_dict.get(c) if js is None: return False else: cur_status = js if i == len(input_string) - 1 and jump_dict.get('accepted'): return True return jump_table[cur_status].get('accepted') is not None 总结到此已经完成了一个简单的正则表达式引擎的所有过程 正则表达式 -> NFA -> DFA -> DFA最小化 -> 进行匹配 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |