利用Java实现简单的词法分析器实例代码
发布时间:2020-12-14 14:38:42 所属栏目:Java 来源:网络整理
导读:首先看下我们要分析的代码段如下: 输出结果如下: 输出结果(a).PNG 输出结果(b).PNG 输出结果(c).PNG 括号里是一个二元式:(单词类别编码,单词位置编号) 代码如下: package Yue.LexicalAnalyzer;import java.io.*;/* * 主程序 */public class Main { pu
首先看下我们要分析的代码段如下: 输出结果如下: 输出结果(a).PNG 输出结果(b).PNG 输出结果(c).PNG 括号里是一个二元式:(单词类别编码,单词位置编号) 代码如下: package Yue.LexicalAnalyzer; import java.io.*; /* * 主程序 */ public class Main { public static void main(String[] args) throws IOException { Lexer lexer = new Lexer(); lexer.printToken(); lexer.printSymbolsTable(); } } package Yue.LexicalAnalyzer; import java.io.*; import java.util.*; /* * 词法分析并输出 */ public class Lexer { /*记录行号*/ public static int line = 1; /*存放最新读入的字符*/ char character = ' '; /*保留字*/ Hashtable<String,KeyWord> keywords = new Hashtable<String,KeyWord>(); /*token序列*/ private ArrayList<Token> tokens = new ArrayList<Token>(); /*符号表*/ private ArrayList<Symbol> symtable = new ArrayList<Symbol>(); /*读取文件变量*/ BufferedReader reader = null; /*保存当前是否读取到了文件的结尾*/ private Boolean isEnd = false; /* 是否读取到文件的结尾 */ public Boolean getReaderState() { return this.isEnd; } /*打印tokens序列*/ public void printToken() throws IOException { FileWriter writer = new FileWriter("E:lex.txt"); System.out.println("词法分析结果如下:"); System.out.print("杜悦-2015220201031rnn"); writer.write("杜悦-2015220201031rnrn"); while (getReaderState() == false) { Token tok = scan(); String str = "line " + tok.line + "t(" + tok.tag + "," + tok.pos + ")tt" + tok.name + ": " + tok.toString() + "rn"; writer.write(str); System.out.print(str); } writer.flush(); } /*打印符号表*/ public void printSymbolsTable() throws IOException { FileWriter writer = new FileWriter("E:symtab1.txt"); System.out.print("rnrn符号表rn"); System.out.print("编号t行号t名称rn"); writer.write("符号表rn"); writer.write("编号 " + "t行号 " + "t名称 rn"); Iterator<Symbol> e = symtable.iterator(); while (e.hasNext()) { Symbol symbol = e.next(); String desc = symbol.pos + "t" + symbol.line + "t" + symbol.toString(); System.out.print(desc + "rn"); writer.write(desc + "rn"); } writer.flush(); } /*打印错误*/ public void printError(Token tok) throws IOException{ FileWriter writer = new FileWriter("E:error.txt"); System.out.print("rnrn错误词法如下:rn"); writer.write("错误词法如下:rn"); String str = "line " + tok.line + "t(" + tok.tag + "," + tok.pos + ")tt" + tok.name + ": " + tok.toString() + "rn"; writer.write(str); } /*添加保留字*/ void reserve(KeyWord w) { keywords.put(w.lexme,w); } public Lexer() { /*初始化读取文件变量*/ try { reader = new BufferedReader(new FileReader("E:输入.txt")); } catch (IOException e) { System.out.print(e); } /*添加保留字*/ this.reserve(KeyWord.begin); this.reserve(KeyWord.end); this.reserve(KeyWord.integer); this.reserve(KeyWord.function); this.reserve(KeyWord.read); this.reserve(KeyWord.write); this.reserve(KeyWord.aIf); this.reserve(KeyWord.aThen); this.reserve(KeyWord.aElse); } /*按字符读*/ public void readch() throws IOException { character = (char) reader.read(); if ((int) character == 0xffff) { this.isEnd = true; } } /*判断是否匹配*/ public Boolean readch(char ch) throws IOException { readch(); if (this.character != ch) { return false; } this.character = ' '; return true; } /*数字的识别*/ public Boolean isDigit() throws IOException { if (Character.isDigit(character)) { int value = 0; while (Character.isDigit(character)) { value = 10 * value + Character.digit(character,10); readch(); } Num n = new Num(value); n.line = line; tokens.add(n); return true; } else return false; } /*保留字、标识符的识别*/ public Boolean isLetter() throws IOException { if (Character.isLetter(character)) { StringBuffer sb = new StringBuffer(); /*首先得到整个的一个分割*/ while (Character.isLetterOrDigit(character)) { sb.append(character); readch(); } /*判断是保留字还是标识符*/ String s = sb.toString(); KeyWord w = keywords.get(s); /*如果是保留字的话,w不应该是空的*/ if (w != null) { w.line = line; tokens.add(w); } else { /*否则就是标识符,此处多出记录标识符编号的语句*/ Symbol sy = new Symbol(s); Symbol mark = sy; //用于标记已存在标识符 Boolean isRepeat = false; sy.line = line; for (Symbol i : symtable) { if (sy.toString().equals(i.toString())) { mark = i; isRepeat = true; } } if (!isRepeat) { sy.pos = symtable.size() + 1; symtable.add(sy); } else if (isRepeat) { sy.pos = mark.pos; } tokens.add(sy); } return true; } else return false; } /*符号的识别*/ public Boolean isSign() throws IOException { switch (character) { case '#': readch(); AllEnd.allEnd.line = line; tokens.add(AllEnd.allEnd); return true; case 'r': if (readch('n')) { readch(); LineEnd.lineEnd.line = line; tokens.add(LineEnd.lineEnd); line++; return true; } case '(': readch(); Delimiter.lpar.line = line; tokens.add(Delimiter.lpar); return true; case ')': readch(); Delimiter.rpar.line = line; tokens.add(Delimiter.rpar); return true; case ';': readch(); Delimiter.sem.line = line; tokens.add(Delimiter.sem); return true; case '+': readch(); CalcWord.add.line = line; tokens.add(CalcWord.add); return true; case '-': readch(); CalcWord.sub.line = line; tokens.add(CalcWord.sub); return true; case '*': readch(); CalcWord.mul.line = line; tokens.add(CalcWord.mul); return true; case '/': readch(); CalcWord.div.line = line; tokens.add(CalcWord.div); return true; case ':': if (readch('=')) { readch(); CalcWord.assign.line = line; tokens.add(CalcWord.assign); return true; } break; case '>': if (readch('=')) { readch(); CalcWord.ge.line = line; tokens.add(CalcWord.ge); return true; } break; case '<': if (readch('=')) { readch(); CalcWord.le.line = line; tokens.add(CalcWord.le); return true; } break; case '!': if (readch('=')) { readch(); CalcWord.ne.line = line; tokens.add(CalcWord.ne); return true; } break; } return false; } /*下面开始分割关键字,标识符等信息*/ public Token scan() throws IOException { Token tok; while (character == ' ') readch(); if (isDigit() || isSign() || isLetter()) { tok = tokens.get(tokens.size() - 1); } else { tok = new Token(character); printError(tok); } return tok; } } package Yue.LexicalAnalyzer; /* * Token父类 */ public class Token { public final int tag; public int line = 1; public String name = ""; public int pos = 0; public Token(int t) { this.tag = t; } public String toString() { return "" + (char) tag; } } package Yue.LexicalAnalyzer; /* * 单词类别赋值 */ public class Tag { public final static int BEGIN = 1,//保留字 END = 2,//保留字 INTEGER = 3,//保留字 FUNCTION = 4,//保留字 READ = 5,//保留字 WRITE = 6,//保留字 IF = 7,//保留字 THEN = 8,//保留字 ELSE = 9,//保留字 SYMBOL = 11,//标识符 CONSTANT = 12,//常数 ADD = 13,//运算符 "+" SUB = 14,//运算符 "-" MUL = 15,//运算符 "*" DIV = 16,//运算符 "/" LE = 18,//运算符 "<=" GE = 19,//运算符 ">=" NE = 20,//运算符 "!=" ASSIGN = 23,//运算符 ":=" LPAR = 24,//界符 "(" RPAR = 25,//界符 ")" SEM = 26,//界符 ";" LINE_END = 27,//行尾符 ALL_END = 28; //结尾符 "#" } package Yue.LexicalAnalyzer; /** * 保留字 */ public class KeyWord extends Token { public String lexme = ""; public KeyWord(String s,int t) { super(t); this.lexme = s; this.name = "保留字"; } public String toString() { return this.lexme; } public static final KeyWord begin = new KeyWord("begin",Tag.BEGIN),end = new KeyWord("end",Tag.END),integer = new KeyWord("integer",Tag.INTEGER),function = new KeyWord("function",Tag.FUNCTION),read = new KeyWord("read",Tag.READ),write = new KeyWord("write",Tag.WRITE),aIf = new KeyWord("if",Tag.IF),aThen = new KeyWord("then",Tag.THEN),aElse = new KeyWord("else",Tag.ELSE); } package Yue.LexicalAnalyzer; /* * 标识符 */ public class Symbol extends Token { public String lexme = ""; public Symbol(String s) { super(Tag.SYMBOL); this.lexme = s; this.name = "标识符"; } public String toString() { return this.lexme; } } package Yue.LexicalAnalyzer; /** * 运算符 */ public class CalcWord extends Token { public String lexme = ""; public CalcWord(String s,int t) { super(t); this.lexme = s; this.name = "运算符"; } public String toString() { return this.lexme; } public static final CalcWord add = new CalcWord("+",Tag.ADD),sub = new CalcWord("-",Tag.SUB),mul = new CalcWord("*",Tag.MUL),div = new CalcWord("/",Tag.DIV),le = new CalcWord("<=",Tag.LE),ge = new CalcWord(">=",Tag.GE),ne = new CalcWord("!=",Tag.NE),assign = new CalcWord(":=",Tag.ASSIGN); } package Yue.LexicalAnalyzer; /** * 界符 */ public class Delimiter extends Token { public String lexme = ""; public Delimiter(String s,int t) { super(t); this.lexme = s; this.name = "界符"; } public String toString() { return this.lexme; } public static final Delimiter lpar = new Delimiter("(",Tag.LPAR),rpar = new Delimiter(")",Tag.RPAR),sem = new Delimiter(";",Tag.SEM); } package Yue.LexicalAnalyzer; /* * 常数 */ public class Num extends Token { public final int value; public Num(int v) { super(Tag.CONSTANT); this.value = v; this.name = "常数"; } public String toString() { return "" + value; } } package Yue.LexicalAnalyzer; /** * 行尾符 */ public class LineEnd extends Token { public String lexme = ""; public LineEnd(String s) { super(Tag.LINE_END); this.lexme = s; this.name = "行尾符"; } public String toString() { return this.lexme; } public static final LineEnd lineEnd = new LineEnd("rn"); } package Yue.LexicalAnalyzer; /** * 结尾符 */ public class AllEnd extends Token { public String lexme = ""; public AllEnd(String s) { super(Tag.ALL_END); this.lexme = s; this.name = "结尾符"; } public String toString() { return this.lexme; } public static final AllEnd allEnd = new AllEnd("#"); } 总结 以上就睡这篇文章的全部内容了,希望本文的内容对大家的学习或者工作能带来一定的帮助,如果有疑问大家可以留言交流。 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |