一个简略词法分析器的实现代码(java实现)
一个简单词法分析器的实现代码(java实现)
一个简单词法分析器的实现代码(java实现)
Main.java
- /*
- * 主程序
- */
- import java.io.*;
- import lexer.*;
- public class Main {
- public static void main(String[] args) throws IOException {
- Lexer lexer = new Lexer();
- while (lexer.getReaderState() == false) {
- lexer.scan();
- }
- /* 保存相关信息 */
- lexer.saveTokens();
- lexer.saveSymbolsTable();
- }
- }
Lexer.java
- package lexer;
- import java.io.*;
- import java.util.*;
- import symbols.*;
- public class Lexer {
- public static int line = 1; /* 记录行号 */
- char peek = ' '; /* 下一个读入字符 */
- Hashtable<String, Word> words =
- new Hashtable<String, Word>();
- /* 符号表 */
- private Hashtable<Token, String> table =
- new Hashtable<Token, String>();
- /* token序列 */
- private List<String> tokens =
- new LinkedList<String> ();
- /* 读取文件变量 */
- BufferedReader reader = null;
- /* 保存当前是否读取到了文件的结尾 */
- private Boolean isEnd = false;
- /* 是否读取到文件的结尾 */
- public Boolean getReaderState() {
- return this.isEnd;
- }
- /* 保存存储在table中的 */
- public void saveSymbolsTable() throws IOException {
- FileWriter writer = new FileWriter("符号表.txt");
- writer.write("[符号] [符号类型信息]\n");
- writer.write("\r\n");
- Enumeration<Token> e = table.keys();
- while( e.hasMoreElements() ){
- Token token = (Token)e.nextElement();
- String desc = table.get(token);
- /* 写入文件 */
- writer.write(token + "\t\t\t" + desc + "\r\n");
- }
- writer.flush();
- }
- /* 保存Tokens */
- public void saveTokens() throws IOException {
- FileWriter writer = new FileWriter("Tokens表.txt");
- writer.write("[符号] \n");
- writer.write("\r\n");
- for(int i = 0; i < tokens.size(); ++i) {
- String tok = (String)tokens.get(i);
- /* 写入文件 */
- writer.write(tok + "\r\n");
- }
- writer.flush();
- }
- void reserve(Word w) {
- words.put(w.lexme, w);
- }
- /*
- * 构造函数中将关键字和类型添加到hashtable words中
- */
- public Lexer() {
- /* 初始化读取文件变量 */
- try {
- reader = new BufferedReader(new FileReader("输入.txt"));
- }
- catch(IOException e) {
- System.out.print(e);
- }
- /* 关键字 */
- this.reserve(new Word("if", Tag.IF));
- this.reserve(new Word("then", Tag.THEN));
- this.reserve(new Word("else", Tag.ELSE));
- this.reserve(new Word("while", Tag.WHILE));
- this.reserve(new Word("do", Tag.DO));
- /* 类型 */
- this.reserve(Word.True);
- this.reserve(Word.False);
- this.reserve(Type.Int);
- this.reserve(Type.Char);
- this.reserve(Type.Bool);
- this.reserve(Type.Float);
- }
- public void readch() throws IOException {
- /* 这里应该是使用的是 */
- peek = (char)reader.read();
- if((int)peek == 0xffff){
- this.isEnd = true;
- }
- // peek = (char)System.in.read();
- }
- public Boolean readch(char ch) throws IOException {
- readch();
- if (this.peek != ch) {
- return false;
- }
- this.peek = ' ';
- return true;
- }
- public Token scan() throws IOException {
- /* 消除空白 */
- for( ; ; readch() ) {
- if(peek == ' ' || peek == '\t')
- continue;
- else if (peek == '\n')
- line = line + 1;
- else
- break;
- }
- /* 下面开始分割关键字,标识符等信息 */
- switch (peek) {
- /* 对于 ==, >=, <=, !=的区分使用状态机实现 */
- case '=' :
- if (readch('=')) {
- tokens.add("==");
- return Word.eq;
- }
- else {
- tokens.add("=");
- return new Token('=');
- }
- case '>' :
- if (readch('=')) {
- tokens.add(">=");
- return Word.ge;
- }
- else {
- tokens.add(">");
- return new Token('>');
- }
- case '<' :
- if (readch('=')) {
- tokens.add("<=");
- return Word.le;
- }
- else {
- tokens.add("<");
- return new Token('<');
- }
- case '!' :
- if (readch('=')) {
- tokens.add("!=");
- return Word.ne;
- }
- else {
- tokens.add("!");
- return new Token('!');
- }
- }
- /* 下面是对数字的识别,根据文法的规定的话,这里的
- * 数字只要是能够识别整数就行.
- */
- if(Character.isDigit(peek)) {
- int value = 0;
- do {
- value = 10 * value + Character.digit(peek, 10);
- readch();
- } while (Character.isDigit(peek));
- Num n = new Num(value);
- tokens.add(n.toString());
- //table.put(n, "Num");
- return n;
- }
- /*
- * 关键字或者是标识符的识别
- */
- if(Character.isLetter(peek)) {
- StringBuffer sb = new StringBuffer();
- /* 首先得到整个的一个分割 */
- do {
- sb.append(peek);
- readch();
- } while (Character.isLetterOrDigit(peek));
- /* 判断是关键字还是标识符 */
- String s = sb.toString();
- Word w = (Word)words.get(s);
- /* 如果是关键字或者是类型的话,w不应该是空的 */
- if(w != null) {
- // table.put(w, "KeyWord or Type");
- tokens.add(w.toString());
- return w; /* 说明是关键字 或者是类型名 */
- }
- /* 否则就是一个标识符id */
- w = new Word(s, Tag.ID);
- tokens.add(w.toString());
- table.put(w, "id");
- words.put(s, w);
- return w;
- }
- /* peek中的任意字符都被认为是词法单元返回 */
- Token tok = new Token(peek);
- // table.put(tok, "Token or Seprator");
- if ((int)peek != 0xffff )
- tokens.add(tok.toString());
- peek = ' ';
- return tok;
- }
- }
Num.java
- package lexer;
- public class Num extends Token{
- public final int value;
- public Num(int v) {
- super(Tag.NUM);
- this.value = v;
- }
- public String toString() {
- return "" + value;
- }
- }
Tag.java
- package lexer;
- public class Tag {
- public final static int
- AND = 256,
- BASIC = 257,
- BREAK = 258,
- DO = 259,
- ELSE = 260,
- EQ = 261, /* == */
- FALSE = 262,
- GE = 263,
- ID = 264,
- IF = 265,
- INDEX = 266,
- LE = 267,
- MINUS = 268,
- NE = 269,
- NUM = 270,
- OR = 271,
- REAL = 272,
- TEMP = 273,
- TRUE = 274,
- WHILE = 275,
- /* 后面添加 */
- THEN = 276;
- }
Token.java
- package lexer;
- public class Token {
- public final int tag;
- public Token(int t) {
- this.tag = t;
- }
- public String toString() {
- return "" + (char)tag;
- }
- public static void main(String[] args) {
- Token tok = new Token('a');
- System.out.println(tok);
- }
- }
Word.java
- /*
- * 类word用于管理保留字,标识符以及像&&这样的复合单词元素 。
- */
- package lexer;
- public class Word extends Token {
- public String lexme = "";
- public Word (String s, int t) {
- super(t);
- this.lexme = s;
- }
- public String toString() {
- return this.lexme;
- }
- public static final Word
- and = new Word("&&", Tag.AND),
- or = new Word("||", Tag.OR),
- eq = new Word ("==", Tag.EQ),
- ne = new Word("!=", Tag.NE),
- le = new Word("<=", Tag.LE),
- ge = new Word(">=", Tag.GE),
- minus = new Word("minus", Tag.MINUS),
- True = new Word("true", Tag.TRUE),
- False = new Word("false", Tag.FALSE),
- temp = new Word("t", Tag.TEMP);
- }
Type.java
- /*
- * 说明数据类型
- */
- package symbols;
- import lexer.*;
- public class Type extends Word{
- public Type(String s, int tag) {
- super(s, tag);
- }
- public static final Type
- Int = new Type("int", Tag.BASIC),
- Float = new Type("float", Tag.BASIC),
- Char = new Type ("char", Tag.BASIC),
- Bool = new Type("bool", Tag.BASIC);
- }