一个简略词法分析器的实现代码（java实现）

一个简单词法分析器的实现代码（java实现）

Main.java

[java] view plaincopyprint?

/*
* 主程序
*/
import java.io.*;
import lexer.*;
public class Main {
public static void main(String[] args) throws IOException {
Lexer lexer = new Lexer();
while (lexer.getReaderState() == false) {
lexer.scan();
}
/* 保存相关信息 */
lexer.saveTokens();
lexer.saveSymbolsTable();
}
}

/* * 主程序 */ import java.io.*; import lexer.*; public class Main { public static void main(String[] args) throws IOException { Lexer lexer = new Lexer(); while (lexer.getReaderState() == false) { lexer.scan(); } /* 保存相关信息 */ lexer.saveTokens(); lexer.saveSymbolsTable(); } }

Lexer.java

[java] view plaincopyprint?

package lexer;
import java.io.*;
import java.util.*;
import symbols.*;
public class Lexer {
public static int line = 1; /* 记录行号 */
char peek = ' '; /* 下一个读入字符 */
Hashtable<String, Word> words =
new Hashtable<String, Word>();
/* 符号表 */
private Hashtable<Token, String> table =
new Hashtable<Token, String>();
/* token序列 */
private List<String> tokens =
new LinkedList<String> ();
/* 读取文件变量 */
BufferedReader reader = null;
/* 保存当前是否读取到了文件的结尾 */
private Boolean isEnd = false;
/* 是否读取到文件的结尾 */
public Boolean getReaderState() {
return this.isEnd;
}
/* 保存存储在table中的 */
public void saveSymbolsTable() throws IOException {
FileWriter writer = new FileWriter("符号表.txt");
writer.write("[符号] [符号类型信息]\n");
writer.write("\r\n");
Enumeration<Token> e = table.keys();
while( e.hasMoreElements() ){
Token token = (Token)e.nextElement();
String desc = table.get(token);
/* 写入文件 */
writer.write(token + "\t\t\t" + desc + "\r\n");
}
writer.flush();
}
/* 保存Tokens */
public void saveTokens() throws IOException {
FileWriter writer = new FileWriter("Tokens表.txt");
writer.write("[符号] \n");
writer.write("\r\n");
for(int i = 0; i < tokens.size(); ++i) {
String tok = (String)tokens.get(i);
/* 写入文件 */
writer.write(tok + "\r\n");
}
writer.flush();
}
void reserve(Word w) {
words.put(w.lexme, w);
}
/*
* 构造函数中将关键字和类型添加到hashtable words中
*/
public Lexer() {
/* 初始化读取文件变量 */
try {
reader = new BufferedReader(new FileReader("输入.txt"));
}
catch(IOException e) {
System.out.print(e);
}
/* 关键字 */
this.reserve(new Word("if", Tag.IF));
this.reserve(new Word("then", Tag.THEN));
this.reserve(new Word("else", Tag.ELSE));
this.reserve(new Word("while", Tag.WHILE));
this.reserve(new Word("do", Tag.DO));
/* 类型 */
this.reserve(Word.True);
this.reserve(Word.False);
this.reserve(Type.Int);
this.reserve(Type.Char);
this.reserve(Type.Bool);
this.reserve(Type.Float);
}
public void readch() throws IOException {
/* 这里应该是使用的是 */
peek = (char)reader.read();
if((int)peek == 0xffff){
this.isEnd = true;
}
// peek = (char)System.in.read();
}
public Boolean readch(char ch) throws IOException {
readch();
if (this.peek != ch) {
return false;
}
this.peek = ' ';
return true;
}
public Token scan() throws IOException {
/* 消除空白 */
for( ; ; readch() ) {
if(peek == ' ' || peek == '\t')
continue;
else if (peek == '\n')
line = line + 1;
else
break;
}
/* 下面开始分割关键字，标识符等信息 */
switch (peek) {
/* 对于 ==, >=, <=, !=的区分使用状态机实现 */
case '=' :
if (readch('=')) {
tokens.add("==");
return Word.eq;
}
else {
tokens.add("=");
return new Token('=');
}
case '>' :
if (readch('=')) {
tokens.add(">=");
return Word.ge;
}
else {
tokens.add(">");
return new Token('>');
}
case '<' :
if (readch('=')) {
tokens.add("<=");
return Word.le;
}
else {
tokens.add("<");
return new Token('<');
}
case '!' :
if (readch('=')) {
tokens.add("!=");
return Word.ne;
}
else {
tokens.add("!");
return new Token('!');
}
}
/* 下面是对数字的识别，根据文法的规定的话，这里的
* 数字只要是能够识别整数就行.
*/
if(Character.isDigit(peek)) {
int value = 0;
do {
value = 10 * value + Character.digit(peek, 10);
readch();
} while (Character.isDigit(peek));
Num n = new Num(value);
tokens.add(n.toString());
//table.put(n, "Num");
return n;
}
/*
* 关键字或者是标识符的识别
*/
if(Character.isLetter(peek)) {
StringBuffer sb = new StringBuffer();
/* 首先得到整个的一个分割 */
do {
sb.append(peek);
readch();
} while (Character.isLetterOrDigit(peek));
/* 判断是关键字还是标识符 */
String s = sb.toString();
Word w = (Word)words.get(s);
/* 如果是关键字或者是类型的话，w不应该是空的 */
if(w != null) {
// table.put(w, "KeyWord or Type");
tokens.add(w.toString());
return w; /* 说明是关键字或者是类型名 */
}
/* 否则就是一个标识符id */
w = new Word(s, Tag.ID);
tokens.add(w.toString());
table.put(w, "id");
words.put(s, w);
return w;
}
/* peek中的任意字符都被认为是词法单元返回 */
Token tok = new Token(peek);
// table.put(tok, "Token or Seprator");
if ((int)peek != 0xffff )
tokens.add(tok.toString());
peek = ' ';
return tok;
}
}

package lexer; import java.io.*; import java.util.*; import symbols.*; public class Lexer { public static int line = 1; /* 记录行号 */ char peek = ' '; /* 下一个读入字符 */ Hashtable<String, Word> words = new Hashtable<String, Word>(); /* 符号表 */ private Hashtable<Token, String> table = new Hashtable<Token, String>(); /* token序列 */ private List<String> tokens = new LinkedList<String> (); /* 读取文件变量 */ BufferedReader reader = null; /* 保存当前是否读取到了文件的结尾 */ private Boolean isEnd = false; /* 是否读取到文件的结尾 */ public Boolean getReaderState() { return this.isEnd; } /* 保存存储在table中的 */ public void saveSymbolsTable() throws IOException { FileWriter writer = new FileWriter("符号表.txt"); writer.write("[符号] [符号类型信息]\n"); writer.write("\r\n"); Enumeration<Token> e = table.keys(); while( e.hasMoreElements() ){ Token token = (Token)e.nextElement(); String desc = table.get(token); /* 写入文件 */ writer.write(token + "\t\t\t" + desc + "\r\n"); } writer.flush(); } /* 保存Tokens */ public void saveTokens() throws IOException { FileWriter writer = new FileWriter("Tokens表.txt"); writer.write("[符号] \n"); writer.write("\r\n"); for(int i = 0; i < tokens.size(); ++i) { String tok = (String)tokens.get(i); /* 写入文件 */ writer.write(tok + "\r\n"); } writer.flush(); } void reserve(Word w) { words.put(w.lexme, w); } /* * 构造函数中将关键字和类型添加到hashtable words中 */ public Lexer() { /* 初始化读取文件变量 */ try { reader = new BufferedReader(new FileReader("输入.txt")); } catch(IOException e) { System.out.print(e); } /* 关键字 */ this.reserve(new Word("if", Tag.IF)); this.reserve(new Word("then", Tag.THEN)); this.reserve(new Word("else", Tag.ELSE)); this.reserve(new Word("while", Tag.WHILE)); this.reserve(new Word("do", Tag.DO)); /* 类型 */ this.reserve(Word.True); this.reserve(Word.False); this.reserve(Type.Int); this.reserve(Type.Char); this.reserve(Type.Bool); this.reserve(Type.Float); } public void readch() throws IOException { /* 这里应该是使用的是 */ peek = (char)reader.read(); if((int)peek == 0xffff){ this.isEnd = true; } // peek = (char)System.in.read(); } public Boolean readch(char ch) throws IOException { readch(); if (this.peek != ch) { return false; } this.peek = ' '; return true; } public Token scan() throws IOException { /* 消除空白 */ for( ; ; readch() ) { if(peek == ' ' || peek == '\t') continue; else if (peek == '\n') line = line + 1; else break; } /* 下面开始分割关键字，标识符等信息 */ switch (peek) { /* 对于 ==, >=, <=, !=的区分使用状态机实现 */ case '=' : if (readch('=')) { tokens.add("=="); return Word.eq; } else { tokens.add("="); return new Token('='); } case '>' : if (readch('=')) { tokens.add(">="); return Word.ge; } else { tokens.add(">"); return new Token('>'); } case '<' : if (readch('=')) { tokens.add("<="); return Word.le; } else { tokens.add("<"); return new Token('<'); } case '!' : if (readch('=')) { tokens.add("!="); return Word.ne; } else { tokens.add("!"); return new Token('!'); } } /* 下面是对数字的识别，根据文法的规定的话，这里的 * 数字只要是能够识别整数就行. */ if(Character.isDigit(peek)) { int value = 0; do { value = 10 * value + Character.digit(peek, 10); readch(); } while (Character.isDigit(peek)); Num n = new Num(value); tokens.add(n.toString()); //table.put(n, "Num"); return n; } /* * 关键字或者是标识符的识别 */ if(Character.isLetter(peek)) { StringBuffer sb = new StringBuffer(); /* 首先得到整个的一个分割 */ do { sb.append(peek); readch(); } while (Character.isLetterOrDigit(peek)); /* 判断是关键字还是标识符 */ String s = sb.toString(); Word w = (Word)words.get(s); /* 如果是关键字或者是类型的话，w不应该是空的 */ if(w != null) { // table.put(w, "KeyWord or Type"); tokens.add(w.toString()); return w; /* 说明是关键字或者是类型名 */ } /* 否则就是一个标识符id */ w = new Word(s, Tag.ID); tokens.add(w.toString()); table.put(w, "id"); words.put(s, w); return w; } /* peek中的任意字符都被认为是词法单元返回 */ Token tok = new Token(peek); // table.put(tok, "Token or Seprator"); if ((int)peek != 0xffff ) tokens.add(tok.toString()); peek = ' '; return tok; } }

Num.java

[java] view plaincopyprint?

package lexer;
public class Num extends Token{
public final int value;
public Num(int v) {
super(Tag.NUM);
this.value = v;
}
public String toString() {
return "" + value;
}
}

package lexer; public class Num extends Token{ public final int value; public Num(int v) { super(Tag.NUM); this.value = v; } public String toString() { return "" + value; } }

Tag.java

[java] view plaincopyprint?

package lexer;
public class Tag {
public final static int
AND = 256,
BASIC = 257,
BREAK = 258,
DO = 259,
ELSE = 260,
EQ = 261, /* == */
FALSE = 262,
GE = 263,
ID = 264,
IF = 265,
INDEX = 266,
LE = 267,
MINUS = 268,
NE = 269,
NUM = 270,
OR = 271,
REAL = 272,
TEMP = 273,
TRUE = 274,
WHILE = 275,
/* 后面添加 */
THEN = 276;
}

package lexer; public class Tag { public final static int AND = 256, BASIC = 257, BREAK = 258, DO = 259, ELSE = 260, EQ = 261, /* == */ FALSE = 262, GE = 263, ID = 264, IF = 265, INDEX = 266, LE = 267, MINUS = 268, NE = 269, NUM = 270, OR = 271, REAL = 272, TEMP = 273, TRUE = 274, WHILE = 275, /* 后面添加 */ THEN = 276; }

Token.java

[java] view plaincopyprint?

package lexer;
public class Token {
public final int tag;
public Token(int t) {
this.tag = t;
}
public String toString() {
return "" + (char)tag;
}
public static void main(String[] args) {
Token tok = new Token('a');
System.out.println(tok);
}
}

package lexer; public class Token { public final int tag; public Token(int t) { this.tag = t; } public String toString() { return "" + (char)tag; } public static void main(String[] args) { Token tok = new Token('a'); System.out.println(tok); } }

Word.java

[java] view plaincopyprint?

/*
* 类word用于管理保留字，标识符以及像&&这样的复合单词元素。
*/
package lexer;
public class Word extends Token {
public String lexme = "";
public Word (String s, int t) {
super(t);
this.lexme = s;
}
public String toString() {
return this.lexme;
}
public static final Word
and = new Word("&&", Tag.AND),
or = new Word("||", Tag.OR),
eq = new Word ("==", Tag.EQ),
ne = new Word("!=", Tag.NE),
le = new Word("<=", Tag.LE),
ge = new Word(">=", Tag.GE),
minus = new Word("minus", Tag.MINUS),
True = new Word("true", Tag.TRUE),
False = new Word("false", Tag.FALSE),
temp = new Word("t", Tag.TEMP);
}

/* * 类word用于管理保留字，标识符以及像&&这样的复合单词元素。 */ package lexer; public class Word extends Token { public String lexme = ""; public Word (String s, int t) { super(t); this.lexme = s; } public String toString() { return this.lexme; } public static final Word and = new Word("&&", Tag.AND), or = new Word("||", Tag.OR), eq = new Word ("==", Tag.EQ), ne = new Word("!=", Tag.NE), le = new Word("<=", Tag.LE), ge = new Word(">=", Tag.GE), minus = new Word("minus", Tag.MINUS), True = new Word("true", Tag.TRUE), False = new Word("false", Tag.FALSE), temp = new Word("t", Tag.TEMP); }

Type.java

[java] view plaincopyprint?

/*
* 说明数据类型
*/
package symbols;
import lexer.*;
public class Type extends Word{
public Type(String s, int tag) {
super(s, tag);
}
public static final Type
Int = new Type("int", Tag.BASIC),
Float = new Type("float", Tag.BASIC),
Char = new Type ("char", Tag.BASIC),
Bool = new Type("bool", Tag.BASIC);
}

一个简略词法分析器的实现代码（java实现）

一个简单词法分析器的实现代码（java实现）

相关推荐