编译技术下机实验1——词法分析

编译技术上机实验1——词法分析

大连理工大学编译技术课程——词法分析上机实验

实验目的：对循环语句和条件判断语句编写词法分析编译程序，只能通过一遍扫描完成。(用c++实现)

实验要求：

(1) 关键字：

for if then else while do

所有关键字都是小写。

(2）运算符和分隔符：

： = + - * / < > <= <> >= ; ( ) #

(3)其他标识符（ID）和整型常数（NUM），通过以下正规式定义：

ID=letter(letter | digit)*

NUM=digit digit*

(4)空格由空白、制表符和换行符组成。空格一般用来分隔ID、NUM、运算符、分隔符和关键字，词法分析阶段通常被忽略。

各种词法单元对应的词法记号如下：

词法单元	词法记号	词法单元	词法记号
for	1	:	17
if	2	:=	18
then	3	<	20
else	4	<>	21
while	5	<=	22
do	6	>	23
letter(letter+digit)*	10	>=	24
digit digit*	11	=	25
+	13	;	26
-	14	(	27
*	15	)	28
/	16	#	0

词法分析程序的功能

输入：源程序

输出：二元组（词法记号,属性值/其在符号表中的位置）构成的序列。

例如：对源程序

x:=5; if (x>0) then x:=2*x+1/3; else x:=2/x; #

经词法分析后输出如下序列：

（10,’x’）(18, :=) (11,5) (26,;) (2, if ) (27,( )……

1.几点说明：

（1）关键字表的初值。

关键字作为特殊标识符处理，把它们预先安排在一张表格中（称为关键字表），当扫描程序识别出标识符，查关键字表。如能查到匹配的单词，则该单词的关键字，否则为一般标识符。关键表为一个字符串数组，其描述如下：

char*keyword[6]={”for”, ”if”, ”then” ,”else”，”while”, ”do” };

(2) 程序中需要用到的主要变量为 token , id和num.

1）id用来存放构成词法单元的字符串；

2）num用来存放整数（可以扩展到浮点数和科学计数法表示）；

3）token用来存放词法单元的词法记号。

可以参考下面的代码：

do{

lexical(); //将词法单元对应的记号保存到token中，属性值保存到num或者id中

switch(token) {

case 11: printf ("(token, %d\n) ",num); break;

case -1: printf("error!\n");break;

default: printf("(%d,%s)\n",token, id);

}

}while (token!=0);

#include <iostream>

#include <string.h>

#include <stdlib.h>

using namespace std;

char Input[100]; //存储输入的字符

char token[20]; //存储连续的词法单元

int mark; //词法记号

char *KeyWords[6]={"begin","if","then","while","do","end"};//关键字数组，用于判断

char ch;

int p=0,n,sum;

int m;//m为token下标

void Scanner()

{

sum=0;

for(m=0; m<20; m++)

token[m++]=NULL;

m=0;

ch=Input[p++];

while((ch==' ')||(ch=='\n'))

ch=Input[p++];

if(((ch<='z')&&(ch>='a'))||((ch<='Z')&&(ch>='A')))

{

while(((ch<='z')&&(ch>='a'))||((ch<='Z')&&(ch>='A'))||((ch>='0')&&(ch<='9')))

{

token[m++]=ch;//将连续读入的字符存到token中

ch=Input[p++];

}

p--;

mark=10;

for(n=0; n<6; n++)

if(strcmp(token,KeyWords[n])==0)//判断token存的字符串是否是关键字

{

mark=n+1;

break;

}

else if((ch>='0')&&(ch<='9'))

{

while((ch>='0')&&(ch<='9'))//判断是否是数字

{

sum=sum*10+ch-'0';

ch=Input[p++];

}

p--;

mark=11;

}

else switch(ch)//其他字符用switch判断之

{

case '<':

token[m++]=ch;

ch=Input[p++];

if(ch=='=')

{

mark=22;

token[m++]=ch;

}

else

{

mark=20;

p--;

}

break;

case '>':

token[m++]=ch;

ch=Input[p++];

if(ch=='=')

{

mark=24;

token[m++]=ch;

}

else

{

mark=23;

p--;

}

break;

case '+':

token[m++]=ch;

ch=Input[p++];

if(ch=='+')

{

mark=17;

token[m++]=ch;

}

else

{

mark=13;

p--;

}

break;

case '-':

token[m++]=ch;

ch=Input[p++];

if(ch=='-')

{

mark=29;

token[m++]=ch;

}

else

{

mark=14;

p--;

}

break;

case '!':

ch=Input[p++];

if(ch=='=')

{

mark=21;

token[m++]=ch;

}

else

{

mark=31;

p--;

}

break;

case '=':

token[m++]=ch;

ch=Input[p++];

if(ch=='=')

{

mark=25;

token[m++]=ch;

}

else

{

mark=18;

p--;

}

break;

case '*':

mark=15;

token[m++]=ch;

break;

case '/':

mark=16;

token[m++]=ch;

break;

case '(':

mark=27;

token[m++]=ch;

break;

case ')':

mark=28;

token[m++]=ch;

break;

case '{':

mark=5;

token[m++]=ch;

break;

case '}':

mark=6;

token[m++]=ch;

break;

case ';':

mark=26;

token[m++]=ch;

break;

case '\"':

mark=30;

token[m++]=ch;

case':':

mark=17;

token[m++]=ch;

break;

case '#':

mark=0;

token[m++]=ch;

break;

default:

mark=-1;

break;

}

token[m++]='\0';

}

int main()

{

int i=0;

cout<<"Please input a string ended with'#':"<<endl;

while(cin>>ch&&ch!='#')//输入代码

{

Input[i++]=ch;

}

{

Scanner();//调用函数扫描代码

switch(mark)

{

case11:

cout<<"("<<sum<<""<<mark<<")"<<endl;//定义变量

break;

case-1:

cout<<"InputErroe"<<endl;//词法记号为-1，输入有误

exit(0);

default:

cout<<"("<<token<<""<<mark<<")"<<endl;//一般字符

break;

}

while(mark!=0);

return0;

}

编译技术下机实验1——词法分析

相关推荐