编译原理LL1语法分析算法的兑现(token转四元式)
#include <stdio.h>
#include <stack>
#include <vector>
#include <iostream>
#include <fstream>
#include <malloc.h>
using namespace std;
//token结构
struct token {
int code; //token的类别,code为1则是符号,为2则是数字
char value; //token的值
};
typedef struct token tokens;
vector<tokens> tokenBuffer; //用于存储token的缓冲区
stack<tokens> tokenBuffers;
//产生式结构
struct formula {
int id; //产生式编号
char left; //产生式左部
char right[256]; //产生式右部
int r_length; //产生式右部长度
};
typedef struct formula formulas;
formulas formulaBuffer[11]; //用于存储产生式的缓冲区
//四元式结构
struct expression {
char symbol; //操作符
char num1; //第一个操作数
char num2; //第二个操作数
char result; //结果变量
};
typedef struct expression expressions;
vector<expressions> expressionBuffer; //用于存储四元式的缓冲区
int expressionCount = 0; //四元式个数
//分析表中每一项的结构
struct analysisTable {
char currentState; //分析栈的栈顶符号
char currentToken; //当前字符
int expressionNum; //对应的产生式编号
};
typedef struct analysisTable analysisTables;
vector<analysisTables> analysisTableBuffer; //LL1分析表的缓冲区
stack<char> analysisStack; //分析栈
stack<char> sematicStack; //语义栈
//初始化LL1分析表
void initialAnalysisTableBuffer() {
analysisTables* temp1a = new analysisTable;
analysisTables* temp1b = new analysisTable;
analysisTables* temp1c = new analysisTable;
analysisTables* temp2 = new analysisTable;
analysisTables* temp3 = new analysisTable;
analysisTables* temp4 = new analysisTable;
analysisTables* temp5 = new analysisTable;
analysisTables* temp6 = new analysisTable;
analysisTables* temp7a = new analysisTable;
analysisTables* temp7b = new analysisTable;
analysisTables* temp7c = new analysisTable;
analysisTables* temp8 = new analysisTable;
analysisTables* temp9 = new analysisTable;
analysisTables* temp10 = new analysisTable;
analysisTables* temp11 = new analysisTable;
analysisTables* temp12 = new analysisTable;
analysisTables* temp13 = new analysisTable;
analysisTables* temp14 = new analysisTable;
analysisTables* temp15a = new analysisTable;
analysisTables* temp15b = new analysisTable;
analysisTables* temp15c = new analysisTable;
analysisTables* temp16 = new analysisTable;
temp1a->expressionNum = 1;
temp1a->currentState = 'E';
temp1a->currentToken = 'a';
temp1b->expressionNum = 1;
temp1b->currentState = 'E';
temp1b->currentToken = 'b';
temp1c->expressionNum = 1;
temp1c->currentState = 'E';
temp1c->currentToken = 'c';
temp2->expressionNum = 1;
temp2->currentState = 'E';
temp2->currentToken = '(';
temp3->expressionNum = 2;
temp3->currentState = 'L';
temp3->currentToken = '+';
temp4->expressionNum = 3;
temp4->currentState = 'L';
temp4->currentToken = '-';
temp5->expressionNum = 4;
temp5->currentState = 'L';
temp5->currentToken = ')';
temp6->expressionNum = 4;
temp6->currentState = 'L';
temp6->currentToken = '#';
temp7a->expressionNum = 5;
temp7a->currentState = 'T';
temp7a->currentToken = 'a';
temp7b->expressionNum = 5;
temp7b->currentState = 'T';
temp7b->currentToken = 'b';
temp7c->expressionNum = 5;
temp7c->currentState = 'T';
temp7c->currentToken = 'c';
temp8->expressionNum = 5;
temp8->currentState = 'T';
temp8->currentToken = '(';
temp9->expressionNum = 8;
temp9->currentState = 'M';
temp9->currentToken = '+';
temp10->expressionNum = 8;
temp10->currentState = 'M';
temp10->currentToken = '-';
temp11->expressionNum = 6;
temp11->currentState = 'M';
temp11->currentToken = '*';
temp12->expressionNum = 7;
temp12->currentState = 'M';
temp12->currentToken = '/';
temp13->expressionNum = 8;
temp13->currentState = 'M';
temp13->currentToken = ')';
temp14->expressionNum = 8;
temp14->currentState = 'M';
temp14->currentToken = '#';
temp15a->expressionNum = 9;
temp15a->currentState = 'F';
temp15a->currentToken = 'a';
temp15b->expressionNum = 9;
temp15b->currentState = 'F';
temp15b->currentToken = 'b';
temp15c->expressionNum = 9;
temp15c->currentState = 'F';
temp15c->currentToken = 'c';
temp16->expressionNum = 10;
temp16->currentState = 'F';
temp16->currentToken = '(';
analysisTableBuffer.push_back(*temp1a);
analysisTableBuffer.push_back(*temp1b);
analysisTableBuffer.push_back(*temp1c);
analysisTableBuffer.push_back(*temp2);
analysisTableBuffer.push_back(*temp3);
analysisTableBuffer.push_back(*temp4);
analysisTableBuffer.push_back(*temp5);
analysisTableBuffer.push_back(*temp6);
analysisTableBuffer.push_back(*temp7a);
analysisTableBuffer.push_back(*temp7b);
analysisTableBuffer.push_back(*temp7c);
analysisTableBuffer.push_back(*temp8);
analysisTableBuffer.push_back(*temp9);
analysisTableBuffer.push_back(*temp10);
analysisTableBuffer.push_back(*temp11);
analysisTableBuffer.push_back(*temp12);
analysisTableBuffer.push_back(*temp13);
analysisTableBuffer.push_back(*temp14);
analysisTableBuffer.push_back(*temp15a);
analysisTableBuffer.push_back(*temp15b);
analysisTableBuffer.push_back(*temp15c);
analysisTableBuffer.push_back(*temp16);
}
//由于本次实验主要是考察语法分析和语义分析,所以为了节省时间不采用查表的方式获取token,而是直接初始化token的值。
//初始化token缓冲区
void initialTokenBuffer() {
ifstream fin("tokens.txt");
char temp[10][5];
int i = 0;
while (!fin.eof()) {
fin.getline(temp[i], 4);
i++;
}
fin.close();
do {
i--;
tokens *token_temp = new tokens();
token_temp->code = atoi(&temp[i][0]);
token_temp->value = temp[i][2];
tokenBuffer.push_back(*token_temp);
tokenBuffers.push(*token_temp);
} while (i != 0);
// tokens* t1 = new tokens();
// tokens* t2 = new tokens();
// tokens* t3 = new tokens();
// tokens* t4 = new tokens();
// tokens* t5 = new tokens();
// tokens* t6 = new tokens();
// t1->code = 2;
// t1->value = 'a';
// t2->code = 1;
// t2->value = '+';
// t3->code = 2;
// t3->value = 'b';
// t4->code = 1;
// t4->value = '*';
// t5->code = 2;
// t5->value = 'c';
// t6->code = 1;
// t6->value = '#';
// tokenBuffer.push_back(*t1);
// tokenBuffer.push_back(*t2);
// tokenBuffer.push_back(*t3);
// tokenBuffer.push_back(*t4);
// tokenBuffer.push_back(*t5);
// tokenBuffer.push_back(*t6);
//
// tokenBuffers.push(*t6);
// tokenBuffers.push(*t5);
// tokenBuffers.push(*t4);
// tokenBuffers.push(*t3);
// tokenBuffers.push(*t2);
// tokenBuffers.push(*t1);
}
//初始化产生式缓冲区
void initialFormulaBuffer() {
formulas *fTemp1 = new formula;
formulas *fTemp2 = new formula;
formulas *fTemp3 = new formula;
formulas *fTemp4 = new formula;
formulas *fTemp5 = new formula;
formulas *fTemp6 = new formula;
formulas *fTemp7 = new formula;
formulas *fTemp8 = new formula;
formulas *fTemp9 = new formula;
formulas *fTemp10 = new formula;
fTemp1->id = 1;
fTemp1->left = 'E';
fTemp1->r_length = 2;
fTemp1->right[0] = 'T';
fTemp1->right[1] = 'L';
fTemp1->right[2] = '\0';
fTemp2->id = 2;
fTemp2->left = 'L';
fTemp2->r_length = 4;
fTemp2->right[0] = '+';
fTemp2->right[1] = 'T';
fTemp2->right[2] = 'Y';
fTemp2->right[3] = 'L';
fTemp2->right[4] = '\0';
fTemp3->id = 3;
fTemp3->left = 'L';
fTemp3->r_length = 4;
fTemp3->right[0] = '-';
fTemp3->right[1] = 'T';
fTemp3->right[2] = 'U';
fTemp3->right[3] = 'L';
fTemp3->right[4] = '\0';
fTemp4->id = 4;
fTemp4->left = 'L';
fTemp4->r_length = 0;
//fTemp->right[0] = "N";
fTemp5->id = 5;
fTemp5->left = 'T';
fTemp5->r_length = 2;
fTemp5->right[0] = 'F';
fTemp5->right[1] = 'M';
fTemp6->id = 6;
fTemp6->left = 'M';
fTemp6->r_length = 4;
fTemp6->right[0] = '*';
fTemp6->right[1] = 'F';
fTemp6->right[2] = 'I';
fTemp6->right[3] = 'M';
fTemp7->id = 7;
fTemp7->left = 'M';
fTemp7->r_length = 4;
fTemp7->right[0] = '/';
fTemp7->right[1] = 'F';
fTemp7->right[2] = 'O';
fTemp7->right[3] = 'M';
fTemp8->id = 8;
fTemp8->left = 'M';
fTemp8->r_length = 0;
fTemp9->id = 9;
fTemp9->left = 'F';
fTemp9->r_length = 2;
fTemp9->right[0] = 'i';
fTemp9->right[1] = 'P';
fTemp10->id = 10;
fTemp10->left = 'F';
fTemp10->r_length = 3;
fTemp10->right[0] = '(';
fTemp10->right[1] = 'E';
fTemp10->right[2] = ')';
formulaBuffer[0] = *fTemp1;
formulaBuffer[1] = *fTemp2;
formulaBuffer[2] = *fTemp3;
formulaBuffer[3] = *fTemp4;
formulaBuffer[4] = *fTemp5;
formulaBuffer[5] = *fTemp6;
formulaBuffer[6] = *fTemp7;
formulaBuffer[7] = *fTemp8;
formulaBuffer[8] = *fTemp9;
formulaBuffer[9] = *fTemp10;
}
//入语义栈操作
void accept(tokens temp) {
if (temp.code == 1) {
cout << temp.value << "匹配成功" << endl;
return;
}
cout << temp.value << " 匹配成功" << endl;
cout << temp.value << " 进入语义栈" << endl;
sematicStack.push(temp.value);
}
//产生四元式并添加到四元式缓冲区中
void produceExpression(char temp) {
expressions *eTemp = new expressions;
eTemp->num2 = sematicStack.top();
sematicStack.pop();
eTemp->num1 = sematicStack.top();
sematicStack.pop();
eTemp->symbol = temp;
eTemp->result = (char) ((int) 'u' + expressionCount);
sematicStack.push(eTemp->result);
expressionBuffer.push_back(*eTemp);
expressionCount++;
}
//输出四元式
void printExpression() {
for (vector<expressions>::iterator i = expressionBuffer.begin(); i != expressionBuffer.end(); i++) {
cout << "四元式:" << i->symbol << " " << (char) (i->num1) << " " << (char) (i->num2) << " " << i->result << endl;
}
}
//输出读取到的token
void printTokens() {
for (vector<tokens>::iterator i = tokenBuffer.begin(); i != tokenBuffer.end(); i++) {
cout << "token:" << i->code << " " << i->value << endl;
}
}
//查找分析表将相应产生式压入分析栈
bool searchAnalysiTable(char cState, tokens cToken) {
formulas *fTemp = new formula;
//查分析表,获取对应的产生式编号
int e_num;
vector<analysisTables>::iterator i;
for (i = analysisTableBuffer.begin(); i != analysisTableBuffer.end(); i++) {
if (i->currentState == cState && i->currentToken == cToken.value) {
e_num = i->expressionNum;
break;
}
}
if (i == analysisTableBuffer.end()) {
cout << "parse error" << endl;
return false;
}
cout << "产生式编号:" << e_num << endl;
//查找产生式缓冲区获得对应产生式
fTemp = &formulaBuffer[e_num - 1];
cout << analysisStack.top() << "出栈" << endl;
analysisStack.pop();
//将产生式右部逆序压栈
if (e_num == 9)
fTemp->right[0] = cToken.value;
int j = fTemp->r_length;
for (int i = 0; i < fTemp->r_length; i++) {
cout << fTemp->right[j - 1] << "进入分析栈" << endl;
analysisStack.push(fTemp->right[j - 1]);
j--;
}
return true;
}
int main(int argc, char *argv[]) {
initialAnalysisTableBuffer();
initialTokenBuffer();
initialFormulaBuffer();
printTokens();
analysisStack.push('#');
analysisStack.push('E');
bool b_temp;
while (!analysisStack.empty()) {
tokens currentToken = tokenBuffers.top();
if (currentToken.value == analysisStack.top()) {
accept(currentToken);
tokenBuffers.pop();
cout << analysisStack.top() << "出栈" << endl;
analysisStack.pop();
continue;
}
cout << "current state:" << analysisStack.top() << endl;
cout << "current token:" << currentToken.value << endl;
switch (analysisStack.top()) {
case 'Y':
produceExpression('+');
analysisStack.pop();
break;
case 'U':
produceExpression('-');
analysisStack.pop();
break;
case 'I':
produceExpression('*');
analysisStack.pop();
break;
case 'O':
produceExpression('/');
analysisStack.pop();
break;
case 'P':
analysisStack.pop();
cout << "P出栈" << endl;
break;
case '#':
analysisStack.pop();
break;
case 'E':
case 'F':
case 'L':
case 'M':
case 'N':
case 'T':
b_temp = searchAnalysiTable(analysisStack.top(), currentToken);
break;
default:
cout << "error" << endl;
getchar();
}
if (!b_temp)
return 0;
}
printExpression();
return 0;
}