编译原理实验一(词法分析)

Posted by nop on 2020-04-03
Words 2.3k In Total

说明

  1. 实验代码采用多文件的形式,即包含三部分:
    • lex.h : 结构体以及类(成员函数等)的声明
    • lex.cpp : 类的成员函数的实现
    • main.cpp : 主调函数,主要为交互的逻辑代码
  2. 此外程序还要一个必须文件,即 lib_local.txt, 用于存放用户新增字符(关键字、边界符、运算符)
  3. test.txt 为测试文件,即内容为编写的“c代码”

实验流程图

Alt

实验代码

  • lex.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#include <iostream>
using namespace std;

#ifndef LEX_H
#define LEX_H

#define CURRENT_K 7; // 关键字当前最大下标
#define CURRENT_B 20; // 分界符当前最大下标
#define CURRENT_C 9; //算数运算符当前最大下标

// 存储字符
struct t
{
int i;
int type; // 内部码
string value;
};
// 储存结果
struct result
{
string value; // 原字符
int type_num; // 类别编号
int type_code; // 类部码
string type; // 类别
int row; // 行号
int column; //列号
result *next;
};

class LEX
{
public:
LEX(); // 初始化关键字表、边界符表等
void add(int i,int type, char value[]); // 新增关键字,边界符以及运算符
void add2list(); // 将用户新增的关键字添加到字符表中
void help(); // 显示帮助
void scaner(string content,int len);
bool isLetter(char ch); // 判断是否为字母
bool isDigit(char ch); // 判断是否为数字
bool isKey(string ch); // 判断是否为关键字,成功匹配返回对应的数组下标,否则返回-1
int isDelimiter(char ch);//判断是否为分界符,成功匹配返回对应的数组下标,否则返回-1
int isOpreator(char ch); // 判断是否为运算符,成功匹配返回对应的数组下标,否则返回-1

void doNum(string value, int row, int column); // 处理常数
void doKey(string value, int row, int column); // 处理关键字
void doId(string value, int row, int column); // 处理标识符
void doDelimiter(int row, int column,int num); // 处理边界符
void doOpreator(int row, int column, int num); // 处理运算符
bool doOther(string value, int row, int column); // 处理关系符以及非法字符
void show(); // 打印结果
private:
t k[40],s[30];
// 数组容量比初始化定义的大,支持扩展
char input[20]; //输入缓冲,此处采用行缓冲即遇到换行符刷新缓冲区
result *res,*p;// 单链表存储结果
int k_num,c_num,b_num;//记录用户新增的关键字、分界符、运算符
};

#endif
  • lex.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
#include "lex.h"
#include<cstring>

result *res = new result;// 单链表存储结果

void LEX::show()
{
result *q;
q = res->next;
cout<<"value\t\ttype_num\ttype_code\ttpye\t\t\trow\t\tcolumn"<<endl;
do{
cout<<q->value<<"\t\t"<<q->type_num<<"\t\t"\
<<q->type_code<<"\t\t"<<q->type\
<<"\t"<<q->row<<"\t\t"<<q->column<<endl;
q = q->next;
}while(q);
}

bool LEX::doOther(string value, int row, int colum){
result *r = new result;
for(int i = 0; i<6 ;i++){
if(s[i].value.find(value) != string::npos){
r->value = value;
r->type_num = s[i].i;
r->type_code = s[i].type;
r->type = "Relation Opreator";
r->row = row;
r->column = colum;
r->next = NULL;
p->next = r;
p = p->next;
return true;
}else
continue;
}
return false;
}

void LEX::doOpreator(int row, int column, int num){
result *r = new result;
r->value = s[num].value;
r->type_num = s[num].i;
r->type_code = s[num].type;
r->type = "Opreator ";
r->row = row;
r->column = column;
r->next = NULL;
p->next = r;
p = p->next;
}

void LEX::doDelimiter(int row, int column, int num){
result *r = new result;
r->value = s[num].value;
r->type_num = s[num].i;
r->type_code = s[num].type;
r->type = "Delimiter ";
r->row = row;
r->column = column;
r->next = NULL;
p->next = r;
p = p->next;
}

void LEX::doId(string value, int row,int column){
result *s = new result;
s->value = value;
s->type_num = 1; // 约定标识符为1
s->type_code = 6; // 约定表示符为6
s->type = "ID ";
s->row = row;
s->column = column;
s->next = NULL;
p->next = s;
p = p->next;
}

void LEX::doKey(string value, int row, int column){
result *s = new result;
int t = CURRENT_K;
t += k_num;
for(int i = 0 ;i<=t ;i++){
if(k[i].value == value){
s->value = value;
s->type_num = k[i].i;
s->type_code = k[i].type;
s->type = "key word ";
s->row = row;
s->column = column;
s->next = NULL;
p->next = s;
p = p->next;
}else
continue;
}
}

void LEX::doNum(string value, int row, int column){
// 尾插法更新单链表
result *s = new result;
s->value = value;
s->type_num = 0; // 约定常数为0
s->type_code = 5; // 约定常数的内部码均为0
s->type = "number ";
s->row = row;
s->column = column;
s->next = NULL;
p->next = s;
p = p->next;
}

int LEX::isOpreator(char ch){
int t = 6;
while(t<15){
if(s[t].value[0] == ch)
return t;
else
t++;
}
return -1;
}

int LEX::isDelimiter(char ch)
{
int t = 15;
while(t<30){
if(s[t].value[0] == ch)
return t;
else
t++;
}
return -1;
}

bool LEX::isKey(string ch)
{
int t = 0;
while(t<40){
if(ch == k[t].value)
return true;
else
t++;
}
return false;
}

bool LEX::isDigit(char ch)
{
if (ch >= '0'&&ch <= '9')
return true;
else
return false;
}

bool LEX::isLetter(char ch)
{
if (ch >= 'a'&&ch <= 'z' || ch >= 'A'&&ch <= 'Z')
return true;
else
return false;
}
void LEX::scaner(string content,int len)
{
int index(0), row(0), column(0),start(0),num(0); // 记录content下标、行号、列号,已读入缓冲区位置,匹配到的字符下标
string temp=""; // 存储读到的字符
char *p;// 缓冲区指针
p = input;
for(;index<len;index++){
if(content[index] != '\n'){
input[start] = content[index];
start++;
}else{
input[start] = content[index];
start = 0;
column = 0;// 换行,列数清零
row++;
while(*p != '\n'){
L:
if(isDigit(*p)){
// 处理数字
temp += *p;
p++;
while(isDigit(*p)){
temp += *p;
p++;
}
column++; // 成功读入常数,列数加一
doNum(temp,row,column);
temp = "";
}else if(isLetter(*p)){
// 处理字母(关键字和标识符)
temp += *p;
p++;
while(isLetter(*p)){
temp += *p;
p++;
}
column++;
if(isKey(temp))
doKey(temp,row,column);
else
doId(temp,row,column);
temp = "";

}else if((num = isDelimiter(*p)) != -1){
// 处理分界符(单字符)
column++;
temp = *p;
doDelimiter(row,column,num);
p++;
temp = "";
}else if((num = isOpreator(*p)) != -1){
// 处理运算符(单字符)
column++;
temp = *p;
doOpreator(row,column,num);
p++;
temp = "";
}else{
// 处理关系运算符
if(*p==' '){
p++;
goto L;
}
column++;
if(*(p+1) != '\n' && \
!isDigit(*(p+1)) && \
isDelimiter(*(p+1)) != -1 &&\
isOpreator(*(p+1)) !=-1 &&\
!isLetter(*(p+1)))
temp = *p + *(p+1);
else
temp = *p;
if(!doOther(temp,row,column))
cout<<"Error: > '"<<temp<<"' <\t at:("<<row<<", "<<column<<")"<<endl;
p++;
temp = "";
}
}
p = input;
}
}
}

void LEX::help()
{
cout<<"Usage: ./lex.exe [optinos] "<<endl;
cout<<"Options:"<<endl;
cout<<"\t-h\t\t\tDisplay this information."<<endl;
cout<<"\t-a [i type value]\tadd new characters."<<endl;
cout<<"\t-g [file]\t\tget content from file.txt and analysis it."<<endl;

}

void LEX::add2list()
{
// 读取本地文件,添加用户新增字符
FILE *fp;
fp = fopen("lib_local.txt","r");
if(fp == NULL){
printf("Failed to open lib_local.txt\n");
exit(1);
}
int n(1);
char ch;
while((ch = getc(fp))!=EOF){
if(ch == '\n')
n++;
}
fclose(fp);
fp = fopen("lib_local.txt","r");
int type_num,type_code;
char value[10];
for(int i =0 ;i<n-1;i++){
fscanf(fp, "%d%d%s", &type_num, &type_code, &value);
if(type_code == 1){
// k_num -> max = 33
if(k_num<=33){
int index = CURRENT_K;
index += k_num;
k[index].i = type_num;
k[index].type = type_code;
k[index].value = value;
k_num++;
}else{
cout<<"Please remove some key words!";
exit(1);
}
}else if(type_code == 2){
// b_num -> max = 9
if(b_num <= 9){
int index = CURRENT_B;
index += b_num;
s[index].i = type_num;
s[index].type = type_code;
s[index].value = value;
b_num++;
}else{
cout<<"Please remove some Delimiters!";
exit(1);
}
}else if(type_code == 3){
// c_mun -> max =5
if(c_num <= 5){
int index = CURRENT_C;
index += c_num;
s[index].i = type_num;
s[index].type = type_code;
s[index].value = value;
c_num++;
}else{
cout<<"Please remove some Opreators!";
exit(1);
}
}else if(type_code == 4)
cout<< "Error:Cannot add arithmetic operators"<<endl;
else
cout<<"Error:This type is not supported!"<<endl;
}
fclose(fp);
}

void LEX::add(int i, int type, char value[])
{
FILE *fp;
fp = fopen("lib_local.txt","a");
if(fp == NULL){
printf("Failed to get local file\n");
exit(1);
}
int index;
if(type == 1 || type == 2 || type == 3){
fprintf(fp, "%d %d %s\n", i, type, value);
fclose(fp);
cout<<"Successful!"<<endl;
}else if(type == 4)
cout<< "Error:Cannot add arithmetic operators"<<endl;
else
cout<<"Error:This type is not supported!"<<endl;
}

LEX::LEX(){
// 初始化关键字
k[0] = {0, 1, "do"};
k[1] = {1, 1, "end"};
k[2] = {2, 1, "for"};
k[3] = {3, 1, "if"};
k[4] = {4, 1, "printf"};
k[5] = {5, 1, "scanf"};
k[6] = {6, 1, "then"};
k[7] = {7, 1, "while"};

/*
初始化其他字符
约定:
s[0]-s[5]为关系运算符,且不可再增加
s[6]-s[14] 为算数运算符,可增加
s[15]-s[29],为分界符,支持拓展
*/
s[0] = {0x00, 3, "< "};
s[1] = {0x01, 4, "<="};
s[2] = {0x02, 4, "= "};
s[3] = {0x03, 4, ">"};
s[4] = {0x04, 4, ">="};
s[5] = {0x05, 4, "<>"};

s[6] = {0x10, 3, "+"};
s[7] = {0x11, 3, "-"};
s[8] = {0x20, 3, "*"};
s[9] = {0x21, 3, "/"};

s[15] = {0, 2, ","};
s[16] = {1, 2, ";"};
s[17] = {2, 2, "("};
s[18] = {3, 2, ")"};
s[19] = {4, 2, "["};
s[20] = {5, 2, "]"};

k_num = c_num = b_num =1;
p = new result;
res = p;
add2list();
}
  • main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#include "lex.h"

string get_char_txt(FILE *fp);

int main(int argc, char *argv[])
{
string content;
LEX L;
if(argc == 1){
L.help();
exit(1);
}else{
const string argv1 = argv[1];
int len;

if(argv1 == "-h")
L.help();
else if(argv1 =="-a"){
int argv2,argv3;
char *argv4;
argv2 = atoi(argv[2]);
argv3 = atoi(argv[3]);
argv4 = argv[4];
L.add(argv2, argv3, argv4);
}else if(argv1 == "-g"){
// 启动词法分析
FILE *fp;
char* file = argv[2];
fp = fopen(file,"r");
if(fp == NULL){
printf("Failed to open %s\n",file);
exit(1);
}
content = get_char_txt(fp);
fclose(fp);
len = content.length();
L.scaner(content,len);
L.show();
}else
L.help();
}
return 0;
}

string get_char_txt(FILE *fp)
{
char ch;
string value;
while((ch = getc(fp)) != EOF)
value += ch;
return value;
}

You are welcome to share this blog, so that more people can participate in it. If the images used in the blog infringe your copyright, please contact the author to delete them.