编译原理实验一(词法分析)


说明

  1. 实验代码采用多文件的形式,即包含三部分:
    • lex.h : 结构体以及类(成员函数等)的声明
    • lex.cpp : 类的成员函数的实现
    • main.cpp : 主调函数,主要为交互的逻辑代码
  2. 此外程序还要一个必须文件,即 lib_local.txt, 用于存放用户新增字符(关键字、边界符、运算符)
  3. test.txt 为测试文件,即内容为编写的“c代码”

实验流程图

Alt

实验代码

  • lex.h
#include <iostream>
using namespace std;

#ifndef LEX_H
#define LEX_H

#define CURRENT_K 7; // 关键字当前最大下标
#define CURRENT_B 20; // 分界符当前最大下标
#define CURRENT_C 9; //算数运算符当前最大下标

// 存储字符
struct t
{
    int i;
    int type; // 内部码
    string value;
};
// 储存结果
struct result
{
    string value; // 原字符
    int type_num; // 类别编号
    int type_code; // 类部码
    string type; // 类别
    int row; // 行号
    int column; //列号
    result *next;
};

class LEX
{
    public:
        LEX(); // 初始化关键字表、边界符表等
        void add(int i,int type, char value[]); // 新增关键字,边界符以及运算符
        void add2list(); // 将用户新增的关键字添加到字符表中
        void help(); // 显示帮助
        void scaner(string content,int len);
        bool isLetter(char ch); // 判断是否为字母
        bool isDigit(char ch); // 判断是否为数字
        bool isKey(string ch); // 判断是否为关键字,成功匹配返回对应的数组下标,否则返回-1
        int isDelimiter(char ch);//判断是否为分界符,成功匹配返回对应的数组下标,否则返回-1
        int isOpreator(char ch); // 判断是否为运算符,成功匹配返回对应的数组下标,否则返回-1

        void doNum(string value, int row, int column); // 处理常数
        void doKey(string value, int row, int column); // 处理关键字
        void doId(string value, int row, int column); // 处理标识符
        void doDelimiter(int row, int column,int num); // 处理边界符
        void doOpreator(int row, int column, int num); // 处理运算符
        bool doOther(string value, int row, int column); // 处理关系符以及非法字符
        void show(); // 打印结果
    private:
        t k[40],s[30];
        // 数组容量比初始化定义的大,支持扩展
        char input[20]; //输入缓冲,此处采用行缓冲即遇到换行符刷新缓冲区
        result *res,*p;// 单链表存储结果
        int k_num,c_num,b_num;//记录用户新增的关键字、分界符、运算符
};

#endif
  • lex.cpp
#include "lex.h"
#include<cstring>

result *res = new result;// 单链表存储结果

void LEX::show()
{
    result *q;
    q = res->next;
    cout<<"value\t\ttype_num\ttype_code\ttpye\t\t\trow\t\tcolumn"<<endl;
    do{
        cout<<q->value<<"\t\t"<<q->type_num<<"\t\t"\
        <<q->type_code<<"\t\t"<<q->type\
        <<"\t"<<q->row<<"\t\t"<<q->column<<endl;
        q = q->next;
    }while(q);
}

bool LEX::doOther(string value, int row, int colum){
    result *r = new result;
    for(int i = 0; i<6 ;i++){
        if(s[i].value.find(value) != string::npos){
            r->value = value;
            r->type_num = s[i].i;
            r->type_code = s[i].type;
            r->type = "Relation Opreator";
            r->row = row;
            r->column = colum;
            r->next = NULL;
            p->next = r;
            p = p->next;
            return true;
        }else
            continue;
    }
    return false;
}

void LEX::doOpreator(int row, int column, int num){
    result *r = new result;
    r->value = s[num].value;
    r->type_num = s[num].i;
    r->type_code = s[num].type;
    r->type = "Opreator        ";
    r->row = row;
    r->column = column;
    r->next = NULL;
    p->next = r;
    p = p->next;
}

void LEX::doDelimiter(int row, int column, int num){
    result *r = new result;
    r->value = s[num].value;
    r->type_num = s[num].i;
    r->type_code = s[num].type;
    r->type = "Delimiter        ";
    r->row = row;
    r->column = column;
    r->next = NULL;
    p->next = r;
    p = p->next;
}

void LEX::doId(string value, int row,int column){
    result *s = new result;
    s->value = value;
    s->type_num = 1;  // 约定标识符为1
    s->type_code = 6; // 约定表示符为6
    s->type = "ID               ";
    s->row = row;
    s->column = column;
    s->next = NULL;
    p->next = s;
    p = p->next;
}

void LEX::doKey(string value, int row, int column){
    result *s = new result;
    int t = CURRENT_K;
    t += k_num;
    for(int i = 0 ;i<=t ;i++){
        if(k[i].value == value){
            s->value = value;
            s->type_num = k[i].i;
            s->type_code = k[i].type;
            s->type = "key word         ";
            s->row = row;
            s->column = column;
            s->next = NULL;
            p->next = s;
            p = p->next;
        }else
            continue;
    }
}

void LEX::doNum(string value, int row, int column){
    // 尾插法更新单链表
    result *s = new result;
    s->value = value;
    s->type_num = 0; // 约定常数为0
    s->type_code = 5; // 约定常数的内部码均为0
    s->type = "number           ";
    s->row = row;
    s->column = column;
    s->next = NULL;
    p->next = s;
    p = p->next;
}

int LEX::isOpreator(char ch){
    int t = 6;
    while(t<15){
        if(s[t].value[0] == ch)
            return t;
        else
            t++;
    }
    return -1;
}

int LEX::isDelimiter(char ch)
{
    int t = 15;
    while(t<30){
        if(s[t].value[0] == ch)
            return t;
        else
            t++;
    }
    return -1;
}

bool LEX::isKey(string ch)
{
    int t = 0;
    while(t<40){
        if(ch == k[t].value)
            return true;
        else
            t++;
    }
    return false;
}

bool LEX::isDigit(char ch)
{
    if (ch >= '0'&&ch <= '9')
        return true;
    else
        return false;
}

bool LEX::isLetter(char ch)
{
    if (ch >= 'a'&&ch <= 'z' || ch >= 'A'&&ch <= 'Z')
        return true;
    else
        return false;
}
void LEX::scaner(string content,int len)
{
    int index(0), row(0), column(0),start(0),num(0); // 记录content下标、行号、列号,已读入缓冲区位置,匹配到的字符下标
    string temp=""; // 存储读到的字符
    char *p;// 缓冲区指针
    p = input;
    for(;index<len;index++){
        if(content[index] != '\n'){
            input[start] = content[index];
            start++;
        }else{
            input[start] = content[index];
            start = 0;
            column = 0;// 换行,列数清零
            row++;
            while(*p != '\n'){
                L:
                if(isDigit(*p)){
                    // 处理数字
                    temp += *p;
                    p++;
                    while(isDigit(*p)){
                        temp += *p;
                        p++;
                    }
                    column++; // 成功读入常数,列数加一
                    doNum(temp,row,column);
                    temp = "";
                }else if(isLetter(*p)){
                    // 处理字母(关键字和标识符)
                    temp += *p;
                    p++;
                    while(isLetter(*p)){
                        temp += *p;
                        p++;
                    }
                    column++;
                    if(isKey(temp))
                        doKey(temp,row,column);
                    else
                        doId(temp,row,column);
                    temp = "";

                }else if((num = isDelimiter(*p)) != -1){
                    // 处理分界符(单字符)
                    column++;
                    temp = *p;
                    doDelimiter(row,column,num);
                    p++;
                    temp = "";
                }else if((num = isOpreator(*p)) != -1){
                    // 处理运算符(单字符)
                    column++;
                    temp = *p;
                    doOpreator(row,column,num);
                    p++;
                    temp = "";
                }else{
                    // 处理关系运算符
                    if(*p==' '){
                        p++;
                        goto L;
                    }
                    column++;
                    if(*(p+1) != '\n' && \
                        !isDigit(*(p+1)) && \
                        isDelimiter(*(p+1)) != -1 &&\
                        isOpreator(*(p+1)) !=-1 &&\
                        !isLetter(*(p+1)))
                        temp = *p + *(p+1);
                    else
                        temp = *p;
                    if(!doOther(temp,row,column))
                        cout<<"Error: > '"<<temp<<"' <\t at:("<<row<<", "<<column<<")"<<endl;
                    p++;
                    temp = "";
                }
            }
            p = input;
        }
    }
}

void LEX::help()
{
    cout<<"Usage: ./lex.exe [optinos] "<<endl;
    cout<<"Options:"<<endl;
    cout<<"\t-h\t\t\tDisplay this information."<<endl;
    cout<<"\t-a [i type value]\tadd new characters."<<endl;
    cout<<"\t-g [file]\t\tget content from file.txt and analysis it."<<endl;

}

void LEX::add2list()
{
     // 读取本地文件,添加用户新增字符
    FILE *fp;
    fp = fopen("lib_local.txt","r");
    if(fp == NULL){
        printf("Failed to open lib_local.txt\n");
        exit(1);
    }
    int n(1);
    char ch;
    while((ch = getc(fp))!=EOF){
        if(ch == '\n')
            n++;
    }
    fclose(fp);
    fp = fopen("lib_local.txt","r");
    int type_num,type_code;
    char value[10];
    for(int i =0 ;i<n-1;i++){
        fscanf(fp, "%d%d%s", &type_num, &type_code, &value);
        if(type_code == 1){
            // k_num -> max = 33
            if(k_num<=33){
            int index = CURRENT_K;
            index += k_num;
            k[index].i = type_num;
            k[index].type = type_code;
            k[index].value = value;
            k_num++;
            }else{
                cout<<"Please remove some key words!";
                exit(1);
            }
        }else if(type_code == 2){
            // b_num -> max = 9
            if(b_num <= 9){
            int index = CURRENT_B;
            index += b_num;
            s[index].i = type_num;
            s[index].type = type_code;
            s[index].value = value;
            b_num++;
            }else{
                cout<<"Please remove some Delimiters!";
                exit(1);
            }
        }else if(type_code == 3){
            // c_mun -> max =5
            if(c_num <= 5){
            int index = CURRENT_C;
            index += c_num;
            s[index].i = type_num;
            s[index].type = type_code;
            s[index].value = value;
            c_num++;
            }else{
                cout<<"Please remove some Opreators!";
                exit(1);
            }
        }else if(type_code == 4)
            cout<< "Error:Cannot add arithmetic operators"<<endl;
        else
            cout<<"Error:This type is not supported!"<<endl;
    }
    fclose(fp);
}

void LEX::add(int i, int type, char value[])
{
    FILE *fp;
    fp = fopen("lib_local.txt","a");
    if(fp == NULL){
        printf("Failed to get local file\n");
        exit(1);
    }
    int index;
    if(type == 1 || type == 2 || type == 3){
         fprintf(fp, "%d %d %s\n", i, type, value);
         fclose(fp);
         cout<<"Successful!"<<endl;
    }else if(type == 4)
        cout<< "Error:Cannot add arithmetic operators"<<endl;
    else
        cout<<"Error:This type is not supported!"<<endl;
}

LEX::LEX(){
    // 初始化关键字
    k[0] = {0, 1, "do"};
    k[1] = {1, 1, "end"};
    k[2] = {2, 1, "for"};
    k[3] = {3, 1, "if"};
    k[4] = {4, 1, "printf"};
    k[5] = {5, 1, "scanf"};
    k[6] = {6, 1, "then"};
    k[7] = {7, 1, "while"};

    /*
    初始化其他字符
    约定:
    s[0]-s[5]为关系运算符,且不可再增加
    s[6]-s[14] 为算数运算符,可增加
    s[15]-s[29],为分界符,支持拓展
    */
    s[0] = {0x00, 3, "< "};
    s[1] = {0x01, 4, "<="};
    s[2] = {0x02, 4, "= "};
    s[3] = {0x03, 4, ">"};
    s[4] = {0x04, 4, ">="};
    s[5] = {0x05, 4, "<>"};

    s[6] = {0x10, 3, "+"};
    s[7] = {0x11, 3, "-"};
    s[8] = {0x20, 3, "*"};
    s[9] = {0x21, 3, "/"};

    s[15] = {0, 2, ","};
    s[16] = {1, 2, ";"};
    s[17] = {2, 2, "("};
    s[18] = {3, 2, ")"};
    s[19] = {4, 2, "["};
    s[20] = {5, 2, "]"};

    k_num = c_num = b_num =1;
    p = new result;
    res = p;
    add2list();
}
  • main.cpp
#include "lex.h"

string get_char_txt(FILE *fp);

int main(int argc, char *argv[])
{
    string content;
    LEX L;
    if(argc == 1){
        L.help();
        exit(1);
    }else{
        const string argv1 = argv[1];
        int len;

        if(argv1 == "-h")
            L.help();
        else if(argv1 =="-a"){
            int argv2,argv3;
            char *argv4;
            argv2 = atoi(argv[2]);
            argv3 = atoi(argv[3]);
            argv4 = argv[4];
            L.add(argv2, argv3, argv4);
        }else if(argv1 == "-g"){
            // 启动词法分析
            FILE *fp;
            char* file = argv[2];
            fp = fopen(file,"r");
            if(fp == NULL){
                printf("Failed to open %s\n",file);
                exit(1);
            }
            content = get_char_txt(fp);
            fclose(fp);
            len = content.length();
            L.scaner(content,len);
            L.show();
        }else
            L.help();
    }
    return 0;
}

string get_char_txt(FILE *fp)
{
    char ch;
    string value;
    while((ch = getc(fp)) != EOF)
        value += ch;
    return value;
}