编译原理之词法分析（C语言）_斯武丶风晴

http://blog.sina.com.cn/u/1741290544

首页博文目录关于我

个人资料

微博

加好友发纸条

写留言加关注

博客等级：
博客积分：

博客访问：
关注人气：
获赠金笔：0支
赠出金笔：0支
荣誉徽章：

正文字体大小：大中小

编译原理之词法分析（C语言）

(2011-04-17 23:55:37)

标签：

杂谈

编译原理之词法分析*（C语言）

一词法

（1）关键字：

begin if then while do end

所有的关键字都是小写。

（2）运算符和界符

： = + - * / <<span style="mso-spacerun:yes"> <= <> > >= = ; ( ) #

（3）其他单词是标识符（ID）和整型常数（SUM），通过以下正规式定义：

ID = letter (letter | digit)*

NUM = digit digit*

（4）空格有空白、制表符和换行符组成。空格一般用来分隔ID、SUM、运算符、界符和关键字，词法分析阶段通常被忽略。

二各种单词符号对应的种别码

单词符号	种别码	单词符号	种别码
bgin	1	：	17
If	2	：=	18
Then	3	<	20
wile	4	<>	21
do	5	<=	22
end	6	>	23
lettet（letter\|digit）*	10	>=	24
dight dight*	11	=	25
+	13	；	26
—	14	(	27
*	15	)	28
/	16	#	0

三词法分析程序的功能：

输入：所给文法的源程序字符串。

输出：二元组（syn,token或sum）构成的序列。

其中：syn为单词种别码；

token为存放的单词自身字符串；

sum为整型常数。

例如：对源程序begin x:=9: if x>9 then x:=2*x+1/3; end #的源文件，经过词法分析后输出如下序列：

(1,begin)(10,x)(18,:=)(11,9)(26,;)(2,if)……

============================================================================
//词法分析
//main.c
//@author langx
#include "stdio.h"                  //定义I/O库所用的某些宏和变量
#include "string.h"                 //定义字符串库函数
#include "conio.h"                  //提供有关屏幕窗口操作函数
#include "ctype.h"                  //分类函数
#define LEN 6
char prog[80]= {'\0'},
               token[80]= {'\0'};                    //存放构成单词符号的字符串
char ch;
int syn,                           //存放单词字符的种别码
    n,
    sum,                           //存放整数型单词
    m,p;                           //p是缓冲区prog的指针，m是token的指针
char *rwtab[LEN]= {"begin","if","then","while","do","end"};
scaner()
{
    m=0;
    sum=0;
    for(n=0; n<8; ++n)token[n]='\0';
    ch=prog[p++];
    while(ch==' ')
        ch=prog[p++];
    if(isalpha(ch))    //ch为字母字符
    {
        while(isalpha(ch)||isdigit(ch))    //ch 为字母字符或者数字字符
        {
            token[m++]=ch;
            ch=prog[p++];
        }
        token[m++]='\0';
        ch=prog[p--];
        syn=10;
        for(n=0; n< LEN; n++)
            if(strcmp(token,rwtab[n])==0)    //字符串的比较
            {
                syn=n+1;
                break;
            }
    }
    else if(isdigit(ch))    //ch是数字字符
    {
        while(isdigit(ch))    //ch是数字字符
        {
            sum=sum*10+ch-'0';
            ch=prog[p++];
        }
        ch=prog[p--];
        syn=11;
    }
    else
        switch(ch)
        {
        case'<':
            m=0;
            token[m++]=ch;
            ch=prog[p++];
            if(ch=='>')
            {
                syn=21;
                token[m++]=ch;
            }
            else if(ch=='=')
            {
                syn=22;
                token[m++]=ch;
            }
            else
            {
                syn=20;
                ch=prog[p--];
            }
            break;
        case'>':
            m=0;
            token[m++]=ch;
            ch=prog[p++];
            if(ch=='=')
            {
                syn=24;
                token[m++]=ch;
            }
            else
            {
                syn=23;
                ch=prog[p--];
            }
            break;
        case':':
            m=0;
            token[m++]=ch;
            ch=prog[p++];
            if(ch=='=')
            {
                syn=18;
                token[m++]=ch;
            }

            else
            {
                syn=17;
                ch=prog[p--];
            }
            break;
        case'+':
            syn=13;
            token[0]=ch;
            break;
        case'-':
            syn=14;
            token[0]=ch;
            break;
        case'*':
            syn=15;
            token[0]=ch;
            break;
        case'/':
            syn=16;
            token[0]=ch;
            break;
        case'=':
            syn=25;
            token[0]=ch;
            break;
        case';':
            syn=26;
            token[0]=ch;
            break;
        case'(':
            syn=27;
            token[0]=ch;
            break;
        case')':
            syn=28;
            token[0]=ch;
            break;
        case'#':
            syn=0;
            token[0]=ch;
            break;
        default:
            syn=-1;
        }
}

main()
{
    printf("\nThe significance of the figures:\n"
           "1.figures 1 to 6 said Keyword\n"
           "2.figures 10 and 11 said Other indicators\n"
           "3.figures 13 to 28 said Operators\n");

    p=0;

    printf("\nplease input string: (end with '#')\n");
    do
    {
        ch=getchar();
        prog[p++]=ch;
    }
    while(ch!='#');

    p=0;

    do
    {
        scaner();
        switch(syn)
        {
        case 11:
            printf("(%d,%d)\n",syn,sum);
            break;
        case -1:
            printf("\n ERROR;\n");
            break;
        default:
            printf("(%d,%s)\n",syn,token);
        }
    }
    while(syn!=0);

    getch();
}

阅读┊ 收藏 ┊ 喜欢 ▼ ┊打印┊举报/Report

前一篇：C#报时小程序1.0.0开发

后一篇：编译原理之语法分析（C语言）

新浪BLOG意见反馈留言板　欢迎批评指正