视频
简单的手写,理解原理 + 花时间 = 写出它
核心
- 遍历字符流,不需回溯
- 没用DFA,全程if-else,思路很好理解
- 识别单词的时候分为:1-标识符关键字,2-整数,3-复合运算符,4-单独字符
- 类别码是1,2,3…,用enum在头文件里定义了。
- 单独字符的类型码统一定义在ssym数组里了,关键字的类型码统一定义在wsym数组里了。因为他们是一一对应的,用数组比较简洁。
- 报错做的很简陋,几乎没有
- 识别到token就直接输出了,万事从简(懒)…理解思想和方法就可再改进😃
- C 库函数 int isalpha( int c ):判断字符是否是字母,int isdigit ( int c ):判断字符是否是数字。当然也可以直接用ASCII码。
- C 库函数 int strcmp(const char *str1, const char *str2) 把 str1 所指向的字符串和 str2 所指向的字符串进行比较。
- 如果视频看不懂就是我没讲清楚,有点口齿不清…sorry🤧
my.c
#include "my.h"
void error(int n)
{
printf("Error %3d: %s\n", n, err_msg[n]);
}
void lexer(FILE *fp)
{
ch = fgetc(fp);
while (ch != EOF)
{
while (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')
{
ch = fgetc(fp);
}
if (isalpha(ch)) // 当前输入为字母,则应该为关键字或标识符
{
char a[MAXIDLEN + 1]; // 当前读取到的单词
int k = 0;
for (; (isalpha(ch) || isdigit(ch)) && k < MAXIDLEN; k++)
{
a[k] = ch;
ch = fgetc(fp);
}
a[k] = '\0'; // 字符数组和字符串的区别就是结尾少了\0,一定要加上!
// 检查是否为关键字
int i = 1;
for (; i <= NRW; i++)
{
if (strcmp(a, word[i]) == 0)
break;
}
if (i <= NRW)
{
sym = wsym[i]; // symbol is a reserved word
}
else
{
sym = SYM_IDENTIFIER; // symbol is an identifier
}
printf("(%d,%s)\n", sym, a);
}
else if (isdigit(ch))
{ // symbol is a number.
sym = SYM_NUMBER;
int k = 0;
int num = 0;
while (isdigit(ch))
{
num = num * 10 + ch - '0';
ch = fgetc(fp);
k++;
}
if (k > MAXNUMLEN)
error(25); // The number is too great.
else
{
printf("(%d,%d)\n", sym, num);
}
}
else if (ch == ':')
{
ch = fgetc(fp);
if (ch == '=')
{
sym = SYM_BECOMES; // :=
ch = fgetc(fp);
printf("(%d,:=)\n", sym);
}
else
{
sym = SYM_NULL; // illegal?
}
}
else if (ch == '>')
{
ch = fgetc(fp);
if (ch == '=')
{
sym = SYM_GEQ; // >=
ch = fgetc(fp);
printf("(%d,>=)\n", sym);
}
else
{
sym = SYM_GTR; // >
printf("(%d,=)\n", sym);
}
}
else if (ch == '<')
{
ch = fgetc(fp);
if (ch == '=')
{
sym = SYM_LEQ; // <=
ch = fgetc(fp);
printf("(%d,<=)\n", sym);
}
else if (ch == '>')
{
sym = SYM_NEQ; // <>
ch = fgetc(fp);
}
else
{
sym = SYM_LES; // <
printf("(%d,<)\n", sym);
}
}
else if (ch == '{')
{ //忽略注释
int end = 1;
while (end)
{
ch = fgetc(fp);
if (ch == '}')
end = 0;
}
ch = fgetc(fp);
}
else
{ // other tokens : '+', '-', '*', '/', '(', ')', '=', ',', '.', ';'
//代码和识别关键字那里类似
int i = 1;
for (; i <= NSYM; i++)
{
if (ch == csym[i])
break;
}
if (i <= NSYM)
{
sym = ssym[i];
printf("(%d,%c)\n", sym, ch);
ch = fgetc(fp);
}
//不应该出现的字符
else
{
printf("Fatal Error: Unknown character.\n");
exit(1);
}
}
}
printf("—————文件读取结束—————");
}
int main()
{
FILE *fp = fopen("source1.txt", "r");
lexer(fp);
return 0;
}
my.h
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define NRW 11 // number of reserved words
#define MAXNUMLEN 14 // maximum number of digits in numbers
#define NSYM 10 // maximum number of symbols in array ssym and csym
#define MAXIDLEN 10 // length of identifiers
char ch; // last character read
int sym; // last symbol read
// char line[80]; //输入缓冲,便于报错时输出错误句子
char csym[NSYM + 1] = {
' ', '+', '-', '*', '/', '(', ')', '=', ',', '.', ';'
};
//关键字
char* word[NRW + 1] = {
"", /* place holder */
"begin", "call", "const", "do", "end","if",
"odd", "procedure", "then", "var", "while"
};
//类别码
enum symtype {
SYM_NULL, SYM_IDENTIFIER, SYM_NUMBER, SYM_PLUS, SYM_MINUS, SYM_TIMES, SYM_SLASH, SYM_ODD, SYM_EQU, SYM_NEQ, SYM_LES, SYM_LEQ, SYM_GTR, SYM_GEQ, SYM_LPAREN, SYM_RPAREN, SYM_COMMA, SYM_SEMICOLON, SYM_PERIOD, SYM_BECOMES, SYM_BEGIN, SYM_END, SYM_IF, SYM_THEN, SYM_WHILE, SYM_DO, SYM_CALL, SYM_CONST, SYM_VAR, SYM_PROCEDURE
};
int wsym[NRW + 1] = {
SYM_NULL, SYM_BEGIN, SYM_CALL, SYM_CONST, SYM_DO, SYM_END,
SYM_IF, SYM_ODD, SYM_PROCEDURE, SYM_THEN, SYM_VAR, SYM_WHILE
};
int ssym[NSYM + 1] = {//
SYM_NULL, SYM_PLUS, SYM_MINUS, SYM_TIMES, SYM_SLASH,
SYM_LPAREN, SYM_RPAREN, SYM_EQU, SYM_COMMA, SYM_PERIOD, SYM_SEMICOLON
};
//报错信息
char* err_msg[] =
{
/* 0 */ "",
/* 1 */ "Found ':=' when expecting '='.",
/* 2 */ "There must be a number to follow '='.",
/* 3 */ "There must be an '=' to follow the identifier.",
/* 4 */ "There must be an identifier to follow 'const', 'var', or 'procedure'.",
/* 5 */ "Missing ',' or ';'.",
/* 6 */ "Incorrect procedure name.",
/* 7 */ "Statement expected.",
/* 8 */ "Follow the statement is an incorrect symbol.",
/* 9 */ "'.' expected.",
/* 10 */ "';' expected.",
/* 11 */ "Undeclared identifier.",
/* 12 */ "Illegal assignment.",
/* 13 */ "':=' expected.",
/* 14 */ "There must be an identifier to follow the 'call'.",
/* 15 */ "A constant or variable can not be called.",
/* 16 */ "'then' expected.",
/* 17 */ "';' or 'end' expected.",
/* 18 */ "'do' expected.",
/* 19 */ "Incorrect symbol.",
/* 20 */ "Relative operators expected.",
/* 21 */ "Procedure identifier can not be in an expression.",
/* 22 */ "Missing ')'.",
/* 23 */ "The symbol can not be followed by a factor.",
/* 24 */ "The symbol can not be as the beginning of an expression.",
/* 25 */ "The number is too great.",
/* 26 */ "",
/* 27 */ "",
/* 28 */ "",
/* 29 */ "",
/* 30 */ "",
/* 31 */ "",
/* 32 */ "There are too many levels."
};
source1.txt
const a=10; {常量声明}
const b=20;
var c; {变量声明}
procedure p; {过程声明}
begin
c:=b+a
end;
begin
call p
end.
版权声明:本文为qq_44850725原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。