C词法分析器设计报告
C#词法分析器设计报告
同济大学2004级计算机4班
范成
040648
程序界面
开发环境:
Microsoft Visual Studio 2005
程序运行环境:
任何安装了Microsoft .NET Framework 2.0(或2.0以上版本)的计算机。
Microsoft .NET Framework 2.0下载地址:
;pn=0.html
开发语言:
Microsoft C#.NET
设计思想:
根据教材给出的扫描程序框架,自行编写扫描程序的各个方法(Method),并设计程
序界面,为各按钮编写Click事件响应方法,由“SCAN”按钮的Click事件响应方法调用
扫描程序,依次对源程序(来自文件或用户键入)的每个字符进行扫描并识别。
扫描程序的返回值有以下几种类型:
标识符 < @ID, 该标识符在标识符表中的位置 >
保留字 < @RESERVED_WORD_保留字编号, - >
整型常数 < @INT, 该数在整型常数表中的位置 >
浮点型常数 < @FLOAT, 该数在浮点型常数表中的位置 >
字符串 < @STRTING, 该字符串在字符串表中的位置 >
(null string) < @STRING_NULL, 该字符串在字符串表中的位置 >
字符 < @CHAR, 该字符在字符表中的位置 >
符号 < @符号助记符, - >
源程序扫描流程图(SCAN按钮按下后):
开始
将搜索指示器指向源
程序第一个字符
YES 源程序扫描完毕,
NO
扫描当前字符,并将搜
索指示器向后移动
结束
状态转换图:
字母或数字或下划线 空白
字母或下划线 非字母与数字与下划线 * 1 0 2
非= / * 23 24 = 数字 25 / 26 数字 非数字 * 3 4 非* * / * 27 28 29 ~ - 5 - 非* 30 31 ` - 6 32 = 33 @ + 7 非+ * 34 35 # + 8 36 = 37 $ 9 = 非= * 38 39 % 10 = 40
非= ! * ^ 41 42 11
= 43 ( 12
非& & * 44 45 ) 13 & 46 [ 14 * 非= * 47 48
= ] 49 15
> 非 > * 50 51 { 16 > 52 = } 53 17
< 非 < * 54 55 , 18 < 56 = . 57 19
非” ? 20 ” “ 58 59 : 21 非? ? „ 60 61 62 ; 22
源程序:
using System;
using System.Collections.Generic; using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.IO;
namespace Scanner
{
public partial class Scanner : Form
{
public Scanner()
{
InitializeComponent();
rtbInput.SelectAll();
}
private string[] token; //标识符表
private int tokenIndex = 0; //标识符表指针
private long[] constant; //整型常数表
private int constantIndex = 0; //整型常数表指针
private double[] dblConstant; //浮点常数表
private int dblConstantIndex = 0; //浮点常数表指针
private string[] strConst; //字符串表
private int strConstIndex; //字符串表指针
private char[] charConst; //字符表
private int charConstIndex = 0; //字符表指针
private string input; //存放待识别的源程序字符串
private int inputIndex = 0; //input字符串指针
private char ch; //存放最新读进的源程序字符
private string strToken; //存放构成
单词
英语单词 下载七年级上册英语单词表下载英语单词表下载深圳小学英语单词表 下载高中英语单词 下载
符号的字符串
private bool scanned = false; //是否扫描过至少一次
private static string[] tokenReserve =
{ "abstract", "do", "in", "protected", "true",
"as", "double", "int", "public", "try",
"base", "else", "interface", "readonly",
"typeof",
"bool", "enum", "internal", "ref", "uint",
"break", "event", "is", "return", "ulong",
"byte", "explicit", "lock", "sbyte",
"unchecked",
"case", "extern", "long", "sealed", "unsafe",
"catch", "false", "namespace", "short", "ushort",
"char", "finally", "new", "sizeof", "using",
"checked", "fixed", "null", "stackalloc", "virtual",
"class", "float", "object", "static", "volatile",
"const", "for", "operator", "string", "void",
"continue", "foreach", "out", "struct", "while",
"decimal", "goto", "override", "switch",
"default", "if", "params", "this",
"delegate", "implicit", "private", "throw"
}; //C#保留关键字表(77个)
//将下一输入字符读到ch中,搜索指示器前移一字符位置
private void getChar()
{
ch = input[inputIndex];
inputIndex++;
}
//将ch中的字符连接到strToken之后
private void concat()
{
strToken += ch;
}
//判断ch中的字符是否为字母
private bool isLetter()
{
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) return true;
else return false;
}
//判断ch中的字符是否为数字
private bool isDigit()
{
if (ch >= '0' && ch <= '9') return true;
else return false;
}
//对strToken中的字符串查找保留字表,若它是一个保留字则返回其编码,否则返回0
private int reserve()
{
int i;
bool notFound = true;
string s = strToken;
for (i = 0; notFound && i < tokenReserve.Length; i++)
{
if (tokenReserve[i] == s) notFound = false;
}
if (notFound) return 0;
else return i;
}
//将搜索指示器回调一个字符位置,将ch置为空白字符
private void retract()
{
inputIndex--;
}
//将strToken中的标识符插入符号表(表中不存在该元素时才插入),返回符号表指针
private int insertId()
{
//先在符号表里找该元素
int i;
bool notFound = true;
string s = strToken;
for (i = 0; notFound && i < token.Length; i++)
{
if (token[i] == s) notFound = false;
}
//没找到
if (notFound)
{
token[tokenIndex] = strToken;
tokenIndex++;
return (tokenIndex - 1);
}
//找到了
else return i - 1;
}
//将strToken中的整型常数插入整型常数表(表中不存在该元素时才插入),返回整型常数表指
针
private int insertConst()
{
int i;
bool notFound = true;
long s = int.Parse(strToken);
for (i = 0; notFound && i < constant.Length; i++)
{
if (constant[i] == s) notFound = false;
}
//没找到
if (notFound)
{
constant[constantIndex] = int.Parse(strToken);
constantIndex++;
return (constantIndex - 1);
}
//找到了
else return i - 1;
}
//将strToken中的浮点型常数插入浮点常数表(表中不存在该元素时才插入),返回浮点常数表
指针
private int insertDblConst()
{
int i;
bool notFound = true;
double s = double.Parse(strToken);
for (i = 0; notFound && i < dblConstant.Length; i++)
{
if (dblConstant[i] == s) notFound = false;
}
//没找到
if (notFound)
{
dblConstant[dblConstantIndex] = double.Parse(strToken);
dblConstantIndex++;
return (dblConstantIndex - 1);
}
//找到了
else return i - 1;
}
//将strToken中的字符串插入字符串表(表中不存在该元素时才插入),返回字符串表指针
private int insertString()
{
int i;
bool notFound = true;
string s = strToken;
for (i = 0; notFound && i < strConst.Length; i++)
{
if (strConst[i] == s) notFound = false;
}
//没找到
if (notFound)
{
strConst[strConstIndex] = strToken;
strConstIndex++;
return (strConstIndex - 1);
}
//找到了
else return i - 1;
}
//将strToken中的字符插入字符表(表中不存在该元素时才插入),返回字符表指针
private int insertChar()
{
int i;
bool notFound = true;
char c = ch;
for (i = 0; notFound && i < charConst.Length; i++)
{
if (charConst[i] == c) notFound = false;
}
//没找到
if (notFound)
{
charConst[charConstIndex] = ch;
charConstIndex++;
return (charConstIndex - 1);
}
//找到了
else return i - 1;
}
//Scan方法
private string scan()
{
strToken = "";
int code, value;
getChar();
//标识符及保留字
if (isLetter()|| ch == '_')
{
if (input.Length > 1)
{
while ((isLetter() || isDigit() ||(ch=='_')) && inputIndex < input.Length)
{
concat();
getChar();
}
if (inputIndex < input.Length) retract();
else if (inputIndex == input.Length)
{
if (isLetter() || isDigit() || (ch == '_')) concat();
else retract();
}
code = reserve();
if (code == 0)
{
value = insertId();
return (strToken + " < @ID" + ", " + value.ToString() + " >");
}
else return (strToken + " < @RESERVED_WORD_" + code.ToString()+", - >");
}
else
{
return (ch + " < @ID" + ", 0 >");
}
}
//整型常数和浮点常数
else if (isDigit())
{
if (input.Length > 1)
{
while (isDigit() && inputIndex < input.Length)
{
concat();
getChar();
}
if (ch == '.' && inputIndex < input.Length)
{
concat();
getChar();
while (isDigit() && inputIndex < input.Length)
{
concat();
getChar();
}
if (inputIndex < input.Length) retract();
else if (inputIndex == input.Length)
{
if (isDigit()) concat();
else retract();
}
value = insertDblConst();
return (strToken + " < @FLOAT" + ", " + value.ToString() + " >");
}
if (inputIndex < input.Length) retract();
else if (inputIndex == input.Length)
{
if (isDigit()) concat();
else retract();
}
value = insertConst();
return (strToken + " < @INT" + ", " + value.ToString() + " >");
}
else
{
return (ch + " < @INT, 0 >");
}
}
else if (ch == '=')
{
if (inputIndex < input.Length)
{
getChar();
if (ch == '=') return ("== < @EQUAL, - >");
retract();
return ("= < @ASSIGN, - >");
}
else return ("= < @ASSIGN, - >");
}
else if (ch == '!')
{
if (inputIndex < input.Length)
{
getChar();
if (ch == '=') return ("!= < @NOT_EQUAL, - >");
retract();
return ("! < @NOT, - >");
}
else return ("! < @NOT, - >");
}
else if (ch == '+')
{
if (inputIndex < input.Length)
{
getChar();
if (ch == '+') return ("++ < @SELF_INCREASE, - >");
else if (ch == '=') return ("+= < @SELF_INCREASE_BY_SIZE, - >");
retract();
return ("+ < @PLUS, - >");
}
else return ("+ < @PLUS, - >");
}
else if (ch == '-')
{
if (inputIndex < input.Length)
{
getChar();
if (ch == '-') return ("-- < @SELF_DECREASE, - >");
else if (ch == '=') return ("-= < @SELF_DECREASE_BY_SIZE, - >");
else if (ch == '>') return ("-> < @MEMBER_VISIT, - >");
retract();
return ("- < @MINUS, - >");
}
else return ("- < @MINUS, - >");
}
else if (ch == '|')
{
if (inputIndex < input.Length)
{
getChar();
if (ch == '|') return ("|| < @OR2, - >");
else if (ch == '=') return ("|= < @OR_EQUAL, - >");
retract();
return ("| < @OR1, - >");
}
else return ("| < @OR1, - >");
}
else if (ch == '&')
{
if (inputIndex < input.Length)
{
getChar();
if (ch == '&') return ("&& < @AND2, - >");
else if (ch == '=') return ("&= < @AND_EQUAL, - >");
retract();
return ("& < @AND1, - >");
}
else return ("& < @AND1, - >");
}
else if (ch == '>')
{
if (inputIndex < input.Length)
{
getChar();
if (ch == '=') return (">= < @GREATER_OR_EQUAL, - >");
else if (ch == '>') return (">> < @BIT_MOV_R, - >");
retract();
return ("> < @GREATER, - >");
}
else return ("> < @GREATER, - >");
}
else if (ch == '<')
{
if (inputIndex < input.Length)
{
getChar();
if (ch == '=') return ("<= < @LESS_OR_EQUAL, - >");
else if (ch == '<') return ("<< < @BIT_MOV_L, - >");
retract();
return ("< < @LESS, - >");
}
else return ("< < @LESS, - >");
}
else if (ch == '\'')
{
if (inputIndex < input.Length - 2)
{
getChar();
if (ch == '\'') return "INVALID USE OF '";
else if (ch == '\\')
{
getChar();
if (ch == 'n' || ch == 'r' || ch == '\\' || ch == 't' || ch == 'b' || ch == '\'' || ch == '\"') //各种换行符
{
getChar();
if (ch == '\'') return "\\" + input[inputIndex - 2] + " < @ESC, - >";
}
else
{
retract();
return "INVALID USE OF '";
}
}
getChar();
if (ch == '\'')
{
ch = input[inputIndex - 2];
value = insertChar();
return (input[inputIndex - 2] + " < @CHAR, " + value.ToString() + " >");
}
else if (inputIndex == input.Length - 1)
{
retract();
return "INVALID USE OF '";
}
else
{
retract();
retract();
return "INVALID USE OF '";
}
}
else if (inputIndex < input.Length - 1)
{
getChar();
if (ch == '\'' || ch == '\\') return "INVALID USE OF '";
getChar();
if (ch == '\'')
{
ch = input[inputIndex - 2];
value = insertChar();
return (input[inputIndex - 2] + " < @CHAR, " + value.ToString() + " >");
}
else if (inputIndex == input.Length - 1)
{
retract();
return "INVALID USE OF '";
}
else
{
retract();
retract();
return "INVALID USE OF '";
}
}
else return "INVALID USE OF '";
}
else if(ch=='\"')
{
bool notMatch = true;
if (inputIndex == input.Length) return ("INVALID USE OF \"");
else if (input[inputIndex] == '\"')
{
strToken = "";
value = insertString();
if (inputIndex < input.Length) getChar();
return ("(null string) < @STRING_NULL, " + value.ToString() + " >");
}
else
{
getChar();
while (notMatch && inputIndex <= input.Length)
{
concat();
if (inputIndex < input.Length)
{
getChar();
if (ch == '\"') notMatch = false;
}
if (ch == '\"') notMatch = false;
}
if (notMatch) return ("INVALID USE OF \"");
else
{
value = insertString();
return (strToken + " < @STRING, " + value.ToString() + " >");
}
}
}
else if (ch == '*')
{
if (inputIndex < input.Length)
{
getChar();
if (ch == '=') return ("*= < @SELF_MULTIPLY_BY_SIZE, - >");
retract();
return ("* < @MULTIPLY, - >");
}
else return ("* < @MULTIPLY, - >");
}
else if (ch == '/')
{
if (inputIndex < input.Length)
{
getChar();
//注释行
if (ch == '*')
{
bool stillSearch = true;
if (inputIndex > input.Length - 2) return("INVALID USE OF /*");
getChar();
while (ch != '*' && input[inputIndex] != '/' && stillSearch)
{
concat();
getChar();
if (inputIndex == input.Length) stillSearch = false;
}
if (inputIndex == input.Length)
{
concat();
}
getChar();
return ("");
}
else if (ch == '/')
{
if (inputIndex < input.Length)
{
getChar();
while (ch != '\n' && inputIndex < input.Length)
{
concat();
getChar();
}
if (inputIndex == input.Length)
{
concat();
}
return ("");
}
else return ("");
}
else if (ch == '=')
{
return ("/= < @SELF_DIVIDE_BY_SIZE, - >");
}
else
{
retract();
return ("/ < @DIVIDE, - >");
}
}
else return ("/ < @DIVIDE, - >");
}
else if (ch == '#') return ("# < @SHARP, - >");
else if (ch == ':')
{
if (inputIndex < input.Length)
{
getChar();
if (ch == ':') return (":: < @FIELD, - >");
retract();
return (": < @COLON, - >");
}
else return (": < @COLON, - >");
}
else if (ch == ';') return ("; < @SEMICOLON, - >");
else if (ch == '~') return ("~ < @COMPLEMENT, - >");
else if (ch == '`') return ("` < @UPDOT, - >");
else if (ch == '@') return ("@ < @AT, - >");
else if (ch == '$') return ("$ < @DOLLAR, - >");
else if (ch == '%')
{
if (inputIndex < input.Length)
{
getChar();
if (ch == '=') return ("%= < @MOD_EQUAL, - >");
retract();
return ("% < @MOD, - >");
}
else return ("% < @MOD, - >");
}
else if (ch == '^')
{
if (inputIndex < input.Length)
{
getChar();
if (ch == '=') return ("^= < @XOR_EQUAL, - >");
retract();
return ("^ < @XOR, - >");
}
else return ("^ < @XOR, - >");
}
else if (ch == '_') return ("_ < @UNDERLINE, - >");
else if (ch == '?')
{
if (inputIndex < input.Length)
{
getChar();
if (ch == '?') return ("?? < @RETURN_NOT_NULL, - >");
retract();
return ("? < @INTERROGATION, - >");
}
else return ("? < @INTERROGATION, - >");
}
else if (ch == '[') return ("[ < @LSQUARE, - >");
else if (ch == ']') return ("] < @RSQUARE, - >");
else if (ch == '{') return ("{ < @LBRACKET, - >");
else if (ch == '}') return ("} < @RBRACKET, - >");
else if (ch == '(') return ("( < @LPAR, - >");
else if (ch == ')') return (") < @RPAR, - >");
else if (ch == ',') return (", < @COMMA, - >");
else if (ch == '.') return (". < @DOT, - >");
else if (ch == ' ') return ("");
else if (ch == '\n') return ("");
else if (ch == '\t') return ("");
else if (ch == '\b') return ("");
else if (ch == '\r') return ("");
else if (ch == '\\') return ("");
else return (ch + ", UNDEFINED, -");
}
private void cmdReset_Click(object sender, EventArgs e)
{
if (rtbInput.Text != "" || rtbOutput.Text != "")
{
if (MessageBox.Show("CLEAR TEXT?", "TEXT CLEAR CONFIRM", MessageBoxButtons.OKCancel, MessageBoxIcon.Question) == DialogResult.OK)
{
rtbInput.Clear();
rtbOutput.Clear();
}
}
}
private void cmdStart_Click(object sender, EventArgs e)
{
rtbOutput.Clear();
token = new string[10000];
constant = new long[10000];
dblConstant = new double[10000];
charConst = new char[10000];
strConst = new string[10000];
tokenIndex = 0;
constantIndex = 0;
dblConstantIndex = 0;
strConstIndex = 0;
charConstIndex = 0;
inputIndex = 0;
input = rtbInput.Text.ToString();
string s;
string result = "";
if (rtbInput.Text.Length > 100000)
{
rtbOutput.Text = "File too large.";
return;
}
try
{
while (inputIndex < input.Length)
{
s = scan();
if (s != "") result += s + "\n"; //扫描字符串
}
if (result != "")
{
rtbOutput.Text = result;
scanned = true;
}
}
catch (IndexOutOfRangeException indexOutOfRange)
{
rtbOutput.Text += "Invalid input.";
}
catch (Exception exception)
{
rtbOutput.Text = exception.Message;
}
}
private void cmdSave_Click(object sender, EventArgs e)
{
if (scanned && rtbOutput.Text != "")
{
DialogResult buttonClicked = saveFileDialog.ShowDialog();
if (buttonClicked.Equals(DialogResult.OK))
{
rtbOutput.SaveFile(saveFileDialog.FileName);
MessageBox.Show("SAVED.", "SCANNER");
}
}
}
private void cmdAbout_Click(object sender, EventArgs e)
{
MessageBox.Show("范成 (KENSHIN)\nTongji University\nDepartment of Computer
Science\n2007", "ABOUT", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
private void cmdOpen_Click(object sender, EventArgs e)
{
DialogResult buttonClicked = openFileDialog.ShowDialog();
if (buttonClicked.Equals(DialogResult.OK))
{
FileStream fs = new FileStream(openFileDialog.FileName, FileMode.Open, FileAccess.Read);
StreamReader m_streamReader = new StreamReader(fs); //使用StreamReader类来读
取文件
m_streamReader.BaseStream.Seek(0, SeekOrigin.Begin);
// 从数据流中读取每一行,直到文件的最后一行,并在rtbInput中显示出内容
rtbInput.Clear();
rtbOutput.Clear();
string strLine = m_streamReader.ReadLine();
while (strLine != null)
{
rtbInput.Text += strLine + "\n";
strLine = m_streamReader.ReadLine();
}
//关闭此StreamReader对象
m_streamReader.Close();
}
}
}
}
C#程序语言子集:
保留关键字:
abstract, do, in, protected, true, as, double, int, public, try, throw, base, else, interface, readonly,
typeof, bool, enum, internal, ref, uint, break, event, is, return, ulong, byte, explicit, lock, sbyte,
unchecked, case, extern, long, sealed, unsafe, catch, false, namespace, short, ushort, char, finally,
new, sizeof, using, checked, fixed, null, stack, alloc, virtual, class, float, object, static, volatile,
const, for, operator, string, void, continue, foreach, out, struct, while, decimal, goto, override,
switch, default, if, params, this, delegate, implicit, private
符号:
运算符类别 运算符
算术 + - * / %
逻辑(布尔型和按位) & | ^ ! ~ && || true false
字符串串联 +
递增、递减 ++ --
变换 << >>
关系 == != < > <= >=
赋值 = += -= *= /= %= &= |= ^= <<= >>= ??
成员访问 .
索引 []
转换 ()
委托串联和移除 + -
间接寻址和地址 * -> [] &
注释 // /**/
程序运行截图:
打开源程序文件
读入源程序并显示
扫描源程序并显示扫描结果
清空文本框
保存扫描结果
作者信息