当前位置:网站首页>[compilation principle] lexical analysis design and Implementation
[compilation principle] lexical analysis design and Implementation
2022-07-07 23:24:00 【RemainsHll】
1 overall design
2 Infix expression to suffix expression
Core code
int priority(char ch)
{
if (ch == '*')
{
return 3;
}
if (ch == '&')
{
return 2;
}
if (ch == '|')
{
return 1;
}
if (ch == '(')
{
return 0;
}
}
void insert_behind_n(string& s, int n, char ch)
{
s += '#';
for (int i = s.size() - 1; i > n; i--)
s[i] = s[i - 1];
s[n] = ch;
}
void pre_process(string& s)
{
int i = 0, length = s.size()-1;
while (i < length)
{
int in_all_1 = 0, in_all_2 = 0;
// Pay attention to escaping
if(operators.count(s[i])==0 && s[i]!='\\'){
in_all_1 = 1;
}
if(operators.count(s[i+1])==0 ){
in_all_2 = 1;
}
if (in_all_1 || (s[i] == '*') || (s[i] == ')')){
if (in_all_2 || s[i + 1] == '(')
{
insert_behind_n(s, i + 1, '&');
length++;
}
}
i++;
}
}
string infix_Suffix(string s)
{
pre_process(s);
string str;
stack<char> oper;
for (unsigned int i = 0; i < s.size(); i++)
{
int in_all = 0;
if(s[i] == '\\'){
str += s[i];
i++;
str += s[i];
operands.insert(s[i]);
}
if(operators.count(s[i])==0){
in_all = 1;
operands.insert(s[i]);
}
if (in_all)
{
str += s[i];
}
else
{
if (s[i] == '(')
{
oper.push(s[i]);
}
else if (s[i] == ')')
{
char ch = oper.top();
while (ch != '(')
{
str += ch;
oper.pop();
ch = oper.top();
}
oper.pop();
}
else
{
if (!oper.empty())
{
char ch = oper.top();
while (priority(ch) >= priority(s[i]))
{
str += ch;
oper.pop();
if (oper.empty())
{
break;
}
else ch = oper.top();
}
oper.push(s[i]);
}
else
{
oper.push(s[i]);
}
}
}
}
while (!oper.empty())
{
char ch = oper.top();
oper.pop();
str += ch;
}
//cout<<str<<endl;
return str;
}
3 Suffix expression to NFA
Core code
//DFA State stack , Record the start state and end state
State stateStack[MAX];
int top;
// obtain ID
int stateID = 1;
void CFG_NFA(string cfg){
stateID = 1;
top = 0;
point = 0;
for (int i=0;i<cfg.length();i++){
State a;
State b;
State c;
switch (cfg[i])
{
case '&':
// Take two states from the state stack , And update the start and end status
a = stateStack[top--];
b = stateStack[top--];
c.begin = b.begin;
c.end = a.end;
stateStack[++top] = c;
// Create a new state transition ,a Status encountered # Transferred to the b, take a and b States are connected
NFA nfa0;
nfa0.from = b.end;
nfa0.to = a.begin;
nfa0.c = '#';
nfa[point++] = nfa0;
break;
case '|':
// The rules 2
c.begin = stateID++;
c.end = stateID++;
a = stateStack[top--];
b = stateStack[top--];
stateStack[++top] = c;
NFA nfa1;
nfa1.from = c.begin;
nfa1.to = a.begin;
nfa1.c = '#';
nfa[point++] = nfa1;
NFA nfa2;
nfa2.from = c.begin;
nfa2.to = b.begin;
nfa2.c = '#';
nfa[point++] = nfa2;
NFA nfa3;
nfa3.from = a.end;
nfa3.to = c.end;
nfa3.c = '#';
nfa[point++] = nfa3;
NFA nfa4;
nfa4.from = b.end;
nfa4.to = c.end;
nfa4.c = '#';
nfa[point++] = nfa4;
break;
case '*':
// The rules 3
c.begin = stateID++;
c.end = stateID++;
a = stateStack[top--];
stateStack[++top] = c;
NFA nfa5;
nfa5.from = c.begin;
nfa5.to = c.end;
nfa5.c = '#';
nfa[point++] = nfa5;
NFA nfa6;
nfa6.from = c.begin;
nfa6.to = a.begin;
nfa6.c = '#';
nfa[point++] = nfa6;
NFA nfa7;
nfa7.from = a.end;
nfa7.to = a.begin;
nfa7.c = '#';
nfa[point++] = nfa7;
NFA nfa8;
nfa8.from = a.end;
nfa8.to = c.end;
nfa8.c = '#';
nfa[point++] = nfa8;
break;
case '\\':
// Escape character encountered
i++;
c.begin = stateID++;
c.end = stateID++;
stateStack[++top] = c;
NFA nfa10;
nfa10.from = c.begin;
nfa10.to = c.end;
nfa10.c = cfg[i];
nfa[point++] = nfa10;
break;
default:
// Encountered operand
c.begin = stateID++;
c.end = stateID++;
stateStack[++top] = c;
NFA nfa9;
nfa9.from = c.begin;
nfa9.to = c.end;
nfa9.c = cfg[i];
nfa[point++] = nfa9;
break;
}
}
// Output NFA
// for(int i=0;i<point;i++){
// cout<<nfa[i].from<<" "<<nfa[i].c<<" "<<nfa[i].to<<endl;
// }
}
4NFA turn DFA
Core code
set<int> empty_transfer(int id){
set<int> set1;
set<int> set2;
set1.insert(id);
// Traverse NFA, Add the arrived state set set1 and set2
for(int i=0;i<point;i++){
if(nfa[i].from==id && nfa[i].c == '#'){
set1.insert(nfa[i].to);
set2.insert(nfa[i].to);
}
}
// Traverse the above set2, Continue to seek empty transfer , until set2 It's empty , Got set1 Is the empty transfer of state
while(!set2.empty()){
int temp = *set2.begin();
set2.erase(set2.begin());
for(int i=0;i<point;i++){
if(nfa[i].from==temp && nfa[i].c == '#'){
set2.insert(nfa[i].to);
set1.insert(nfa[i].to);
}
}
}
return set1;
}
set<int> empty_move(int id,char c){
// First seek move(c)
set<int> set1;
for(int i=0;i<point;i++){
if(nfa[i].from==id && nfa[i].c == c){
set1.insert(nfa[i].to);
}
}
// obtain move Then find the empty transfer
set<int> set2;
set2.insert(set1.begin(),set1.end());
set<int>::iterator it1;
for (it1 = set1.begin(); it1 != set1.end(); it1++){
set<int> set3 = empty_transfer(*it1);
set2.insert(set3.begin(),set3.end());
}
return set2;
}
void NFA_DFA(){
DFA* dfa = new DFA();
// Get the start state and end state
State ss = stateStack[top];
int begin = ss.begin;
int end = ss.end;
// Empty transition of initial state
set<int> initial = empty_transfer(begin);
// State set
set< set<int> > states;
set< set<int> > states_temp;
states.insert(initial);
// rename DFA
dfa->dfa_id[initial] = ++dfa->id;
// Traverse the state set
while(!states.empty()){
set<int> temp = *states.begin();
states.erase(states.begin());
set<int>::iterator it1;
set<char>::iterator it2;
// Traverse the operand to find closeuure(move)
for (it2 = operands.begin(); it2 != operands.end(); it2++){
set<int> new_set;
for (it1 = temp.begin(); it1 != temp.end(); it1++){
set<int> set_temp = empty_move(*it1,*it2);
new_set.insert(set_temp.begin(),set_temp.end());
}
if(states_temp.count(new_set) == 0 && !new_set.empty()){
states.insert(new_set);
states_temp.insert(new_set);
dfa->dfa_id[new_set] = ++dfa->id;
}
//cout<<dfa->dfa_id[temp]<<" "<<*it2<<" "<<dfa->dfa_id[new_set]<<endl;
dfa->tran[dfa->dfa_id[temp]][*it2-33] = dfa->dfa_id[new_set];
if(new_set.count(end)){
dfa->vn[dfa->dfa_id[new_set]] = true;
}else{
dfa->vn[dfa->dfa_id[new_set]] = false;
}
}
}
dfas[point_dfa++] = dfa;
}
5 To minimize the DFA
Core code
void minimize_DFA(){
set<set<int> > sets;
DFA* dfa = dfas[point_dfa-1];
// initialization : Distinguish between initial state and final state
int len = dfa->id;
set<int> set1,set2;
for(int i=1;i<=len;i++){
if(dfa->vn[i]){
set1.insert(i);
}else{
set2.insert(i);
}
}
sets.insert(set1);
sets.insert(set2);
set<set<int> > begin_set;
do{
begin_set = sets;
set<set<int> >::iterator it1;
set<int> temp;
// Traverse the state set
for (it1 = begin_set.begin(); it1 != begin_set.end(); it1++){
temp= *it1;
set<char>::iterator it2;
// Traversal operand
for(it2 = operands.begin();it2 != operands.end();it2++){
map<set<int>,set<int> > map_set;
set<int>::iterator it3;
for(it3 = temp.begin();it3 != temp.end();it3++){
int d = dfa->tran[*it3][*it2-33];
// Determine which set it belongs to
//cout<<*it3<<" "<<*it2<<" "<<d<<endl;
set<set<int> >::iterator it4;
int flag = 0;
set<int>temp2,temp3;
for(it4 = begin_set.begin();it4 != begin_set.end();it4++){
temp2 = *it4;
if(temp2.count(d)>0){
temp3 = map_set[temp2];
flag = 1;
}
}
// If the state produces a new set of States , That is, it can be divided , Delete the original status , The state set is stored in map in .
if(flag == 1){
sets.erase(temp);
temp3.insert(*it3);
map_set[temp2] = temp3;
}
}
// take map The set in is inserted into the original set
map<set<int>,set<int> >::iterator it5;
for(it5 = map_set.begin();it5 != map_set.end();it5++){
sets.insert(it5->second);
}
}
}
}while(begin_set != sets);
// Minimize DFA rename .
DFA* min_dfa = new DFA();
set<set<int> >::iterator it6;
set<set<int> > sets_temp;
// Traversal minimization DFA The resulting set
for(it6 = sets.begin();it6 != sets.end();it6++){
set<int> t_set = *it6;
set<char>::iterator it7;
int f = *t_set.begin();
int id=0,to=0;
// newly build DFA state
if(sets_temp.count(t_set) == 0){
sets_temp.insert(t_set);
min_dfa->dfa_id[t_set] = ++min_dfa->id;
}
id = min_dfa->dfa_id[t_set];
// Compile operators , Get the state transition matrix and terminator array
for(it7 = operands.begin();it7 != operands.end();it7++){
to = 0;
int d = dfa->tran[f][*it7 -33];
set<set<int> >::iterator it8;
for(it8 = sets.begin();it8 != sets.end();it8++){
set<int> t_set2 = *it8;
if(t_set2.count(d)>0){
if(sets_temp.count(t_set2) == 0){
min_dfa->dfa_id[t_set2] = ++min_dfa->id;
sets_temp.insert(t_set2);
}
to = min_dfa->dfa_id[t_set2];
if(min_dfa->vn[d]){
min_dfa->vn[to] = true;
}
}
}
min_dfa->tran[id][*it7-33] = d;
}
}
dfas[point_dfa] = min_dfa;
}
6 Recognize words
Core code
int distinguish(string s){
for(int i = 0;i<point_dfa;i++){
int cur = 1,flag = 1;
DFA* dfa = dfas[i];
int to = 0;
// Always transfer , Know the end of the word
for(int j = 0;j<s.length();j++){
to = dfa->tran[cur][s[j]-33];
if(to == 0){
flag = 0;
}else{
cur = to;
}
}
// If words can complete all state transitions , And the state transferred to is the final state , take DFA Of id return .
if(flag ==1 && dfa->vn[to]){
return i;
}
}
return -1;
}
7 test result
边栏推荐
- 欢聚时代一面
- Wechat forum exchange applet system graduation design completion (4) opening report
- Count the top 10 films at the box office and save them in another file
- Wechat forum exchange applet system graduation design (3) background function
- Bit operation
- Matlab SEIR infectious disease model prediction
- Install a new version of idea. Double click it to open it
- 2021ICPC上海 H.Life is a Game Kruskal重构树
- PMP项目管理考试过关口诀-1
- 解决:信息中插入avi格式的视频时,提示“unsupported video format”
猜你喜欢
海内外技术人们“看”音视频技术的未来
Wechat forum exchange applet system graduation design (3) background function
UE4_UE5蓝图command节点的使用(开启关闭屏幕响应-log-发布全屏显示)
Inftnews | web5 vs Web3: the future is a process, not a destination
Explain
USB(十五)2022-04-14
成年人只有一份主业是要付出代价的,被人事劝退后,我哭了一整晚
移动端异构运算技术 - GPU OpenCL 编程(基础篇)
JMeter interface automated test read case, execute and write back result
伸展树(一) - 图文解析与C语言实现
随机推荐
PMP project management exam pass Formula-1
LeeCode -- 6. Z 字形变换
Wechat forum exchange applet system graduation design completion (8) graduation design thesis template
POJ2392 SpaceElevator [DP]
给出一个数组,如 [7864, 284, 347, 7732, 8498],现在需要将数组中的数字拼接起来,返回「最大的可能拼出的数字」
leetcode-520. 检测大写字母-js
JMeter interface automated test read case, execute and write back result
Bit operation
Binary tree
Coreseek: the second step is index building and testing
Count the top 10 films at the box office and save them in another file
RE1 attack and defense world reverse
云原生数据仓库AnalyticDB MySQL版用户手册
FPGA基础篇目录
Mysql索引优化实战二
LDO voltage stabilizing chip - internal block diagram and selection parameters
Quelles sont les similitudes et les différences entre les communautés intelligentes et les villes intelligentes?
Introduction to redis and jedis and redis things
How to generate unique file names
2022第六季完美童模陕西总决赛圆满落幕