当前位置:网站首页>[compilation principle] lexical analysis design and Implementation
[compilation principle] lexical analysis design and Implementation
2022-07-07 23:24:00 【RemainsHll】
1 overall design
2 Infix expression to suffix expression

Core code
int priority(char ch)
{
if (ch == '*')
{
return 3;
}
if (ch == '&')
{
return 2;
}
if (ch == '|')
{
return 1;
}
if (ch == '(')
{
return 0;
}
}
void insert_behind_n(string& s, int n, char ch)
{
s += '#';
for (int i = s.size() - 1; i > n; i--)
s[i] = s[i - 1];
s[n] = ch;
}
void pre_process(string& s)
{
int i = 0, length = s.size()-1;
while (i < length)
{
int in_all_1 = 0, in_all_2 = 0;
// Pay attention to escaping
if(operators.count(s[i])==0 && s[i]!='\\'){
in_all_1 = 1;
}
if(operators.count(s[i+1])==0 ){
in_all_2 = 1;
}
if (in_all_1 || (s[i] == '*') || (s[i] == ')')){
if (in_all_2 || s[i + 1] == '(')
{
insert_behind_n(s, i + 1, '&');
length++;
}
}
i++;
}
}
string infix_Suffix(string s)
{
pre_process(s);
string str;
stack<char> oper;
for (unsigned int i = 0; i < s.size(); i++)
{
int in_all = 0;
if(s[i] == '\\'){
str += s[i];
i++;
str += s[i];
operands.insert(s[i]);
}
if(operators.count(s[i])==0){
in_all = 1;
operands.insert(s[i]);
}
if (in_all)
{
str += s[i];
}
else
{
if (s[i] == '(')
{
oper.push(s[i]);
}
else if (s[i] == ')')
{
char ch = oper.top();
while (ch != '(')
{
str += ch;
oper.pop();
ch = oper.top();
}
oper.pop();
}
else
{
if (!oper.empty())
{
char ch = oper.top();
while (priority(ch) >= priority(s[i]))
{
str += ch;
oper.pop();
if (oper.empty())
{
break;
}
else ch = oper.top();
}
oper.push(s[i]);
}
else
{
oper.push(s[i]);
}
}
}
}
while (!oper.empty())
{
char ch = oper.top();
oper.pop();
str += ch;
}
//cout<<str<<endl;
return str;
}
3 Suffix expression to NFA


Core code
//DFA State stack , Record the start state and end state
State stateStack[MAX];
int top;
// obtain ID
int stateID = 1;
void CFG_NFA(string cfg){
stateID = 1;
top = 0;
point = 0;
for (int i=0;i<cfg.length();i++){
State a;
State b;
State c;
switch (cfg[i])
{
case '&':
// Take two states from the state stack , And update the start and end status
a = stateStack[top--];
b = stateStack[top--];
c.begin = b.begin;
c.end = a.end;
stateStack[++top] = c;
// Create a new state transition ,a Status encountered # Transferred to the b, take a and b States are connected
NFA nfa0;
nfa0.from = b.end;
nfa0.to = a.begin;
nfa0.c = '#';
nfa[point++] = nfa0;
break;
case '|':
// The rules 2
c.begin = stateID++;
c.end = stateID++;
a = stateStack[top--];
b = stateStack[top--];
stateStack[++top] = c;
NFA nfa1;
nfa1.from = c.begin;
nfa1.to = a.begin;
nfa1.c = '#';
nfa[point++] = nfa1;
NFA nfa2;
nfa2.from = c.begin;
nfa2.to = b.begin;
nfa2.c = '#';
nfa[point++] = nfa2;
NFA nfa3;
nfa3.from = a.end;
nfa3.to = c.end;
nfa3.c = '#';
nfa[point++] = nfa3;
NFA nfa4;
nfa4.from = b.end;
nfa4.to = c.end;
nfa4.c = '#';
nfa[point++] = nfa4;
break;
case '*':
// The rules 3
c.begin = stateID++;
c.end = stateID++;
a = stateStack[top--];
stateStack[++top] = c;
NFA nfa5;
nfa5.from = c.begin;
nfa5.to = c.end;
nfa5.c = '#';
nfa[point++] = nfa5;
NFA nfa6;
nfa6.from = c.begin;
nfa6.to = a.begin;
nfa6.c = '#';
nfa[point++] = nfa6;
NFA nfa7;
nfa7.from = a.end;
nfa7.to = a.begin;
nfa7.c = '#';
nfa[point++] = nfa7;
NFA nfa8;
nfa8.from = a.end;
nfa8.to = c.end;
nfa8.c = '#';
nfa[point++] = nfa8;
break;
case '\\':
// Escape character encountered
i++;
c.begin = stateID++;
c.end = stateID++;
stateStack[++top] = c;
NFA nfa10;
nfa10.from = c.begin;
nfa10.to = c.end;
nfa10.c = cfg[i];
nfa[point++] = nfa10;
break;
default:
// Encountered operand
c.begin = stateID++;
c.end = stateID++;
stateStack[++top] = c;
NFA nfa9;
nfa9.from = c.begin;
nfa9.to = c.end;
nfa9.c = cfg[i];
nfa[point++] = nfa9;
break;
}
}
// Output NFA
// for(int i=0;i<point;i++){
// cout<<nfa[i].from<<" "<<nfa[i].c<<" "<<nfa[i].to<<endl;
// }
}
4NFA turn DFA

Core code
set<int> empty_transfer(int id){
set<int> set1;
set<int> set2;
set1.insert(id);
// Traverse NFA, Add the arrived state set set1 and set2
for(int i=0;i<point;i++){
if(nfa[i].from==id && nfa[i].c == '#'){
set1.insert(nfa[i].to);
set2.insert(nfa[i].to);
}
}
// Traverse the above set2, Continue to seek empty transfer , until set2 It's empty , Got set1 Is the empty transfer of state
while(!set2.empty()){
int temp = *set2.begin();
set2.erase(set2.begin());
for(int i=0;i<point;i++){
if(nfa[i].from==temp && nfa[i].c == '#'){
set2.insert(nfa[i].to);
set1.insert(nfa[i].to);
}
}
}
return set1;
}
set<int> empty_move(int id,char c){
// First seek move(c)
set<int> set1;
for(int i=0;i<point;i++){
if(nfa[i].from==id && nfa[i].c == c){
set1.insert(nfa[i].to);
}
}
// obtain move Then find the empty transfer
set<int> set2;
set2.insert(set1.begin(),set1.end());
set<int>::iterator it1;
for (it1 = set1.begin(); it1 != set1.end(); it1++){
set<int> set3 = empty_transfer(*it1);
set2.insert(set3.begin(),set3.end());
}
return set2;
}
void NFA_DFA(){
DFA* dfa = new DFA();
// Get the start state and end state
State ss = stateStack[top];
int begin = ss.begin;
int end = ss.end;
// Empty transition of initial state
set<int> initial = empty_transfer(begin);
// State set
set< set<int> > states;
set< set<int> > states_temp;
states.insert(initial);
// rename DFA
dfa->dfa_id[initial] = ++dfa->id;
// Traverse the state set
while(!states.empty()){
set<int> temp = *states.begin();
states.erase(states.begin());
set<int>::iterator it1;
set<char>::iterator it2;
// Traverse the operand to find closeuure(move)
for (it2 = operands.begin(); it2 != operands.end(); it2++){
set<int> new_set;
for (it1 = temp.begin(); it1 != temp.end(); it1++){
set<int> set_temp = empty_move(*it1,*it2);
new_set.insert(set_temp.begin(),set_temp.end());
}
if(states_temp.count(new_set) == 0 && !new_set.empty()){
states.insert(new_set);
states_temp.insert(new_set);
dfa->dfa_id[new_set] = ++dfa->id;
}
//cout<<dfa->dfa_id[temp]<<" "<<*it2<<" "<<dfa->dfa_id[new_set]<<endl;
dfa->tran[dfa->dfa_id[temp]][*it2-33] = dfa->dfa_id[new_set];
if(new_set.count(end)){
dfa->vn[dfa->dfa_id[new_set]] = true;
}else{
dfa->vn[dfa->dfa_id[new_set]] = false;
}
}
}
dfas[point_dfa++] = dfa;
}
5 To minimize the DFA

Core code
void minimize_DFA(){
set<set<int> > sets;
DFA* dfa = dfas[point_dfa-1];
// initialization : Distinguish between initial state and final state
int len = dfa->id;
set<int> set1,set2;
for(int i=1;i<=len;i++){
if(dfa->vn[i]){
set1.insert(i);
}else{
set2.insert(i);
}
}
sets.insert(set1);
sets.insert(set2);
set<set<int> > begin_set;
do{
begin_set = sets;
set<set<int> >::iterator it1;
set<int> temp;
// Traverse the state set
for (it1 = begin_set.begin(); it1 != begin_set.end(); it1++){
temp= *it1;
set<char>::iterator it2;
// Traversal operand
for(it2 = operands.begin();it2 != operands.end();it2++){
map<set<int>,set<int> > map_set;
set<int>::iterator it3;
for(it3 = temp.begin();it3 != temp.end();it3++){
int d = dfa->tran[*it3][*it2-33];
// Determine which set it belongs to
//cout<<*it3<<" "<<*it2<<" "<<d<<endl;
set<set<int> >::iterator it4;
int flag = 0;
set<int>temp2,temp3;
for(it4 = begin_set.begin();it4 != begin_set.end();it4++){
temp2 = *it4;
if(temp2.count(d)>0){
temp3 = map_set[temp2];
flag = 1;
}
}
// If the state produces a new set of States , That is, it can be divided , Delete the original status , The state set is stored in map in .
if(flag == 1){
sets.erase(temp);
temp3.insert(*it3);
map_set[temp2] = temp3;
}
}
// take map The set in is inserted into the original set
map<set<int>,set<int> >::iterator it5;
for(it5 = map_set.begin();it5 != map_set.end();it5++){
sets.insert(it5->second);
}
}
}
}while(begin_set != sets);
// Minimize DFA rename .
DFA* min_dfa = new DFA();
set<set<int> >::iterator it6;
set<set<int> > sets_temp;
// Traversal minimization DFA The resulting set
for(it6 = sets.begin();it6 != sets.end();it6++){
set<int> t_set = *it6;
set<char>::iterator it7;
int f = *t_set.begin();
int id=0,to=0;
// newly build DFA state
if(sets_temp.count(t_set) == 0){
sets_temp.insert(t_set);
min_dfa->dfa_id[t_set] = ++min_dfa->id;
}
id = min_dfa->dfa_id[t_set];
// Compile operators , Get the state transition matrix and terminator array
for(it7 = operands.begin();it7 != operands.end();it7++){
to = 0;
int d = dfa->tran[f][*it7 -33];
set<set<int> >::iterator it8;
for(it8 = sets.begin();it8 != sets.end();it8++){
set<int> t_set2 = *it8;
if(t_set2.count(d)>0){
if(sets_temp.count(t_set2) == 0){
min_dfa->dfa_id[t_set2] = ++min_dfa->id;
sets_temp.insert(t_set2);
}
to = min_dfa->dfa_id[t_set2];
if(min_dfa->vn[d]){
min_dfa->vn[to] = true;
}
}
}
min_dfa->tran[id][*it7-33] = d;
}
}
dfas[point_dfa] = min_dfa;
}
6 Recognize words

Core code
int distinguish(string s){
for(int i = 0;i<point_dfa;i++){
int cur = 1,flag = 1;
DFA* dfa = dfas[i];
int to = 0;
// Always transfer , Know the end of the word
for(int j = 0;j<s.length();j++){
to = dfa->tran[cur][s[j]-33];
if(to == 0){
flag = 0;
}else{
cur = to;
}
}
// If words can complete all state transitions , And the state transferred to is the final state , take DFA Of id return .
if(flag ==1 && dfa->vn[to]){
return i;
}
}
return -1;
}
7 test result

边栏推荐
- Ros2 topic (03): the difference between ros1 and ros2 [02]
- Gee (III): calculate the correlation coefficient between two bands and the corresponding p value
- windows设置redis开启自动启动
- Happy gathering time
- Binary tree
- UE4_UE5结合罗技手柄(F710)使用记录
- Ros2 topic (03): the difference between ros1 and ros2 [01]
- 2021icpc Shanghai h.life is a game Kruskal reconstruction tree
- Network security sqlmap and DVWA explosion
- POJ2392 SpaceElevator [DP]
猜你喜欢

聊聊支付流程的设计与实现逻辑

2022第六季完美童模陕西总决赛圆满落幕

三问TDM

深入理解Mysql锁与事务隔离级别

Wechat forum exchange applet system graduation design completion (1) development outline

Explain

Technology at home and abroad people "see" the future of audio and video technology

成年人只有一份主业是要付出代价的,被人事劝退后,我哭了一整晚

Explain

Inftnews | the wide application of NFT technology and its existing problems
随机推荐
leetcode-520. 检测大写字母-js
Vulnerability recurrence ----- 49. Apache airflow authentication bypass (cve-2020-17526)
Cloud native data warehouse analyticdb MySQL user manual
Dynamics 365 查找字段过滤
Adrnoid Development Series (XXV): create various types of dialog boxes using alertdialog
How to generate unique file names
欢聚时代一面
Introduction to redis and jedis and redis things
Adults have only one main job, but they have to pay a price. I was persuaded to step back by personnel, and I cried all night
1. Sum of two numbers
智慧社区和智慧城市之间有什么异同
LeeCode -- 6. Zigzag transformation
Ros2 topic (03): the difference between ros1 and ros2 [01]
Wechat forum exchange applet system graduation design completion (6) opening defense ppt
Dynamic agent explanation (July 16, 2020)
Explain
opencv scalar传入三个参数只能显示黑白灰问题解决
Coreseek:第二步建索引及測试
Installing spss25
CXF call reports an error. Could not find conduct initiator for address:
