当前位置:网站首页>[compilation principle] lexical analysis design and Implementation
[compilation principle] lexical analysis design and Implementation
2022-07-07 23:24:00 【RemainsHll】
1 overall design
2 Infix expression to suffix expression
Core code
int priority(char ch)
{
if (ch == '*')
{
return 3;
}
if (ch == '&')
{
return 2;
}
if (ch == '|')
{
return 1;
}
if (ch == '(')
{
return 0;
}
}
void insert_behind_n(string& s, int n, char ch)
{
s += '#';
for (int i = s.size() - 1; i > n; i--)
s[i] = s[i - 1];
s[n] = ch;
}
void pre_process(string& s)
{
int i = 0, length = s.size()-1;
while (i < length)
{
int in_all_1 = 0, in_all_2 = 0;
// Pay attention to escaping
if(operators.count(s[i])==0 && s[i]!='\\'){
in_all_1 = 1;
}
if(operators.count(s[i+1])==0 ){
in_all_2 = 1;
}
if (in_all_1 || (s[i] == '*') || (s[i] == ')')){
if (in_all_2 || s[i + 1] == '(')
{
insert_behind_n(s, i + 1, '&');
length++;
}
}
i++;
}
}
string infix_Suffix(string s)
{
pre_process(s);
string str;
stack<char> oper;
for (unsigned int i = 0; i < s.size(); i++)
{
int in_all = 0;
if(s[i] == '\\'){
str += s[i];
i++;
str += s[i];
operands.insert(s[i]);
}
if(operators.count(s[i])==0){
in_all = 1;
operands.insert(s[i]);
}
if (in_all)
{
str += s[i];
}
else
{
if (s[i] == '(')
{
oper.push(s[i]);
}
else if (s[i] == ')')
{
char ch = oper.top();
while (ch != '(')
{
str += ch;
oper.pop();
ch = oper.top();
}
oper.pop();
}
else
{
if (!oper.empty())
{
char ch = oper.top();
while (priority(ch) >= priority(s[i]))
{
str += ch;
oper.pop();
if (oper.empty())
{
break;
}
else ch = oper.top();
}
oper.push(s[i]);
}
else
{
oper.push(s[i]);
}
}
}
}
while (!oper.empty())
{
char ch = oper.top();
oper.pop();
str += ch;
}
//cout<<str<<endl;
return str;
}
3 Suffix expression to NFA
Core code
//DFA State stack , Record the start state and end state
State stateStack[MAX];
int top;
// obtain ID
int stateID = 1;
void CFG_NFA(string cfg){
stateID = 1;
top = 0;
point = 0;
for (int i=0;i<cfg.length();i++){
State a;
State b;
State c;
switch (cfg[i])
{
case '&':
// Take two states from the state stack , And update the start and end status
a = stateStack[top--];
b = stateStack[top--];
c.begin = b.begin;
c.end = a.end;
stateStack[++top] = c;
// Create a new state transition ,a Status encountered # Transferred to the b, take a and b States are connected
NFA nfa0;
nfa0.from = b.end;
nfa0.to = a.begin;
nfa0.c = '#';
nfa[point++] = nfa0;
break;
case '|':
// The rules 2
c.begin = stateID++;
c.end = stateID++;
a = stateStack[top--];
b = stateStack[top--];
stateStack[++top] = c;
NFA nfa1;
nfa1.from = c.begin;
nfa1.to = a.begin;
nfa1.c = '#';
nfa[point++] = nfa1;
NFA nfa2;
nfa2.from = c.begin;
nfa2.to = b.begin;
nfa2.c = '#';
nfa[point++] = nfa2;
NFA nfa3;
nfa3.from = a.end;
nfa3.to = c.end;
nfa3.c = '#';
nfa[point++] = nfa3;
NFA nfa4;
nfa4.from = b.end;
nfa4.to = c.end;
nfa4.c = '#';
nfa[point++] = nfa4;
break;
case '*':
// The rules 3
c.begin = stateID++;
c.end = stateID++;
a = stateStack[top--];
stateStack[++top] = c;
NFA nfa5;
nfa5.from = c.begin;
nfa5.to = c.end;
nfa5.c = '#';
nfa[point++] = nfa5;
NFA nfa6;
nfa6.from = c.begin;
nfa6.to = a.begin;
nfa6.c = '#';
nfa[point++] = nfa6;
NFA nfa7;
nfa7.from = a.end;
nfa7.to = a.begin;
nfa7.c = '#';
nfa[point++] = nfa7;
NFA nfa8;
nfa8.from = a.end;
nfa8.to = c.end;
nfa8.c = '#';
nfa[point++] = nfa8;
break;
case '\\':
// Escape character encountered
i++;
c.begin = stateID++;
c.end = stateID++;
stateStack[++top] = c;
NFA nfa10;
nfa10.from = c.begin;
nfa10.to = c.end;
nfa10.c = cfg[i];
nfa[point++] = nfa10;
break;
default:
// Encountered operand
c.begin = stateID++;
c.end = stateID++;
stateStack[++top] = c;
NFA nfa9;
nfa9.from = c.begin;
nfa9.to = c.end;
nfa9.c = cfg[i];
nfa[point++] = nfa9;
break;
}
}
// Output NFA
// for(int i=0;i<point;i++){
// cout<<nfa[i].from<<" "<<nfa[i].c<<" "<<nfa[i].to<<endl;
// }
}
4NFA turn DFA
Core code
set<int> empty_transfer(int id){
set<int> set1;
set<int> set2;
set1.insert(id);
// Traverse NFA, Add the arrived state set set1 and set2
for(int i=0;i<point;i++){
if(nfa[i].from==id && nfa[i].c == '#'){
set1.insert(nfa[i].to);
set2.insert(nfa[i].to);
}
}
// Traverse the above set2, Continue to seek empty transfer , until set2 It's empty , Got set1 Is the empty transfer of state
while(!set2.empty()){
int temp = *set2.begin();
set2.erase(set2.begin());
for(int i=0;i<point;i++){
if(nfa[i].from==temp && nfa[i].c == '#'){
set2.insert(nfa[i].to);
set1.insert(nfa[i].to);
}
}
}
return set1;
}
set<int> empty_move(int id,char c){
// First seek move(c)
set<int> set1;
for(int i=0;i<point;i++){
if(nfa[i].from==id && nfa[i].c == c){
set1.insert(nfa[i].to);
}
}
// obtain move Then find the empty transfer
set<int> set2;
set2.insert(set1.begin(),set1.end());
set<int>::iterator it1;
for (it1 = set1.begin(); it1 != set1.end(); it1++){
set<int> set3 = empty_transfer(*it1);
set2.insert(set3.begin(),set3.end());
}
return set2;
}
void NFA_DFA(){
DFA* dfa = new DFA();
// Get the start state and end state
State ss = stateStack[top];
int begin = ss.begin;
int end = ss.end;
// Empty transition of initial state
set<int> initial = empty_transfer(begin);
// State set
set< set<int> > states;
set< set<int> > states_temp;
states.insert(initial);
// rename DFA
dfa->dfa_id[initial] = ++dfa->id;
// Traverse the state set
while(!states.empty()){
set<int> temp = *states.begin();
states.erase(states.begin());
set<int>::iterator it1;
set<char>::iterator it2;
// Traverse the operand to find closeuure(move)
for (it2 = operands.begin(); it2 != operands.end(); it2++){
set<int> new_set;
for (it1 = temp.begin(); it1 != temp.end(); it1++){
set<int> set_temp = empty_move(*it1,*it2);
new_set.insert(set_temp.begin(),set_temp.end());
}
if(states_temp.count(new_set) == 0 && !new_set.empty()){
states.insert(new_set);
states_temp.insert(new_set);
dfa->dfa_id[new_set] = ++dfa->id;
}
//cout<<dfa->dfa_id[temp]<<" "<<*it2<<" "<<dfa->dfa_id[new_set]<<endl;
dfa->tran[dfa->dfa_id[temp]][*it2-33] = dfa->dfa_id[new_set];
if(new_set.count(end)){
dfa->vn[dfa->dfa_id[new_set]] = true;
}else{
dfa->vn[dfa->dfa_id[new_set]] = false;
}
}
}
dfas[point_dfa++] = dfa;
}
5 To minimize the DFA
Core code
void minimize_DFA(){
set<set<int> > sets;
DFA* dfa = dfas[point_dfa-1];
// initialization : Distinguish between initial state and final state
int len = dfa->id;
set<int> set1,set2;
for(int i=1;i<=len;i++){
if(dfa->vn[i]){
set1.insert(i);
}else{
set2.insert(i);
}
}
sets.insert(set1);
sets.insert(set2);
set<set<int> > begin_set;
do{
begin_set = sets;
set<set<int> >::iterator it1;
set<int> temp;
// Traverse the state set
for (it1 = begin_set.begin(); it1 != begin_set.end(); it1++){
temp= *it1;
set<char>::iterator it2;
// Traversal operand
for(it2 = operands.begin();it2 != operands.end();it2++){
map<set<int>,set<int> > map_set;
set<int>::iterator it3;
for(it3 = temp.begin();it3 != temp.end();it3++){
int d = dfa->tran[*it3][*it2-33];
// Determine which set it belongs to
//cout<<*it3<<" "<<*it2<<" "<<d<<endl;
set<set<int> >::iterator it4;
int flag = 0;
set<int>temp2,temp3;
for(it4 = begin_set.begin();it4 != begin_set.end();it4++){
temp2 = *it4;
if(temp2.count(d)>0){
temp3 = map_set[temp2];
flag = 1;
}
}
// If the state produces a new set of States , That is, it can be divided , Delete the original status , The state set is stored in map in .
if(flag == 1){
sets.erase(temp);
temp3.insert(*it3);
map_set[temp2] = temp3;
}
}
// take map The set in is inserted into the original set
map<set<int>,set<int> >::iterator it5;
for(it5 = map_set.begin();it5 != map_set.end();it5++){
sets.insert(it5->second);
}
}
}
}while(begin_set != sets);
// Minimize DFA rename .
DFA* min_dfa = new DFA();
set<set<int> >::iterator it6;
set<set<int> > sets_temp;
// Traversal minimization DFA The resulting set
for(it6 = sets.begin();it6 != sets.end();it6++){
set<int> t_set = *it6;
set<char>::iterator it7;
int f = *t_set.begin();
int id=0,to=0;
// newly build DFA state
if(sets_temp.count(t_set) == 0){
sets_temp.insert(t_set);
min_dfa->dfa_id[t_set] = ++min_dfa->id;
}
id = min_dfa->dfa_id[t_set];
// Compile operators , Get the state transition matrix and terminator array
for(it7 = operands.begin();it7 != operands.end();it7++){
to = 0;
int d = dfa->tran[f][*it7 -33];
set<set<int> >::iterator it8;
for(it8 = sets.begin();it8 != sets.end();it8++){
set<int> t_set2 = *it8;
if(t_set2.count(d)>0){
if(sets_temp.count(t_set2) == 0){
min_dfa->dfa_id[t_set2] = ++min_dfa->id;
sets_temp.insert(t_set2);
}
to = min_dfa->dfa_id[t_set2];
if(min_dfa->vn[d]){
min_dfa->vn[to] = true;
}
}
}
min_dfa->tran[id][*it7-33] = d;
}
}
dfas[point_dfa] = min_dfa;
}
6 Recognize words
Core code
int distinguish(string s){
for(int i = 0;i<point_dfa;i++){
int cur = 1,flag = 1;
DFA* dfa = dfas[i];
int to = 0;
// Always transfer , Know the end of the word
for(int j = 0;j<s.length();j++){
to = dfa->tran[cur][s[j]-33];
if(to == 0){
flag = 0;
}else{
cur = to;
}
}
// If words can complete all state transitions , And the state transferred to is the final state , take DFA Of id return .
if(flag ==1 && dfa->vn[to]){
return i;
}
}
return -1;
}
7 test result
边栏推荐
- PMP项目管理考试过关口诀-1
- Gee (IV): calculate the correlation between two variables (images) and draw a scatter diagram
- CXF call reports an error. Could not find conduct initiator for address:
- 14、 Two methods of database export and import
- Install Fedora under RedHat
- 给出一个数组,如 [7864, 284, 347, 7732, 8498],现在需要将数组中的数字拼接起来,返回「最大的可能拼出的数字」
- 2021icpc Shanghai h.life is a game Kruskal reconstruction tree
- ArcGIS: two methods of attribute fusion of the same field of vector elements
- USB (十七)2022-04-15
- Solve the problem of duplicate request resource paths /o2o/shopadmin/o2o/shopadmin/getproductbyid
猜你喜欢
Spark 离线开发框架设计与实现
Ros2 topic (03): the difference between ros1 and ros2 [02]
Gee (IV): calculate the correlation between two variables (images) and draw a scatter diagram
Installing spss25
Wechat forum exchange applet system graduation design (3) background function
Puce à tension stabilisée LDO - schéma de bloc interne et paramètres de sélection du modèle
js 获取对象的key和value
MATLAB signal processing [Q & A essays · 2]
Inftnews | the wide application of NFT technology and its existing problems
Oracle database backup and recovery
随机推荐
Happy gathering time
LM12丨Rolling Heikin Ashi二重K线滤波器
648. 单词替换
Wechat forum exchange applet system graduation design completion (8) graduation design thesis template
ROS2专题(03):ROS1和ROS2的区别【02】
Why does the market need low code?
智慧社区和智慧城市之间有什么异同
Installing spss25
V-for traversal object
13、 System optimization
Network security - install CentOS
leetcode-520. Detect capital letters -js
欢聚时代一面
MySQL Index Optimization Practice II
Coreseek: the second step is index building and testing
Cloud native data warehouse analyticdb MySQL user manual
Opencv scalar passes in three parameters, which can only be displayed in black, white and gray. Solve the problem
1. Sum of two numbers
php 使用阿里云存储
[microservices SCG] gateway integration Sentinel