当前位置:网站首页>[compilation principle] lexical analysis design and Implementation
[compilation principle] lexical analysis design and Implementation
2022-07-07 23:24:00 【RemainsHll】
1 overall design
2 Infix expression to suffix expression
Core code
int priority(char ch)
{
if (ch == '*')
{
return 3;
}
if (ch == '&')
{
return 2;
}
if (ch == '|')
{
return 1;
}
if (ch == '(')
{
return 0;
}
}
void insert_behind_n(string& s, int n, char ch)
{
s += '#';
for (int i = s.size() - 1; i > n; i--)
s[i] = s[i - 1];
s[n] = ch;
}
void pre_process(string& s)
{
int i = 0, length = s.size()-1;
while (i < length)
{
int in_all_1 = 0, in_all_2 = 0;
// Pay attention to escaping
if(operators.count(s[i])==0 && s[i]!='\\'){
in_all_1 = 1;
}
if(operators.count(s[i+1])==0 ){
in_all_2 = 1;
}
if (in_all_1 || (s[i] == '*') || (s[i] == ')')){
if (in_all_2 || s[i + 1] == '(')
{
insert_behind_n(s, i + 1, '&');
length++;
}
}
i++;
}
}
string infix_Suffix(string s)
{
pre_process(s);
string str;
stack<char> oper;
for (unsigned int i = 0; i < s.size(); i++)
{
int in_all = 0;
if(s[i] == '\\'){
str += s[i];
i++;
str += s[i];
operands.insert(s[i]);
}
if(operators.count(s[i])==0){
in_all = 1;
operands.insert(s[i]);
}
if (in_all)
{
str += s[i];
}
else
{
if (s[i] == '(')
{
oper.push(s[i]);
}
else if (s[i] == ')')
{
char ch = oper.top();
while (ch != '(')
{
str += ch;
oper.pop();
ch = oper.top();
}
oper.pop();
}
else
{
if (!oper.empty())
{
char ch = oper.top();
while (priority(ch) >= priority(s[i]))
{
str += ch;
oper.pop();
if (oper.empty())
{
break;
}
else ch = oper.top();
}
oper.push(s[i]);
}
else
{
oper.push(s[i]);
}
}
}
}
while (!oper.empty())
{
char ch = oper.top();
oper.pop();
str += ch;
}
//cout<<str<<endl;
return str;
}
3 Suffix expression to NFA
Core code
//DFA State stack , Record the start state and end state
State stateStack[MAX];
int top;
// obtain ID
int stateID = 1;
void CFG_NFA(string cfg){
stateID = 1;
top = 0;
point = 0;
for (int i=0;i<cfg.length();i++){
State a;
State b;
State c;
switch (cfg[i])
{
case '&':
// Take two states from the state stack , And update the start and end status
a = stateStack[top--];
b = stateStack[top--];
c.begin = b.begin;
c.end = a.end;
stateStack[++top] = c;
// Create a new state transition ,a Status encountered # Transferred to the b, take a and b States are connected
NFA nfa0;
nfa0.from = b.end;
nfa0.to = a.begin;
nfa0.c = '#';
nfa[point++] = nfa0;
break;
case '|':
// The rules 2
c.begin = stateID++;
c.end = stateID++;
a = stateStack[top--];
b = stateStack[top--];
stateStack[++top] = c;
NFA nfa1;
nfa1.from = c.begin;
nfa1.to = a.begin;
nfa1.c = '#';
nfa[point++] = nfa1;
NFA nfa2;
nfa2.from = c.begin;
nfa2.to = b.begin;
nfa2.c = '#';
nfa[point++] = nfa2;
NFA nfa3;
nfa3.from = a.end;
nfa3.to = c.end;
nfa3.c = '#';
nfa[point++] = nfa3;
NFA nfa4;
nfa4.from = b.end;
nfa4.to = c.end;
nfa4.c = '#';
nfa[point++] = nfa4;
break;
case '*':
// The rules 3
c.begin = stateID++;
c.end = stateID++;
a = stateStack[top--];
stateStack[++top] = c;
NFA nfa5;
nfa5.from = c.begin;
nfa5.to = c.end;
nfa5.c = '#';
nfa[point++] = nfa5;
NFA nfa6;
nfa6.from = c.begin;
nfa6.to = a.begin;
nfa6.c = '#';
nfa[point++] = nfa6;
NFA nfa7;
nfa7.from = a.end;
nfa7.to = a.begin;
nfa7.c = '#';
nfa[point++] = nfa7;
NFA nfa8;
nfa8.from = a.end;
nfa8.to = c.end;
nfa8.c = '#';
nfa[point++] = nfa8;
break;
case '\\':
// Escape character encountered
i++;
c.begin = stateID++;
c.end = stateID++;
stateStack[++top] = c;
NFA nfa10;
nfa10.from = c.begin;
nfa10.to = c.end;
nfa10.c = cfg[i];
nfa[point++] = nfa10;
break;
default:
// Encountered operand
c.begin = stateID++;
c.end = stateID++;
stateStack[++top] = c;
NFA nfa9;
nfa9.from = c.begin;
nfa9.to = c.end;
nfa9.c = cfg[i];
nfa[point++] = nfa9;
break;
}
}
// Output NFA
// for(int i=0;i<point;i++){
// cout<<nfa[i].from<<" "<<nfa[i].c<<" "<<nfa[i].to<<endl;
// }
}
4NFA turn DFA
Core code
set<int> empty_transfer(int id){
set<int> set1;
set<int> set2;
set1.insert(id);
// Traverse NFA, Add the arrived state set set1 and set2
for(int i=0;i<point;i++){
if(nfa[i].from==id && nfa[i].c == '#'){
set1.insert(nfa[i].to);
set2.insert(nfa[i].to);
}
}
// Traverse the above set2, Continue to seek empty transfer , until set2 It's empty , Got set1 Is the empty transfer of state
while(!set2.empty()){
int temp = *set2.begin();
set2.erase(set2.begin());
for(int i=0;i<point;i++){
if(nfa[i].from==temp && nfa[i].c == '#'){
set2.insert(nfa[i].to);
set1.insert(nfa[i].to);
}
}
}
return set1;
}
set<int> empty_move(int id,char c){
// First seek move(c)
set<int> set1;
for(int i=0;i<point;i++){
if(nfa[i].from==id && nfa[i].c == c){
set1.insert(nfa[i].to);
}
}
// obtain move Then find the empty transfer
set<int> set2;
set2.insert(set1.begin(),set1.end());
set<int>::iterator it1;
for (it1 = set1.begin(); it1 != set1.end(); it1++){
set<int> set3 = empty_transfer(*it1);
set2.insert(set3.begin(),set3.end());
}
return set2;
}
void NFA_DFA(){
DFA* dfa = new DFA();
// Get the start state and end state
State ss = stateStack[top];
int begin = ss.begin;
int end = ss.end;
// Empty transition of initial state
set<int> initial = empty_transfer(begin);
// State set
set< set<int> > states;
set< set<int> > states_temp;
states.insert(initial);
// rename DFA
dfa->dfa_id[initial] = ++dfa->id;
// Traverse the state set
while(!states.empty()){
set<int> temp = *states.begin();
states.erase(states.begin());
set<int>::iterator it1;
set<char>::iterator it2;
// Traverse the operand to find closeuure(move)
for (it2 = operands.begin(); it2 != operands.end(); it2++){
set<int> new_set;
for (it1 = temp.begin(); it1 != temp.end(); it1++){
set<int> set_temp = empty_move(*it1,*it2);
new_set.insert(set_temp.begin(),set_temp.end());
}
if(states_temp.count(new_set) == 0 && !new_set.empty()){
states.insert(new_set);
states_temp.insert(new_set);
dfa->dfa_id[new_set] = ++dfa->id;
}
//cout<<dfa->dfa_id[temp]<<" "<<*it2<<" "<<dfa->dfa_id[new_set]<<endl;
dfa->tran[dfa->dfa_id[temp]][*it2-33] = dfa->dfa_id[new_set];
if(new_set.count(end)){
dfa->vn[dfa->dfa_id[new_set]] = true;
}else{
dfa->vn[dfa->dfa_id[new_set]] = false;
}
}
}
dfas[point_dfa++] = dfa;
}
5 To minimize the DFA
Core code
void minimize_DFA(){
set<set<int> > sets;
DFA* dfa = dfas[point_dfa-1];
// initialization : Distinguish between initial state and final state
int len = dfa->id;
set<int> set1,set2;
for(int i=1;i<=len;i++){
if(dfa->vn[i]){
set1.insert(i);
}else{
set2.insert(i);
}
}
sets.insert(set1);
sets.insert(set2);
set<set<int> > begin_set;
do{
begin_set = sets;
set<set<int> >::iterator it1;
set<int> temp;
// Traverse the state set
for (it1 = begin_set.begin(); it1 != begin_set.end(); it1++){
temp= *it1;
set<char>::iterator it2;
// Traversal operand
for(it2 = operands.begin();it2 != operands.end();it2++){
map<set<int>,set<int> > map_set;
set<int>::iterator it3;
for(it3 = temp.begin();it3 != temp.end();it3++){
int d = dfa->tran[*it3][*it2-33];
// Determine which set it belongs to
//cout<<*it3<<" "<<*it2<<" "<<d<<endl;
set<set<int> >::iterator it4;
int flag = 0;
set<int>temp2,temp3;
for(it4 = begin_set.begin();it4 != begin_set.end();it4++){
temp2 = *it4;
if(temp2.count(d)>0){
temp3 = map_set[temp2];
flag = 1;
}
}
// If the state produces a new set of States , That is, it can be divided , Delete the original status , The state set is stored in map in .
if(flag == 1){
sets.erase(temp);
temp3.insert(*it3);
map_set[temp2] = temp3;
}
}
// take map The set in is inserted into the original set
map<set<int>,set<int> >::iterator it5;
for(it5 = map_set.begin();it5 != map_set.end();it5++){
sets.insert(it5->second);
}
}
}
}while(begin_set != sets);
// Minimize DFA rename .
DFA* min_dfa = new DFA();
set<set<int> >::iterator it6;
set<set<int> > sets_temp;
// Traversal minimization DFA The resulting set
for(it6 = sets.begin();it6 != sets.end();it6++){
set<int> t_set = *it6;
set<char>::iterator it7;
int f = *t_set.begin();
int id=0,to=0;
// newly build DFA state
if(sets_temp.count(t_set) == 0){
sets_temp.insert(t_set);
min_dfa->dfa_id[t_set] = ++min_dfa->id;
}
id = min_dfa->dfa_id[t_set];
// Compile operators , Get the state transition matrix and terminator array
for(it7 = operands.begin();it7 != operands.end();it7++){
to = 0;
int d = dfa->tran[f][*it7 -33];
set<set<int> >::iterator it8;
for(it8 = sets.begin();it8 != sets.end();it8++){
set<int> t_set2 = *it8;
if(t_set2.count(d)>0){
if(sets_temp.count(t_set2) == 0){
min_dfa->dfa_id[t_set2] = ++min_dfa->id;
sets_temp.insert(t_set2);
}
to = min_dfa->dfa_id[t_set2];
if(min_dfa->vn[d]){
min_dfa->vn[to] = true;
}
}
}
min_dfa->tran[id][*it7-33] = d;
}
}
dfas[point_dfa] = min_dfa;
}
6 Recognize words
Core code
int distinguish(string s){
for(int i = 0;i<point_dfa;i++){
int cur = 1,flag = 1;
DFA* dfa = dfas[i];
int to = 0;
// Always transfer , Know the end of the word
for(int j = 0;j<s.length();j++){
to = dfa->tran[cur][s[j]-33];
if(to == 0){
flag = 0;
}else{
cur = to;
}
}
// If words can complete all state transitions , And the state transferred to is the final state , take DFA Of id return .
if(flag ==1 && dfa->vn[to]){
return i;
}
}
return -1;
}
7 test result
边栏推荐
- Installing spss25
- The 19th Zhejiang Provincial College Programming Contest VP record + supplementary questions
- Deep understanding of MySQL lock and transaction isolation level
- LDO voltage stabilizing chip - internal block diagram and selection parameters
- Count the top 10 films at the box office and save them in another file
- Wechat forum exchange applet system graduation design completion (6) opening defense ppt
- HDU 4747 Mex「建议收藏」
- USB (十八)2022-04-17
- 海内外技术人们“看”音视频技术的未来
- How to generate unique file names
猜你喜欢
ArcGIS: field assignment_ The attribute table field calculator assigns values to fields based on conditions
LeeCode -- 6. Z 字形变换
13、 System optimization
移动端异构运算技术 - GPU OpenCL 编程(基础篇)
Matlab-SEIR传染病模型预测
Vulnerability recurrence ----- 49. Apache airflow authentication bypass (cve-2020-17526)
【微服务|SCG】gateway整合sentinel
Wechat forum exchange applet system graduation design (3) background function
Unity3D学习笔记5——创建子Mesh
STL标准模板库(Standard Template Library)一周学习总结
随机推荐
违法行为分析1
2021icpc Shanghai h.life is a game Kruskal reconstruction tree
Archlinux install MySQL
Ros2 topic (03): the difference between ros1 and ros2 [01]
高级程序员必知必会,一文详解MySQL主从同步原理,推荐收藏
Entity层、DAO层、Service层、Controller层 先后顺序
Wechat forum exchange applet system graduation design (2) applet function
Opencv scalar passes in three parameters, which can only be displayed in black, white and gray. Solve the problem
ArcGIS: field assignment_ The attribute table field calculator assigns values to fields based on conditions
The 19th Zhejiang Provincial College Programming Contest 2022 f.easyfix chairman tree
系统设计概述
UE4_UE5结合罗技手柄(F710)使用记录
USB (十七)2022-04-15
PCB wiring rules of PCI Express interface
js 获取对象的key和value
windows设置redis开启自动启动
Unity3D学习笔记4——创建Mesh高级接口
STL标准模板库(Standard Template Library)一周学习总结
经纬度PLT文件格式说明
The 19th Zhejiang Provincial Collegiate Programming Contest 2022浙江省赛 F.EasyFix 主席树