当前位置:网站首页>Template: p6114 [template] Lyndon Decomposition & runs (string)

Template: p6114 [template] Lyndon Decomposition & runs (string)

2022-06-21 17:20:00 wind__ whisper

You can't even run .

( flee

Preface

SAM:runs? Then I run 了 .

Than SAM look A more sophisticated algorithm .
The theoretical proof is complicated , But the board is easy to write .
This article will skip a lot of proofs .

Lyndon decompose

Definition
If a string itself is smaller than all its true suffixes , We call such a string Lyndon strand .
If a string is divided w 1 w 2 . . . w k w_1w_2...w_k w1w2...wk Meet all w All are Lyndon strand , And meet the dictionary order w 1 ≥ w 2 ≥ . . . ≥ w k w_1\ge w_2\ge ...\ge w_k w1w2...wk, Is a string Lyndon decompose .

Can prove that , Of any string Lyndon Decomposition is the only .

solve

How to ask ?
Assuming that i position .
At this time, the undetermined decomposition must be as follows w w w . . . w ′ www...w' www...w, w ′ w' w yes w w w A prefix of .
If the corresponding position of the new character and the circular section is the same , Don't deal with it .
If the new character is larger , Merge into one piece .
If the new character is smaller , Split the previous cyclic sections , And then back to w ′ w' w, To continue processing .
Time complexity O ( n ) O(n) O(n).

int i=1,j=2,l=1,res(0);
for(;i<=n;j++){
    
  if(s[j]>s[j-l]) l=j-i+1;
  else if(s[j]<s[j-l]){
    
    while(i+l<=j){
    
		res^=(i+l-1);
		i+=l;		
    }
    j=i;l=1;
  }  
}

Runs

definition:
If a string of a string can be written as p p p . . . p ′ ppp...p' ppp...p In the form of , And it is a conditional extremely long substring , It is called a runs.( cycle p At least twice )
For each of these runs, Its length is just p Of lyndon The string is called its lyndon root .

Every runs There is and only one fundamentally different lyndon root .

Lemma: set up l t i lt_i lti by max ⁡ j , s ( i , j ) by l y n d o n strand \max j,s(i,j) by lyndon strand maxj,s(i,j) by lyndon strand , So for a lyndon root (i,j), stay < 0 , < 1 <_0,<_1 <0,<1 Two opposite comparator definitions , At least one condition is satisfied l t i = j lt_i=j lti=j.

So we can find l t lt lt Array , And then through ( i , l t i ) (i,lt_i) (i,lti) Reverses its corresponding runs( By seeking lcp and lcs It's easy to find out ).

So how to find out l t lt lt Well ?
lyndon Decomposition also has a monotone stack solution , When the top stack dictionary order is smaller than the current element, the top stack block and the current block are merged continuously . It is not difficult to find that the right endpoint of the block obtained at this time corresponds to l t i lt_i lti.

#include<bits/stdc++.h>
using namespace std;
#define ll long long
#define ull unsigned ll
#define debug(...) fprintf(stderr,__VA_ARGS__)
#define ok debug("OK\n")

inline ll read() {
    
  ll x(0),f(1);char c=getchar();
  while(!isdigit(c)) {
    if(c=='-') f=-1;c=getchar();}
  while(isdigit(c)) {
    x=(x<<1)+(x<<3)+c-'0';c=getchar();}
  return x*f;
}
const int N=1e6+100;
const int mod=1e9+7;
const int inf=1e9+100;
const double eps=1e-9;

bool mem1;

bool Flag=0;

inline ll ksm(ll x,ll k,int mod){
    
  ll res(1);
  x%=mod;
  while(k){
    
    if(k&1) res=res*x%mod;
    x=x*x%mod;
    k>>=1;
  }
  return res;
}

int n,m;
char s[N];
const int bas=31;
ull h[N],mi[N];
void init(){
    
  mi[0]=1;
  for(int i=1;i<=n;i++){
    
    mi[i]=mi[i-1]*bas;
    h[i]=h[i-1]*bas+s[i]-'a'+1;
  }
  return;
}
inline ll Hash(int l,int r){
    
  return h[r]-mi[r-l+1]*h[l-1];
}
inline int lcp(int i,int j){
    
  int st=0,ed=n-max(i,j)+1;
  while(st<ed){
    
    int mid=(st+ed+1)>>1;
    if(Hash(i,i+mid-1)==Hash(j,j+mid-1)) st=mid;
    else ed=mid-1;
  }
  return st;
}
inline int lcs(int i,int j){
    
  int st=0,ed=min(i,j);
  while(st<ed){
    
    int mid=(st+ed+1)>>1;
    if(Hash(i-mid+1,i)==Hash(j-mid+1,j)) st=mid;
    else ed=mid-1;
  }
  return st;
}
map<int,int>mp[N];
struct run{
    
  int l,r,p;
  bool operator < (const run &oth)const{
    
    if(l!=oth.l) return l<oth.l;
    else return r<oth.r;
  }
};
vector<run> ans;
inline void ins(int i,int j){
    
  if(j==n) return;
  j++;
  int p=(j-i),pre=i-lcs(i,j)+1,suf=j+lcp(i,j)-1;
  //printf("ins: (%d %d) (%d %d)\n",i,j,pre,suf);
  if(mp[pre][suf]) return;
  if(suf-pre+1>=2*p){
    
    mp[pre][suf]=1;
    ans.emplace_back((run){
    pre,suf,p});
  }
  return;
}
int cmp(int i,int j,int x,int y){
    //i<j?
  int len=lcp(i,j);
  //printf("i=%d j=%d x=%d y=%d len=%d\n",i,j,x,y,len);
  if(len>=min(x,y)){
    
    if(x<y) return 1;
    else if(x==y) return 0;
    else return -1;
  }
  else{
    
    if(s[i+len]<s[j+len]) return 1;
    else if(s[i+len]==s[j+len]) return 0;
    else return -1;
  }
}
int zhan[N],top;//zhan:right pos
void work(){
    
  top=0;
  for(int i=n;i>=1;i--){
    
    int pos=i;
    while(top&&cmp(i,pos+1,pos-i+1,zhan[top]-pos)==1) pos=zhan[top--];
    //if(top) printf(" f=%d\n",cmp(i,pos+1,pos-i+1,zhan[top]-pos));
    zhan[++top]=pos;
    //printf("i=%d pos=%d\n",i,pos);
    ins(i,pos);
  }  
}

bool mem2;
signed main(){
    
#ifndef ONLINE_JUDGE
  freopen("a.in","r",stdin);
  freopen("a.out","w",stdout);
#endif
  scanf("%s",s+1);
  n=strlen(s+1);
  init();
  work();
  for(int i=1;i<=n;i++) s[i]='a'-s[i]+1;
  work();
  sort(ans.begin(),ans.end());
  printf("%d\n",(int)ans.size());
  for(run o:ans) printf("%d %d %d\n",o.l,o.r,o.p);
  return 0;
}
/* */
原网站

版权声明
本文为[wind__ whisper]所创,转载请带上原文链接,感谢
https://yzsam.com/2022/172/202206211335160831.html