/********************************************************************************************************/ /* Backward Nondeterministic Dawg Matching algorithm, a.k.a. BNDM, is a fast string matching algorithm. */ /* To understand BNDM on q-gram (BNDMq), you may first need to understand BNDM. A good source to learn */ /* BNDM is here: */ /* www-igm.univ-mlv.fr/~lecroq/string/bndm.html */ /* To understand BNDMq, read this paper published in SIAM ALENEX'09: */ /* "Tuning BNDM with q-Grams", by Branislav Durian etc. */ /* The following is my implement of BNDM on 1-gram (BNDM1), which allows the matching process to fastly */ /* skip the sub-strings that are unlikely to be the searching pattern. */ /* The asymtotical bound for both BNDM and BNDMq is O(M*N), where M is the length of the pattern and N */ /* is the length of the searching string. In pratical, BNDM and BNDMq can be as fast as O(N/M). */ /* */ /* */ /* -Wangchao Le */ /* Jan. 14, 2009 */ /********************************************************************************************************/ inline char * BNDM1(char * str, int psize, int *B) /*str points to the searching space, psize is the length of pattern, B is the bit mask of pattern*/ { int i,j,d,first,c,m1; m1=psize-1; i=m1; c=(1<<(psize-1)); while(*(str+i)) { if((d=(*(B+*(str+i))))){ j=i; first=i-m1; do{ j--; if(d>=c) { if(j>=first) i=j; else return (str+j+1); } d=(d<<1)&(*(B+*(str+j))); }while(d); } i+=m1+1; } return NULL; } /******************B is the bit mask for the pattern******************************************************/ /******************A simple way to generate B for a character-type pattern is like this*******************/ #define ASIZE 128 /* assume the range is from 0 to 127*/ int B[ASIZE], s=1; memset(B,0,ASIZE*sizeof(int));/*initialize elements of B[] to be 0*/ void initialize_mask(char * pattern, int psize) /*psize is string length of pattern, pattern is a pointer to a pattern string*/ { int i; for(i=psize-1;i>=0;i--) { *(B+*(pattern+i))|=s; s<<=1; } }