�� ! D O C T Y P E   h t m l   P U B L I C   " - / / W 3 C / / D T D   X H T M L   1 . 0   S t r i c t / / E N "   " D T D / x h t m l 1 - s t r i c t . d t d " >  
 < h t m l   x m l n s = " h t t p : / / w w w . w 3 . o r g / 1 9 9 9 / x h t m l "   x m l : l a n g = " e n "   l a n g = " e n " >  
 < h e a d >  
         < t i t l e > E f f i c i e n t   M i n i n g   o f   F r e q u e n t   S e q u e n c e   G e n e r a t o r s < / t i t l e >  
         < l i n k   r e l = " s t y l e s h e e t "   h r e f = " i w 3 c 2 . c s s "   / >  
         < s t y l e   t y p e = " t e x t / c s s " >  
                 . u n d e r l i n e S t y l e  
                 {  
                         t e x t - d e c o r a t i o n :   u n d e r l i n e ;  
                 }  
         < / s t y l e >  
 < / h e a d >  
 < b o d y >  
         < d i v   c l a s s = " m e t a " >  
                 < h 1   c l a s s = " t i t l e " >  
                         E f f i c i e n t   M i n i n g   o f   F r e q u e n t   S e q u e n c e   G e n e r a t o r s  
                 < / h 1 >  
                 < d i v   c l a s s = " a u t h o r s " >  
                         < d i v   c l a s s = " a u t h o r " >  
                                 < h 2   c l a s s = " a u t h o r " >  
                                         C h u a n c o n g   G a o  
                                 < / h 2 >  
                                 < h 3   c l a s s = " a f f i l i a t i o n " >  
                                         T s i n g h u a   U n i v e r s i t y < b r   / >  
                                         B e i j i n g ,   1 0 0 0 8 4 ,   P . R . C h i n a  
                                 < / h 3 >  
                                 < h 2   c l a s s = " e m a i l " >  
                                         < a   h r e f = " m a i l t o : g a o c c 0 7 @ m a i l s . t s i n g h u a . e d u . c n " > g a o c c 0 7 @ m a i l s . t s i n g h u a . e d u . c n < / a >  
                                 < / h 2 >  
                         < / d i v >  
                         < d i v   c l a s s = " a u t h o r " >  
                                 < h 2   c l a s s = " a u t h o r " >  
                                         J i a n y o n g   W a n g < / h 2 >  
                                 < h 3   c l a s s = " a f f i l i a t i o n " >  
                                         T s i n g h u a   U n i v e r s i t y < b r   / >  
                                         B e i j i n g ,   1 0 0 0 8 4 ,   P . R . C h i n a < / h 3 >  
                                 < h 2   c l a s s = " e m a i l " >  
                                         < a   h r e f = " m a i l t o : j i a n y o n g @ t s i n g h u a . e d u . c n " > j i a n y o n g @ t s i n g h u a . e d u . c n < / a > < / h 2 >  
                         < / d i v >  
                         < d i v   c l a s s = " a u t h o r " >  
                                 < h 2   c l a s s = " a u t h o r " >  
                                         Y u k a i   H e < / h 2 >  
                                 < h 3   c l a s s = " a f f i l i a t i o n " >  
                                         T s i n g h u a   U n i v e r s i t y < b r   / >  
                                         B e i j i n g ,   1 0 0 0 8 4 ,   P . R . C h i n a < / h 3 >  
                                 < h 2   c l a s s = " e m a i l " >  
                                         < a   h r e f = " m a i l t o : h e y k 0 5 @ m a i l s . t s i n g h u a . e d u . c n " > h e y k 0 5 @ m a i l s . t s i n g h u a . e d u . c n < / a > < / h 2 >  
                         < / d i v >  
                         < d i v   c l a s s = " a u t h o r " >  
                                 < h 2   c l a s s = " a u t h o r " >  
                                         L i z h u   Z h o u < / h 2 >  
                                 < h 3   c l a s s = " a f f i l i a t i o n " >  
                                         T s i n g h u a   U n i v e r s i t y < b r   / >  
                                         B e i j i n g ,   1 0 0 0 8 4 ,   P . R . C h i n a < / h 3 >  
                                 < h 2   c l a s s = " e m a i l " >  
                                         < a   h r e f = " m a i l t o : d c s z l z @ t s i n g h u a . e d u . c n " > d c s z l z @ t s i n g h u a . e d u . c n < / a > < / h 2 >  
                         < / d i v >  
                 < / d i v >  
                 < d i v   c l a s s = " c o p y r i g h t " >  
                         < p   c l a s s = " c o p y r i g h t " >  
                                 C o p y r i g h t   i s   h e l d   b y   t h e   W o r l d   W i d e   W e b   C o n f e r e n c e   C o m m i t t e e   ( I W 3 C 2 ) .   D i s t r i b u t i o n  
                                 o f   t h e s e   p a p e r s   i s   l i m i t e d   t o   c l a s s r o o m   u s e ,   a n d   p e r s o n a l   u s e   b y   o t h e r s . < b r   / >  
                                 W W W   2 0 0 8 ,   A p r i l   2 1 - 2 5 ,   2 0 0 8 ,   B e i j i n g ,   C h i n a . < b r   / >  
                                 A C M   9 7 8 - 1 - 6 0 5 5 8 - 0 8 5 - 2 / 0 8 / 0 4 .  
                         < / p >  
                 < / d i v >  
                 < d i v   c l a s s = " a b s t r a c t " >  
                         < h 1   c l a s s = " a b s t r a c t " >  
                                 A B S T R A C T < / h 1 >  
                         < p   c l a s s = " a b s t r a c t " >  
                                 S e q u e n t i a l   p a t t e r n   m i n i n g   h a s   r a i s e d   g r e a t   i n t e r e s t   i n   d a t a   m i n i n g   r e s e a r c h   f i e l d  
                                 i n   r e c e n t   y e a r s .   H o w e v e r ,   t o   o u r   b e s t   k n o w l e d g e ,   n o   e x i s t i n g   w o r k   s t u d i e s   t h e   p r o b l e m  
                                 o f   f r e q u e n t   s e q u e n c e   g e n e r a t o r   m i n i n g .   I n   t h i s   p a p e r   w e   p r e s e n t   a   n o v e l   a l g o r i t h m ,  
                                 F E A T   ( a b b r .   < s p a n   c l a s s = " u n d e r l i n e S t y l e " > F < / s p a n > r e q u e n t   s < s p a n   c l a s s = " u n d e r l i n e S t y l e " > E < / s p a n > q u e n c e   g e n e r < s p a n    
                                         c l a s s = " u n d e r l i n e S t y l e " > A T < / s p a n > o r   m i n e r ) ,   t o   p e r f o r m  
                                 t h i s   t a s k .   E x p e r i m e n t a l   r e s u l t s   s h o w   t h a t   F E A T   i s   m o r e   e f f i c i e n t   t h a n   t r a d i t i o n a l  
                                 s e q u e n t i a l   p a t t e r n   m i n i n g   a l g o r i t h m s   b u t   g e n e r a t e s   m o r e   c o n c i s e   r e s u l t   s e t ,   a n d  
                                 i s   v e r y   e f f e c t i v e   f o r   c l a s s i f y i n g   W e b   p r o d u c t   r e v i e w s .  
                         < / p >  
                 < / d i v >  
                 < d i v   c l a s s = " c a t e g o r i e s " >  
                         < h 2   c l a s s = " c a t e g o r i e s " >  
                                 C a t e g o r i e s   & a m p ;   S u b j e c t   D e s c r i p t o r s < / h 2 >  
                         < p   c l a s s = " c a t e g o r i e s " >  
                                 H . 2 . 8   [ D a t a b a s e   M a n a g e m e n t ] :   D a t a b a s e   a p p l i c a t i o n s   -   D a t a   M i n i n g < / p >  
                 < / d i v >  
                 < d i v   c l a s s = " t e r m s " >  
                         < h 2   c l a s s = " t e r m s " >  
                                 G e n e r a l   T e r m s < / h 2 >  
                         < p   c l a s s = " t e r m s " >  
                                 A l g o r i t h m s ,   P e r f o r m a n c e < / p >  
                 < / d i v >  
                 < d i v   c l a s s = " k e y w o r d s " >  
                         < h 2   c l a s s = " k e y w o r d s " >  
                                 K e y w o r d s < / h 2 >  
                         < p   c l a s s = " k e y w o r d s " >  
                                 S e q u e n c e   G e n e r a t o r s ,   S e q u e n c e ,   W e b   M i n i n g < / p >  
                 < / d i v >  
         < / d i v >  
         < h 2 >  
                 < a   i d = " t t h _ s E c 1 " > 1 < / a >   I n t r o d u c t i o n < / h 2 >  
         < d i v   c l a s s = " p a r " >  
                 S e q u e n t i a l   p a t t e r n   m i n i n g   h a s   r a i s e d   g r e a t   i n t e r e s t   i n   d a t a   m i n i n g   r e s e a r c h   f i e l d  
                 i n   r e c e n t   y e a r s .   V a r i o u s   m i n i n g   m e t h o d s   h a v e   b e e n   p r o p o s e d ,   i n c l u d i n g   s e q u e n t i a l  
                 p a t t e r n   m i n i n g [ < a   h r e f = " # o l d e s t " > 1 < / a > ] [ < a   h r e f = " # P r e f i x S p a n " > 5 < / a > ] ,   a n d   c l o s e d  
                 s e q u e n t i a l p a t t e r n   m i n i n g [ < a   h r e f = " # c l o s p a n " > 7 < / a > ] [ < a   h r e f = " # B I D E " > 6 < / a > ] .   S e q u e n t i a l  
                 p a t t e r n   m i n i n g   h a s   a l s o   s h o w n   i t s   u t i l i t y   f o r   W e b   d a t a   a n a l y s i s ,   s u c h   a s   m i n i n g  
                 W e b   l o g   d a t a [ < a   h r e f = " # w e b l o g " > 2 < / a > ]   a n d   i d e n t i f y i n g   c o m p a r a t i v e   s e n t e n c e s   f r o m  
                 W e b   f o r u m   p o s t i n g   a n d   p r o d u c t   r e v i e w s [ < a   h r e f = " # c o m p a r a t i v e " > 3 < / a > ] .   H o w e v e r ,   t h e r e  
                 e x i s t s   n o   e x i s t i n g   w o r k   o n   m i n i n g   f r e q u e n t   s e q u e n c e   g e n e r a t o r s ,   w h e r e   a   s e q u e n c e  
                 g e n e r a t o r   i s   i n f o r m a l l y   d e f i n e d   a s   o n e   o f   t h e   m i n i m a l   s u b s e q u e n c e s   i n   a n   e q u i v a l e n c e  
                 c l a s s .   T h u s ,   g e n e r a t o r s   h a v e   t h e   s a m e   a b i l i t y   t o   d e s c r i b e   a n   e q u i v a l e n c e   c l a s s   a s  
                 t h e i r   c o r r e s p o n d i n g   s u b s e q u e n c e s   o f   t h e   s a m e   e q u i v a l e n c e   c l a s s ,   a n d   a c c o r d i n g   t o  
                 t h e   M D L   p r i n c i p l e [ < a   h r e f = " # w h y M D L " > 4 < / a > ] ,   g e n e r a t o r s   a r e   p r e f e r a b l e   t o   a l l   s e q u e n t i a l  
                 p a t t e r n s   i n   t e r m s   o f   W e b   p a g e   a n d   p r o d u c t   r e v i e w   c l a s s i f i c a t i o n .  
         < / d i v >  
         < d i v   c l a s s = " p a r " >  
                 I n   t h e   r e s t   o f   t h i s   p a p e r ,   w e   f i r s t   g i v e   a   f o r m a l   p r o b l e m   f o r m u l a t i o n   a n d   f o c u s  
                 o n   o u r   s o l u t i o n   i n   S e c t i o n   < a   h r e f = " # a l g o r i t h m " > 2 < / a > ,   t h e n   p r e s e n t   t h e   p e r f o r m a n c e  
                 s t u d y   i n   S e c t i o n   < a   h r e f = " # e x p e r i m e n t " > 3 < / a > .   W e   c o n c l u d e   t h e   s t u d y   i n   S e c t i o n   < a  
                         h r e f = " # c o n c l u s i o n " > 4 < / a > .  
         < / d i v >  
         < h 2 >  
                 < a   i d = " t t h _ s E c 2 " > 2 < / a >   M i n i n g   S e q u e n t i a l   G e n e r a t o r s < / h 2 >  
         < h 3 >  
                 < a   i d = " t t h _ s E c 2 . 1 " > 2 . 1 < / a >   P r o b l e m   F o r m u l a t i o n < / h 3 >  
         < d i v   c l a s s = " p a r " >  
                 A n   < b > i n p u t   s e q u e n c e   d a t a b a s e < / b >   S D B   c o n t a i n s   a   s e t   o f   i n p u t   s e q u e n c e s ,   w h e r e   a n  
                 < b > i n p u t   s e q u e n c e < / b >   i s   a n   o r d e r e d   l i s t   o f   i t e m s   ( e a c h   i t e m   c a n   a p p e a r   m u l t i p l e  
                 t i m e s   i n   a   s e q u e n c e )   a n d   c a n   b e   d e n o t e d   b y   S = e < s u b > 1 < / s u b > e < s u b > 2 < / s u b > . . .   e < s u b > n < / s u b > .  
                 G i v e n   a   < b > p r e f i x < / b >   o f   s e q u e n c e   S ,   S < s u b > p r e < / s u b > = e < s u b > 1 < / s u b > e < s u b > 2 < / s u b > . . .  
                 e < s u b > i < / s u b > ,   w e   d e f i n e   t h e   < b > p r o j e c t e d   s e q u e n c e < / b >   o f   S < s u b > p r e < / s u b >   w . r . t .  
                 S   a s   e < s u b > i + 1 < / s u b > e < s u b > 2 < / s u b > . . .   e < s u b > n < / s u b > .   T h e   c o m p l e t e   s e t   o f   p r o j e c t e d  
                 s e q u e n c e s   o f   S < s u b > p r e < / s u b >   w . r . t .   e a c h   s e q u e n c e   i n   S D B   i s   c a l l e d   t h e   < b > p r o j e c t e d  
                         d a t a b a s e < / b >   o f   S < s u b > p r e < / s u b >   w . r . t .   S D B ,   d e n o t e d   b y   S D B < s u b > S < s u b > p r e < / s u b > < / s u b > .  
                 G i v e n   a   s u b s e q u e n c e   S < s u b > p < / s u b > = e < s u b > p < s u b > 1 < / s u b > < / s u b > e < s u b > p < s u b > 2 < / s u b > < / s u b >  
                 . . .   e < s u b > p < s u b > m < / s u b > < / s u b > ,   i t s   < b > s u p p o r t < / b >   s u p ( S < s u b > p < / s u b > )   i s   d e f i n e d  
                 a s   t h e   n u m b e r   o f   s e q u e n c e s   i n   S D B < s u b > S < s u b > p < / s u b > < / s u b >   e a c h   o f   w h i c h   c o n t a i n s  
                 S < s u b > p < / s u b > ,   d e n o t e d   b y   | S D B < s u b > S < s u b > p < / s u b > < / s u b > | .   G i v e n   a   u s e r   s p e c i f i e d  
                 m i n i m u m   s u p p o r t   t h r e s h o l d ,   m i n _ s u p ,   S < s u b > p < / s u b >   i s   s a i d   t o   b e   < b > f r e q u e n t < / b >  
                 i f   s u p ( S < s u b > p < / s u b > )   e"  m i n _ s u p   h o l d s .   S < s u b > p < / s u b >   i s   c a l l e d   a   < b > s e q u e n c e   g e n e r a t o r < / b >  
                 i f f   "  S < s u b > p < / s u b > '   s u c h   t h a t   S < s u b > p < / s u b > ' �"  S < s u b > p < / s u b >   ( i . e . ,   S < s u b > p < / s u b >  
                 c o n t a i n s   S < s u b > p < / s u b > ' )   a n d   s u p ( S < s u b > p < / s u b > ) = s u p ( S < s u b > p < / s u b > ' ) .   I n   a d d i t i o n ,  
                 g i v e n   a   s e q u e n c e   S = e < s u b > 1 < / s u b > e < s u b > 2 < / s u b > . . .   e < s u b > n < / s u b >   a n d   a n   i t e m   e ' ,   w e  
                 d e n o t e   e < s u b > 1 < / s u b > e < s u b > 2 < / s u b > . . .   e < s u b > i - 1 < / s u b > e < s u b > i + 1 < / s u b > . . .   e < s u b > n < / s u b >  
                 b y   S < s u p > ( i ) < / s u p > ,   e < s u b > i < / s u b > e < s u b > i + 1 < / s u b > . . .   e < s u b > j < / s u b >   b y   S < s u b > ( i ,   j ) < / s u b > ,  
                 a n d   e < s u b > 1 < / s u b > e < s u b > 2 < / s u b > . . . e < s u b > n < / s u b > e '   b y   & l t ;   S ,   e '   & g t ;   .  
         < / d i v >  
         < d i v   c l a s s = " p a r " >  
                 G i v e n   a   m i n i m u m   s u p p o r t   t h r e s h o l d   m i n _ s u p   a n d   a n   i n p u t   s e q u e n c e   d a t a b a s e   S D B ,   t h e  
                 t a s k   o f   < i > f r e q u e n t   s e q u e n c e   g e n e r a t o r   m i n i n g < / i >   i s   t o   m i n e   t h e   c o m p l e t e   s e t   o f  
                 s e q u e n c e   g e n e r a t o r s   w h i c h   a r e   f r e q u e n t   i n   d a t a b a s e   S D B .  
         < / d i v >  
         < h 3 >  
                 < a   i d = " t t h _ s E c 2 . 2 " > 2 . 2 < / a >   P r u n i n g   S t r a t e g y < / h 3 >  
         < d i v   c l a s s = " p a r " >  
                 A   n a � v e   a p p r o a c h   t o   m i n i n g   t h e   s e t   o f   f r e q u e n t   s e q u e n c e   g e n e r a t o r s   i s   t o   f i r s t   a p p l y  
                 a   s e q u e n t i a l   p a t t e r n   m i n i n g   a l g o r i t h m   t o   f i n d   t h e   s e t   o f   f r e q u e n t   s u b s e q u e n c e s   a n d  
                 c h e c k   i f   e a c h   f r e q u e n t   s u b s e q u e n c e   i s   a   g e n e r a t o r .   H o w e v e r ,   i t   i s   i n e f f i c i e n t   a s  
                 i t   c a n n o t   p r u n e   t h e   u n p r o m i s i n g   p a r t s   o f   s e a r c h   s p a c e .   I n   t h i s   s u b s e c t i o n   w e   p r o p o s e  
                 t w o   n o v e l   p r u n i n g   m e t h o d s ,   < i > F o r w a r d   P r u n e < / i >   a n d   < i > B a c k w a r d   P r u n e < / i > ,   w h i c h  
                 c a n   b e   i n t e g r a t e d   w i t h   t h e   p a t t e r n - g r o w t h   e n u m e r a t i o n   f r a m e w o r k   [ < a   h r e f = " # P r e f i x S p a n " > 5 < / a > ]  
                 t o   s p e e d   u p   t h e   m i n i n g   p r o c e s s .   W e   f i r s t   i n t r o d u c e   T h e o r e m s   1   a n d   2   w h i c h   f o r m   t h e  
                 b a s i s   o f   t h e   p r u n i n g   m e t h o d s ,   b u t   d u e   t o   l i m i t e d   s p a c e   w e   e l i m i n a t e   t h e i r   p r o o f s  
                 h e r e .  
         < / d i v >  
         < d i v   c l a s s = " t h e o r e m " >  
                 < a   i d = " e q u i v a l e n c e 1 " > < / a > < b > T h e o r e m   1 < / b >   < e m > G i v e n   t w o   s e q u e n c e s   S < s u b > p 1 < / s u b >   a n d  
                         S < s u b > p 2 < / s u b > ,   i f   S < s u b > p 1 < / s u b >   �"  S < s u b > p 2 < / s u b >   ( i . e . ,   S < s u b > p 1 < / s u b >  
                         i s   a   p r o p e r   s u b s e q u e n c e   o f   S < s u b > p 2 < / s u b > )   a n d   S D B < s u b > S < s u b > p 1 < / s u b > < / s u b >   =   S D B < s u b > S < s u b > p 2 < / s u b > < / s u b > ,  
                         t h e n   a n y   e x t e n s i o n   t o   S < s u b > p 2 < / s u b >   c a n n o t   b e   a   g e n e r a t o r .   < a   h r e f = " # t t h F t N t A A B "  
                                 i d = " t t h F r e f A A B " > < s u p > 1 < / s u p > < / a > < / e m >  
         < / d i v >  
         < d i v   c l a s s = " t h e o r e m " >  
                 < b > T h e o r e m   2 < / b >   < e m > < a   i d = " e q u i v a l e n c e 2 " > < / a > G i v e n   s u b s e q u e n c e   S < s u b > p < / s u b >   =   e < s u b > 1 < / s u b > e < s u b > 2 < / s u b > . . .  
                         e < s u b > n < / s u b >   a n d   a n   i t e m   e ' ,   i f   S D B < s u b > S < s u b > p < / s u b > < / s u b >   =   S D B < s u b > S < s u b > p < / s u b > < s u p > ( i ) < / s u p > < / s u b >   ( i   =   1 ,   2 ,   . . . ,   n ) ,  
                         t h e n   w e   h a v e   t h a t   S D B < s u b > & l t ;   S < s u b > p < / s u b > ,   e '   & g t ; < / s u b >   =   S D B   < s u b > & l t ;   S < s u b > p < / s u b > < s u p > ( i ) < / s u p > ,   e '  
                                 & g t ; < / s u b >   . < / e m >  
         < / d i v >  
         < d i v   c l a s s = " l e m m a " >  
                 < b > L e m m a   1 < / b >   < e m > ( F o r w a r d   P r u n e ) .   G i v e n   s u b s e q u e n c e   S < s u b > p < / s u b > ,   a n d   l e t   S < s u b > p < / s u b > < s u p > * < / s u p >   =    
                         & l t ;   S < s u b > p < / s u b > ,   e '   & g t ;   ,   i f   s u p ( S < s u b > p < / s u b > )   =   s u p ( S < s u b > p < / s u b > < s u p > * < / s u p > )  
                         a n d   f o r   a n y   l o c a l   f r e q u e n t   i t e m   u   o f   S < s u b > p < / s u b > < s u p > * < / s u p >   w e   a l w a y s   h a v e   S D B  
                         < s u b > & l t ;   S < s u b > p < / s u b > ,   u   & g t ; < / s u b >   =   S D B   < s u b > & l t ;   S < s u b > p < / s u b > < s u p > * < / s u p > ,   u   & g t ; < / s u b >  
                         ,   t h e n   S < s u b > p < / s u b > < s u p > * < / s u p >   c a n   b e   s a f e l y   p r u n e d . < / e m >  
         < / d i v >  
         < d i v   c l a s s = " p r o o f " >  
                 E a s i l y   d e r i v e d   f r o m   T h e o r e m   1 .  
         < / d i v >  
         < d i v   c l a s s = " l e m m a " >  
                 < b > L e m m a   2 < / b >   < e m > ( B a c k w a r d   P r u n e ) .   G i v e n   S < s u b > p < / s u b >   =   e < s u b > 1 < / s u b >   e < s u b > 2 < / s u b >   . . .    
                         e < s u b > n < / s u b > ,   i f   t h e r e   e x i s t s   a n   i n d e x   i   ( i   =   1 ,   2 ,   . . . ,   n - 1 )   a n d   a   c o r r e s p o n d i n g   i n d e x  
                         j   ( j   =   i   +   1 ,   i   +   2 ,   . . . ,   n )   s u c h   t h a t   S D B < s u b > ( S < s u b > p < / s u b > ) < s u b > ( 1 ,   j ) < / s u b > < / s u b >   =   S D B < s u b > ( ( S < s u b > p < / s u b > ) < s u b > ( 1 ,   j ) < / s u b > ) < s u p > ( i ) < / s u p > < / s u b > ,  
                         t h e n   S < s u b > p < / s u b >   c a n   b e   s a f e l y   p r u n e d . < / e m >  
         < / d i v >  
         < d i v   c l a s s = " p r o o f " >  
                 E a s i l y   d e r i v e d   f r o m   T h e o r e m   2   a n d   T h e o r e m   1 . < / d i v >  
         < h 3 >  
                 < a   i d = " t t h _ s E c 2 . 3 " > 2 . 3 < / a >   G e n e r a t o r   C h e c k i n g   S c h e m e < / h 3 >  
         < d i v   c l a s s = " p a r " >  
                 T h e   p r e c e d i n g   p r u n i n g   t e c h n i q u e s   c a n   b e   u s e d   t o   p r u n e   t h e   u n p r o m i s i n g   p a r t s   o f   s e a r c h  
                 s p a c e ,   b u t   t h e y   c a n n o t   a s s u r e   e a c h   m i n e d   f r e q u e n t   s u b s e q u e n c e   S = e < s u b > 1 < / s u b >   e < s u b > 2 < / s u b >   . . .    
                 e < s u b > n < / s u b >   i s   a   g e n e r a t o r .   W e   d e v i s e   a   g e n e r a t o r   c h e c k i n g   s c h e m e   a s   s h o w n   i n  
                 T h e o r e m   < a   h r e f = " # c h e c k i n g " > 3 < / a >   i n   o r d e r   t o   p e r f o r m   t h i s   t a s k ,   a n d   i t   c a n   b e   d o n e  
                 e f f i c i e n t l y   d u r i n g   p r u n i n g   p r o c e s s   b y   c h e c k i n g   w h e t h e r   t h e r e   e x i s t s   s u c h   a n   i n d e x  
                 i ( i = 1 , 2 , . . . , n )   t h a t   | S D B < s u b > S < / s u b > |   =   | S D B < s u b > S < s u p > ( i ) < / s u p > < / s u b > | ,   a s   s u p ( S )   =   | S D B < s u b > S < / s u b > |  
                 h o l d s . < / d i v >  
         < d i v   c l a s s = " t h e o r e m " >  
                 < b > T h e o r e m   3 < / b >   < e m > < a   i d = " c h e c k i n g " > < / a > A   s e q u e n c e   S   =   e < s u b > 1 < / s u b >   e < s u b > 2 < / s u b >   . . .    
                         e < s u b > n < / s u b >   i s   a   g e n e r a t o r   i f   a n d   o n l y   i f   "i   t h a t   1   d"i   d"n   a n d   s u p ( S )   =   s u p ( S < s u p > ( i ) < / s u p > ) . < / e m >  
         < / d i v >  
         < d i v   c l a s s = " p r o o f " >  
                 E a s i l y   d e r i v e d   f r o m   t h e   d e f i n i t i o n   o f   g e n e r a t o r   a n d   t h e   w e l l - k n o w n   d o w n w a r d   c l o s u r e  
                 p r o p e r t y   o f   a   s e q u e n c e . < / d i v >  
         < h 3 >  
                 < a   i d = " t t h _ s E c 2 . 4 " > 2 . 4 < / a >   A l g o r i t h m < / h 3 >  
         < d i v   c l a s s = " p a r " >  
                 B y   i n t e g r a t i n g   t h e   p r e c e d i n g   p r u n i n g   m e t h o d s   a n d   g e n e r a t o r   c h e c k i n g   s c h e m e   w i t h  
                 a   t r a d i t i o n a l   p a t t e r n   g r o w t h   f r a m e w o r k   [ < a   h r e f = " # P r e f i x S p a n " > 5 < / a > ] ,   w e   c a n   e a s i l y  
                 d e r i v e   t h e   F E A T   a l g o r i t h m   a s   s h o w n   i n   A l g o r i t h m   1 .   G i v e n   a   p r e f i x   s e q u e n c e   S < s u b > P < / s u b > ,  
                 F E A T   f i r s t   f i n d s   a l l   i t s   l o c a l l y   f r e q u e n t   i t e m s ,   u s e s   e a c h   l o c a l l y   f r e q u e n t   i t e m  
                 t o   g r o w   S < s u b > P < / s u b > ,   a n d   b u i l d s   t h e   p r o j e c t e d   d a t a b a s e   f o r   t h e   n e w   p r e f i x   ( l i n e s  
                 2 , 3 , 4 ) .   I t   a d o p t s   b o t h   t h e   < i > f o r w a r d < / i >   a n d   < i > b a c k w a r d < / i >   p r u n i n g   t e c h n i q u e s  
                 t o   p r u n e   t h e   u n p r o m i s i n g   p a r t s   o f   s e a r c h   s p a c e   ( l i n e s   8 , 1 1 ) ,   a n d   u s e s   t h e   g e n e r a t o r  
                 c h e c k i n g   s c h e m e   t o   j u d g e   w h e t h e r   t h e   n e w   p r e f i x   i s   a   g e n e r a t o r   ( l i n e s   7 , 9 , 1 1 , 1 2 ) .  
                 F i n a l l y ,   i f   t h e   n e w   p r e f i x   c a n n o t   b e   p r u n e d   ,   F E A T   r e c u r s i v e l y   c a l l s   i t s e l f   w i t h  
                 t h e   n e w   p r e f i x   a s   i t s   i n p u t   ( l i n e s   1 4 , 1 5 ) . < / d i v >  
         < d i v   c l a s s = " i m a g e " >  
                 < a   i d = " a l g o r i t h m " > < / a >  
                 < i m g   s r c = " f e a t / a l g o r i t h m . b m p "   a l t = " F e a t   A l g o r i t h m "   / >  
         < / d i v >  
         < h 2 >  
                 < a   i d = " t t h _ s E c 3 " > 3 < / a >   P e r f o r m a n c e   E v a l u a t i o n < / h 2 >  
         < d i v   c l a s s = " p a r " >  
                 W e   c o n d u c t e d   e x t e n s i v e   p e r f o r m a n c e   s t u d y   t o   e v a l u a t e   F E A T   a l g o r i t h m   o n   a   c o m p u t e r  
                 w i t h   I n t e l   C o r e   D u o   2   E 6 5 5 0   C P U   a n d   2 G B   m e m o r y   i n s t a l l e d .   D u e   t o   l i m i t e d   s p a c e ,  
                 w e   o n l y   r e p o r t   t h e   r e s u l t s   f o r   s o m e   r e a l   d a t a s e t s .   T h e   f i r s t   d a t a s e t ,   < i > G a z e l l e < / i > ,  
                 i s   a   W e b   c l i c k - s t r e a m   d a t a   c o n t a i n i n g   2 9 , 3 6 9   s e q u e n c e s   o f   W e b   p a g e   v i e w s .   T h e   s e c o n d  
                 d a t a s e t ,   < i > P r o g r a m T r a c e < / i > ,   i s   a   p r o g r a m   t r a c e   d a t a s e t .   T h e   t h i r d   d a t a s e t ,   < i > O f f i c e 0 7 R e v i e w < / i > ,  
                 c o n t a i n s   3 2 0   c o n s u m e r   r e v i e w s   f o r   O f f i c e   2 0 0 7   c o l l e c t e d   f r o m   A m a z o n . c o m ,   i n   w h i c h  
                 2 4 0   a n d   8 0   r e v i e w s   a r e   l a b e l e d   a s   p o s i t i v e   a n d   n e g a t i v e ,   r e s p e c t i v e l y . < / d i v >  
         < d i v   c l a s s = " f i g u r e " >  
                 < a   i d = " f i g : c m p " > < / a >  
                 < d i v   c l a s s = " i m a g e " >  
                         < i m g   s r c = " f e a t / g a z e l l e _ t i m e . b m p "   a l t = " G a z e l l e   F i g u r e "   / >  
                         < p   c l a s s = " c a p t i o n " >  
                                 a )   G a z e l l e < / p >  
                 < / d i v >  
                 < d i v   c l a s s = " i m a g e " >  
                         < i m g   s r c = " f e a t / p r o g r a m t r a c e _ t i m e . b m p "   a l t = " P r o g r a m T r a c e   F i g u r e "   / >  
                         < p   c l a s s = " c a p t i o n " >  
                                 b )   P r o g r a m T r a c e < / p >  
                 < / d i v >  
                 < d i v   c l a s s = " i m a g e T e x t " >  
                         F i g u r e   1 :   R u n t i m e   E f f i c i e n c y   C o m p a r i s o n  
                 < / d i v >  
         < / d i v >  
         < d i v   c l a s s = " p a r " >  
                 F i g u r e   < a   h r e f = " # f i g : c m p " > 1 < / a >   s h o w s   t h e   r u n t i m e   e f f i c i e n c y   c o m p a r i s o n   b e t w e e n  
                 F E A T   a n d   P r e f i x S p a n ,   a   s t a t e - o f - t h e - a r t   a l g o r i t h m   f o r   m i n i n g   a l l   s e q u e n t i a l   p a t t e r n s .  
                 F i g u r e   < a   h r e f = " # f i g : c m p " > 1 < / a >   a )   d e m o n s t r a t e s   t h a t   F E A T   i s   s l i g h t l y   s l o w e r   t h a n  
                 P r e f i x S p a n   w h e n   t h e   m i n i m u m   s u p p o r t   t h r e s h o l d   i s   h i g h   f o r   s p a r s e   d a t a s e t   < i > G a z e l l e < / i > ,  
                 h o w e v e r ,   w i t h   a   m i n i m u m   s u p p o r t   t h r e s h o l d   l e s s   t h a n   0 . 0 2 6 % ,   F E A T   i s   s i g n i f i c a n t l y  
                 f a s t e r   t h a n   P r e f i x S p a n .   T h i s   a l s o   v a l i d a t e s   t h a t   o u r   p r u n i n g   t e c h n i q u e s   a r e   v e r y  
                 e f f e c t i v e ,   s i n c e   w i t h o u t   p r u n i n g   F E A T   n e e d s   t o   g e n e r a t e   t h e   s a m e   s e t   o f   s e q u e n t i a l  
                 p a t t e r n s   a s   P r e f i x S p a n   a n d   p e r f o r m   g e n e r a t o r   c h e c k i n g   t o   r e m o v e   t h o s e   n o n - g e n e r a t o r s ,  
                 t h u s   i t   s h o u l d   b e   n o   f a s t e r   t h a n   P r e f i x S p a n   i f   t h e   p r u n i n g   t e c h n i q u e s   a r e   n o t   a p p l i e d .  
                 F i g u r e   < a   h r e f = " # f i g : c m p " > 1 < / a >   b )   s h o w s   t h a t   f o r   d e n s e   d a t a s e t   < i > P r o g r a m T r a c e < / i > ,  
                 F E A T   i s   s i g n i f i c a n t l y   f a s t e r   t h a n   P r e f i x S p a n   w i t h   a n y   m i n i m u m   s u p p o r t .   F o r   e x a m p l e ,  
                 P r e f i x S p a n   u s e d   n e a r l y   2 0 0 , 0 0 0   s e c o n d s   t o   f i n i s h   e v e n   a t   a   m i n i m u m   s u p p o r t   o f   1 0 0 %  
                 ,   w h i l e   F E A T   c o s t s   l e s s   t h e n   0 . 0 2   s e c o n d s .  
         < / d i v >  
         < d i v   c l a s s = " p a r " >  
                 W e   u s e d   g e n e r a t o r s   a n d   s e q u e n t i a l   p a t t e r n s   a s   f e a t u r e s   t o   b u i l d   S V M   a n d   N a � v e   B a y e s i a n  
                 c l a s s i f i e r s   r e s p e c t i v e l y .   T h e   r e s u l t s   f o r   < i > O f f i c e 0 7 R e v i e w < / i >   d a t a s e t   s h o w   t h a t  
                 b o t h   g e n e r a t o r - b a s e d   a n d   s e q u e n t i a l   p a t t e r n - b a s e d   m o d e l s   a c h i e v e   a l m o s t   t h e   s a m e  
                 a c c u r a c y .   F o r   e x a m p l e ,   w i t h   a   m i n i m u m   s u p p o r t   o f   2 %   a n d   a   m i n i m u m   c o n f i d e n c e   o f  
                 7 5 % ,   b o t h   g e n e r a t o r - b a s e d   a n d   s e q u e n t i a l   p a t t e r n - b a s e d   N a � v e   B a y e s i a n   c l a s s i f i e r s  
                 c a n   a c h i e v e   t h e   s a m e   b e s t   a c c u r a c y   o f   8 0 . 6 % .   A s   g e n e r a t o r - b a s e d   a p p r o a c h   i s   m o r e  
                 e f f i c i e n t ,   i t   h a s   a n   e d g e   o v e r   s e q u e n t i a l   p a t t e r n - b a s e d   a p p r o a c h   i n   t e r m s   o f   e f f i c i e n c y .  
         < / d i v >  
         < h 2 >  
                 < a   i d = " t t h _ s E c 4 " > 4 < / a > < a   i d = " c o n c l u s i o n " > < / a >   C o n c l u s i o n s < / h 2 >  
         < d i v   c l a s s = " p a r " >  
                 I n   t h i s   p a p e r   w e   s t u d y   t h e   p r o b l e m   o f   m i n i n g   s e q u e n c e   g e n e r a t o r s ,   w h i c h   h a s   n o t  
                 b e e n   e x p l o r e d   p r e v i o u s l y   t o   o u r   b e s t   k n o w l e d g e .   W e   p r o p o s e d   t w o   n o v e l   p r u n i n g   m e t h o d s  
                 a n d   a n   e f f i c i e n t   g e n e r a t o r   c h e c k i n g   s c h e m e ,   a n d   d e v i s e d   a   f r e q u e n t   g e n e r a t o r   m i n i n g  
                 a l g o r i t h m ,   F E A T .   A n   e x t e n s i v e   p e r f o r m a n c e   s t u d y   s h o w s   t h a t   F E A T   i s   m o r e   e f f i c i e n t  
                 t h a n   t h e   s t a t e - o f - t h e - a r t   s e q u e n t i a l   p a t t e r n   m i n i n g   a l g o r i t h m ,   P r e f i x S p a n ,   a n d   i s  
                 v e r y   e f f e c t i v e   f o r   c l a s s i f y i n g   W e b   p r o d u c t   r e v i e w s .   I n   f u t u r e   w e   w i l l   f u r t h e r   e x p l o r e  
                 i t s   a p p l i c a t i o n s   i n   W e b   p a g e   c l a s s i f i c a t i o n   a n d   c l i c k   s t r e a m   d a t a   a n a l y s i s .  
         < / d i v >  
         < h 2 >  
                 < a   i d = " t t h _ s E c 5 " > 5 < / a > < a   i d = " a c k " > < / a >   A c k n o w l e d g e m e n t s < / h 2 >  
         < d i v   c l a s s = " p a r " >  
                 T h i s   w o r k   w a s   p a r t l y   s u p p o r t e d   b y   9 7 3   P r o g r a m   u n d e r   G r a n t   N o .   2 0 0 6 C B 3 0 3 1 0 3 ,   a n d  
                 P r o g r a m   f o r   N e w   C e n t u r y   E x c e l l e n t   T a l e n t s   i n   U n i v e r s i t y   u n d e r   G r a n t   N o .   N C E T - 0 7 - 0 4 9 1 ,  
                 S t a t e   E d u c a t i o n   M i n i s t r y   o f   C h i n a .  
         < / d i v >  
         < d i v   c l a s s = " p " >  
                 < ! - - - - >  
         < / d i v >  
         < d i v   c l a s s = " r e f e r e n c e s " >  
                 < h 1 >  
                         R E F E R E N C E S < / h 1 >  
                 < p >  
                         < a   h r e f = " # C I T E o l d e s t "   i d = " o l d e s t " > < / a > [ 1 ]   R .   A g r a w a l ,   R .   S r i k a n t .   M i n i n g   S e q u e n t i a l  
                         P a t t e r n s .   I C D E ' 9 5 . < / p >  
                 < p >  
                         < a   h r e f = " # C I T E w e b l o g "   i d = " w e b l o g " > < / a > [ 2 ]   J .   C h e n ,   T .   C o o k .   M i n i n g   C o n t i g u o u s   S e q u e n t i a l  
                         P a t t e r n s   f r o m   W e b   L o g s .   W W W ' 0 7   ( P o s t e r s   t r a c k ) . < / p >  
                 < p >  
                         < a   h r e f = " # C I T E c o m p a r a t i v e "   i d = " c o m p a r a t i v e " > < / a > [ 3 ]   N .   J i n d a l ,   B .   L i u .   I d e n t i f y i n g  
                         C o m p a r a t i v e   S e n t e n c e s   i n   T e x t   D o c u m e n t s .   S I G I R ' 0 6 . < / p >  
                 < p >  
                         < a   h r e f = " # C I T E w h y M D L "   i d = " w h y M D L " > < / a > [ 4 ]   J .   L i ,   e t   a l .   M i n i m u m   d e s c r i p t i o n   l e n g t h  
                         p r i n c i p l e :   G e n e r a t o r s   a r e   p r e f e r a b l e   t o   c l o s e d   p a t t e r n s .   A A A I ' 0 6 . < / p >  
                 < p >  
                         < a   h r e f = " # C I T E P r e f i x S p a n "   i d = " P r e f i x S p a n " > < / a > [ 5 ]   J .   P e i ,   J .   H a n ,   e t   a l .   P r e f i x s p a n :  
                         m i n i n g   s e q u e n t i a l   p a t t e r n s   e f f i c c i e n t l y   b y   p r e f i x - p r o j e c t e d   p a t t e r n   g r o w t h .   I C D E ' 0 1 . < / p >  
                 < p >  
                         < a   h r e f = " # C I T E B I D E "   i d = " B I D E " > < / a > [ 6 ]   J .   W a n g , J .   H a n .   B I D E : e f f i c i e n t   m i n i n g   o f   f r e q u e n t  
                         c l o s e d   s e q u e n c e s .   I C D E ' 0 4 . < / p >  
                 < p >  
                         < a   h r e f = " # C I T E c l o s p a n "   i d = " c l o s p a n " > < / a > [ 7 ]   X .   Y a n ,   J .   H a n ,   R .   A f s h a r .   C l o S p a n :  
                         M i n i n g   c l o s e d   s e q u e n t i a l   p a t t e r n s   i n   l a r g e   d a t a s e t s .   S D M ' 0 3 . < / p >  
         < / d i v >  
         < h r   / >  
         < h 3 >  
                 F o o t n o t e s : < / h 3 >  
         < d i v   c l a s s = " p a r " >  
                 < a   i d = " t t h F t N t A A B " > < / a > < a   h r e f = " # t t h F r e f A A B " > < s u p > 1 < / s u p > < / a > N o t e   t h a t   a   s i m i l a r  
                 c h e c k i n g   h a s   b e e n   a d o p t e d   i n   a   c l o s e d   s e q u e n t i a l   p a t t e r n   m i n i n g   a l g o r i t h m ,   C l o S p a n  
                 [ < a   h r e f = " # c l o s p a n " > 7 < / a > ] .   H e r e   w e   a d a p t e d   t h e   t e c h n i q u e   t o   t h e   s e t t i n g   o f   s e q u e n c e  
                 g e n e r a t o r   m i n i n g .  
         < / d i v >  
 < / b o d y >  
 < / h t m l >  
  