;ņ
r Cc           @   sÖ  d  k  l Z l Z l Z l Z d k l Z d k l Z d k	 l
 Z
 d k l Z d k Z d k Z d   Z d f  d     YZ d	 f  d
     YZ d f  d     YZ d f  d     YZ d   Z d f  d     YZ d f  d     YZ d f  d     YZ d f  d     YZ d f  d     YZ d f  d     YZ d   Z d   Z d   Z d   Z d    Z d! d"  Z  d#   Z! d$   Z" d%   Z# d&   Z$ d'   Z% d(   Z& d) d*  Z' e( d+  Z) e* d, j o	 d- GHn d S(.   (   s   Tokens   FrozenTokens   CharSpanLocations   SubtokenContextPointer(   s   Set(   s   ChunkedTaggedTokenReader(   s   Tree(   s   chktypeNc         C   sq   d   } t |  t  oL t |  i g   } x; |  D]+ } | |  o | i t |   q2 q2 Wn |  S| Sd  S(   Nc         C   s-   t  |  t  o t Sn |  d i d  Sd  S(   Ns   TEXTs   yy(   s
   isinstances   nodes   Trees   Falses
   startswith(   s   node(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   isPunct   s     (	   s   isPuncts
   isinstances   thetrees   Trees   nodes   trees   subs   appends   removePunct(   s   thetrees   isPuncts   subs   tree(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   removePunct   s    	 s
   ChunkScorec           B   s   t  Z d  Z d   Z d   Z d   Z d   Z d   Z d d  Z d   Z	 d	   Z
 d
   Z d   Z d   Z d   Z d   Z d   Z RS(   s]
  
    A utility class for scoring chunk parsers.  C{ChunkScore} can
    evaluate a chunk parser's output, based on a number of statistics
    (precision, recall, f-measure, misssed chunks, incorrect chunks).
    It can also combine the scores from the parsing of multiple texts;
    this makes it signifigantly easier to evaluate a chunk parser that
    operates one sentence at a time.

    Texts are evaluated with the C{score} method.  The results of
    evaluation can be accessed via a number of accessor methods, such
    as C{precision} and C{f_measure}.  A typical use of the
    C{ChunkScore} class is::

        >>> chunkscore = ChunkScore()
        >>> for correct in correct_sentences:
        ...     guess = chunkparser.parse(correct.leaves())
        ...     chunkscore.score(correct, guess)
        >>> print 'F Measure:', chunkscore.f_measure()
        F Measure: 0.823

    @ivar kwargs: Keyword arguments:

        - max_tp_examples: The maximum number actual examples of true
          positives to record.  This affects the C{correct} member
          function: C{correct} will not return more than this number
          of true positive examples.  This does *not* affect any of
          the numerical metrics (precision, recall, or f-measure)

        - max_fp_examples: The maximum number actual examples of false
          positives to record.  This affects the C{incorrect} member
          function and the C{guessed} member function: C{incorrect}
          will not return more than this number of examples, and
          C{guessed} will not return more than this number of true
          positive examples.  This does *not* affect any of the
          numerical metrics (precision, recall, or f-measure)
        
        - max_fn_examples: The maximum number actual examples of false
          negatives to record.  This affects the C{missed} member
          function and the C{correct} member function: C{missed}
          will not return more than this number of examples, and
          C{correct} will not return more than this number of true
          negative examples.  This does *not* affect any of the
          numerical metrics (precision, recall, or f-measure)
        
    @type _tp: C{list} of C{Token}
    @ivar _tp: List of true positives
    @type _fp: C{list} of C{Token}
    @ivar _fp: List of false positives
    @type _fn: C{list} of C{Token}
    @ivar _fn: List of false negatives
    
    @type _tp_num: C{int}
    @ivar _tp_num: Number of true positives
    @type _fp_num: C{int}
    @ivar _fp_num: Number of false positives
    @type _fn_num: C{int}
    @ivar _fn_num: Number of false negatives.
    c         K   s   t    |  _ t    |  _ t    |  _ t    |  _ t    |  _ | i d d  |  _	 | i d d  |  _
 | i d d  |  _ d |  _ d |  _ d |  _ d  S(   Ns   max_tp_examplesid   s   max_fp_exampless   max_fn_examplesi    (   s   Sets   selfs   _corrects   _guesseds   _tps   _fps   _fns   kwargss   gets   _max_tps   _max_fps   _max_fns   _tp_nums   _fp_nums   _fn_num(   s   selfs   kwargs(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   __init__]   s    		c         C   s   t  | i t   Sd  S(   N(   s   tuples   ts   freezes   FrozenToken(   s   selfs   t(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   _childtuplej   s    c         C   s’   t  d | t  p t  t  d | t  p t  d k } t |  } t |  } |  i t	 g  i
 } | D]- } t | t  o | |  i |   qm qm ~  O_ |  i t	 g  i
 } | D]- } t | t  o | |  i |   qĄ qĄ ~  O_ d S(   s  
        Given a correctly chunked text, score another chunked text.
        Merge the results with all previous scorings.  Note that when
        the score() function is used repeatedly, each token I{must}
        have a unique location.  For sentence-at-a-time chunking, it
        is recommended that you use locations like C{@12w@3s} (the
        word at index 12 of the sentence at index 3).
        
        @type correct: chunk structure
        @param correct: The known-correct ("gold standard") chunked
            sentence.
        @type guessed: chunk structure
        @param guessed: The chunked sentence to be scored.
        i   i   N(   s   chktypes   corrects   Trees   AssertionErrors   guesseds   syss   removePuncts   selfs   _corrects   Sets   appends   _[1]s   ts
   isinstances   _childtuples   _guessed(   s   selfs   corrects   guesseds   _[1]s   syss   t(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   scoreo   s     	Sc         C   s©   |  i |  i @|  _ |  i |  i |  _ |  i |  i |  _ t |  i  |  _ t |  i  |  _ t |  i  |  _	 |  i |  i } | d j o d Sn t |  i  | Sd S(   s   
        @return: the overall precision for all texts that have been
            scored by this C{ChunkScore}.
        @rtype: C{float}
        i    N(   s   selfs   _guesseds   _corrects   _tps   _fns   _fps   lens   _tp_nums   _fp_nums   _fn_nums   divs   float(   s   selfs   div(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys	   precision   s      c         C   s©   |  i |  i @|  _ |  i |  i |  _ |  i |  i |  _ t |  i  |  _ t |  i  |  _ t |  i  |  _	 |  i |  i	 } | d j o d Sn t |  i  | Sd S(   s   
        @return: the overall recall for all texts that have been
            scored by this C{ChunkScore}.
        @rtype: C{float}
        i    N(   s   selfs   _guesseds   _corrects   _tps   _fns   _fps   lens   _tp_nums   _fp_nums   _fn_nums   divs   float(   s   selfs   div(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   recall   s      f0.5c         C   sÅ   |  i |  i @|  _ |  i |  i |  _ |  i |  i |  _ t |  i  |  _ t |  i  |  _ t |  i  |  _	 |  i
   } |  i   } | d j p
 | d j o d Sn d | | d | | Sd S(   sĶ  
        @return: the overall F measure for all texts that have been
            scored by this C{ChunkScore}.
        @rtype: C{float}
        
        @param alpha: the relative weighting of precision and recall.
            Larger alpha biases the score towards the precision value,
            while smaller alpha biases the score towards the recall
            value.  C{alpha} should have a value in the range [0,1].
        @type alpha: C{float}
        i    i   N(   s   selfs   _guesseds   _corrects   _tps   _fns   _fps   lens   _tp_nums   _fp_nums   _fn_nums	   precisions   ps   recalls   rs   alpha(   s   selfs   alphas   rs   p(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys	   f_measure­   s     c         C   s   |  i |  i @|  _ |  i |  i |  _ |  i |  i |  _ t |  i  |  _ t |  i  |  _ t |  i  |  _	 t
 |  i  Sd S(   sX  
        @rtype: C{Set} of C{Token}
        @return: the set of chunks which were included in the
            correct chunk structures, but not in the guessed chunk
            structures.  Each chunk is encoded as a single token,
            spanning the chunk.  This encoding makes it easier to
            examine the missed chunks.
        N(   s   selfs   _guesseds   _corrects   _tps   _fns   _fps   lens   _tp_nums   _fp_nums   _fn_nums   list(   s   self(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   missedÅ   s     c         C   s   |  i |  i @|  _ |  i |  i |  _ |  i |  i |  _ t |  i  |  _ t |  i  |  _ t |  i  |  _	 t
 |  i  Sd S(   s[  
        @rtype: C{Set} of C{Token}
        @return: the set of chunks which were included in the
            guessed chunk structures, but not in the correct chunk
            structures.  Each chunk is encoded as a single token,
            spanning the chunk.  This encoding makes it easier to
            examine the incorrect chunks.
        N(   s   selfs   _guesseds   _corrects   _tps   _fns   _fps   lens   _tp_nums   _fp_nums   _fn_nums   list(   s   self(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys	   incorrectÖ   s     c         C   s   t  |  i  Sd S(   s$  
        @rtype: C{Set} of C{Token}
        @return: the set of chunks which were included in the correct
            chunk structures.  Each chunk is encoded as a single token,
            spanning the chunk.  This encoding makes it easier to
            examine the correct chunks.
        N(   s   lists   selfs   _correct(   s   self(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   correctē   s     c         C   s   t  |  i  Sd S(   s$  
        @rtype: C{Set} of C{Token}
        @return: the set of chunks which were included in the guessed
            chunk structures.  Each chunk is encoded as a single token,
            spanning the chunk.  This encoding makes it easier to
            examine the guessed chunks.
        N(   s   lists   selfs   _guessed(   s   self(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   guessedń   s     c         C   s   |  i |  i @|  _ |  i |  i |  _ |  i |  i |  _ t |  i  |  _ t |  i  |  _ t |  i  |  _	 |  i |  i	 Sd  S(   N(
   s   selfs   _guesseds   _corrects   _tps   _fns   _fps   lens   _tp_nums   _fp_nums   _fn_num(   s   self(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   __len__ū   s    c         C   s   d t  |   d Sd S(   sf   
        @rtype: C{String}
        @return: a concise representation of this C{ChunkScoring}.
        s   <ChunkScoring of s    chunks>N(   s   lens   self(   s   self(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   __repr__  s     c         C   s­   |  i |  i @|  _ |  i |  i |  _ |  i |  i |  _ t |  i  |  _ t |  i  |  _ t |  i  |  _	 d d |  i
   d d |  i   d d |  i   d Sd S(   sJ  
        @rtype: C{String}
        @return: a verbose representation of this C{ChunkScoring}.
            This representation includes the precision, recall, and
            f-measure scores.  For other information about the score,
            use the accessor methods (e.g., C{missed()} and
            C{incorrect()}). 
        s   ChunkParser score:
s       Precision: %5.1f%%
id   s       Recall:    %5.1f%%
s       F-Measure: %5.1f%%
N(   s   selfs   _guesseds   _corrects   _tps   _fns   _fps   lens   _tp_nums   _fp_nums   _fn_nums	   precisions   recalls	   f_measure(   s   self(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   __str__  s     c         C   s=   g  i  } | D]$ } t | t  o | |  q q ~ Sd S(   sC   
        @return: The list of tokens contained in C{text}.
        N(   s   appends   _[1]s   texts   toks
   isinstances   AbstractTree(   s   selfs   texts   _[1]s   tok(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   _chunk_toks  s     (   s   __name__s
   __module__s   __doc__s   __init__s   _childtuples   scores	   precisions   recalls	   f_measures   misseds	   incorrects   corrects   guesseds   __len__s   __repr__s   __str__s   _chunk_toks(    (    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys
   ChunkScore!   s   ; 								
	
				s   CorpusReaderc           B   s&   t  Z d   Z e d  Z d   Z RS(   Nc         C   sJ   t  |  } d |  _ | i   |  _ | i   t d d d d  |  _	 d  S(   Ns   MyCorpusReaders
   chunk_nodes   NPs	   SUBTOKENSs   WORDS(
   s   opens   filenames   files   selfs   _names	   readliness   raw_datas   closes   ChunkedTaggedTokenReaders   reader(   s   selfs   filenames   file(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   __init__&  s
    	
c         C   s   t  d t |  i   Sd  S(   Ni   (   s   ranges   lens   selfs   raw_data(   s   selfs   group(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   items-  s    c         O   sF   d |  i | f } |  i | } |  i i | d t d | | | Sd  S(   Ns   %s/%ss   add_locss   source(   s   selfs   _names   items   sources   raw_datas   texts   readers
   read_tokens   Trues   reader_argss   reader_kwargs(   s   selfs   items   reader_argss   reader_kwargss   sources   text(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   read0  s    (   s   __name__s
   __module__s   __init__s   Nones   itemss   read(    (    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   CorpusReader%  s   	s
   CorpusDatac           B   s,   t  Z d d d  Z d   Z e d  Z RS(   Ni    i   c         C   sĖ   d GH| d j o t | i    } n | t | i    j o t | i    } n g  i } t | |  D] } | | i	 |   qn ~ |  _ d GH|  i i |  _ d t t |  i   d |  _ d  S(   Ns   Loading corpus datai’’’’s   Dones   Corpus Data (Sequence with s    items)(   s   lasts   lens   corpusReaders   itemss   starts   appends   _[1]s   ranges   items   reads   selfs   datas   __iter__s   strs   __str__(   s   selfs   corpusReaders   starts   lasts   items   _[1](    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   __init__7  s      <c         C   s^   g  } xM |  i D]B } x9 | d D]- } t | t  o | i | i    q! q! Wq W| Sd  S(   Ns   TREE(	   s   results   selfs   datas   items   nodes
   isinstances   Trees   appends   leaves(   s   selfs   nodes   items   result(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   allNPsA  s    
  c      	   C   sf   d  k  } g  i } |  i   D]> } | | i g  i } | D] } | | d  q: ~   q ~ Sd  S(   Ns   TAG(	   s   strings   appends   _[1]s   selfs   allNPss   nps   joins   _[2]s   item(   s   selfs   INCLUDE_TEXTs   strings   _[1]s   items   nps   _[2](    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   allNPs_as_stringsJ  s    	(   s   __name__s
   __module__s   __init__s   allNPss   Falses   allNPs_as_strings(    (    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys
   CorpusData6  s   
		s   NPsFormatterc           B   s,   t  Z d   Z d   Z d   Z d   Z RS(   Nc         C   sJ   g  } x9 | d D]- } t | t  o | i | i    q q W| Sd  S(   Ns   TREE(   s   results   items   nodes
   isinstances   Trees   appends   leaves(   s   selfs   items   nodes   result(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   getNPsO  s     c         C   s5   g  } x$ | D] } | i |  i |   q W| Sd  S(   N(   s   results
   corpusDatas   items   appends   selfs   getNPs(   s   selfs
   corpusDatas   items   result(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   allNPsV  s
     c         C   s   d  k  } d Sd  S(   Ns    (   s   string(   s   selfs   nps   string(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   as_tags_string\  s    	c         C   s2   g  i  } | D] } | |  i |   q ~ Sd  S(   N(   s   appends   _[1]s	   listOfNPss   nps   selfs   as_tags_string(   s   selfs	   listOfNPss   _[1]s   np(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   as_tags_stringsa  s    (   s   __name__s
   __module__s   getNPss   allNPss   as_tags_strings   as_tags_strings(    (    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   NPsFormatterN  s   			c         C   sM   h  } x< |  D]4 } | i |  o | | c d 7<q d | | <q W| Sd  S(   Ni   (   s   dicts   list_of_stringss   strs   has_key(   s   list_of_stringss   dicts   str(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys
   count_eachd  s     s   MistakesTrackerc           B   s#   t  Z d   Z d   Z d   Z RS(   Nc         C   s   g  |  _ d  S(   N(   s   selfs   mistakes(   s   self(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   __init__o  s    c         C   sB   t  |  t  |  j o% |  i t d | d |  g 7_ n d  S(   Ns   CORRECTs   GUESS(   s   removePuncts   corrects   guesss   selfs   mistakess   Token(   s   selfs   corrects   guess(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   trackq  s    c         C   s   |  i Sd  S(   N(   s   selfs   mistakes(   s   self(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   asListt  s    (   s   __name__s
   __module__s   __init__s   tracks   asList(    (    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   MistakesTrackern  s   		s   ChunkRulesTesterc           B   sG   t  Z d   Z d   Z d   Z d   Z d   Z e d   d  Z RS(   Nc         C   s4   | |  _  t |  _ t   |  _ g  |  _ t |  _ d  S(   N(   s   corpuss   selfs   Nones   matchers   MistakesTrackers   mistakess   historys   score(   s   selfs   corpus(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   __init__x  s
    			c         C   s   | |  _  | Sd  S(   N(   s   treess   self(   s   selfs   trees(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   setLearningData  s    	c         C   s   | |  _  | Sd  S(   N(   s   matchers   self(   s   selfs   matcher(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys
   setMatcher  s    	c         C   s   |  i Sd  S(   N(   s   selfs   matcher(   s   self(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys
   getMatcher  s    c         C   s   |  i Sd  S(   N(   s   selfs   score(   s   self(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   getScore  s    c         C   s
   |  } | S(   N(   s   x(   s   .0s   x(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   <lambda>  s    c         C   sõ   g  |  _ | t j o |  i } n t   } t   |  _ t |  } x |  i
 D] } t i i d  | d i   } | i |  } | |  } | i | d |  |  i i | d |  |  i i t d | d d |   qN W| |  _ |  i Sd  S(   Ns   *s   TREEs   CORRECTs   GUESS(   s   selfs   historys   matchers   Nones
   ChunkScores
   chunkscores   MistakesTrackers   mistakess	   MyChunkers   chunkparsers   corpuss   trees   syss   stdouts   writes   leavess   to_chunks   chunks   chunkeds   fixFuncs   scores   tracks   appends   Token(   s   selfs   matchers   fixFuncs   to_chunks   trees   chunkparsers   chunkeds
   chunkscore(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   evaluate  s"    	 	
 '	(	   s   __name__s
   __module__s   __init__s   setLearningDatas
   setMatchers
   getMatchers   getScores   Nones   evaluate(    (    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   ChunkRulesTesterw  s   					s   Modelc           B   s   t  Z d d d  Z RS(   Ni    i’’’’c         C   sR   t  |  |  _ t |  i | |  |  _ t |  i  |  _	 g  |  _
 t   |  _ d  S(   N(   s   CorpusReaders   corpusFileNames   selfs   readers
   CorpusDatas	   dataFirsts   dataLasts
   corpusDatas   ChunkRulesTesters   testers   ruless   NPsFormatters	   formatter(   s   selfs   corpusFileNames	   dataFirsts   dataLast(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   __init__”  s
    	(   s   __name__s
   __module__s   __init__(    (    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   Model   s   s   DecTreeLeafc           B   sJ   t  Z d d  Z d   Z d   Z d   Z d   Z d   Z d   Z RS(	   Ni    c         C   s(   | |  _  h  |  _ | |  _ t |  _ d  S(   N(   s   tags   selfs   nexts   depths   Falses   rule(   s   selfs   tags   depth(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   __init__Ŗ  s    			c         C   s!   t  | |  i d  |  i | <d  S(   Ni   (   s   DecTreeLeafs   tags   selfs   depths   next(   s   selfs   tag(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   addNext°  s    c         C   s   |  i i |  Sd  S(   N(   s   selfs   nexts   has_keys   tag(   s   selfs   tag(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   hasNext²  s    c         C   s   |  i | Sd  S(   N(   s   selfs   nexts   tag(   s   selfs   tag(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   getNext“  s    c         C   s   |  i Sd  S(   N(   s   selfs   rule(   s   self(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   isRule·  s    c         C   s   | |  _ d  S(   N(   s   bools   selfs   rule(   s   selfs   bool(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   setRule¹  s    c         C   s   |  i Sd  S(   N(   s   selfs   depth(   s   self(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   getDepth¼  s    (	   s   __name__s
   __module__s   __init__s   addNexts   hasNexts   getNexts   isRules   setRules   getDepth(    (    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   DecTreeLeaf©  s   					s   Matcherc           B   s8   t  Z d   Z d   Z d   Z d   Z d d  Z RS(   Nc         C   s/   t  d d  |  _ |  i i t  g  |  _ d  S(   Ns   ROOOOTi    (   s   DecTreeLeafs   selfs   decisionTrees   setRules   Trues   rules(   s   self(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   __init__Ą  s    c         C   s   |  i Sd  S(   N(   s   selfs   rules(   s   self(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   getRulesListÅ  s    c         C   s`   |  i i |  | i d d  } | i d d  } | i d d  } |  i | i d   d  S(   Ns   ><s   |s   <s    s   >(   s   selfs   ruless   appends   rules   replaces   addRules   split(   s   selfs   rule(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys
   addStrRuleĒ  s
    c         C   sj   |  i } xM | D]E } | i |  o | i |  } q | i |  | i |  } q W| i t	  d  S(   N(
   s   selfs   decisionTrees   currents   rules   tags   hasNexts   getNexts   addNexts   setRules   True(   s   selfs   rules   currents   tag(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   addRuleĻ  s    	 i    c         C   s~   |  i } | i   } x^ | | D]R } | i | d  o4 | i	 | d  } | i
   o | i   } qr q  | Sq  W| Sd  S(   Ns   TAG(   s   selfs   decisionTrees   currents   getDepths	   lastMatchs   tokenss   frms   toks   hasNexts   getNexts   isRule(   s   selfs   tokenss   frms   currents   toks	   lastMatch(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   matchŚ  s    	  (   s   __name__s
   __module__s   __init__s   getRulesLists
   addStrRules   addRules   match(    (    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   Matcheræ  s
   				s	   MyChunkerc           B   s   t  Z d   Z d   Z RS(   Nc         C   s   | |  _  d  S(   N(   s   matchers   self(   s   selfs   matcher(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   __init__é  s    c         C   sŖ   t  d g   } t |  } d } x~ | | j  op |  i i | |  } | d j o/ | i
 t  d | | | | !  | | 7} q$ | i
 | |  | d 7} q$ W| Sd  S(   Ns   Si    s   NPi   (   s   Trees   chunkeds   lens   leavess   ends   locs   selfs   matchers   matchs
   lenOfMatchs   append(   s   selfs   leavess   locs
   lenOfMatchs   ends   chunked(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   chunkģ  s     !(   s   __name__s
   __module__s   __init__s   chunk(    (    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys	   MyChunkerč  s   	c         C   sd   t  |  d  i   } d   } t   } x2 | D]* } | |  } | o | i	 |  q. q. W| Sd  S(   Ns   rc         C   sS   |  i d d  \ } } | i d  o d  Sn | d j o | i   Sn d  Sd  S(   Ns   : i   s   #s   chunk(   s   lines   splits   types   rules
   startswiths   strip(   s   lines   types   rule(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   processLine   s     (
   s   opens   filenames	   readliness   liness   processLines   Sets   ruless   lines   rs   add(   s   filenames   ruless   liness   rs   lines   processLine(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   readChunkRulesFromFilež  s    		 c         C   sV   g  i  } |  D] } | |  q ~ } t   } x | D] } | i |  q7 W| Sd  S(   N(   s   appends   _[1]s
   setOfRuless   rs   listOfRuless   Matchers   matchers
   addStrRule(   s
   setOfRuless   matchers   listOfRuless   _[1]s   r(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   makeMatcher  s    '	 c         C   sD   d   } t i g  i } |  D] } | | |   q ~ d  Sd  S(   Nc         C   sI   d } x8 |  D]0 } | | i   d d | i   d d 7} q W| Sd  S(   Ns    i    s   /i   s    (   s   ress   items   tokens   values(   s   items   tokens   res(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   printNicely  s
     .s   
(   s   printNicelys   strings   joins   appends   _[1]s   list_of_itemss   item(   s   list_of_itemss   _[1]s   printNicelys   item(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   getResultsAsTextLines  s    		c         C   s¶   d } | d t |  i    d 7} | d t |  i    d 7} | d t t |  i     d 7} | d t t |  i     d 7} | d t t |  i     d 7} | Sd  S(   Ns    s   Precision:	s   
s   Recall:	s   Missed:	s	   Correct:	s   Incorrect:	(	   s   ress   strs   scores	   precisions   recalls   lens   misseds   corrects	   incorrect(   s   scores   res(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   getScore!  s    $$$c         C   s<   x1 | i d  D]  } |  i | d | d  }  q W|  Sd  S(   Ns   
s    { s    } (   s   guesseds   splits   guesss   misseds   replace(   s   misseds   guesseds   guess(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   annotate+  s     s    c   	      C   s<  |  i } |  i i   } t | i    } t | i    } t | i    } t | i    } t	 d | d  } | i t |   | i   t	 d | d  i |  t	 d | d  i |  t	 d | d  i |  t	 d | d  i |  t	 d | d  i t |   t	 d | d  i t |   d  S(	   Ns   data/score.txts   ws   data/correct.txts   data/incorrect.txts   data/missed.txts   data/guessed.txts   data/mistakes.txts   data/mistakes-brief.txt(   s   testers   scores   mistakess   asLists   getResultsAsTextLiness   corrects	   incorrects   misseds   guesseds   opens   exts   ss   writes   getScores   closes   mistakesAsStringFulls   mistakesAsStringBrief(	   s   testers   exts	   incorrects   scores   ss   mistakess   guesseds   corrects   missed(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   writeResultsToFiles0  s    	
 c            s#    d     d } x|  D]ž }
 t |
 d  }	 | d   |	  d 7} t |
 d  } g  i } |	 D]$ } t
 | t  o | |  qe qe ~ } g  i } | D]1 } t
 | t  o
 | | j o | |  q  q  ~ } | d 7} x& | D] } | d   |  d	 7} qė W| d 7} q W| Sd
 S(   s(    all the bad nps, and the correct sents c            si   d } xX |  D]P } t | t  o | d   |  d 7} q | | d d | d d 7} q W| Sd  S(   Ns    s    [s   ] s   TEXTs   /s   TAGs    (   s   strs   trees   xs
   isinstances   Trees   makestr(   s   trees   strs   x(   s   makestr(    s'   C:\school\tnlp\final\v2\PruneChunker.pys   makestrF  s     "s    s   CORRECTs   + s   
s   GUESSs   - s    { s    } N(   s   makestrs   ress   mistakesLists   mistakes   removePuncts   correctTrees   guessedTrees   appends   _[1]s   trees
   isinstances   Trees
   correctNPss
   badGuessess   wrong(   s   mistakesLists   makestrs   ress   trees
   badGuessess   _[1]s   wrongs
   correctNPss   guessedTrees   correctTrees   mistake(    (   s   makestrs'   C:\school\tnlp\final\v2\PruneChunker.pys   mistakesAsStringBriefD  s      
 ;H
 c            si     d     d } xL |  D]D } d   | d  } d   | d  } | | d | d 7} q W| Sd S(	   s!    show both sents one after other c            si   d } xX |  D]P } t | t  o | d   |  d 7} q | | d d | d d 7} q W| Sd  S(   Ns    s    [s   ] s   TEXTs   /s   TAGs    (   s   strs   trees   xs
   isinstances   Trees   makestr(   s   trees   strs   x(   s   makestr(    s'   C:\school\tnlp\final\v2\PruneChunker.pys   makestr_  s     "s    s   + s   CORRECTs   - s   GUESSs   
N(   s   makestrs   ress   mistakesLists   mistakes   correct_strs   guessed_str(   s   mistakesLists   correct_strs   makestrs   ress   guessed_strs   mistake(    (   s   makestrs'   C:\school\tnlp\final\v2\PruneChunker.pys   mistakesAsStringFull]  s     
 c            sn  h   d   } d   } d        d   }	 d   } x*|  D]"} | | d  } | | d  }
 x« | D]£ } t } |
 } x |
 D] } | | |  o |	 | d  | i |  q | | |  o< | o$ |	 | d	  | i |  t } q| i |  q q W| }
 qm Wx | D] } |	 | d	  qWx- | D]% }  i |  o d
  | <q9q9Wq@ W Sd S(   s8    each rule is given a score a-la Cardice/Pierce article c         C   s\   g  i  } |  D] } | |  q ~ } g  i  } | D] } | |  q5 ~ } | | j Sd  S(   N(   s   appends   _[1]s   np1s   els   l1s   np2s   l2(   s   np1s   np2s   els   _[1]s   l2s   l1(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   equalst  s    ''c         C   sK   x@ |  i   D]2 } x) | i   D] } | | j o t Sq  q  Wq Wt Sd S(   s    returns true if nps overlap N(   s   np1s   leavess   toks   np2s   tok2s   Trues   False(   s   np1s   np2s   tok2s   tok(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   overlapz  s       c         C   s5   d } x$ |  D] } | d | d d 7} q W| Sd  S(   Ns    s   <s   TAGs   >(   s   rules   nps   tok(   s   nps   toks   rule(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   getRule  s
     c            s?     |   }  i |  o d  | <n  | c | 7<d  S(   Ni    (   s   getRules   nps   rules
   ruleScoress   has_keys   num(   s   nps   nums   rule(   s   getRules
   ruleScores(    s'   C:\school\tnlp\final\v2\PruneChunker.pys   addScore  s    c         C   sT   g  } xC |  D]; } t | t  o% | i d j o | i |  qH q q W| Sd  S(   Ns   NP(   s   npss   chunkeds   els
   isinstances   Trees   nodes   append(   s   chunkeds   els   nps(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   nps  s      s   CORRECTs   GUESSi   i’’’’i    N(   s
   ruleScoress   equalss   overlaps   getRules   addScores   npss   historys   events   corrects   guesseds   reals   Trues   firsts   gcopys   guesss   removes   Falses   rulesSets   rs   has_key(   s   historys   rulesSets   reals   getRules   guesss
   ruleScoress   rs   equalss   overlaps   addScores   guesseds   corrects   gcopys   firsts   events   nps(    (   s   getRules
   ruleScoress'   C:\school\tnlp\final\v2\PruneChunker.pys
   scoreRulesq  sF     					   

  c            s     d   } | Sd  S(   Nc            sg   t    }   i   } | i   d    x4 |   D]( } | i |  d G  | Gd G| GHq3 W| Sd  S(   Nc            s     |    | S(   N(   s
   ruleScoress   as   b(   s   as   b(   s
   ruleScores(    s'   C:\school\tnlp\final\v2\PruneChunker.pys   <lambda>¾  s    s   $s   :(	   s   Sets   ress
   ruleScoress   keyss   ruless   sorts   HOW_MUCHs   rs   add(   s
   ruleScoress   ruless   ress   r(   s   HOW_MUCH(   s
   ruleScoress'   C:\school\tnlp\final\v2\PruneChunker.pys	   pruneFunc»  s    	 (   s	   pruneFunc(   s   HOW_MUCHs	   pruneFunc(    (   s   HOW_MUCHs'   C:\school\tnlp\final\v2\PruneChunker.pys   makeIncrementalPruningFuncŗ  s    	c            s     d   } | Sd  S(   Nc            sJ   t    } x6 |  i   D]( } |  |   j  o | i |  q q W| Sd  S(   N(   s   Sets   ress
   ruleScoress   keyss   rs   MINs   add(   s
   ruleScoress   ress   r(   s   MIN(    s'   C:\school\tnlp\final\v2\PruneChunker.pys	   pruneFuncĒ  s    	 (   s	   pruneFunc(   s   MINs	   pruneFunc(    (   s   MINs'   C:\school\tnlp\final\v2\PruneChunker.pys   makeThresholdPruningFuncĘ  s    c   
      C   sÖ   d   } |  i i   } h  } xI | D]A }	 | |	  } | i |  o d | | <n | | c d 7<q% Wg  i	 } | i   D] } | o | d |  q~ q~ ~ } t i | d  } t d d  i |  | Sd  S(   Nc         C   s5   d } x$ |  D] } | d | d d 7} q W| Sd  S(   Ns    s   <s   TAGs   >(   s   strs   lsts   token(   s   lsts   tokens   str(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys
   tagsStringŅ  s
     i    i   s   chunk: s   
s   data/extractedRules.txts   w(   s
   tagsStrings   models
   corpusDatas   allNPss   allNpss
   npsCounters   nps   strs   has_keys   appends   _[1]s   keyss   rs   ruless   strings   joins   rulesStrs   opens   write(
   s   models   _[1]s   rulesStrs
   tagsStrings   ruless
   npsCounters   allNpss   rs   strs   np(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   extractRulesŃ  s    	 <i   c         C   sŻ  | d d d g j o
 d } n d } d } d } d } t } t |  } x| o| } | } | i
   } |  i i t |   d GH|  i i   } | i   } | i   } d G| GHd GHt |  i i |  } d	 GHt |  }	 | | |  8} d
 Gt |  GH| d j o | | j } nE | d j o | | j } n( | d j o |	 t |  d j } n | og | } t i g  i } | D] } | d |  q~ d  }
 t  d d  i! |
  t" |  i d  qM qM W| Sd S(   sć   
   prundeFunction: a function that takes ruleScores and returns a list of rules to remove
   whenToStop: 1 when precision drops
               2 when recall drops
               3 when rules are stable (no rule got pruned)
   i   i   i   i’’’’i    s   Evaluating:s   Prec:s   scoring ruless   pruning bad ruless   New size of ruleset:s   chunk: s   
s   data/ruleset.txt.prunes   ws   .pruneN(#   s
   whenToStops   oldPrecs	   oldRecalls   precs   recalls   Trues   conts   readChunkRulesFromFiles
   RULES_FILEs   rulesSets   copys   oldRuless   models   testers
   setMatchers   makeMatchers   evaluates   scores	   precisions
   scoreRuless   historys
   ruleScoress   lens   rulesCnts   pruneFunctions
   theRuleSets   strings   joins   appends   _[1]s   rs   ruless   opens   writes   writeResultsToFiles(   s   models
   RULES_FILEs   pruneFunctions
   whenToStops   conts
   ruleScoress   rulesSets   precs   scores   rulesCnts   ruless   oldPrecs
   theRuleSets	   oldRecalls   _[1]s   rs   recalls   oldRules(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   doGenRuleSetē  sL      
 	   7c         C   s}   d   } t |  } |  i i t |   d GH| t	 j o |  i i
 t |  } n |  i i
   } t |  i d  | Sd  S(   Nc            s     d       |   Sd S(   sD   after-chunking fixes - get a chunked text and return a fixed versionc            s   t  |  t  os t |  i g   } t |   d j o |  d d d j o d GH|  d Sq x) |  D] } | i   |   qb Wn |  S| Sd  S(   Ni   i    s   TEXTs   źéąs   REMOVED THE EICH!(	   s
   isinstances   thetrees   Trees   nodes   trees   lens   subs   appends
   removeEich(   s   thetrees   subs   tree(   s
   removeEich(    s'   C:\school\tnlp\final\v2\PruneChunker.pys
   removeEich  s    ( N(   s
   removeEichs   chunked(   s   chunkeds
   removeEich(    (   s
   removeEichs'   C:\school\tnlp\final\v2\PruneChunker.pys   afterFixFunc  s     s   Evaluating:s   .results(   s   afterFixFuncs   readChunkRulesFromFiles
   RULES_FILEs   rulesSets   models   testers
   setMatchers   makeMatchers   afterFixs   Falses   evaluates   Nones   scores   writeResultsToFiles(   s   models
   RULES_FILEs   afterFixs   scores   afterFixFuncs   rulesSet(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   doEval  s    	s   __main__s+   Don't run me. I'm not that kind of program.(+   s
   nltk.tokens   Tokens   FrozenTokens   CharSpanLocations   SubtokenContextPointers   setss   Sets   nltk.tokenreader.taggeds   ChunkedTaggedTokenReaders	   nltk.trees   Trees   nltk.chktypes   chktypes   strings   syss   removePuncts
   ChunkScores   CorpusReaders
   CorpusDatas   NPsFormatters
   count_eachs   MistakesTrackers   ChunkRulesTesters   Models   DecTreeLeafs   Matchers	   MyChunkers   readChunkRulesFromFiles   makeMatchers   getResultsAsTextLiness   getScores   annotates   writeResultsToFiless   mistakesAsStringBriefs   mistakesAsStringFulls
   scoreRuless   makeIncrementalPruningFuncs   makeThresholdPruningFuncs   extractRuless   doGenRuleSets   Falses   doEvals   __name__($   s	   MyChunkers   ChunkRulesTesters   getScores   doGenRuleSets   annotates   makeMatchers   NPsFormatters   removePuncts   mistakesAsStringFulls   Matchers   doEvals   ChunkedTaggedTokenReaders   mistakesAsStringBriefs   extractRuless
   scoreRuless   strings   writeResultsToFiless   Trees   syss   Tokens
   ChunkScores   chktypes   readChunkRulesFromFiles   getResultsAsTextLiness   Models   makeThresholdPruningFuncs   Sets   CorpusReaders   MistakesTrackers   DecTreeLeafs   FrozenTokens   makeIncrementalPruningFuncs   CharSpanLocations
   count_eachs   SubtokenContextPointers
   CorpusData(    (    s'   C:\school\tnlp\final\v2\PruneChunker.pys   ?   sF   			’ 	
	)	)				
				I			3%
