from PruneChunker import *

try:
   RULES_FILE  = sys.argv[1]
   INPUT_FILE = sys.argv[2]
   doAfterFix = False
except:
   print "Usage: textchunker.py rulesFile inputFile"
   print
   sys.exit()
   
def makestr(tree):
   str = ""
   for x in tree:
      if isinstance(x, Tree):
         str += ' [' + makestr(x) + '] '
      else:
         #print "&",x
         str += x['TEXT'] + "/" + x['TAG'] + " "
   return str
   
rulesSet = readChunkRulesFromFile(RULES_FILE)
chunkparser = MyChunker(makeMatcher(rulesSet))
# read all the sentences in the file
input = CorpusData(CorpusReader(INPUT_FILE),0,-1)
for tree in input:
   to_chunk = tree['TREE'].leaves()
   chunked = chunkparser.chunk(to_chunk)
   print makestr(chunked)



