#!/usr/local/python/bin/python import re, sys, os from xml.dom.minidom import parse # global variables fromver = '2.0' tover = '2.1' oldgroupingdb = 'grouping-db-2.0/' newgroupingdb = 'grouping-db-2.1/' # wn17to20map = 'sensemap/1.7.1to2.0.verb.poly' wn20to21map = 'sensemap/2.0to2.1.verb.poly' ################################################################# # # Update a list of sense number for each sense in a grouping # # Aruments: # oldvernum - base version (to be mapped to next version) # first, call with 1.7.1 to map to 2.0 # next, call with 2.0 to update to 2.1 # # mapfile - file of mappings between versions # verb - lemma # ################################################################# def updateGrouping(mapfile, verb): print '\nlooking at grouping:', verb # get doc object domdoc = parse(oldgroupingdb + verb + '-v.xml') # get all senses in this doc senses = domdoc.getElementsByTagName("sense") # look at each sense and modify text # between ... tags # e.g. 4,6,7,8,21 # also update version attribute for sense in senses: # there's just one tag per sense mappings = sense.getElementsByTagName("mappings")[0] # there's just one tag per wn = mappings.getElementsByTagName("wn")[0] if wn.getAttribute('version') == fromver: print 'updating grouping:', verb, wn.childNodes[0].data # update version attribute wn.setAttribute('version', tover) # update text between ... tags wn.childNodes[0].data = \ updateSenseNumList(mapfile, verb, wn.childNodes[0].data) # write the updates f = open(newgroupingdb + verb + '-v.xml', 'w') domdoc.writexml(f) f.close() ################################################################# # # Get -> dict. for a verb # ################################################################# def getSenseNumMap(mapfile, verb): try: infile = open(mapfile, "r") except IOError: print "there's been an IO exception!", infile sys.exit() # match begin. of line + confidence + verb + '%2' # (2 = POS verb in sense keys) restr = '^\d+\s' + verb + '%2' rexpr = re.compile(restr) dict = {} for line in infile.readlines(): matchobj = rexpr.search(line) if matchobj: # e.g. 100 come%2:30:02::;00274019;7 come%2:30:02::;00331936;7 elements = line.split() #sense number is the last element oldsensenum = elements[1].split(';')[-1] newsensenum = elements[2].split(';')[-1] dict[oldsensenum] = newsensenum return dict ################################################################# # # Take a list of senses numbers ,,... and update it # ################################################################# def updateSenseNumList(mapfile, verb, senselist): # get the mapping dict = getSenseNumMap(mapfile, verb) oldsensenums = senselist.split(',') newsensenums = [] for oldsensenum in oldsensenums: if dict.has_key(oldsensenum): newsensenums.append(dict[oldsensenum]) else: print 'NO SUCH SENSE:', oldsensenum return ','.join(newsensenums) ################################################################# # # Main function # ################################################################# def updateGroupings(mapfile): files = os.listdir(oldgroupingdb) for file in files: if(os.path.isfile(oldgroupingdb + file) and re.search("-v", file)): verb = file.split('-')[0] updateGrouping(mapfile, verb) ################################################################# # # ################################################################# if __name__ == "__main__": updateGroupings(wn20to21map)