#!/usr/local/python/bin/python
import re, sys, os
from xml.dom.minidom import parse
# global variables
fromver = '2.0'
tover = '2.1'
oldgroupingdb = 'grouping-db-2.0/'
newgroupingdb = 'grouping-db-2.1/'
# wn17to20map = 'sensemap/1.7.1to2.0.verb.poly'
wn20to21map = 'sensemap/2.0to2.1.verb.poly'
#################################################################
#
# Update a list of sense number for each sense in a grouping
#
# Aruments:
# oldvernum - base version (to be mapped to next version)
# first, call with 1.7.1 to map to 2.0
# next, call with 2.0 to update to 2.1
#
# mapfile - file of mappings between versions
# verb - lemma
#
#################################################################
def updateGrouping(mapfile, verb):
print '\nlooking at grouping:', verb
# get doc object
domdoc = parse(oldgroupingdb + verb + '-v.xml')
# get all senses in this doc
senses = domdoc.getElementsByTagName("sense")
# look at each sense and modify text
# between ... tags
# e.g. 4,6,7,8,21
# also update version attribute
for sense in senses:
# there's just one tag per sense
mappings = sense.getElementsByTagName("mappings")[0]
# there's just one tag per
wn = mappings.getElementsByTagName("wn")[0]
if wn.getAttribute('version') == fromver:
print 'updating grouping:', verb, wn.childNodes[0].data
# update version attribute
wn.setAttribute('version', tover)
# update text between ... tags
wn.childNodes[0].data = \
updateSenseNumList(mapfile, verb, wn.childNodes[0].data)
# write the updates
f = open(newgroupingdb + verb + '-v.xml', 'w')
domdoc.writexml(f)
f.close()
#################################################################
#
# Get -> dict. for a verb
#
#################################################################
def getSenseNumMap(mapfile, verb):
try:
infile = open(mapfile, "r")
except IOError:
print "there's been an IO exception!", infile
sys.exit()
# match begin. of line + confidence + verb + '%2'
# (2 = POS verb in sense keys)
restr = '^\d+\s' + verb + '%2'
rexpr = re.compile(restr)
dict = {}
for line in infile.readlines():
matchobj = rexpr.search(line)
if matchobj:
# e.g. 100 come%2:30:02::;00274019;7 come%2:30:02::;00331936;7
elements = line.split()
#sense number is the last element
oldsensenum = elements[1].split(';')[-1]
newsensenum = elements[2].split(';')[-1]
dict[oldsensenum] = newsensenum
return dict
#################################################################
#
# Take a list of senses numbers ,,... and update it
#
#################################################################
def updateSenseNumList(mapfile, verb, senselist):
# get the mapping
dict = getSenseNumMap(mapfile, verb)
oldsensenums = senselist.split(',')
newsensenums = []
for oldsensenum in oldsensenums:
if dict.has_key(oldsensenum):
newsensenums.append(dict[oldsensenum])
else:
print 'NO SUCH SENSE:', oldsensenum
return ','.join(newsensenums)
#################################################################
#
# Main function
#
#################################################################
def updateGroupings(mapfile):
files = os.listdir(oldgroupingdb)
for file in files:
if(os.path.isfile(oldgroupingdb + file) and re.search("-v", file)):
verb = file.split('-')[0]
updateGrouping(mapfile, verb)
#################################################################
#
#
#################################################################
if __name__ == "__main__":
updateGroupings(wn20to21map)