### msg.py - Message System for POO and other projects
###   6 Mar 1997 - Amit Patel
###  25 Mar 1997 - added changes required to run in restricted exec mode
###  26 Mar 1997 - changed genders from objects to dictionaries
###   2 Aug 1997 - Joe Strout: added handling of "n't" on verb end

######################################################################
# Module Documentation
#   The module is divided into Utilities, Gender Objects, and Messages
#   Utilities are used internally but might also be a useful library
#   Gender Objects are used internally but can be extended or replaced
#	 by custom gender objects
#   The Message section handles processing of a message (with %-codes)
#	 into a string

######################################################################
# Message System documentation:
#
# The message system is a way to combine a _message_, which is a
#   specification of information you want, with a list of objects
#   and/or strings that provide the information.  If you are familiar
#   with C's printf or Python's '%' operator, the idea is similar here:
#   a format string (message) indicates what information to display and
#   how to display it, and the list of objects (or values) provides the
#   information to display.  Like C and Python formatting, this system
#   may seem complex at first.  Looking at some examples (at the end
#   of this documentation) may help.
#
# An message is a string with special codes prefixed by '%'.
#   Following the '%' is either another '%', in which case a single '%'
#   is used, or an object selector followed by an information selector.
#
# An object selector is a digit 0-9 or letter a-z/A-Z optionally followed
#   by a sequence of l, L, or c modifiers.  The digit represents which
#   object is being selected.  The modifiers extract other objects from the
#   previously selected object.
#	 l - the object's location
#	 L - the object's outermost location (usually a $place)
#	 c - the contents of the object
#   Examples of object selectors are:
#	 3 - object 3
#	 al - the location of the object marked 'a'
#	 ALc - the contents of the outermost location of object 'A'
#   (Note: object selectors are case sensitive)
#   Some of this will be more clear with the examples presented later.
#
#   An information selector is one of the following:
#   (the following are 'relative' selectors)
#	 s		  - subject pronoun (he,she,it,they)
#	 o		  - object pronoun (him,her,it,them)
#	 r		  - reflexive pronoun (himself,herself,itself,theirselves)
#	 p		  - possessive adjective (his,her,its,their)
#	 q		  - possessive pronoun (his,hers,its,theirs)
#	 n		  - name of object (apple,banana,carrots)
#	 i		  - name plus indirect article (an apple,a banana,some carrots)
#	 d		  - name plus direct article (the apple,the banana,the carrots)
#	 'n		 - name as possessive (apple's,banana's,carrots')
#	 'i		 - name+indir. as poss. (an apple's,a banana's,some carrots')
#	 'd		 - name+dir. as poss. (the apple's,the banana's,the carrots')
#	 :(verbs)   - conjugates singular 'verbs' for the object (verbs,verb)
#   (the following are 'absolute' selectors)
#	 #		  - object number (#313)
#	 .(prop)	- finds the 'prop' property on the object
#	 '(noun)	- changes singular 'noun' to match plural flag (noun,nouns)
#	 x{a1 a2..} - special extension specific to each object
#   In addition, all of the codes are case sensitive.  If any code except
#	 'x' is capitalized, the resulting information will be capitalized.
#   The difference between an absolute and a relative selector is that
#	 relative selectors depend on your point of view.  If you are the
#	 object being referred to, you may see something different for a
#	 relative selector than a third party.  (For example 's' would turn
#	 into 'you' or 'him' depending on who you are.)
#   Examples of information selectors are:
#	 s - the subject pronoun for the object
#	 :(runs) - the verb 'runs' conjugated appropriately for the object
#	 N - the capitalized name of the object
#	 .(name) - the 'name' property on the object (usually equivalent to n)
#
# The message system also requires a set of object specifiers which provide
#   information.  An object specifier (objspec) can be one of:
#	 obj - a single POO or Python object
#	 str - a string (noun)
#	 (count,obj) or (count,str) - count indicates the number of objects
#	 [objspec,...] - a list of object specifiers
# Examples of object specifiers are:
#	 Joe - a single object, Joe
#	 (3,'duck') - 3 ducks
#	 [(1,'apple'),'banana',(3,'carrot')] - an apple, a banana, and 3 carrots
#
# Here are some examples of putting object selectors, information specifiers,
# and object specifiers together:
#	 '%1N %1:(looks) at %1r.'   with   {1:Joe}
#		=> 'Joe looks at himself.'
#	 '%AN %A:(looks) at %Ar.'   with   {'A':[Joe,Guido]}
#		=> 'Joe and Guido look at themselves.'
#	 '%1I spontaneously %1:(combusts)!'  with  {1:Joe}
#		=> 'Joe spontaneously combusts!'
#	 '%1I spontaneously %1:(combusts)!'  with  {1:[(3,'carrot'),'apple']}
#		=> '3 carrots and an apple spontaneously combust!'

######################################################################
# Imports
#   I know, I know, this is supposed to be bad for your health
from string import *
# End of imports

######################################################################
# Utilities
#   The following utility functions and values are adapted from JHCore

class English:	
	def to_list(self,strings,andstr=' and ',sep=', ',
					 finalsep=',',nothing='nothing'):
		if len(strings) == 0: return nothing
		if len(strings) == 1: return strings[0]
		if len(strings) == 2: return strings[0]+andstr+strings[1]
		return join( strings[:-1], sep ) + finalsep + andstr + strings[-1]

	vowel_list = ('a','e','i','o','u')
	vowel_exceptions = ("usu", "uke", "uvu", "use", "UPI", "unit", "univ",
						"unic", "uniq", "unix", "eur", "uu", "ubiq",
						"union", "one", "once", "uti")
	nonvowel_exceptions = ("honor", "honest", "heir")
	
	verb_exceptions = {"has": "have",
					   "is": "are",
					   "was": "were",
					   "can": "can"}
	
	noun_exceptions = {"child": "children",
					   "deer": "deer",
					   "moose": "moose",
					   "sheep": "sheep",
					   "goose": "geese",
					   "louse": "lice",
					   "ox": "oxen",
					   "mouse": "mice"}
	
	def add_s(self,s):
		if len(s) < 2: return s+'s'
		if s[-1] == 'y' and find('aeiou',s[-2]) == -1: return s+'ies'
		if s[-1] == 'o' and find('aeiouy',s[-2]) == -1: return s+'es'
		if s[-1] in ('s','x'): return s+'es'
		if s[-2:] in ('ch','sh'): return s+'es'
		return s+'s'

	def remove_s(self,s):
		if len(s) <= 3 or s[-1] != 's':				 return s
		if s[-2] != 'e':								return s[:-1]
		if s[-3] == 'h' and s[-4] in ('c','s'):		 return s[:-2]
		if s[-3] in ('o','x'):						  return s[:-2]
		if s[-3] == 's' and find('aeiouy',s[-4]) == -1: return s[:-2]
		if s[-3] == 'i':								return s[:-3]+'y'
		return s[:-1]

english = English()
# End of utility section

######################################################################
# Genders
#   A gender value contains the necessary information for msg substitions.
#	 s,o,r,p,q are pronouns and plural is a flag indicating whether this
#	 gender is plural.  Note that gender values don't necessarily
#	 represent real genders, but could indicate special genders like
#	 2nd person or 1st person.
#   A gender value is a dictionary with mappings for s,o,r,p,q,plural.
#
#   builtin_genders maps 'm' to male, 'f' to female, 'n' to neuter,
#	 'e' to either, 't' to plural(they), and 'y' to you.

# This function sets up the standard genders
def _setup_genders():
	def Gender(s,o,r,p,q,plural):
		return {'s':s,'o':o,'r':r,'p':p,'q':q,'plural':plural}
	# These are the common genders
	male = Gender('he','him','himself','his','his',0)
	female = Gender('she','her','herself','her','hers',0)
	neuter = Gender('it','it','itself','its','its',0)
	either = Gender('s/he','him/her','himself/herself','his/her','his/hers',0)
	# These are special genders
	plural = Gender('they','them','themselves','their','theirs',1)
	you = Gender('you','you','yourself','your','yours',1)
	
	return {'m':male,'f':female,'n':neuter,'e':either,'t':plural,'y':you}

# Standard genders are in a dictionary mapping one-char strings to genders
builtin_genders = _setup_genders()

# GenderedObject gives us objects that have a gender
class GenderedObject:
	def __init__(self,name,gender,articles):
		self.name = name
		self.gender = gender
		self.articles = articles

# You is a special object
You = GenderedObject('you','y',('',''))

# End of gender section

######################################################################
# Message Substitution
#   The Msg class handles parsing and substitution of messages

# The internal codes, after parsing, are tuples:
#   {s,o,r,p,q},cap - subject,object,reflexive pronouns, and
#					 possessive adjective,possessive pronoun
#					 for the object; cap is 1 if it should be
#					 capitalized
#   {n,d,i},cap	 - name, direct article+name, indirect
#					 article+name; cap is 1 if it should be capped
#   '{n,d,i},cap	- object's name (+article if d or i) as possessive
#   #			   - the object's number
#   :,verbs		 - verbs (singular) conjugated for the object
#   .,prop		  - the prop property of the object
#   ',noun		  - noun is singular or plural depending on obj's gender
#   l,code		  - use the location of the object instead of the object
#   L,code		  - use the outermost location of the object instead
#   c,code		  - use the contents of the object instead of the object
#   x,text		  - special

class Msg:
	# First the parsing functions
	
	def parse_part(self,text):
		# Returns (one-letter-code,data,...), remaining-text
		if not text: raise 'Parse'
		code,text = text[:1],text[1:]
		if find('sonidprq',lower(code)) >= 0:
			cap =  (lower(code)!=code)
			return (lower(code),cap),text
		if code=='#':
			return (code,None),text
		if find(":.'",code) >= 0 and text[:1] == '(':
			i = find(text,')')
			if i < 0: raise 'Parse','could not find )'
			return (code,text[1:i]),text[i+1:]
		if find("lLc",code) >= 0:
			result,text = self.parse_part(text)
			return (code,result),text
		if code == "'" and find('nid',lower(text[:1])) >= 0:
			cap = (lower(text[:1]) != text[:1])
			return (code+lower(text[:1]),cap),text[1:]
		if code == 'x' and text[:1] == '{':
			i = find(text,'}')
			if i < 0: raise 'Parse','could not find }'
			return (code,split(text[1:i])),text[i+1:]
		return None,code+text
	
	def parse_selector(self,text):
		# Return [partial parsed list], remaining-text
		t,rest = text[:1],text[1:]
		if (t >= '0' and t <= '9' or
			t >= 'a' and t <= 'z' or t >= 'A' and t <= 'Z'):
			try: which = atoi(t)
			except: which = t
			result,text = self.parse_part(rest)
			if result==None: return ['%'],t+text
			else: return [(which,result)], text
		if t == '%':
			return ['%'], rest
		else:
			return ['%'], text

	# A parsed list is a list of strings or tuples,
	# where tuples are parsed %codes
	def parse(self,text):
		if type(text) == type([]): return text  # already parsed
		i = find(text,'%')
		result = []
		while i >= 0:
			result.append(text[:i])
			r,text = self.parse_selector(text[i+1:])
			for x in r: result.append(x)
			i = find(text,'%')
		if text: result.append(text)
		return result

   
######################################################################
	# The substitution section
	
	def add_default_article(self,name,code,plural):
		if code=='d': return 'the '+name
		if code=='i':
			if plural: return 'some '+name

			# Find the first alphanumeric
			okay = letters+digits
			i = 0
			while i < len(name) and ( find(okay,name[i]) == -1 ):
				i = i+1
			# Now use that first useful char to determine a or an
			use_an = ( name[i:i+1] in english.vowel_list or
					   name[i:i+1] == '8' )
			for v in english.vowel_exceptions:
				if name[i:i+len(v)] == v: use_an = 0
			for v in english.nonvowel_exceptions:
				if name[i:i+len(v)] == v: use_an = 1
			if use_an: return 'an '+name
			else: return 'a '+name
		return name

	def canonical_objspec(self,objspec):
		# Returns a list of tuples rather than a more free-form objspec
		# The remainder of the functions expect a canonical objspec rather
		# than the free-form one. 
		if type(objspec) == type( () ): return [objspec]
		if type(objspec) == type([]):
			r = []
			for x in objspec:
				if type(x) != type( () ): r = r + self.canonical_objspec(x)
				else: r.append(x)
			return r
		return [ (None,objspec) ]

	def get_gender(self,gender,prop):
		if type(gender) == type(''):
			try: gender = builtin_genders[gender]
			except: gender = builtin_genders['e']
		try:
			return gender[prop]
		except:
			return builtin_genders['e'][prop]
	
	def is_plural(self,objspec):
		# Returns 1 if objspec represents a plural object or set of objects
		if type(objspec) == type([]):
			if len(objspec) >= 2: return 1
			if len(objspec) == 1: return self.is_plural(objspec[0])
			else: return 0
		if type(objspec) == type( () ):
			return ( objspec[0] >= 2 ) or self.is_plural(objspec[1])
		if type(objspec) == type( '' ):
			return 0
		else:
			return self.get_gender(objspec.gender,'plural')

	def get_name(self,objspec,code):
		# code is one of {n,i,d,'n,'i,'d}
		# objspec should be a canonical object specification list
		results = []
		possessive,code = (code[0]=="'"), code[-1]
		for count,obj in objspec:
			# Determine the name and article type of the object
			articles = 'normal'
			if type(obj) == type(''):
				name = self.get_noun(obj,(count != None and count != 1))
			else:
				try:	name = obj.name
				except: name = '#<no-name>'
				try:	articles = obj.articles
				except: pass
			# Handle counts
			if count != None:
				name = `count` + ' ' + name
				if articles == 'normal': articles = ('','the')
			# Determine special treatment for article types
			c = code
			if articles == 'unique' and c == 'i': c = 'd'
			if articles == 'proper': c = 'n'
			if type(articles) == type( () ):
				# Custom article type
				a = ''
				if c == 'i': a = articles[0]
				elif c == 'd': a = articles[1]
				if a: name = a + ' ' + name
			else:
				# Other article type
				name = self.add_default_article(name,c,self.is_plural(obj))
			# Handle possessives
			if possessive:
				if self.is_plural( (count,obj ) ): name = name+"'"
				else: name = name+"'s"
			# Add the name to the list
			results.append(name)

		return english.to_list(results)

	def get_pronoun(self,objspec,code):
		# code in {s,o,r,p,q}
		if objspec == []: return builtin_genders['n'][code]

		# Collapse 
		if type(objspec) == type([]):
			if len(objspec) > 1:
				for count,x in objspec:
					if x == You: return builtin_genders['y'][code]
				return builtin_genders['t'][code]
			else: objspec = objspec[0]
		if type(objspec) == type( () ):
			if objspec[0] != None and objspec[0] > 1:
				if not self.is_plural(objspec):
					return builtin_genders['t'][code]
			objspec = objspec[1]

		if type(objspec) == type(''):
			return builtin_genders['n'][code]
		else:
			return self.get_gender(objspec.gender,code)

	def get_objnum(self,objspec):
		# This will have to be changed for POO
		results = []
		for count,x in objspec:
			if type(x) == type(''): results.append( '#<string '+`x`+'>' )
			else: results.append( '#'+`id(objspec)` )
		return english.to_list(results)
	
	def get_property(self,objspec,property):
		# Get a property from the objects
		results = []
		for count,x in objspec:
			try:
				s = getattr(x,property)
				if type(s) != type(''): s = `s`
			except AttributeError:  s = '#<not-found>'
			except: s = '#<error>'
			results.append(s)
		return english.to_list(results)
	
	def get_verb(self,verbname,plural):
		# Handle exceptions first
		i = find(verbname,'/')
		if i >= 0:
			if plural: return verbname[i+1:]
			else: return verbname[:i]
			
		# Conjugate a verb
		if plural:
			if verbname[-3:] == "n't":
				return self.get_verb(verbname[:-3],plural) + "n't"
			if verbname in english.verb_exceptions.keys():
				return english.verb_exceptions[verbname]
			elif verbname[-2:] == "'s":
				# Dunno why this rule is on JHM
				return verbname[:-2] + "'ve"
			else:
				return english.remove_s(verbname)
		return verbname

	def get_noun(self,nounname,plural):
		# Handle exceptions first
		i = find(nounname,'/')
		if i >= 0:
			if plural: return nounname[i+1:]
			else: return nounname[:i]
			
		# Decline a noun
		if plural:
			if nounname in english.noun_exceptions.keys():
				return noun_exceptions[nounname]
			else:
				return english.add_s(nounname)
		return nounname

	def get_special(self,objspec,args):
		# Call a function on all the objects
		if not args: return '#<invalid x{} specifier>'
		results = []
		v,args = 'sub_'+args[0],tuple(args[1:])
		for count,x in objspec:
			try:
				s = getattr(x,v)
				s = apply(s,args)
			except AttributeError:  s = '#<not-found>'
			except:  s = '#<error>'
			results.append(s)
		return english.to_list(results)
		
	def get_part(self,objspec,part,absolute_only=0):
		# A message is a list of (string or parsed part)
		# This function turns a part into a string, or
		# if absolute_only is set, it might return None to indicate
		# that the part is relative and not absolute
		c,options = part

		# First, try the absolute selectors
		if c == '.': return self.get_property(objspec,options)
		if c == '#': return self.get_objnum(objspec)

		if c == "'": return self.get_noun(options,self.is_plural(objspec))
		if c == 'x': return self.get_special(objspec,options)

		if c in ('l','L','c'):
			results = []
			for count,obj in objspec:
				if type(obj) != type(''):
					try:
						if c == 'l': add = [obj.location]
						if c == 'L': add = [obj.outer_location()]
						if c == 'c': add = obj.contents
					except AttributeError:
						add = []
					except:
						return '#<error>'
					for a in add:
						if a not in results: results.append(a)
			return self.get_part(self.canonical_objspec(results),
								 options,absolute_only)

		# Now try the relative selectors
		if absolute_only: return None
		if c in ('n','i','d',"'n","'i","'d"):
			name = self.get_name(objspec,c)
			if options: name = capitalize(name)
			return name
		if c in ('s','o','r','p','q'):
			name = self.get_pronoun(objspec,c)
			if options: name = capitalize(name)
			return name
		if c == ':': return self.get_verb(options,self.is_plural(objspec))
		return '#<unknown code:'+`c`+'>'

	def lookup_in_objlist(self,objlist,key):
		try: obj = objlist[key]
		except:
			try: obj = objlist[lower(key)]
			except: obj = []
		return obj
	
	def canonical_objlist(self,objlist):
		canonical = {}
		for k in objlist.keys():
			canonical[k] = self.canonical_objspec(objlist[k])
		return canonical

	def sub_parties(self,objlist,msg,parties):
		# Handle message substitution for many points of view
		# First we preprocess the message to eliminate absolute refs
		# (This is an optimization step and isn't strictly necessary)		
		objlist = self.canonical_objlist(objlist)
		msg = self.parse(msg)
		for i in range(len(msg)):
			m = msg[i]
			if type(m) != type(''):			
				v = self.get_part(self.lookup_in_objlist(objlist,m[0]),m[1],1)
				if v: msg[i] = v
		# Now msg has all absolute references processed
		results = []
		for p in parties:
			# Make a copy of the objlist and replace p with magic `you' object
			def subst_obj(objlist,p=p):
				temp = objlist[:]
				for i in range(len(temp)):
					if temp[i][1] == p: temp[i] = (None,You)
				return temp
			rel_objlist = {}
			for key in objlist.keys():
				rel_objlist[key] = subst_obj(objlist[key])
			results.append(self.sub(rel_objlist,msg))
		return results,self.sub(objlist,msg)
		
	def sub(self,objlist,msg):
		# Handle message substitution for one point of view
		s = ''
		objlist = self.canonical_objlist(objlist)
		for m in self.parse(msg):
			if type(m) == type(''): s = s+m
			else:
				objspec = self.lookup_in_objlist(objlist,m[0])
				s = s+self.get_part(objspec,m[1])
		return s

# End of message substitution 

######################################################################
# Testing code

def test():
	msg = Msg()
	Stephen = GenderedObject('Stephen','m','proper')
	Joe = GenderedObject('Joe','m','proper')
	Guido = GenderedObject('Guido','m','proper')
	Amit = GenderedObject('Amit','m','proper')
	Museum = GenderedObject('museum','n','unique')
	Boat = GenderedObject('boat','n','normal')
	Hut = GenderedObject('hut','n','normal')
	Joe.location = Museum
	Stephen.location = Boat
	Guido.location = Museum
	Amit.location = Hut
	ale = GenderedObject('ale','n','normal')
	ducks = GenderedObject('ducks','t','normal')
	ducks.location = Boat

	m1 = "While standing in %1ld, %1I %1:(looks) at %1r (%1#) oddly."
	m2 = "%1'i %1'(hand) %1:(looks) bloody.  %1P %1'(glove) %1:(are) not %1q."
	m3 = "%1I %1:(hits) %2i (%2#) with %1p %1'(hand)."
	m4 = "In %1li, %1i %1:(raises) %1p %1'(eyebrow) and %1:(peers) at %2i suspiciously."
	v1 = {1:[ducks,(3,'soldier'),'fooble'],2:ale}
	print '1:',msg.sub({1:[Joe,Guido]},m1)
	print '2:',msg.sub({1:[Guido,Joe]},m2)
	print '3:',msg.sub(v1,m3)
	print '4:',msg.sub({1:Joe,2:ducks},m3)
	print '   Test 5 gives the message processed for Amit and Stephen, '
	print '   and also a generic message for the rest of the world:'
	print '5:',msg.sub_parties({1:[Stephen,ducks],2:Amit},m4,[Amit,Stephen])
	raw_input()
		
#test()
class Gender:
		def __init__(self): self.foo = 42

# Possible object selector:
#	x{...} 
# Unfinished object specifiers:
#	Instead of (num,string), it should be possible to have (num,s1,s2,s0)
#	where s1 is the normal singular string, s2 is the plural string, and
#	s0 is the string to use when the count is 0.
# Also unfinished:
#	Error checking to check for invalid objspecs, invalid property refs,...
# Possible addition
#	Randomness (useful for making messages a little more interesting)
# Also needs to be tested a lot more.