% File:		parser
% Author:	Peter Ross
% Updated:	6 June 1985
% Purpose:	simple general purpose active chart parser

% This is a (very) simple general purpose chart parser.
% There is separate user documentation in parser.doc.

% There are three important data structures to know about:
% Edge:
%	edge(Category, Found, Needs, StartVertex, EndVertex)
%		Category is the category as on the LHS of a rule.
%		Found is what has already been accounted for, of the
%		    RHS of that rule. At the start it is just []. It
%		    is ordered so that the most recently found item is
%		    first. To help analyse the chart after the parseing,
%		    the items are of the form
%			Category = VertexNumber
%		    showing the vertex number where that found category
%		    started.
%		Needs is what has not yet been accounted for, of the
%		    RHS of that rule. At the start it is everything.
%		Startvertex and EndVertex show where the edge is.
%	NOTES: [1] Found and Needs don't get changed. New edges	with updated 
%		   Found (bigger) and Needs (smaller) get added.
%	       [2] when applying the Fundamental Rule, assume that the new 
%		   edge goes farther right than the active edge that gave 
%		   birth to it, as opposed to farther left.
%	       [3] a reminder: an edge is ACTIVE is Needs is non-empty. 
%		   Otherwise it is INACTIVE.
%	       [4] Found lists are really just bureaucracy useful when the
%		   parsing is done.
% Chart:
%	ActiveEdgeList + InactiveEdgeList
%		The two types of edges are kept in separate lists for
%		convenience. Only edges which have been processed already
%		(so that they have triggered all the new edges they can)
%		get onto the chart.
% Agenda:
%	CandidateList - Hole
%		This is a difference list (a list with a hole at the end, so
%		it's as cheap to add items at the end, when working
%		breadth-first, as to add items to the front, when working
%		depth-first). The items are all of the form
%			ActiveEdge+InactiveEdge
%		and the fundamental rule is in due course applied to each
%		such pair of edges. In fact, the way the code works, the
%		rule is guaranteed to succeed, although the user could modify
%		the test of candidacy, and the fundamental rule, so that it
%		did not always work. As things stand, we could apply the rule
%		before the item ever gets onto the agenda, but that would
%		tend to hide the algorithm even more, and cut down the general
%		flexibility. The speed loss is pretty trivial.

% The Fundamental Rule is:
%	Find a case of an ACTIVE edge meeting, at its EndVertex, the
%	StartVertex of an INACTIVE edge, such that the category of the
%	INACTIVE edge is what is first needed for the ACTIVE edge to
%	'grow'. Construct a new edge from these two:
%		- its category will be that of the ACTIVE edge.
%		- the Found list is the Found list of the ACTIVE edge
%		  but with the category of the INACTIVE edge added.
%		- the Needs list is the tail of the Needs list of the
%		  ACTIVE edge.
%		- the edge spans both the old edges.
% You could always modify this rule, e.g. for plan recognition purposes allow
% there to be a gap between the end of the active edge and the start of the
% inactive edge.

% Certain decisions are needed. Does parsing start with the most global
% category and proceed downward ("top-down") or with the minimal chart
% built from the raw data and the input, and proceed upward ("bottom-up")?
% Either way, there will be an agenda of candidates for applying the
% fundamental rule. When the first candidate on the agenda is processed,
% more candidates will arise from that. Should they go on the front of
% the agenda ("depth-first") or the end ("breadth-first"), or should the
% user be allowed to reshuffle the agenda as he likes. The code does not
% currently cater for this last choice, and would need a bit of hacking
% to make it do so. Chief point is that currently the agenda is a difference
% list, so it is cheap to add things to either end, but is no better than
% an ordinary list if you want to start adding things to the middle.
% (Note for future hackers: how about keeping the agenda as a tree, with
% the user's sorting relation defined as the tree ordering relation? More
% costly than the simple scheme here, but about equally good for any sensible
% (i.e. non-global) ordering rule...)


% ================== START OF THE CODE ================== 

% ======== TOP LEVEL ========

% parse/4: the TOP-LEVEL goal of all this lot. Use make_chart/4 if the
%	rules have already been inverted.

parse(Tag, WordList, MaxVertex, Chart) :-
	invert_rules(Tag),
	make_chart(Tag, WordList, MaxVertex, Chart).

% invert_rules/1: takes a tag. Looks at each rule, adds clauses of the form
%		upward_rule(Tag, Category, [Parent=[Category|Rest], ...])
%	and
%		downward_rule(Tag, Category, ListOfExpansions)
%	purely for "speed" later on. The point is that the system
%	want to find, in bottom-up search, all rules with a given
%	category as the first item on the RHS (upward_rule/3 gives
%	this) or, in top-down search, all rules with a given LHS
%	(downward_rule/3 does this). This "rule inversion" should
%	be done once only, not once per parse, since all the necessary
%	information is contained in the rule/3 clauses.
%	Yes, it is a bit cumbersome, and sorry about those failure-driven
%	loops.

invert_rules(Tag) :-
	abolish(upward_rule,3),
	rule(Tag, _, [Category|_]),
	not(upward_rule(Tag,Category,_)),
	setof(Parent=[Category|Rest],
	      rule(Tag,Parent,[Category|Rest]),
	      List),
	assert(upward_rule(Tag, Category, List)),
	fail.
invert_rules(Tag) :-
	abolish(downward_rule,3),
	rule(Tag, Category,_),
	not(downward_rule(Tag,Category,_)),
	setof(RHS,
	      rule(Tag,Category,RHS),
	      List),
	assert(downward_rule(Tag,Category,List)),
	fail.
invert_rules(Tag) :-
	( watching(Tag) ->
		write('inverse rules created for tag '), 
		write(Tag), nl
	; true
	).

% make_chart/4: given tag, a WordList, produce the maximum vertex number and 
%	a final chart. The approach is one that reflects my undersatnding
%	of what ought to be happening in a simple chart parser, namely:
%	    (a) pick up the strategy and policy:
%		strategies: bu = bottom up, namely try rule expansions
%				 triggered by inactive edge creation
%			    td = top down, namely try rule expansions
%				 triggered by active edge creation
%		policies:   df = depth first, namely add new candidates
%				 to front of agenda lists
%			    bf = breadth first, namely add new candidates
%				 to back of agenda lists
%	    (b) grow an initial chart using all the words and all the lexical 
%		info to get the lowest level details. Any active edges will
%		be added according to the strategy, i.e. if bottom-up then
%		each inactive edge will trigger rule expansion upward and
%		cause some active edges to be added. If top-down, only
%		one active edge will initially be added, but this will
%		trigger the addition of some more active edges. To make
%		life easy for the initialisation routines, and to help
%		whoever looks at the chart afterward to spot what the
%		top-level category was, there is an assumed ersatz rule
%		of the form
%			user -> top_level_category.
%		Thus you can look for the edge
%			edge(user,[],[Top],0,0)
%		to spot the topmost category. This will be useful when I
%		get round to adding rule tags, when there will be many top
%		categories, but only one such edge per chart - so you can
%		deduce the tag backwards. The penalty is, of course, that
%		you shouldn't have a category called 'user'. If you really
%		want to, you will need to have a predicate
%			ersatz_category(Tag, ErsatzCategoryName)
%		and then the system will use that name instead.
%	    (c) grow the initial agenda (strategy-dependent)
%	    (d) call chart/5 to run the main loop and check for termination.

make_chart(T, WordList, MaxVertex, FinalChart) :-
        strategy(T,S),    % choices: bu or td, validated higher up.
        policy(T,P),      % choices: df or bf, validated higher up.
	( watching(T) ->
		prompt(_, 'monitor:')
	; true
	),
        initial_setup(T,S,P, WordList, 0, MaxVertex, 
		      []+[], InitialChart, 
		      Var-Var,InitialAgenda),
        chart(T,S,P, InitialChart, InitialAgenda, FinalChart).
        
% chart/6: the main loop (with monitoring hook). Given tag, strategy, policy,
%	the current chart and agenda, work out the final chart.
%	This encapsulates the basic control algorithm of a chart parser,
%	namely:
%		- get the first entry of the agenda. This is a pair of
%		  edges to which the fundamental rule applies.
%		- apply the fundamental rule to get a new edge.
%		- add this edge to the chart. This includes the job of
%		  finding any inactive edges with which it will
%		  eventually combine at a later cycle. Add items to the
%		  agenda for each such case (at the back if breadth-first,
%		  at the front if depth-first).
%		  Also, if we are working top-down, then adding an active
%		  edge will recursively trigger the addition of further
%		  active 'embryo' edges according to the rule clauses.
%		  If we are working bottom-up, this triggering is done
%		  when inactive edges are added.

chart(_,_,_, Chart, Ag-_, Chart) :-
	var(Ag),	% This means the agenda is at last empty.
	!.
chart(T,S,P, Chart, [AEdge+IEdge|Rest]-Var, FinalChart) :-
	apply_fr(AEdge,IEdge,NewEdge),
	( active(NewEdge) ->
	    add_active_edge(T,S,P,NewEdge,Chart,NewChart,Rest-Var,NewAgenda)
	; add_inactive_edge(T,S,P,NewEdge,Chart,NewChart,Rest-Var,NewAgenda)
	),
	monitor(T,S,P,Chart,[AEdge+IEdge|Rest]-Var,NewChart,NewAgenda),
	chart(T,S,P,NewChart,NewAgenda,FinalChart).


% ============ SUBSIDIARY PREDICATES ============

% ======== INITIALISING STUFF ========

% initial_setup/9: given tag, strategy, policy, word list, min vertex,
%	return number giving the maximum vertex number, and from a seed chart
%	(typically []+[] if not re-starting) create an initial chart and
%	from a seed agenda (typically Var-Var if not-restarting) create
%	an initial agenda.

initial_setup(T,S,P, WordList, MinVertex, MaxVertex, 
	      SeedChart, InitialChart, 
	      SeedAgenda, InitialAgenda) :-
	words_to_edges(T, WordList, MinVertex, MaxVertex, EdgeList),
	add_inactive_list(T,S,P,EdgeList,SeedChart,TempChart,
			  SeedAgenda,TempAgenda),
	initial_category(T, C),
	(ersatz_category(T, EC)
	; EC = user
	),
	!,
	add_active_edge(T,S,P,edge(EC,[],[C],MinVertex,MinVertex),
			TempChart,InitialChart,
			TempAgenda,InitialAgenda).

% words_to_edges/5: given tag, word list, min vertex number, return maximum 
%	vertex number (for later use in inspecting final chart) and list of 
%	inactive edges derived from lexical data about each word.

words_to_edges(T, WordList, MinVertex, MaxVertex, EdgeList) :-
	words_to_edges(T, WordList, MinVertex, MaxVertex, [], EdgeList).

words_to_edges(_, [], N, N, Answer, Answer).
words_to_edges(T, [W|More], N, MaxVertex, List, Answer) :-
	!,
	( lexical(T,W,Categories) ->
		true
	; write('Word '),
	  write(W),
	  write(' has no entry in the lexicon for tag '),
	  write(T),
	  write(' - skipped it'),
	  nl,
	  Categories = []
	),
	N1 is N+1,
	cats_to_edges(Categories,W,N,N1,List,NewList),
	words_to_edges(T,More,N1,MaxVertex,NewList,Answer).

cats_to_edges([],_,_,_,List,List).
cats_to_edges([C|More],W,N,N1,List,Answer) :-
	cats_to_edges(More,W,N,N1,[edge(C,[word(W)=N],[],N,N1)|List],Answer).


% add_inactive_list/8: given tag, strategy, policy, list of inactive edges, 
%	old chart, get new chart, given old agenda, get new agenda. This is 
%	done by adding each inactive edge in turn.

add_inactive_list(T,S,P,[E|More],Chart,NewChart,Agenda,NewAgenda) :-
	!,
	add_inactive_edge(T,S,P,E,Chart,MidChart,Agenda,MidAgenda),
	add_inactive_list(T,S,P,More,MidChart,NewChart,MidAgenda,NewAgenda).
add_inactive_list(_,_,_,_,Chart,Chart,Agenda,Agenda).

% ======== SUBSIDIARY PREDICATES FOR THE MAIN PART ========

% add_active_edge/8: arguments are tag, strategy, policy, edge (active), old
%	chart, resulting new chart, old agenda, resulting new agenda.
%	Much depends on the strategy. If it is top-down (td), then whenever
%	an active edge is added and it is possible to add new embryo edges,
%	then add them - each will recursively add more embryo active edges.
%	If the strategy is bottom-up, new embryo edges are not sought.
%	Either way, if an active edge is added, then all pairings with
%	inactive edges that the fundamental rule might apply to are added
%	to the agenda.

add_active_edge(_,td,_,Edge,OldA+OldI,OldA+OldI,OldAg-OldV,OldAg-OldV) :-
	member(Edge,OldA),	% Do nothing, if the edge has been
	!.			% processed in the past.
add_active_edge(T,td,P,Edge,OldA+OldI,NewA+OldI,OldAg-OldV,NewAg-NewV) :-
	Edge = edge(_,_,[N|_],_,EV),
	downward_edge_list(T,N,EV,EdgeList),
	!,			% Aha ... there are relevant rules!
	add_active_configs(P,Edge,OldI,OldAg-OldV,MidAg-MidV),
	add_active_list(T,td,P,EdgeList,[Edge|OldA]+OldI,NewA+OldI,MidAg-MidV,
			NewAg-NewV).
add_active_edge(_,_,P,Edge,OldA+OldI,[Edge|OldA]+OldI,OldAg-OldV,NewAg-NewV) :-
	add_active_configs(P,Edge,OldI,OldAg-OldV,NewAg-NewV).

%add_active_configs/5: given policy, new edge, list of inactive edges, old
%	agenda, then creates a new agenda by adding all possible
%	configurations to the agenda and returning the new agenda.

add_active_configs(df,
		   ActiveEdge,
		   [InactiveEdge|MoreIs],
		   OldAg-OldV,
		   NewAg-OldV) :-
	candidate(ActiveEdge,InactiveEdge),
	!,
	MidAg = [ActiveEdge+InactiveEdge|OldAg],
	add_active_configs(df,
		   	   ActiveEdge,
		   	   MoreIs,
		   	   MidAg-OldV,
		   	   NewAg-OldV).
add_active_configs(bf,
		   ActiveEdge,
		   [InactiveEdge|MoreIs],
		   OldAg-OldV,
		   OldAg-NewV) :-
	candidate(ActiveEdge,InactiveEdge),
	!,
	OldV = [ActiveEdge+InactiveEdge|MidV],
	add_active_configs(bf,
		   	   ActiveEdge,
		   	   MoreIs,
		   	   OldAg-MidV,
		   	   OldAg-NewV).
add_active_configs(P,
		   ActiveEdge,
		   [_|MoreIs],
		   OldAg-OldV,
		   NewAg-NewV) :-
	add_active_configs(P,
		   	   ActiveEdge,
		   	   MoreIs,
		   	   OldAg-OldV,
		   	   NewAg-NewV).
add_active_configs(_,_,[],Ag-V,Ag-V).

% add_inactive_edge/8: arguments are tag, strategy, policy, edge (inactive), 
%	old chart, resulting new chart, old agenda, resulting new agenda.
%	Much depends on the strategy. If it is bottom-up (bu), then whenever
%	an inactive edge is added and it is possible to add new embryo edges,
%	then add them - these will be active, of course.
%	If the strategy is top-down, new embryo edges are not sought.
%	Either way, if an inactive edge is added, then all pairings with
%	active edges that the fundamental rule might apply to are added
%	to the agenda.

add_inactive_edge(_,bu,_,Edge,A+OldI,A+OldI,OldAg-OldV,OldAg-OldV) :-
	member(Edge,OldI),	% Do nothing, if the edge has
	!.			% been processed before.
add_inactive_edge(T,bu,P,Edge,A+OldI,NewA+[Edge|OldI],OldAg-OldV,NewAg-NewV) :-
	Edge = edge(Cat,_,[],SV,_),
	upward_edge_list(T,Cat,SV,EdgeList),
	!,			% Aha ... there are relevant rules!
	add_inactive_configs(P,Edge,A,OldAg-OldV,MidAg-MidV),
	add_active_list(T,td,P,EdgeList,A+[Edge|OldI],NewA+[Edge|OldI],
			MidAg-MidV, NewAg-NewV).
add_inactive_edge(_,_,P,Edge,A+OldI,A+[Edge|OldI],OldAg-OldV,NewAg-NewV) :-
	add_inactive_configs(P,Edge,A,OldAg-OldV,NewAg-NewV).

%add_inactive_configs/5: given policy, new edge, list of active edges, old
%	agenda, then creates a new agenda by adding all possible
%	configurations to the agenda and returning the new agenda.

add_inactive_configs(df,
		   InactiveEdge,
		   [ActiveEdge|MoreAs],
		   OldAg-OldV,
		   NewAg-OldV) :-
	candidate(ActiveEdge,InactiveEdge),
	!,
	MidAg = [ActiveEdge+InactiveEdge|OldAg],
	add_inactive_configs(df,
		   	   InactiveEdge,
		   	   MoreAs,
		   	   MidAg-OldV,
		   	   NewAg-OldV).
add_inactive_configs(bf,
		   InactiveEdge,
		   [ActiveEdge|MoreAs],
		   OldAg-OldV,
		   OldAg-NewV) :-
	candidate(ActiveEdge,InactiveEdge),
	!,
	OldV = [ActiveEdge+InactiveEdge|MidV],
	add_inactive_configs(bf,
		   	   InactiveEdge,
		   	   MoreAs,
		   	   OldAg-MidV,
		   	   OldAg-NewV).
add_inactive_configs(P,
		   InactiveEdge,
		   [_|MoreAs],
		   OldAg-OldV,
		   NewAg-NewV) :-
	add_inactive_configs(P,
		   	   InactiveEdge,
		   	   MoreAs,
		   	   OldAg-OldV,
		   	   NewAg-NewV).
add_inactive_configs(_,_,[],Ag-V,Ag-V).

% add_active_list/8: like add_active_edge/8, but works through a list
%	of active edges.

add_active_list(T,S,P,[Edge|Rest],OldA+I,NewA+I,OldAg-OldV,NewAg-NewV) :-
	!,
	add_active_edge(T,S,P,Edge,OldA+I,MidA+I,OldAg-OldV,MidAg-MidV),
	add_active_list(T,S,P,Rest,MidA+I,NewA+I,MidAg-MidV,NewAg-NewV).
add_active_list(_,_,_,[],A+I,A+I,Ag-V,Ag-V).

% downward_edge_list/4: given a tag, a category and a vertex, make up a list of
%	all the embryo edges extractable from the downward_rule/3 for that
%	category.

downward_edge_list(T,Cat,Vertex,EdgeList) :-
	downward_rule(T,Cat,RHSlist),
	rhs_to_edge_list(Cat,Vertex,RHSlist,EdgeList).

rhs_to_edge_list(Cat,V,[RHS|More],[edge(Cat,[],RHS,V,V)|Rest]) :-
	!,
	rhs_to_edge_list(Cat,V,More,Rest).
rhs_to_edge_list(_,_,[],[]).

% upward_edge_list/4: given a tag, a category and a vertex, make up a list of
%	all the embryo edges extractable from the upward_rule/3 for that
%	category.

upward_edge_list(T,Cat,Vertex,EdgeList) :-
	upward_rule(T,Cat,RuleList),
	rule_to_edge_list(RuleList,Vertex,EdgeList).

rule_to_edge_list([Parent=RHS|More],V,[edge(Parent,[],RHS,V,V)|Rest]) :-
	!,
	rule_to_edge_list(More,V,Rest).
rule_to_edge_list([],_,[]).


% ======== USER-REDEFINABLE PREDICATES ========

% NOTE: do NOT change the format of an edge, it is explicitly used in
%	several other places in the code. These definitions are grouped
%	here for convenience. Together they define the essence of the
%	fundamental rule.

% active/1: succeeds if its argument is an active edge. In the system, an
%	edge is inactive if it is not active.

active(edge(_,_,[_|_],_,_)).

% candidate/2: takes two edges, succeeds if they are candidates for
%	application of the fundamental rule. In normal chart parsing,
%	this test is so simple it is silly to have it wrapped up in a
%	separate predicate like this. However, having it separate makes it
%	easy to change. Note that the first edge must be active and the
%	second edge must be inactive. This is dictated by the places where
%	this predicate is used.

candidate(edge(_,_,[N|_],_,V), edge(N,_,_,V,_)).

% apply_fr/3: applies the fundamental rule to a given active and a given
%	inactive edge. Returns a new edge.

apply_fr(edge(C,F,[N|Rest],SV,MV),
	 edge(N,_,_,MV,EV),
	 edge(C,[N=MV|F],Rest,SV,EV)).


% ============ MONITORING TOOLS ============

% monitor/7: hook for the user to watch what is going on. The
%	user must have turned on 'watching' by using watch/1
%	(converse nowatch/1) first. He can define user_mon/7
%	for himself: the arguments are
%		- tag to identify the rule set
%		- strategy
%		- policy
%		- old chart
%		- old agenda
%		- new chart
%		- new agenda
%	All are instantiated already. user_mon/7 might, for
%	example, show changes between old and new, just show the
%	old versions, or be sophisticated and ask the user what
%	he wants to see.
%
%	If user_mon/7 fails, and watching is turned on, the user will
%	get the default scheme - the old chart and agenda will be
%	written on the output, and the monitor will wait for the user
%	to type <cr> before continuing.

monitor(T,S,P,OC,OA,NC,NA) :- 
	watching(T),
	user_mon(T,S,P,OC,OA,NC,NA),
	!.
monitor(T,_,_,_,_,NC,NA) :- 
	watching(T),
	write('Chart:  '),write(NC),nl,nl,
	write('Agenda: '),write(NA),nl,nl,
	skip(10),
	!.
monitor(_,_,_,_,_,_,_).

watch(T) :-
	( watching(T)
	; assert(watching(T))
	).
nowatch(T) :-
	( retract(watching(T))
	; true
	).

print_chart(A+I) :-
	sort(A,SortedA),
	sort(I,SortedI),
	print_sorted_chart(SortedA+SortedI).

print_sorted_chart([A|MoreA]+[I|MoreI]) :-
	write('  Active edges: '),
	write(A), nl,
	print_list(MoreA,16),
	write('Inactive edges: '),
	write(I), nl,
	print_list(MoreI,16).

print_list([],_).
print_list([Item|Rest],N) :-
	tab(N), write(Item), nl,
	print_list(Rest,N).

% a simple test rig:

test(T) :-
	( upward_rule(T,_,_)
	; downward_rule(T,_,_)
	; write('Inverting rules for tag '), write(T), nl,
	  invert_rules(T),
	  write('...done the inversion'), nl
	),
	!,
	prompt(_, 'Word list: '),
	read(L),
	L = [_|_],
	!,
	make_chart(T,L,_,C),
	nl,
	print_chart(C).
test(_) :-
	write('That was not a list'), nl.