% spcompat.pl

% Extra code needed by Sicstus, for functionality from Quintus library
% not available in Sicstus one. This file is created by David Carter
% (d.m.carter@bigfoot.com); code is written by David Carter and (earlier)
% by Ralph Becket.

% This has been tested under Sicstus 3.7.1.

%%% ======================================================================
%%% Implementations of predicates with no simple equivalents in SICStus 3#7.
%%% ======================================================================

% can_open_file exists in Quintus library(file). We implement just the
% behaviour for Mode=read, because that's all that's used by OAA, and 
% because the Quintus documentation points out the following:
%  "Under operating systems which do not support version numbers (as
%   UNIX does not), file_exists/2 could fail (because there is no
%   such FileName) and can_open_file/2 could succeed (because you are
%   allowed to create one).  Conversely, file_exists/2 could succeed
%   (because there is such a FileName) and can_open_file/2 fail
%   (because you have so many files open that you cannot open
%   another)."
% We assume the "too many open files" condition won't arise.

can_open_file(_FileName, Mode, _) :-
  \+(Mode==read),
  !,
  format(user_error,
"~N* WARNING: can_open_file called with Mode=~w (not implemented under Sicstus\n"),
  fail.
% Succeed if can open FileName in this Mode.
can_open_file(FileName, read, _) :-
  file_exists(FileName, read),
  !.
% No, fail after printing a warning if Quiet = warn.
can_open_file(FileName, read, warn) :-
  format(user_error,
         "~N* could not open ~w in mode read~n",
	 [FileName]),
  fail.

% ------------------------------------------------------------------------

% Emulation of Quintus library(ctr).

ctr_is(Ctr, Old) :-
  ctr(Ctr, Old1),
  !,
  Old=Old1.
ctr_is(_,0).

ctr_set(Ctr, N) :-
  retractall(ctr(Ctr, _)),
  assert(ctr(Ctr, N)).

ctr_inc(Ctr, N, Old) :-
  ctr(Ctr, Old),
  retractall(ctr(Ctr, _)),
  M is Old + N,
  assert(ctr(Ctr, M)).

ctr_inc(Ctr) :-
  ctr_inc(Ctr, 1, _Old).

ctr_inc(Ctr, N) :-
  ctr_inc(Ctr, N, _Old).

% ------------------------------------------------------------------------

% Partial emulation of library(tcp) ...

:- dynamic tcp_connectionid_stream/2.
:- dynamic ctr/2.
:- dynamic tcp_watch_user/0.
:- dynamic tcp_listener/1.
:- dynamic tcp_scheduled_wakeups/1.

% Just a stub to warn of non-existence:
tcp_trace(off, OnOff) :-
  OnOff == on,
  !,
  format(user_error,
	 "~NWarning: tcp_trace/2 has no equivalent in SICStus Prolog 3.7~n",
	 []).
tcp_trace(_,_).

% ctr 14 is used to generate socket #'s like what we get from the QP
% tcp lib.
% :- ctr_set(14, 1).
% Not sure if the above works, so do this:
ctr(14, 1).

% Connect to a server that is waiting at port Port on host Host. We return a
% socket #, ConnectionID. 

tcp_connect(Address, ConnectionID) :-
  tcp_connect(Address, ConnectionID, _).

% This predicate returns a Stream which we can read and write.

tcp_connect(address(Port, Host), ConnectionID, Stream) :-
  socket('AF_INET', Socket),
  socket_connect(Socket, 'AF_INET'(Host, Port), Stream),
  socket_buffering(Stream, read, _Old, unbuf),
  % Socket is an int, but since we don't know that it always will be,
  % let's play it safe and generate our own.
  ctr_inc(14, 1, ConnectionID),
  set_tcp_connection_name(ConnectionID, Stream).

set_tcp_connection_name(ConnectionID, Stream) :-
  retractall(tcp_connectionid_stream(ConnectionID, _)),
  assert(tcp_connectionid_stream(ConnectionID, Stream)).

% Succeed on the current connection name, or return such a name.

tcp_connected(ConnectionID) :-
  tcp_connectionid_stream(ConnectionID, _).

% Send a Term down the stream corresponding to the named connection.
% NB: in Sicstus, this is done with simple portray_clause, i.e. we can't
% emulate the Quintus term-compression trickery. This is why an OAA Sicstus
% process will post its language as 'sicstus', not as 'prolog', because
% the latter will attract compressed terms.
%
% This probably isn't called by any Sicstus code, and probably shouldn't be,
% because it doesn't escape single quotes the right way.
% Use com:com_SendData.  (DLM, 6/2000)

tcp_send(ConnectionID, Term) :-
  debug_format("~N% tcp_send(~q, ~q)~n", [ConnectionID, term(Term)]),
  tcp_connectionid_stream(ConnectionID, Stream),
  portray_clause(Stream, term(Term)),
  flush_output(Stream).

% Comment out the first clause to get debug_format/2 to behave like
% format/2.
debug_format(_,_) :-
  !.
debug_format(Template,Args) :-
  format(Template,Args),
  current_output(CO),
  flush_output(CO).

% These are simple under Sicstus. Just look up the stream (bidirectional)
% for the named connection.

tcp_input_stream(ConnectionID, Stream) :-
  tcp_connectionid_stream(ConnectionID, Stream).

tcp_output_stream(ConnectionID, Stream) :-
  tcp_connectionid_stream(ConnectionID, Stream).

flush_all_streams :-
  tcp_connectionid_stream(_ConnectionID, Stream),
  flush_output(Stream),
  fail.
flush_all_streams.

% This is Ralph Becket's Sicstus emulation of the Quintus
% tcp_select/[1,2], using the Sicstus socket_select/5 predicate in
% library(sockets). I've cleaned it up a bit (shortening predicate
% bodies, etc) and have also added an emulation of the Quintus
% "wakeup" code, whereby timers can be set with
% tcp_schedule_wakeup(When,Value) which will later cause tcp_select to
% return with wakeup(Value). The abbreviation "SW" in variable names
% here stands for "ScheduledWakeup". DMC.

% Default timeout is 0, which (under Quintus) means no timeout.
tcp_select(X) :-
  tcp_select(0, X).

tcp_select(QPTimeout, X) :-
%    flush_all_streams,
    debug_format("~N% Call: tcp_select(~q, ~q)...~n", [QPTimeout,X]),
  % Convert to Sicstus timeout representation.
  sicstus_timeout_structure(QPTimeout,SPTimeout),
  % If there's a wakeup due before our timeout, use the wakeup time instead.
  adjust_timeout_from_wakeup(SPTimeout,SWTimeval,SWTerm,
                             SPTimeoutToUse),
  % Find all the sockets we are listening on (as server)
  findall(Listener, tcp_listener(Listener), Listeners),
  % Find all the streams we are connected to (as client)
  find_all_connected_streams(Streams),
    debug_format("call: socket_select(~q, _, ~q, ~q, _)~n",
                 [Listeners, SPTimeout, Streams]),
  % See the Sicstus documentation for what socket_select does.
  socket_select(Listeners, NewStreams, SPTimeoutToUse, Streams, ReadStreams),
    debug_format("exit: socket_select(~q, ~q, ~q, ~q, ~q)~n",
                 [Listeners, NewStreams, SPTimeout, Streams, ReadStreams]),
  % Pick through what socket_select has given us and return X1 as an
  % initial return value.
  tcp_select_return_value(NewStreams,ReadStreams,SPTimeout,X1),
  % If X1 was a timeout, see if it's because we've been using an adjusted
  % timeout (for a wakeup). If so, return the wakeup term. Otherwise,
  % return X1.
  adjust_return_value_from_wakeup(X1,SWTimeval,SWTerm,X),
    debug_format("~N% Exit: tcp_select(~q, ~q)...~n", [QPTimeout, X]).

% If we got a timeout from socket_select, and SWTimeval did get set by
% adjust_timeout_from_wakeup (i.e. the timeout was really a wakeup, then
% return the wakeup value. Otherwise, pass back the value from 
% tcp_select_return_value unchanged.

adjust_return_value_from_wakeup(In,SWTimeval,SWTerm,wakeup(SWTerm)) :-
  In == timeout,
  nonvar(SWTimeval),
  !,
  tcp_cancel_wakeup(SWTimeval,SWTerm).
adjust_return_value_from_wakeup(Value,_,_,Value).

% Here's where the clever wakeup stuff is done. We look at the next scheduled
% wakeup, if any, and see if it's before the Timeout provided as arg1. If so,
% we return TimeoutToUse as a more imminent timeout value, and set timeval
% and term to return as args 2 and 3 as well. If we don't find a wakeup, or
% if we do but it's after the timeout, we just return the timeout as we got it.

adjust_timeout_from_wakeup(Timeout,SWTimeval,SWTerm,
                           TimeoutToUse) :-
  (tcp_scheduled_wakeup(SWTimeval,SWTerm) -> true;
   fail),
  tcp_now(Now),
  (Timeout=off; % any wakeup is more imminent that 'off'
   Timeout=Secs:Micros,
     TimeoutSecs is Secs+Micros/1000000,
     tcp_time_plus(Now,TimeoutSecs,TimeoutTimeval),
     TimeoutTimeval @> SWTimeval),
  !, % scheduled wakeup will happen earlier than Timeout we were given
  tcp_time_plus(Now,TimeoutToUse1,SWTimeval),
  sicstus_timeout_structure(TimeoutToUse1,TimeoutToUse).
adjust_timeout_from_wakeup(Timeout,_,_,Timeout).

% A Quintus timeout structure is just a number (of seconds to wait). Sicstus
% uses 'off' for no timeout, and a structure of the form Seconds:Microseconds
% for other amounts of time.

sicstus_timeout_structure(0,Timeout) :-
  !,
  Timeout=off.
sicstus_timeout_structure(Secs,Secs:0) :-
  integer(Secs),
  !.
sicstus_timeout_structure(Secs1,Secs:Micros) :-
  number(Secs1),
  !,
  Micros1 is Secs1*1000000,
  Secs is Micros1 // 1000000,
  Micros is integer(Micros1 - (Secs*1000000)).
% Assume already converted:
sicstus_timeout_structure(Timeout,Timeout).

find_all_connected_streams(Streams) :-
  findall(Stream, tcp_connectionid_stream(_, Stream), Streams0),
  (tcp_watch_user ->
     Streams = [user_input | Streams0];
   Streams = Streams0).

% We got a new stream. Register it and return the KS ID.
tcp_select_return_value(NewStreams,_ReadStreams,_Timeout,X) :-
  member(NewStream, NewStreams),
  tcp_register_new_stream(NewStream, KSID),
  !,
  X = connected(KSID).
% No read-streams: must be a timeout.
tcp_select_return_value(_,[],_,X) :-
  !,
  X = timeout.
% We have either a proper term to read, or some user input.
tcp_select_return_value(_,ReadStreams,_,X) :-
  member(ReadStream, ReadStreams),
  (  tcp_connectionid_stream(ConnectionID, ReadStream),
     debug_format("call: tcp_select(_); reading ~q stream ~q~n",
                  [ConnectionID, ReadStream]),
     read(ReadStream, Input),
     ( Input == end_of_file -> 
	 tcp_shutdown(ConnectionID),
         debug_format("exit: tcp_select(~q)~n", [end_of_file(ConnectionID)]),
         X = end_of_file(ConnectionID)
     | Input = term(Event) ->
         debug_format("exit: tcp_select(~q)~n", [term(ConnectionID, Event)]),
         X = term(ConnectionID, Event)
       % Not sure if this case occurs (DLM):
     | otherwise ->
         debug_format("exit: tcp_select(~q)~n", [term(ConnectionID, Input)]),
         X = term(ConnectionID, Input)
     )
   ;
   % ReadStream = user_input,
   X = user_input).
% Otherwise, try again.
% Ralph says: "I *think* this is the correct semantics for
% tcp_select/2, but the Quintus manual isn't very explicit (i.e. can
% tcp_select/2 ever fail?  If not, what are the timeout semantics?)"
tcp_select_return_value(_,_,Timeout,X) :-
  tcp_select(Timeout, X).

tcp_register_new_stream(Stream, KSID) :-
  next_number(KSID),
  \+ tcp_connectionid_stream(KSID, _),
  !,
  socket_buffering(Stream, read, _Old, unbuf),
  assert(tcp_connectionid_stream(KSID, Stream)).

next_number(1).
next_number(N) :-
  next_number(M),
  N is M + 1.

% Start up as a server on the given port (and host, which should be our
% hostname). This both returns and asserts a Socket number; at the time of
% writing, the OAA code ignores the returned one and relies on the asserted
% one.

tcp_listen_at_port(Port,Host,Socket) :-
  socket('AF_INET', Socket),
  socket_bind(Socket, 'AF_INET'(Host, Port)),
  socket_listen(Socket, 5),	% Max no. of pending connection req's.
  assert(tcp_listener(Socket)).

% This should only ever be called on one Socket, so allow for the argument
% being variable:

tcp_destroy_listener(Socket) :-
  (var(Socket) -> tcp_listener(Socket); true),
  socket_close(Socket).

tcp_watch_user(Old, New) :-
  (tcp_watch_user -> Old = on; Old = off),
  retractall(tcp_watch_user),
  (New = on -> assert(tcp_watch_user); true).

% Shut down the specified connection(s).
tcp_shutdown(ConnectionID) :-
  retract(tcp_connectionid_stream(ConnectionID, Stream)),
  !,
  close(Stream).
% At least shout if no associated stream...
tcp_shutdown(ConnectionID) :-
  ground(ConnectionID),
  !,
  format('~NWARNING: tcp_shutdown/1: ~w has no associated stream.~n',[ConnectionID]).
tcp_shutdown(_).

% I hope this is sufficiently drastic...DMC
tcp_reset :-
  tcp_shutdown(_),
  retractall(tcp_listener(_)),
  tcp_cancel_wakeups.

% Simulate some of the timing predicates...these work with "timevals", which
% are structures of the form timeval(Seconds,Microseconds). Which is a bit
% of a pain, but there we are.

tcp_now(timeval(Secs,Micros)) :-
  statistics(walltime,[Millis|_]),
  Secs is Millis // 1000,
  Micros is 1000*(Millis-1000*Secs).

tcp_time_plus(TV1,Delta,TV2) :-
  ground(TV1),
  ground(Delta),
  !,
  TV1=timeval(Secs1,Micros1),
  Micros1a is Micros1+integer(0.5+Delta*1000000),
  Secs2 is Secs1 + Micros1a // 1000000,
  Micros2 is Micros1a - 1000000*(Secs2-Secs1),
  TV2=timeval(Secs2,Micros2).
tcp_time_plus(TV1,Delta,TV2) :-
  ground(TV2),
  ground(Delta),
  !,
  TV2=timeval(Secs2,Micros2),
  Micros2a is Micros2-integer(0.5+Delta*1000000),
  (Micros2a >= 0 -> 
     TV1=timeval(Secs2,Micros2a);
   Secs1 is Secs2 + (Micros2a+1) // 1000000 -1,
     Micros1 is Micros2a + 1000000*(Secs2-Secs1),
     TV1=timeval(Secs1,Micros1)).
tcp_time_plus(TV1,Delta,TV2) :-
  ground(TV1),
  ground(TV2),
  !,
  TV1=timeval(Secs1,Micros1),
  TV2=timeval(Secs2,Micros2),
  Delta is Secs2-Secs1 + (Micros2-Micros1)/1000000.
% This should probably raise another exception...
tcp_time_plus(TV1,Delta,TV2) :-
  raise_exception(bad_arguments(tcp_time_plus(TV1,Delta,TV2))).

% We maintain the wakeups in a sorted list so tcp_scheduled_wakeup
% (and tcp_select) are relatively efficient. The price is that
% scheduling and cancelling wakeups is a bit less efficient, but
% we assume checking will be more frequent than setting.

tcp_schedule_wakeup(Timeval,Term) :-
  (retract(tcp_scheduled_wakeups(Existing)) ->
     sort([wakeup(Timeval,Term)|Existing],New);
   New=[wakeup(Timeval,Term)]),
  assert(tcp_scheduled_wakeups(New)).

tcp_scheduled_wakeup(Timeval,Term) :-
  tcp_scheduled_wakeups(List),
  member(wakeup(Timeval,Term),List).

tcp_cancel_wakeup(Timeval,Term) :-
  tcp_scheduled_wakeups(Existing),
  append(Pre,[wakeup(Timeval,Term)|Post],Existing),
  !,
  retract(tcp_scheduled_wakeups(_)),
  append(Pre,Post,New),
  assert(tcp_scheduled_wakeups(New)).

tcp_cancel_wakeups :-
  retractall(tcp_scheduled_wakeups(_)).

% This can be used to test wakeups are working. A call such as
%   tcp_test_wakeup_mechanism(2.8,11.2).
% sets a wakeup for 2.8 seconds time, then calls tcp_select with a
% timeout of 11.2 seconds. X should get returned as wakeup(foo), not
% timeout, after 2.8 seconds. If the numbers are reversed, we should
% get a timeout instead.

tcp_test_wakeup_mechanism(WakeupSecs,TimeoutSecs) :-
  (bad_timeout_value(WakeupSecs);
   bad_timeout_value(TimeoutSecs)),
  !.
tcp_test_wakeup_mechanism(WakeupSecs,TimeoutSecs) :-
  format('~N~nTesting Sicstus TCP wakeup mechanism emulator.~n',[]),
  format('~NWakeup time is ~w, and timeout time is ~w.~n',
         [WakeupSecs,TimeoutSecs]),
  (TimeoutSecs = 0 ->
     format('~NA timeout time of 0 seconds means no timeout.~n',[]);
   true),
  (WakeupSecs > TimeoutSecs, TimeoutSecs > 0 -> 
     format('~NHmm...we expect a timeout, as it precedes the wakeup.~n',[]),
     MinSecs=TimeoutSecs,
     Expect=timeout;
   MinSecs=WakeupSecs,
     format('~NHmm...we expect a wakeup, as it precedes the timeout.~n',[]),
     Expect=wakeup(foo)),
  format('Setting the wakeup...~n',[]),
  tcp_cancel_wakeups,
  tcp_now(Now),
  tcp_time_plus(Now,WakeupSecs,Then),
  tcp_schedule_wakeup(Then,foo),
  format('~NCalling tcp_select/2, which should return "~w".~n',[Expect]),
  format('Please wait ~w seconds...~n',[MinSecs]),
  tcp_select(TimeoutSecs,X),
  format('~Ntcp_select/2 returned "~w".~n',[X]),
  (X == Expect -> format('~NGood, that''s what I expected.~n',[]);
   format('That''s bad. It should have returned "~w"~n',[Expect])),
  !.
tcp_test_wakeup_mechanism(_,_) :-
  format('~NOh, dear. This predicate really shouldn''t fail.~n',[]),
  fail.

% Check the times offered are sensible...

bad_timeout_value(Val) :-
  \+((number(Val),Val>=0)),
  !,
  format('~NBad value "~w": it should be a positive number (of seconds).',[Val]).
bad_timeout_value(Val) :-
  Val > 500,
  !,
  format('~NBad value "~w": do you really want to wait for ~w seconds?~n',
         [Val,Val]).

% ------------------------------------------------------------------------

% Part of Quintus library(strings)...

concat(X, Y, Z) :-
  atom_chars(X, XCs),
  atom_chars(Y, YCs),
  append(XCs, YCs, ZCs),
  atom_chars(Z, ZCs).

:- dynamic gensym_ctr/1.
gensym_ctr(0).

gensym(Prefix, Sym) :-
  gensym_ctr(N),
  M is N + 1,
  retractall(gensym_ctr(_)),
  assert(gensym_ctr(M)),
  number_chars(M, NChars),
  atom_chars(Prefix, PChars),
  append(PChars, NChars, Chars),
  atom_chars(Sym, Chars).

% @@Needs to be completed!
span_trim(X, X).

% ------------------------------------------------------------------------

% Part of Quintus library(system)...

unix(argv(Args)) :-
  prolog_flag(argv,Args).

% ------------------------------------------------------------------------

% Simulate the effect of Quintus load_files(File,[all_dynamic(true)]).

load_all_dynamic(File) :-
  open(File,read,S),
  repeat,
  read(S,Term),
  (Term == end_of_file;
   Term = (:- multifile(_)) -> fail;
   Term = (:- dynamic(_)) -> fail;
   Term = (:-(Goal)) -> call(Goal),fail;
   assertz(Term),fail),
  !.

% ------------------------------------------------------------------------

% Emulate ask_oneof/3 in Quintus library ask.pl.

ask_oneof(Prompt,Constants,Answer) :-
  current_output(CO),
  format('~N~w: ',[Prompt]),
  flush_output(CO),
  read_line_of_chars(Chars),
  findall(Constant,
          (member(Constant,Constants),
           atom_chars(Constant,CChars),
           append(Chars,_,CChars)),
          Matches),
  (Matches=[Answer] -> true;
   ask_oneof(Prompt,Constants,Answer)).

read_line_of_chars(Chars) :-
  get0(C),
  (C = 10 -> Chars=[];
   C < 0 -> Chars=[];
   Chars=[C|Rest],
     read_line_of_chars(Rest)).

ask_number(Prompt,Default,Answer) :-
    current_output(CO),
  format('~N~w [~w]: ',[Prompt,Default]),
  flush_output(CO),
  read_line_of_chars(Chars),
  (number_chars(Answer,Chars) -> true;
   ask_number(Prompt,Default,Answer)).

% ------------------------------------------------------------------------

% Emulate part of Quintus library(sets)...

union([List],List) :-
  !.
union([List|Lists],Union) :-
  union(Lists,Union1),
  union(List,Union1,Union).

union([],Set,Set).
union([E|Set1],Set2,[E|Set12]) :-
  \+(memberchk(E,Set2)),
  !,
  union(Set1,Set2,Set12).
union([_|Set1],Set2,Set12) :-
  union(Set1,Set2,Set12).

%# EXTRA STUFF FOR OAA V2

tcp_inet_addr(Host,IPNum) :-
  hostname_address(Host,IPNum).

%# NOTE: there is an incompatibility here. now/1 under Quintus returns
%# the number of seconds since January 1st 1970. But this is only used
%# to provide the third argument of oaa_data_ref/3, and that argument
%# never seems to be used. So I'm providing an increasing function of time
%# as a lazy way out. DMC.

now(When) :-
  statistics(walltime,[When|_]).  

samsort(Pred,InList,OutList) :-
  functor(Call,Pred,2),
  samsort(InList,Call,OutList,[]).

samsort([],_,End,End) :- !.
samsort(InList,Call,OutList,End) :-
  samsort_middle_member(InList,X,Rest),
  samsort_split(X,Rest,Call,Small,Big),
  samsort(Small,Call,OutList,[X|V]),
  samsort(Big,Call,V,End).

samsort_middle_member(InList,Mem,Rest) :-
  length(InList,N),
  N2 is 1 + (N // 2),
  samsort_nth_member(InList,N2,Mem,Rest).

% Return the nth member of a list, and what follows it in the list.

samsort_nth_member([H|T],1,H,T).
samsort_nth_member([H|T],N,Ans,[H|Rest]) :-
  N > 1,
  N1 is N-1,
  samsort_nth_member(T,N1,Ans,Rest).

samsort_split(_,[],_,[],[]).
samsort_split(X,[Y|Tail],Call,Small,[Y|Big]) :-
  \+((arg(1,Call,Y),
      arg(2,Call,X),
      call(Call))),
  !,
  samsort_split(X,Tail,Call,Small,Big).
samsort_split(X,[Y|Tail],Call,[Y|Small],Big) :-
  samsort_split(X,Tail,Call,Small,Big).

tcp_listen_at_port(Port,Host) :-
  on_exception(_,
               tcp_listen_at_port(Port,Host,_),
               fail).

can_open_file(File,Mode) :-
  can_open_file(File,Mode,fail).

% Sicstus really has the better naming decision here...

at_end_of_file(Stream) :-
  at_end_of_stream(Stream).

% append/2 is defined in Quintus library(lists).

append([],_) :-
  !,
  fail.
append([List],List) :-
  !.
append([List|Lists],Ans) :-
  append(List,Rest,Ans),
  append(Lists,Rest).


% Borrowed from Quintus documentation:

concat_atom(Constants, Atom) :-
        concat_chars(Constants, "", Chars),
        atom_chars(Atom, Chars).

concat_chars([], _, []).
concat_chars([Constant|Constants], Sep, Chars0) :-
        name(Constant, Name),
        lists:append(Sep,Chars1,Tail),
        lists:append(Name, Tail, Chars0),
        concat_chars(Constants, Sep, Chars1).

concat_atom(Atoms,Sep,Atom) :-
  atom_chars(Sep,SChars),
  concat_chars(Atoms,SChars,Chars),
  atom_chars(Atom,Chars).

%-------------------------------------------------------------------
% Emulate Quintus library read_sent.  The following code, a precursor
% of read_sent, is freely available.

%   File   : /usr/lib/prolog/read_sent
%   Author : R.A.O'Keefe
%   Updated: 11 November 1983
%   Purpose: to provide a flexible input facility
%   Needs  : memberchk from utils.

/*  The main predicates provided exported by this file are
	read_until(+Delimiters, -Answer)
	read_line(-String)
	trim_blanks(+RawString, -Trimmed)
	read_sentence(-ListOfTokens).
*/

:- public
	case_shift/2,
	chars_to_words/2,
	is_digit/1,
	is_endfile/1,
	is_layout/1,
	is_letter/1,
	is_lower/1,
	is_newline/1,
	is_paren/2,
	is_period/1,
	is_punct/1,
	is_upper/1,
	read_line/1,
	read_sent/1,
	read_until/2,
	trim_blanks/2.

:- mode
	case_shift(+, -),
	chars_to_atom(-, ?, ?),
	chars_to_integer(+, -, ?, ?),
	chars_to_string(+, -, ?, ?),
	chars_to_words(+, -),
	chars_to_words(-, ?, ?),
	chars_to_word(-, ?, ?),
	is_digit(+),
	is_endfile(?),
	is_layout(+),
	is_letter(+),
	is_lower(+),
	is_newline(?),
	is_paren(?, ?),
	is_period(+),
	is_punct(+),
	is_upper(+),
	read_line(-),
	read_sent(-),
	read_until(+, -),
	read_until(+, +, -),
	trim_blanks(+, -),
	trim_blanks_rest_word(+, -),
	trim_blanks_next_word(+, -).



/*  read_until(Delimiters, Answer)
    reads characters from the current input until  a  character  in  the
    Delimiters  string  is  read.  The characters are accumulated in the
    Answer string, and include the closing  delimiter.   Prolog  returns
    end-of-file as ^Z (26)  regardless of the user's assignment (e.g. if
    you use ^D as end of file, Prolog still returns ^Z).  The end of the
    file is always a delimiter.
*/
read_until(Delimiters, [Char|Rest]) :-
	get0(Char),
	read_until(Char, Delimiters, Rest).


read_until(Char, Delimiters, []) :-
	memberchk(Char, [26|Delimiters]), !.
read_until(_, Delimiters, Rest) :-
	read_until(Delimiters, Rest).



%   The following predicates define useful character classes.

is_newline(31).				% the Dec-10 line terminator.
					% UNIX systems: use 10.

is_endfile(26).				% the file terminator ^Z
					% UNIX systems: *still* use 26.

is_layout(Char) :-
	Char =< " ".			% includes tab, newline, ^S, &c

is_lower(Char) :-
	Char >= "a", Char =< "z".	% lower case letter

is_upper(Char) :-
	Char >= "A", Char =< "Z".	% upper case letter

is_letter(Char) :-
	is_lower(Char) | is_upper(Char).

is_digit(Char) :-
	Char >= "0", Char =< "9".	% decimal digit

is_period(Char) :-
	memberchk(Char, ".!?").		% sentence terminator

is_punct(Char) :-
	memberchk(Char, ",;:").		% other punctuation mark

is_paren(Left,Right) :-			% brackets
	memberchk([Left,Right], ["()","[]","{}","<>"]).


/*  trim_blanks(RawInput, Cleaned)
    removes leading and trailing layout characters  from  RawInput,  and
    replaces  internal  groups  of  layout  characters by single spaces.
    Thus trim_blanks(<|TAB TAB a SP ^M ^E b ^Z|>, "a b") would be true.
*/
trim_blanks([Char|Chars], Cleaned) :-
	is_layout(Char), !,
	trim_blanks(Chars, Cleaned).
trim_blanks([Char|Chars], [Char|Cleaned]) :- !,
	trim_blanks_rest_word(Chars, Cleaned).
trim_blanks([], []).


trim_blanks_rest_word([Char|Chars], Cleaned) :-
	is_layout(Char), !,
	trim_blanks_next_word(Chars, Cleaned).
trim_blanks_rest_word([Char|Chars], [Char|Cleaned]) :- !,
	trim_blanks_rest_word(Chars, Cleaned).
trim_blanks_rest_word([], []).


trim_blanks_next_word([Char|Chars], Cleaned) :-
	is_layout(Char), !,
	trim_blanks_next_word(Chars, Cleaned).
trim_blanks_next_word([Char|Chars], [32,Char|Cleaned]) :- !,
	trim_blanks_rest_word(Chars, Cleaned).
trim_blanks_next_word([], []).



/*  chars_to_words(Chars, Words)
    parses a list of characters (read by read_until) into a list of
    tokens, where a token is
	'X' for X a period or other punctuation mark, e.g. ';'
	atom(X) for X a sequence of letters, e.g. atom(the)
	integer(X) for X a sequence of digits, e.g. integer(12)
	apost for '
	aposts for 's
	string(X) for X "..sequence of any.."
    Thus the string "the "Z-80" is on card 12." would be parsed as
	[atom(the),string('Z-80'),atom(is),atom(on),atom(card),
	 integer(12),'.'].
    It is up to the sentence parser to decide what to do with these.
    Note that the final period, if any, is retained.  The parser may
    need it.
*/
chars_to_words(Chars, Words) :-
	chars_to_words(Words, Chars, []).


chars_to_words([Word|Words]) -->
	chars_to_word(Word), !,
	chars_to_words(Words).
chars_to_words([]) --> [].


chars_to_word(Word) -->
	[Char], {is_layout(Char)}, !,
	chars_to_word(Word).
chars_to_word(atom(Word)) -->
	[Char], {is_letter(Char)}, !,
	chars_to_atom(Chars),
	{case_shift([Char|Chars], Name)},
	{name(Word, Name)}.
chars_to_word(integer(Word)) -->
	[Char], {is_digit(Char)}, !,
	{Init is Char-"0"},		% integer value of char
	chars_to_integer(Init, Word).
chars_to_word(aposts) -->
	"'s", !.
chars_to_word(apost) -->
	"'", !.
chars_to_word(string(Word)) -->
	[Quote], {Quote is """"}, !,	% NB Quote is an integer (34)
	chars_to_string(Quote, String),
	{name(Word, String)}.
chars_to_word(Punct) -->
	[Char],
	{name(Punct, [Char])}.


/*  chars_to_atom(Tail)
    reads the remaining characters of a word.  Case conversion  is  left
    to  another  routine.   In this application, a word may only contain
    letters but they may be in either case.  If you want to parse French
    you will have to decide what to do about accents.  I suggest putting
    them after the vowel, and adding a clause
	chars_to_atom([Vowel,Accent|Chars]) -->
		[Vowel],	{accentable_vowel(Vowel)},
		[Accent],	{accent_for(Vowel, Accent)},
		!.
	with the obvious definitions of accentable_vowel and accent_for.
    Note that the Ascii characters ' ` ^ are officially  designated  the
    "accent acute", "accent grave", and "circumflex".  But this file was
    originally written for an English parser and there was no problem.
*/
chars_to_atom([Char|Chars]) -->
	[Char], {is_letter(Char)}, !,
	chars_to_atom(Chars).
chars_to_atom([]) --> [].


/*  case_shift(Mixed, Lower)
    converts all the upper case letters in Mixed to lower  case.   Other
    characters (not necessarily letters!) are left alone.  If you decide
    to accept other characters in words only chars_to_atom has to alter.
*/
case_shift([Upper|Mixed], [Letter|Lower]) :-
	is_upper(Upper),
	Letter is Upper-"A"+"a", !,
	case_shift(Mixed, Lower).
case_shift([Other|Mixed], [Other|Lower]) :-
	case_shift(Mixed, Lower).
case_shift([], []).


/*  chars_to_integer(Init, Final)
    reads the remaining characters of an integer which starts  as  Init.
    NB:  this  parser  does  not  know about negative numbers or radices
    other than 10, as it was written for PDP-11 Prolog.
*/
chars_to_integer(Init, Final) -->
	[Char], {is_digit(Char)}, !,
	{Next is Init*10-"0"+Char},
	chars_to_integer(Next, Final).
chars_to_integer(Final, Final) --> [].


/*  chars_to_string(Quote, String)
    reads the rest of a string which was opened by  a  Quote  character.
    The  string is expected to end with a Quote as well.  If there isn't
    a matching Quote, the attempt to parse the  string  will  FAIL,  and
    thus the whole parse will FAIL.  I would prefer to give some sort of
    error  message and try to recover but that is application dependent.
    Two adjacent Quotes are taken as one, as they are in Prolog itself.
*/
chars_to_string(Quote, [Quote|String]) -->
	[Quote,Quote], !,
	chars_to_string(Quote, String).
chars_to_string(Quote, []) -->
	[Quote], !.
chars_to_string(Quote, [Char|String]) -->
	[Char], !,
	chars_to_string(Quote, String).


/*  read_line(Chars)
    reads characters up to the next newline or the end of the file, and
    returns them in a list, including the newline or end of file.  When
    you want multiple spaces crushed out, and the newline dropped, that
    is most of the time, call trim_blanks on the result.
*/
read_line(Chars) :-
	is_newline(NL),
	read_until([NL], Chars).


/*  read_sent(Words)
    reads characters up to the next period, which may be  several  lines
    distant from the start, skips to the end of that line, and turns the
    result  into  a  list of tokens.  It can happen that the sentence is
    not well formed, if say there is an unmatched double quote.  In that
    case all the characters will still be read, but chars_to_words  will
    fail  and  so  read_sent  will fail.  read_sent will NOT try to read
    another sentence.
*/
read_sent(Words) :-
	read_until("!?.", Chars),
	is_newline(NL),
	read_until([NL], _),		% skip to end of line
	!,
	chars_to_words(Chars, Words),
	!.


subtract([], _, []).
subtract([Element|Residue], Set, Difference) :-
	memberchk(Element, Set), !,
	subtract(Residue, Set, Difference).
subtract([Element|Residue], Set, [Element|Difference]) :-
	subtract(Residue, Set, Difference).
