!-------------------------------!
!             WORD              !
!  Copyright 1985   A. R. Deas  !
!-------------------------------!

{---------------------------------------------------------------------------}
{ WORD infile outfile [-a] [-b] [-c] [-d] [-dict fname] [-f] [-h] [-l] [-m] }
{                     [-n] [-p] [-r] [-s] [-t] [-u] [-w]                    }
{                                                                           }
{ FUNCTION                                                                  }
{    Produce word lists: vocabularies, lists of spelling errors,            }
{    word frequency lists, word counts, list multiple words.                }
{                                                                           }
{ OPTIONS                                                                   }
{     default        produce nothing at all                                 }
{    -a              shorthand for [-c -m -n -u -w]                         }
{    -b              What is the biggest (longest) word                     }
{    -c              Check spelling                                         }
{    -d              include Digits                                         }
{    -f              produce vocabulary list, sorted by frequency           }
{    -h              print help info                                        }
{    -l              produce vocabulary list, sorted Lexically              }
{    -m              flag error when multiple instances of same word occur  }
{    -n              for each spelling error, give me help with the spelling}
{    -p              produce Pure word lists - no frequency info            }
{    -r              print vocabulary lists in Reverse order                }
{    -s              disregard `@' instead of seeing it as a special alpha  }
{    -t              consider `¬' to be a char instead disregarding it      }
{    -w              print Word count and variety of words                  }
{    -u              check word Usage and help me with my writing style     }
{    -dict dictname  use dictionary in file dictname instead of minidict    }
{---------------------------------------------------------------------------}

{---------------------------------------------------------------------------}
{ How it works:                                                             }
{   Interpet command line                                                   }
{   if  no options then stop                                                }
{   hash all words in the infile into the WORD TABLE                        }
{   MARK - remember where heap is                                           }
{   if help wanted then call help                                           }
{   if spelling wanted                                                      }
{      produce a sort tree in lex order                                     }
{      open dictionary file                                                 }
{      cut links from tree to hash table where you find a word              }
{      close dictionary                                                     }
{      print out sort tree                                                  }
{      RELEASE                                                              }
{   fi                                                                      }
{   if word count wanted then produce word count                            }
{   if lexical sort of words wanted then sort and print table and RELEASE   }
{   if frequency sort of words wanted then sort and print table and RELEASE }
{---------------------------------------------------------------------------}
 

!%include "UTILS:util.imp"   {for unadulterated STOI and ITOS}


{QUICK STORAGE TABLE}

recordformat word rec(string (31) word, integer occurances,
                       record (word rec) name next)
constantinteger  hash table size = 16_0FFF {4095}
ownrecord (word rec) name array  word table(0:hash table size)


{SORTING TREE}

recordformat  node(record (word rec) name word,
                    record (node) name lson, rson, brother)
ownrecord (node) name sort tree

{Things needed to flag multiple words}
ownstring (31) prev word = ""
owninteger     line number = 1
owninteger     num words in sentences = 0
owninteger     sentence count = 0
owninteger     recursive reference = 0
owninteger     words from start of sentence = 1
owninteger     in a sentence = 1
owninteger     just finished a sentence = 0

{CLI THINGS}

constantinteger  A option  = 2_100000000000000,
                  B option  = 2_010000000000000,
                  C option  = 2_001000000000000,
                  D option  = 2_000100000000000,
                  F option  = 2_000010000000000,
                  H option  = 2_000001000000000,
                  L option  = 2_000000100000000,
                  M option  = 2_000000010000000,
                  N option  = 2_000000001000000,
                  P option  = 2_000000000100000,
                  R option  = 2_000000000010000,
                  S option  = 2_000000000001000,
                  T option  = 2_000000000000100,
                  U option  = 2_000000000000010,
                  W option  = 2_000000000000001
owninteger  options = 0
ownstring (31) dictfile = "STYLE:DICT"
ownstring (31) inf=""
ownstring (31) outf=""

{INTERPRET COMMAND LINE}

routine  interpret command line

!   define param("Source file",inf,pam major+pam infile)
!   define param("Destination file",outf,pam major+pam outfile)
!   define param("Dictionary file",dictfile,0)
!   define boolean params(",,,,,,,,,,,,,,,,,A,B,C,D,F,H,L,M,N,P,R,S,T,U,W",
!                         options,0)
!   process parameters(cliparam)

own string (255) Prog name = "Style"

string(255)fn cliparam
constinteger R0 = 0
integer strad,i
string(255) s
  *STMFD_12!,<0,1,2,3>
  *SWI_16
  *STR_R0,Strad
  *LDMFD_12!,<0,1,2,3>
  s = ""
  cycle
    i = byteinteger(STRAD)
    if i <= 13 start
      Prog name = s and s = "" unless s -> Prog name.(" ").s
      result=s
    finish
    s <- s.tostring(i)
    strad=strad+1
  repeat
end

routine handle(string(255) s)
own integer pos state = 0
integer ch
switch which(1:4), option(0:255)
  return if s=""
  if CHARNO(s, 1) # '-' start
    pos state = pos state+1
    -> which(pos state)
    which(1): inf = s; return
    which(2): outf = s; return
    which(3): dictfile = s; return
    which(4): pos state = pos state-1
              print string("*** Spurious parameter '".s."'".nl)
              return
  else
    if s = "-help" or s = "-Help" or s = "-HELP" start
      Options = Options ! H Option
    elseif LENGTH(s) # 2
      print string("*** Spurious option '".s."'".nl)
      return
    else
      ch = CHARNO(s, 2)
      -> option(ch)
      option('A'): option('a'): Options = Options ! A Option; return
      option('B'): option('b'): Options = Options ! B Option; return
      option('C'): option('c'): Options = Options ! C Option; return
      option('D'): option('d'): Options = Options ! D Option; return
      option('F'): option('f'): Options = Options ! F Option; return
      option('H'): option('h'): Options = Options ! H Option; return
      option('L'): option('l'): Options = Options ! L Option; return
      option('M'): option('m'): Options = Options ! M Option; return
      option('N'): option('n'): Options = Options ! N Option; return
      option('P'): option('p'): Options = Options ! P Option; return
      option('R'): option('r'): Options = Options ! R Option; return
      option('S'): option('s'): Options = Options ! S Option; return
      option('T'): option('t'): Options = Options ! T Option; return
      option('U'): option('u'): Options = Options ! U Option; return
      option('W'): option('w'): Options = Options ! W Option; return
      OPTION(*):
        print string("*** Spurious option '".s."'".nl)
        return
    finish
  finish
end

string(255)fn first(string(255)name s)
string(255) left
  if s -> left.(" ").s then result = left
  left = s; s = ""
  result = left
end

string(255) s, one, left, right

   options = 0

   s = cliparam
   s = left." ".right while s -> left.("  ").right
   if s -> left.("-dict ").right            -
     or s -> left.("-DICT ").right          -
     or s -> left.("-Dict ").right start
     dictfile = first(right); s = left.right
   finish

   while s -> one.(" ").s cycle
     handle(one)
   repeat
   handle(s)

   options = H Option if Options=0 and inf=""
   options = A Option if Options=0
   options = options ! M Option ! C Option ! W Option if (options & A option)#0
   options = options ! N Option ! U Option if (options & A Option)#0

   if inf # "" start
     Open Input(1, inf)
     Select Input(1)
   finish
   if outf # "" start
     Open output(1, outf)
     Select Output(1)
   finish
end  {of interpret command line}


{LEXICAL TABLES}

  constantbyteinteger  wordchar     = 0,
                        blankspace   = 1,
                        illegal char = 2,
                        noise word   = 3 {ignore words containing one of these},
                        bad alpha    = 4,
                        punctuation  = 5,
                        newline char = 10
   ownbyteinteger array  lexical type (0:255) =
         illegal char        {NUL}, illegal char        {SOH},
         illegal char        {STX}, illegal char        {ETX},
         illegal char        {EOT}, illegal char        {ENQ},
         illegal char        {ACK}, illegal char        {BEL},
         illegal char        {BS},  illegal char        {HT},
         blankspace          {NL},  illegal char        {VT},
         illegal char        {FF},  illegal char        {CR},
         illegal char        {SO},  illegal char        {SI},
         illegal char        {DLE}, illegal char        {DC1},
         illegal char        {DC2}, illegal char        {DC3},
         illegal char        {DC4}, illegal char        {NAK},
         illegal char        {SYN}, illegal char        {ETB},
         illegal char        {CAN}, illegal char        {EM},
         illegal char        {SUB}, illegal char        {ESC},
         illegal char        {FS},  illegal char        {GS},
         illegal char        {RS},  illegal char        {US},
         blank space         { },   punctuation         {!},
         punctuation         {"},   blank space         {#},
         blank space         {$},   blank space         {%},
         punctuation         {&},   punctuation         {'},
         punctuation         {(},   punctuation         {)},
         blank space         {*},   blank space         {+},
         punctuation         {,},   blank space         {-},
         punctuation         {.},   noise word          {/},
         noise word          {0},   noise word          {1},
         noise word          {2},   noise word          {3},
         noise word          {4},   noise word          {5},
         noise word          {6},   noise word          {7},
         noise word          {8},   noise word          {9},
         punctuation         {:},   punctuation         {;},
         blank space         {<},   blank space         {=},
         blank space         {>},   punctuation         {?},
         word char           {@},   bad alpha           {A},
         bad alpha           {B},   bad alpha           {C},
         bad alpha           {D},   bad alpha           {E},
         bad alpha           {F},   bad alpha           {G},
         bad alpha           {H},   bad alpha           {I},
         bad alpha           {J},   bad alpha           {K},
         bad alpha           {L},   bad alpha           {M},
         bad alpha           {N},   bad alpha           {O},
         bad alpha           {P},   bad alpha           {Q},
         bad alpha           {R},   bad alpha           {S},
         bad alpha           {T},   bad alpha           {U},
         bad alpha           {V},   bad alpha           {W},
         bad alpha           {X},   bad alpha           {Y},
         bad alpha           {Z},   noise word          {[},
         noise word {for LATEX ¬},  blank space         {]},
         blank space         {^},   blank space         {_},
         punctuation         {`},   word char           {a},
         word char           {b},   word char           {c},
         word char           {d},   word char           {e},
         word char           {f},   word char           {g},
         word char           {h},   word char           {i},
         word char           {j},   word char           {k},
         word char           {l},   word char           {m},
         word char           {n},   word char           {o},
         word char           {p},   word char           {q},
         word char           {r},   word char           {s},
         word char           {t},   word char           {u},
         word char           {v},   word char           {w},
         word char           {x},   word char           {y},
         word char           {z},   punctuation         {curly bra},
         blank space         {|},   blank space         {curly ket},
         blank space         {~},   illegal char        {DEL},
         illegal char(*)
   owninteger array punctuation count(0:255) = 0(*)

{INIT}

routine init
   integer i
   for i=0,1,hash table size  cycle
      word table(i) ==  NIL
   repeat
   sort tree ==  NIL

   {handle latex ctrl char}
   if  (options & T option)#0  start
      lexical type('¬') =  blank space
      lexical type('(') =  blank space
      lexical type('{') =  blank space
   finish

   {handle scribe ctrl char}
   if  (options & S option)#0  start
      lexical type('@') =  noise word
   finish

   {handle digits}
   if  (options & D option)#0  start
      for i='0',1,'9'  cycle
         lexical type(i) =  word char
      repeat
   finish
end  {of init}


{HELP}

routine  help
   Print String(c
"{---------------------------------------------------------------------------}")
   Newline
   Print String(c
"{ WORD infile outfile [-a] [-b] [-c] [-d] [-dict fname] [-f] [-h] [-l] [-m] }")
   Newline
   Print String(c
"{                     [-p] [-r] [-s] [-t] [-u] [-w]                         }")
   Newline
   Print String(c
"{                                                                           }")
   Newline
   Print String(c
"{ FUNCTION                                                                  }")
   Newline
   Print String(c
"{    Produce word lists: vocabularies, lists of spelling errors,            }")
   Newline
   Print String(c
"{    word frequency lists, word counts, list multiple words.                }")
   Newline
   Print String(c
"{    Option -t is in support of TEX and LATEX                               }")
   Newline
   Print String(c
"{                                                                           }")
   Newline
   Print String(c
"{ OPTIONS                                                                   }")
   Newline
   Print String(c
"{     default        same as [-a]                                           }")
   Newline
   Print String(c
"{    -a              shorthand for [-c -m -n -u -w]                         }")
   Newline
   Print String(c
"{    -b              What is the biggest (longest) word                     }")
   Newline
   Print String(c
"{    -c              Check spelling                                         }")
   Newline
   Print String(c
"{    -d              include Digits                                         }")
   Newline
   Print String(c
"{    -f              produce vocabulary list, sorted by frequency           }")
   Newline
   Print String(c
"{    -h              print help info                                        }")
   Newline
   Print String(c
"{    -l              produce vocabulary list, sorted Lexically              }")
   Newline
   Print String(c
"{    -m              flag error when multiple instances of same word occur  }")
   Newline
   Print String(c
"{    -n              for each spelling error, give me help with the spelling}")
   Newline
   Print String(c
"{    -r              print vocabulary lists in Reverse order                }")
   Newline
   Print String(c
"{    -s              disregard `@' instead of seeing it as a special alpha  }")
   Newline
   Print String(c
"{    -t              consider `¬' to be a char instead disregarding it      }")
   Newline
   Print String(c
"{    -u              check word Usage and help me with my writing style     }")
   Newline
   Print String(c
"{    -w              print Word count and variety of words                  }")
   Newline
   Print String(c
"{    -dict dictname  use dictionary in file <dictname> instead of default   }")
   Newline
   Print String(c
"{                    (Default dict is STYLE:DICT)                           }")
   Newline
   Print String(c
"{---------------------------------------------------------------------------}")
   Newline
   if Options = H Option then stop
end  {of help}


{NEXT WORD}

string (31) function  next word

   {GET SYMBOL}
   integer function  get symbol
      integer ch
      Read Symbol(ch)
      line number =  line number + 1 if ch = newline char
      if lexical type(ch) = punctuation  start
         punctuation count(ch) = punctuation count(ch) + 1
         prev word = "."
      finish
      if  'A'  <=  ch  <= 'Z'  start
         {convert to lower case}
         result =  ch - 'A' + 'a'
      else
         result =  ch
      finish
   end  {of get symbol}

   integer ch
   ownbyteinteger ready to start a new sentence = 0
   string (31) word

   {MAIN OF NEXT WORD}

   word = ""
   ch =  get symbol

   while lexical type(ch) # word char and c
          lexical type(ch) # noise word cycle
      ch =  get symbol
   repeat

   {check that have no noise words}
   if  lexical type(ch) = noise word  start
      {skip to end of word}
      while  lexical type(ch) # blank space  cycle
         ch =  get symbol
      repeat
      result  =  next word
   finish

   {read word}
   while lexical type(ch) = word char and LENGTH(word) < 31 cycle
      word =  word . Tostring(ch)
      ch =  get symbol
   repeat

   {watch for startf a new sentence}
   if  ready to start a new sentence = 1  start
       words from start of sentence =  1 {the first word in a new sentence}
       ready to start a new sentence = 0
       in a sentence =  1
   else
       words from start of sentence =  words from start of sentence + 1
   finish

   {Keep score of how many sentences parsed}
   if  ch = '.'  or  ch = '!'  or  ch = '?' or ch = ':'  start
      sentence count =  sentence count + 1
      ready to start a new sentence =  1
   finish

   {watch for end of a sentence}
   if  ready to start a new sentence = 1  start
       just finished a sentence =  1
       in a sentence =  0
   else
       just finished a sentence =  0
   finish

   {check that have no noise words}
   if  lexical type(ch) = noise word  start
      {skip to end of word}
      while  lexical type(ch) # blank space  cycle
         ch =  get symbol
      repeat
      result  =  next word
   else
      while  lexical type(ch) = word char  cycle
         ch =  get symbol
      repeat
      result =  word
   finish
end  {of next word}


{HASHING ADDR}

integerfunction hash addr(string (31) name)
   integer hash index = 0; integer char
   for char = 1,1,LENGTH (name) cycle
      hash index =  hash index<<1 !! CHARNO(name,char)
   repeat
   result =  hash index & hash table size
end  {of  hash code generated from string}


{ENDS WITH S}
predicate ends with s(string (31) s)
   {plurality test}
   true  if  CHARNO(s,LENGTH(s)) = 's'
   false
end {of ends with s}

{HAVE ARTICLE}
predicate  have article(string (31) s)
   true  if  s = "a"         or c
               s = "the"
   false
end  {of have article}

{HAVE CONJUGATION}
predicate  have conjugation(string (31) s)
   true  if  s = "and"    or c
               s = "but"    or c
               s = "yet"    or c
               s = "while"  or c
               s = "though"
   false
end  {of have conjugation}

{HAVE PREPOSITION}
predicate  have preposition(string (31) s)
   true  if  s = "from"  or c
               s = "to"    or c
               s = "on"    or c
               s = "by"     or c
               s = "at"     or c
               s = "in"     or c
               s = "over"    or c
               s = "along"    or c
               s = "after"    or c
               s = "before"    or c
               s = "with"  or c
               s = "without"  or c
               s = "near"   or c
               s = "next"
   false
end  {of have preposition}

{HAVE PRONOUN}
predicate  have pronoun(string (31) s)
   true  if  s = "i"        or c
               s = "you"      or c
               s = "she"      or c
               s = "he"       or c
               s = "we"       or c
               s = "they"     or c
               s = "who"      or c
               s = "whom"     or c
               s = "them"     or c
               s = "which"    or c
               s = "near"
   false
end  {of have pronoun}

{HAVE REFERENCE}
predicate  have reference(string (31) s)
   true  if s = "that"      or c
              s = "these"     or c
              s = "this"      or c
              s = "those"     or c
              s = "it"
   false
end  {of have reference}

{HAVE ADVERB}
predicate  have adverb(string (31) s)
   true  if s = "is"       or c
              s = "are"      or c
              s = "can"      or c
              s = "may"      or c
              s = "were"     or c
              s = "was"      or c
              s = "should"
   false
end  {of have reference}


{CREATE VOCABULARY}

routine  create vocabulary
   record (word rec) name word holder
   integer hash posn
   string (31) word
   cycle
      word =  next word
      if  in a sentence = 1  start
         num words in sentences =  num words in sentences + 1
      finish
      if  (options & M option)#0  and c
         word = prev word  and words from start of sentence > 1 and c
         (Length(word) > 1 or word = "a") start
         Print String("Line ".ITOS(line number,0).": Duplicate of """.c
            word."""")
         Newline
      finish
      if  (options & U option)#0  start
         if  words from start of sentence = 2  and c
              have reference(prev word) and         c
              have adverb(word)  start
            recursive reference =  recursive reference + 1
         finish
         if words from start of sentence = 1 and word = "such" start
            Print String("--- Line ".ITOS(line number,0).c
            ": Use of recursive SUCH is legal jargon.")
            Newline
            Print String(c
"          Recursive SUCH is a nasty construct. It makes your writing awkward.")
            Newline
         finish
         if  (word = "inevitably" or word = "necessarily")  and c
            prev word = "must"  start
            Print String("--- Line ".ITOS(line number,0).c
            ": Redundant MUST.  Using MUST here is very sloppy.")
            Newline
            Print String(c
 "         See  page 56 of Gower's ""Plain Words"" (1973 ed)");
            Newline
         finish
         if  words from start of sentence > 1 and c
              word = "incidentally"  start
            Print String("--- Line ".ITOS(line number,0).c
            ": Intrusive INCIDENTALLY.  It intrudes too much into the sentence")
            Newline
            Print String(c
 "         See  page 56 of Gower's ""Plain Words"" (1973 ed)");
            Newline
         finish
         if  (word = "incidentally"  or c
               word = "definitely"    or c
               word = "actually"      or c
               word = "specific"      or c
               word = "particularly") and c
               have reference(prev word)  start
            Print String("--- Line ".ITOS(line number,0).c
            ": Using """.word.""" here intrudes into your sentence. Remove it.")
            Newline
            Print String(c
 "         See  page 56 of Gower's ""Plain Words"" (1973 ed)");
            Newline
         finish
!         %if  (word = "case") %and have article(prev word)  %start
!            Print String("--- Line ".ITOS(line number,0).%c
!            ": Using CASE here is almost certainly wrong.")
!            Newline
!            Print String(%c
! "         See  page 58 of Gower's ""Plain Words"" (1973 ed)");
!            Newline
!         %finish
         if  (word = "instance") and prev word = "first" start
            Print String("--- Line ".ITOS(line number,0).c
            ": Bad cliche.  Remove the word INSTANCE and restructure.")
            Newline
            Print String(c
 "         See  page 59 of Gower's ""Plain Words"" (1973 ed)");
            Newline
         finish
         if  (word = "concerned") and have adverb(prev word) start
            Print String("--- Line ".ITOS(line number,0).c
            ": Bad cliche.  Variant of AS FAR AS or SOMETHING .. IS CONCERNED.")
            Newline
            Print String(c
 "         See  page 59 of Gower's ""Plain Words"" (1973 ed)");
            Newline
         finish
         if  (word = "time") and prev word = "such" start
            Print String("--- Line ".ITOS(line number,0).c
            ": Bad cliche.  Variant of UNTIL SUCH TIME.  You mean WHEN.")
            Newline
            Print String(c
 "         See  page 60 of Gower's ""Plain Words"" (1973 ed)");
            Newline
         finish
         if prev word = "signal" and word = "out" start
            Print String("--- Line ".ITOS(line number,0).c
            ": Misprint.  SIGNAL OUT instead of SINGLE OUT")
            Newline
         finish
         if prev word = "farther" and words from start of sentence = 1 start
               Print String("--- Line ".ITOS(line number,0).c
               ": You use FARTHER when you mean FURTHER.")
               Newline
               Print String(c
 "         See  page 189 of Fowler's ""Modern English Usage"" (1983 ed)");
               Newline
         finish
!         %if prev word = "if" %and word = "and"  %start
!               {could be IF AND only if}
!               Print String("--- Line ".ITOS(line number,0).%c
!               ": Parrot.  Parrot.  (IF AND is a parrot's expression)")
!               Newline
!               Print String(%c
! "         See  page 264 of Fowler's ""Modern English Usage"" (1983 ed)");
!               Newline
!         %finish
         if prev word = "and" and word = "but"  start
            Print String("--- Line ".ITOS(line number,0).c
            ": AND BUT is an especially bad conjugation.")
            Newline
         elseif  have conjugation(prev word) and have conjugation(word)
            Print String("--- Line ".ITOS(line number,0).c
            ": Redundant conjugation.  """.prev word.""" followed by """.c
               word."""")
            Newline
         finish
         if prev word = "form" and have article(word)  start
               Print String("--- Line ".ITOS(line number,0).c
               ": Hickup in parse caused by verb FORM followed by ".c
                  "the article """.word."""")
               Newline
               Print String(c
 "         Please check that you mean FORM and not FROM.");
               Newline
         finish
         if  Length(prev word) >= 7 and c
              Substring(prev word,1,7) = "infring" and c
              Length(word) >= 2 and Substring(word,1,2) = "up"  start
               Print String("--- Line ".ITOS(line number,0).c
               ": You have infringed the meaning of the word INFRINGE.")
               Newline
               Print String(c
 "         You probably mean ENCROACH or TRESPASS instead");
               Newline
               Print String(c
 "         See  page 283 of Fowler's ""Modern English Usage"" (1983 ed)");
               Newline
         finish
         if  Length(word) >= 5 and Substring(word,1,5) = "infer" and c
              word # "inference" start
            if  have preposition(prev word) or have pronoun(prev word)  start
               Print String("--- Line ".ITOS(line number,0).c
               ": You use INFER when you mean IMPLY.")
               Newline
               Print String(c
 "         See  page 282 of Fowler's ""Modern English Usage"" (1983 ed)");
               Newline
            finish
         finish
         if (Length(word)>=5 and Substring(word,1,5) = "impli") or c
            word = "imply" start
            if  prev word = "can" or c
                 prev word = "he" or c
                 prev word = "article" or c
                 prev word = "may" start
               Print String("--- Line ".ITOS(line number,0).c
               ": You use IMPLY when you mean INFER.")
               Newline
               Print String(c
 "         See  page 282 of Fowler's ""Modern English Usage"" (1983 ed)");
               Newline
            finish
         finish
      finish
      hash posn =  hash addr(word)
      word holder ==  word table(hash posn)
      if  word holder == NIL  start
         word holder ==  New(word holder)
         word holder_word =  word
         word holder_occurances =  1
         word holder_next ==  NIL
         word table(hash posn) ==  word holder
      elseif  word holder_word # word
         while  word holder ## NIL  and  word holder_word # word cycle
            word holder ==  word holder_next
         repeat
         if  word holder == NIL  start
            word holder ==  New(word holder)
            word holder_word =  word
            word holder_occurances =  1
            word holder_next ==  word table(hash posn)
            word table(hash posn) ==  word holder
         else
            word holder_occurances =  word holder_occurances + 1
         finish
      else
         word holder_occurances =  word holder_occurances + 1
      finish
      prev word =  word
   repeat
end  {of create vocabulary}



{SORT WORD TABLE LEXICALLY}

routine sort word table lexically
   { Transfer hash table to a binary tree }

   routine  add to tree(record (node) name nd,record (word rec) name wd)
      record (node) name nn == NIL
      if wd_word > nd_word_word  start
         if  nd_rson == NIL  start
            nn ==  New(nn)
            nn =  0; nn_rson == NIL; nn_lson == NIL; nn_brother == NIL
            nn_word ==  wd
            nd_rson ==  nn
         else
            add to tree(nd_rson,wd)
         finish
      finish
      if wd_word < nd_word_word  start
         if  nd_lson == NIL  start
            nn ==  New(nn)
            nn =  0; nn_rson == NIL; nn_lson == NIL; nn_brother == NIL
            nn_word ==  wd
            nd_lson ==  nn
         else
            add to tree(nd_lson,wd)
         finish
      finish
   end {of add to tree}

   record (word rec) name  mw
   string (31) word
   integer i

   for  i=0,1,hash table size  cycle
       mw ==  word table(i)
       while  mw ## NIL  cycle
          word =  mw_word
          if  sort tree == NIL  start
             sort tree ==  New(sort tree)
             sort tree =  0
             sort tree_rson == NIL; sort tree_lson == NIL
             sort tree_brother == NIL
             sort tree_word ==  mw
          else
             add to tree(sort tree, mw)
          finish
          mw ==  mw_next
       repeat
   repeat
end  {of sort word table lexically}


{SORT WORD TABLE BY FREQUENCY}

routine sort word table by frequency
   { Transfer hash table to a binary tree }

   routine  add to tree(record (node) name nd,record (word rec) name wd)
      record (node) name nn
      if wd_occurances > nd_word_occurances  start
         if  nd_rson == NIL  start
            nn ==  New(nn)
            nn =  0; nn_rson == NIL; nn_lson == NIL; nn_brother == NIL
            nn_word ==  wd
            nd_rson ==  nn
         else
            add to tree(nd_rson,wd)
         finish
      finish
      if  wd_occurances = nd_word_occurances  start
         nn ==  New(nn)
         nn =  0; nn_rson == NIL; nn_lson == NIL; nn_brother == NIL
         nn_word ==  wd
         nn_brother ==  nd_brother
         nd_brother ==  nn
      finish
      if wd_occurances < nd_word_occurances  start
         if  nd_lson == NIL  start
            nn ==  New(nn)
            nn =  0; nn_rson == NIL; nn_lson == NIL; nn_brother == NIL
            nn_word ==  wd
            nd_lson ==  nn
         else
            add to tree(nd_lson,wd)
         finish
      finish
   end {of add to tree}

   record (node) name  nd
   record (word rec) name  mw
   string (31) word
   integer i

   for  i=0,1,hash table size  cycle
       mw ==  word table(i)
       while  mw ## NIL  cycle
          word =  mw_word
          if  sort tree == NIL  start
             sort tree ==  New(sort tree)
             sort tree =  0
             sort tree_rson == NIL; sort tree_lson == NIL
             sort tree_brother == NIL
             sort tree_word ==  mw
          else
             add to tree(sort tree,mw)
          finish
          mw ==  mw_next
       repeat
   repeat
end  {of sort word table lexically}


{PRINT OUT SORT TREE}

routine  print node(record (node) name nd)
   record (word rec) name mw
   integer i
   return if nd == NIL
   print node(nd_lson)
   mw ==  nd_word
   print string(mw_word)
   if  (options & P option)=0  start
      for i=LENGTH(mw_word),1,40  cycle
         Print String(" ")
      repeat
      print String(ITOS(mw_occurances,7))
   finish
   Newline
   print node(nd_brother)
   print node(nd_rson)
end  {of print node}


{PRINT OUT SORT TREE IN REVERSE}

routine  print node in reverse(record (node) name nd)
   record (word rec) name mw
   integer i
   return if nd == NIL
   print node in reverse(nd_rson)
   print node in reverse(nd_brother)
   mw ==  nd_word
   Print string(mw_word)
   if  (options & P option)=0  start
      for i=LENGTH(mw_word),1,40  cycle
         Print String(" ")
      repeat
      Print String(ITOS(mw_occurances,7))
   finish
   Newline
   print node in reverse(nd_lson)
end  {of print node in reverse}


{NUMBER OF WORDS}

integerfunction  number of words
   record (word rec) name  mw
   integer i,num words

   num words = 0
   for i=0,1,hash table size  cycle
      if  word table(i) ## NIL  start
         mw ==  word table(i)
         while  mw ## NIL  cycle
            num words =  num words + mw_occurances
            mw ==  mw_next
         repeat
      finish
   repeat
   result =  num words
end  {of number of words}


{AVERAGE WORD LENGTH}

realfunction  average word length
   record (word rec) name  mw
   integer i,num words,num chars
 
   num words = 0; num chars = 0
   for i=0,1,hash table size  cycle
      if  word table(i) ## NIL  start
         mw ==  word table(i)
         while  mw ## NIL  cycle
            num words =  num words + mw_occurances
            num chars =  num chars + (LENGTH(mw_word)*mw_occurances)
            mw ==  mw_next
         repeat
      finish
   repeat
   result =  num chars / num words
end  {of number of words}


{WORD COUNT}

routine  WORD COUNT
   record (word rec) name  mw
   integer i,num words, variety

   num words = 0; variety = 0
   for i=0,1,hash table size  cycle
      if  word table(i) ## NIL  start
         mw ==  word table(i)
         while  mw ## NIL  cycle
            num words =  num words + mw_occurances
            variety =  variety + 1
            mw ==  mw_next
         repeat
      finish
   repeat
   Print String("Word count = ".ITOS(num words,0))
   Newline
   Print String("Number of different words = ".ITOS(variety,0))
   Newline
end  {of word count}


{DO SPELLING CHECK}

routine  do spelling check {from sort tree}

   string (31) my word
   owninteger  printed heading = 0

   {go down to}
   routine  go down to(record (node) name nd)
      ownstring (31) dictionary word =  ""
      integer i
      return  if  nd == NIL
      go down to(nd_lson)
      my word =  nd_word_word
      while  dictionary word < my word  cycle
         prev word = dictionary word
         dictionary word =  next word
      repeat
      if  dictionary word # my word start
         {Have spelling error}
         if  printed heading = 0 and (options & P option)=0  start
            Print String("MISSPELT WORDS:")
            Newline
            printed heading =  1
         finish
         Print string(my word)
         if  (options & N option)# 0  start
            for i=LENGTH(my word),1,34  cycle
               Print String(" ")
            repeat
            Print String("(".prev word.", ".dictionary word.")")
         finish
         Newline
      finish

      go down to(nd_rson)
   end  {of go down to}

   integer i

   onevent 3,9 start
      if event_event = 3  start
         Select Output(0)
         Print String("Ether or file system error:")
         Print String(itos(event_extra,0)."_".c
                      itos(event_sub,0)." ".event_message)
         Newline
         stop
      else
         Print string(my word)
         if  (options & N option)# 0  start
            for i=LENGTH(my word),1,34  cycle
               Print String(" ")
            repeat
            Print String("(".prev word.")")
         finish
         Newline
         return
      finish
   finish

   prev word = "-"
   go down to(sort tree)
   if  printed heading = 0  start
      Print String("No spelling errors found."); Newline
   finish
end  {do spelling check}


{NUMBER OF}

integerfunction  number of(string (31) s)
   integer hash posn
   record (word rec) name  mw
   hash posn =  hash addr(s)
   mw ==  word table(hash posn)
   while  mw ## NIL  and  mw_word # s  cycle
      mw ==  mw_next
   repeat
   result =  mw_occurances if mw ## NIL
   result =  0 {otherwise}
end  {of  number of}


{MAIN PROGRAM}
predicate Open DictFile
  on event 3,9 start
    false
  finish
  Open Input(1,dictfile)
  true
end

begin

   record (word rec) name mw
   string (31) longest word
   integer i, longest, num words
   real average number of words per sentence

   onevent 3,9 start
      if event_event = 3  start
         Select Output(0)
         Print String("Ether or file system error:")
         Print String(itos(event_extra,0)."_".c
                      itos(event_sub,0)." ".event_message)
         Newline
         stop
      else
         Close Input
         in a sentence = 0
         words from start of sentence = 0
         just finished a sentence = 0
         if  (options & U option)#0  start
            if  (options & P option)=0  start
               Print String("WORD USAGE AND STYLISTIC ANALYSIS")
               Newline
            finish
            if  sentence count < 10  start
               Print String("Not enough sentences to give usage statistics")
               Newline
            else
               {Reading age}
               num words =  number of words
               average number of words per sentence =  c
                   num words in sentences / sentence count
               Print String("--- ".c
                  ITOS(sentence count,0)." sentences parsed,".c
                  " with an average of ")
               Print(average number of words per sentence,0,2)
               Print String(" words per sentence")
               Newline
               if  average number of words per sentence > 35  start
                  Print String(c
"         Your sentences are too long.  Try to make some of them shorter.")
                  Newline
               finish
               if  average word length > 6  start
                  Print String("--- You use far too many long words.")
                  Newline
                  Print String(c
"         Your average word length is ")
                  Print(average word length,0,2)
                  Print String(" - try to get it nearer 5")
                  Newline
               elseif  average word length > 5.5
                  Print String("--- You use quite a lot of long words.")
                  Newline
                  Print String(c
"         Your average word length is ")
                  Print(average word length,0,2)
                  Print String(" - try to get it nearer 4.9")
                  Newline
               else
                  Print String(c
"--- You use a good mix of word length, with an average of ")
                  Print(average word length,0,2)
                  Print String(" letters per word.")
                  Newline
               finish
               if  number of ("such") > 1 and c
                    number of("such") > sentence count*7/100 start
                  Print String("--- ")
                  Print(number of("such")*100/sentence count,0,2)
                  Print String("% of your sentences use SUCH.")
                  Newline
                  Print String(c
"         Only legal documents should have this many SUCHs.")
                  Newline
               finish
               if  (FLOAT(number of("that"))/FLOAT(sentence count)) > 0.40  start
                  Print String("--- ")
                  Print(number of("that")*100/sentence count,0,2)
                  Print String("% of your sentences use THAT.")
                  Newline
                  Print String(c
"         You railroad your readers.  Try to reduce frequency of THATs to 25%")
                  Newline
               finish
               if  number of("basically") > 1  start
                  Print String("--- You use BASICALLY too often.".c
                     "  It is redundant each time.")
                  Newline
                  Print String(c
"         You should use this word only rarely.")
                  Newline
               finish
               if  number of("we") = 0 and number of("i") = 0 and c
                    number of("you") = 0  start
                  Print String(c
                  "--- Your written work is dying from abstractitis");  Newline
                  Print String(c
"         See page 5 of Fowler's ""Modern English Usage"" (1973 ed)")
                  Newline
               finish
               if  number of("which") >= number of("that")  and c
                    number of("which") > 1                   start
                  Print String(c
                  "--- You sometimes use WHICH instead THAT - ".c
                     " they are not interchangeable.");  Newline
                  Print String(c
"         See page 142-145 of Gower's ""The Complete Plain Words"" (1973 ed)")
                  Newline
                  Print String(c
"         Also page 696-706 of Fowler's ""Modern English Usage"" (1983 ed)")
                  Newline
               finish
               if  number of("ingeminate") > 0 start
                  Print String(c
"--- You use the word INGEMINATE.  I bet you don't know what it means.")
                  Newline
                  Print String(c
"         See page 284 of Fowler's ""Modern English Usage"" (1973 ed)")
                  Newline
               finish
               if  number of("gender") > 0 start
                  Print String(c
                      "--- Use of GENDER is probably a blunder.")
                  Newline
                  Print String(c
"         See page 220 of Fowler's ""Modern English Usage"" (1973 ed)")
                  Newline
               finish
               if  number of("feasible") > 0 start
                  Print String(c
"--- Use POSSIBLE instead of FEASIBLE.  FEASIBLE is pretentious.")
                  Newline
                  Print String(c
"         See page 191 of Fowler's ""Modern English Usage"" (1973 ed)")
                  Newline
               finish
               if  number of("firstly") > 0 and c
                  number of("secondly") = 0 start
                  Print String(c
"--- You use FIRSTLY without using SECONDLY.  Clumsy.")
                  Newline
               finish
               if  recursive reference > sentence count/20  and c
                    recursive reference > 2                  start
                  Print String("--- You use ".ITOS(recursive reference,0).c
                     " recursive references (THIS, THAT, THESE etc).")
                  Newline
                  Print String(c
"         This means that ")
                  Print(recursive reference*100/sentence count,0,2)
                  Print String("% of your sentences start with a ")
                  Newline
                  Print String(c
"         reference to a previous sentence, making your writing hard to")
                  Newline
                  Print String(c
"         follow.  Your readers must work too hard unravelling sentences.")
                  Newline
               finish
               if  number of("utilize") > 0 or c
                    number of("utilized") > 0 or c
                    number of("utilizes") > 0 start
                  Print String(c
"--- You use UTILIZE.  It is a pretentious word.  What's wrong with USE ?")
                  Newline
               finish
               if  number of("obligate") > 0 or c
                    number of("obligated") > 0 start
                  Print String(c
"--- You use OBLIGATE.  It is a clumsy word.  What's wrong with OBLIGE ?")
                  Newline
               finish
               if  number of("averse") > 0  start
                  Print String(c
"--- You use AVERSE.  Check that it is not a misprint of ADVERSE")
                  Newline
               finish
               if  number of("casual") > 0  start
                  Print String(c
"--- You use CASUAL.  Check that it is not a misprint of CAUSAL")
                  Newline
               finish
               if  (number of("dependant") > 0 or c
                     number of("dependants") > 0 ) and c
                     number of("social") = 0 and c
                     number of("family") = 0 and c
                     number of("mother") = 0 and c
                     number of("father") = 0 and c
                     number of("parents") = 0 start
                  Print String(c
"--- You use DEPENDANT when you almost certainly mean DEPENDENT.")
                  Newline
               finish
               if  number of("deprecate") > 0  start
                  Print String(c
"--- You use DEPRECATE.  Check that it is not a misprint of DEPRECIATE")
                  Newline
               finish
               if  number of("methodology") > 0  or c
                    number of ("methodologies") > 0 start
                  Print String(c
"--- You use METHODOLOGY.  What is wrong with METHOD, STYLE or TECHNIQUE ?")
                  Newline
                  Print String(c
"         These plain alternatives have done well for hundreds of years")
                  Newline
                  Print String(c
"         so why should you want to use this vulgar import?  Are not plain")
                  Newline
                  Print String(c
"         words good enough for you?")
                  Newline
               finish
               if  number of("realty") > 0  start
                  Print String(c
"--- You use REALTY.  Check that it is not a misprint of REALITY")
                  Newline
               finish
               if  number of("uniformed") > 0  start
                  Print String(c
"--- You use UNIFORMED.  Check that it is not a misprint of UN-INFORMED")
                  Newline
               finish
            finish
         finish
         if  (options & L option)#0  start
!           Mark
            Sort word table lexically
            if  (options & P option)=0  start
               Print String(c
                  "LEXICALLY ORDERED VOCABULARY")
               Newline

               Print String(c
                  "Word                                    Occurances")
               Newline
            finish
            if  (options & R option)#0  start
               Print node in reverse(sort tree)
            else
               Print node(sort tree)
            finish
!           Release
            sort tree ==  NIL
         finish
         if  (options & F option)#0  start
!           Mark
            Sort word table by frequency
            if  (options & P option)=0  start
               Print String("VOCABULARY ORDERED BY FREQUENCY")
               Newline
               Print String(c
                  "Word                                    Occurances")
               Newline
            finish
            if  (options & R option)#0  start
               Print node in reverse(sort tree)
            else
               Print node(sort tree)
            finish
!           Release
            sort tree ==  NIL
         finish
         if  (options & B option)#0  start
            longest word =  ""
            longest      =  0
            for i=0,1,hash table size  cycle
               if  word table(i) ## NIL  start
                  mw ==  word table(i)
                  while  mw ## NIL  cycle
                     if longest < LENGTH(mw_word)  start
                        longest word =  mw_word
                        longest =  LENGTH(longest word)
                     finish
                     mw ==  mw_next
                  repeat
               finish
            repeat
            Print String("Longest word = ".longest word.c
               " (".ITOS(longest,0).")")
            Newline
         finish
         if  (options & W option)#0  start
            word count
         finish
         if  (options & C option)#0  start
            Sort word table lexically
            Close Input
            if Open Dictfile then Select Input(1) and Do spelling check
         finish
         Close Output
         stop
      finish
   finish
 
   interpret command line
   stop if  options = 0
   help  if  (options & H option) # 0
   init
   create vocabulary

endofprogram