program dvistats;
const
    max_inp = 2;
    max_words = 16384;
    max_abbrev = 256;
    max_abbrev_files = 4;
    max_forbid = 32;
    label x1000, x2000, read_one_line;
type str255 = string[255];
     stw = string[32];
     fname = string[127];
     stn = string[3];
var language_name, dict_dir, abbrev_name, dict_name, dict_gname, forbid_name : str255;
    nb_abbrev_files, nb_inputs, blpos, ptpos : integer;
    abbrev_names : array [1..max_abbrev_files] of fname;
    dict_file, input_file, output_file : text;
    work_word, option, save_word, in_line, in_word, in_dict : str255;
    ordk, l, k, lp, x, xp, nb_words, nb_abbrevs, nb_forbids : integer;
    initial : word;
    verbose, opened, desordre : boolean;
    abbrev : array[1..max_abbrev] of stw;
    forbid : array[1..max_forbid] of stw;
    begin_page, end_page : integer;
    file_names : array [1..max_inp] of fname;
    old_first_char, first_char : char;

    quick, first_page_no, last_page_no : integer;

    tab_words : array [1..max_words] of stw;
    ord_words : array [1..max_words] of integer;
    fir_words : array [1..max_words] of integer;
    las_words : array [1..max_words] of integer;

function val_int(x : stw) : integer;
var l, m, kfig : integer;
begin
  kfig:=0;
  for l:=1 to length(x) do
    begin
      m:=ord(x[l])-ord('0');
      if(m >= 0) and (m < 10) then kfig:=10*kfig+m;
    end
  ;
  val_int:=kfig;
end;

procedure lower_case(var u : str255);
var xx : char;
    v : str255;
begin
 v:=u;
 for k:=1 to length(u) do
  begin
    xx:=u[k];
    if(ord(xx)>=ord('A')) and (ord(xx)<=ord('Z')) then
      xx:=chr(ord(xx)+ord('a')-ord('A'))
    ;
    if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    else if xx='' then xx:=''
    ;
    u[k]:=xx;
  end
 ; (* if u<> v then writeln(u,'<>',v); *)
end;

(* ======================= main routine ======================== *)

begin
  if paramcount=0 then
    begin
      writeln('DVI spell checking');
      writeln('Usage: dvistats [options] <input file> [options] <output file> [options]');
      writeln('Options: -d<directory> : root directory for dictionnaries');
      writeln('         -l<language>  : root subdirectory for dictionnaries');
      writeln('         -f<name>      : generic name for dictionnaries');
      writeln('         -a<abbrev>    : dictionnary of abbreviations');
      writeln('         -x<forbid>    : dictionnary of forbidden forms');
      writeln('         -v            : verbose');
      writeln('         -b<n>         : ignore words of page number < n');
      writeln('         -e<p>         : ignore words of page number > p');
      halt;
    end
  ;

  language_name:=''; abbrev_name:=''; dict_dir:=''; dict_gname:='';
  forbid_name:=''; verbose:=FALSE; begin_page:=0; end_page:=30000;
  
  nb_inputs:=0; nb_abbrevs:=0; nb_forbids:=0; nb_abbrev_files:=0;

  for k:=1 to max_inp do file_names[k]:='';
  
  for k:=1 to paramcount do
    begin
      option:=paramstr(k);
      if option[1]='-' then
        begin
          if(upcase(option[2])='L') then language_name:=copy(option,3,999);
          if(upcase(option[2])='D') then dict_dir:=copy(option,3,999);
          if(upcase(option[2])='A') then
            begin
              if nb_abbrev_files >= max_abbrev_files then
                begin
                  writeln('More than ',max_abbrev_files,' abbreviation files, abort.');
                  halt;
                end
              ;
              nb_abbrev_files:=nb_abbrev_files+1;
              abbrev_names[nb_abbrev_files]:=copy(option,3,999);
            end
          ;
          if(upcase(option[2])='X') then forbid_name:=copy(option,3,999);
          if(upcase(option[2])='F') then dict_gname:=copy(option,3,999);
          if(upcase(option[2])='V') then verbose:=TRUE;
          if(upcase(option[2])='B') then begin_page:=val_int(copy(option,3,999));
          if(upcase(option[2])='E') then end_page:=val_int(copy(option,3,999));
        end
      else 
        begin
          if nb_inputs >= max_inp then
            begin writeln('More than ',max_inp,' i/o files. Abort.');
              halt;
            end
          ;
          nb_inputs:=nb_inputs+1;
          file_names[nb_inputs]:=option;
        end
      ; {end if option[1]}
    end
  ; {end do}

  writeln('input file:',file_names[1]);
  if file_names[2] = ''
  then
    begin ptpos:=pos('.',file_names[1])-1;
      if ptpos <= 0 then ptpos := length(file_names[1]);
      file_names[2]:=copy(file_names[1],1,ptpos)+'.wls'
    end
  ;
  writeln('output file:',file_names[2]);

(* read abbreviations *)
  for k:= 1 to nb_abbrev_files do
    begin abbrev_name:=abbrev_names[k];
      if (dict_dir <> '') and (abbrev_name <> '') then
        if abbrev_name[1]='\' then
        else if abbrev_name[2]=':' then
        else if copy(abbrev_name,1,2)='.\' then
          abbrev_name:=copy(abbrev_name,3,999)
        else abbrev_name:=dict_dir+'\'+language_name+'\'+abbrev_name
        ; {end if abbrev_name}
      if abbrev_name <> '' then
        begin
          writeln('abbreviation name:',abbrev_name);
         {$I-}
          assign(input_file,abbrev_name);
          reset(input_file);
          if(ioresult <> 0) then
            begin writeln('File ''',abbrev_name,''' cannot be opened for reading.');
              halt;
            end
          ;
          {$I+}
          while not eof(input_file) do
            begin
              readln(input_file,in_line);
              while in_line[length(in_line)]=' '
              do in_line:=copy(in_line,1,length(in_line)-1)
              ;
              if in_line <> '' then
                begin
                  if nb_abbrevs >= max_abbrev then
                    begin
                      writeln('More than ',max_abbrev,' abbreviations. Abort!'); halt;
                    end
                  ;
                  nb_abbrevs:=nb_abbrevs+1; abbrev[nb_abbrevs]:=in_line;
                end
              ; (* end if in_line <> *)
            end
          ;
          close(input_file);
        end
      ; (* end if abbrev_name <> '' *)
    end
  ; (* end for k:=1 to nb_abbrev_names *)
         
  writeln('Found ',nb_abbrevs,' abbreviations');

  if verbose then
    begin
      for k:=1 to nb_abbrevs do writeln('''',abbrev[k],'''');
    end;
  ;

(* read forbidden words *)
  {$I-}
  if (dict_dir <> '') and (forbid_name <> '') then
    if forbid_name[1]='\' then
    else if forbid_name[2]=':' then
    else forbid_name:=dict_dir+'\'+language_name+'\'+forbid_name
    ; {end if forbid_name}
  if forbid_name <> '' then
    begin
      writeln('forbidden file name:',forbid_name);
      assign(input_file,forbid_name);
      reset(input_file);
      nb_forbids:=0;
      if(ioresult <> 0) then
        begin writeln('File ''',forbid_name,''' cannot be opened for reading.');
        end
      else
        begin
          while not eof(input_file) do
            begin
              readln(input_file,in_line);
              while in_line[length(in_line)]=' '
              do in_line:=copy(in_line,1,length(in_line)-1)
              ;
              if in_line <> '' then
                begin
                  if nb_forbids >= max_forbid then
                    begin
                      writeln('More than ',max_forbid,' forbidden words. Abort!'); halt;
                    end
                  ;
                  nb_forbids:=nb_forbids+1; forbid[nb_forbids]:=in_line;
                end
              ;
            end
          ;
          writeln('Found ',nb_forbids,' forbidden words');
          close(input_file);
        end
      ;
    end
  ;
  writeln('First page to be treated: ',begin_page);
  writeln('Last page to be treated: ',end_page);
(* opening true input text *) 

  assign(input_file,file_names[1]);
  reset(input_file);
  if ioresult <> 0 then
    begin
      writeln('Unable to open ''',file_names[1],''' for input.'); halt;
    end
  ;

  assign(output_file,file_names[2]);
  rewrite(output_file);

  if ioresult <> 0 then
    begin
      writeln('Unable to open ''',file_names[2],''' for output.'); halt;
    end
  ;

  first_page_no:=-9999;
  last_page_no:=0;

  nb_words:=0;
  save_word:='';

  while not eof(input_file) do
    begin
      readln(input_file,in_line);
      while in_line[length(in_line)]=' ' do in_line:=copy(in_line,1,length(in_line)-1);
      ptpos:=pos(':',in_line);
      if ptpos>0 then
        begin
          if first_page_no = -9999 then
            first_page_no:=val_int(copy(in_line,1,ptpos-1));
          last_page_no:=val_int(copy(in_line,1,ptpos-1));
          if last_page_no < begin_page then goto read_one_line;
          if last_page_no > end_page then goto read_one_line;
          in_line:=copy(in_line,ptpos+1,9999);
        end
      else
        goto read_one_line;
      ;

      while length(in_line)>0 do
        begin
          blpos:=pos(' ',in_line);
          if blpos>0 then
            begin
              in_word:=copy(in_line,1,blpos-1);
              in_line:=copy(in_line,blpos+1,9999);
            end
          else
            begin
              in_word:=in_line; in_line:='';
            end
          ;

(* Eliminate special chars at beginning *)
          if(in_word = '-') then in_word:='';
          if(in_word = '.') then in_word:='';
          initial:=ord(in_word[1]);
          if(initial < ord(' ')) then
            begin
              if verbose then
                 writeln('>',in_word,':',initial,'<',ord(' '),'!'); 
              in_word:=copy(in_word,2,9999);
            end
          ;
          initial:=ord(in_word[1]);
(* Eliminate figures *)
          if (initial >= ord('0')) and (initial <= ord('9'))
          then in_word:='';
(* Eliminate single initials like J. *)
          if(length(in_word)=2) and (in_word[2]='.') then in_word:='';
(* Eliminate final "..." *)
          k:=pos('...',in_word);
          if k>0 then in_word:=copy(in_word,1,k-1);
(* Eliminate abbreviations *)
          for k:=1 to nb_abbrevs do
            begin
              l:=length(in_word);
              if(in_word = abbrev[k]) or (in_word = abbrev[k]+'.') then
                begin if verbose then
                  writeln('Abbrev. ''',in_word,''' found, erased.');
                  in_word:='';
                end
              ;
            end
          ;

          k:=length(in_word);
          if (in_word[k]='.') and (pos('.',in_word)=k) then
            in_word:=copy(in_word,1,k-1);

(* Eliminate hypenated words, save them for next word *)
          if(in_word <> '') then
            begin
              if save_word <> '' then
              (* concatenate with previous word if hyphenated *)
                begin in_word:=save_word+in_word; save_word:='';
                end
              ;
              k:=length(in_word);
              if (in_word[k] = '-') and (in_line='') then
                begin
                  save_word:=in_word; in_word:='';
                end
              ;
            end
          ;
(* store in table *)          
          lower_case(in_word);
          if(in_word <> '') then
            begin
              for k:=1 to nb_words do
                begin
                  if in_word = tab_words[k] then
                    begin
                      las_words[k]:=last_page_no;
                      goto x1000;
                    end
                  ; {end if in_word }
                end
              ; {end for}

              if nb_words = max_words then
                begin
                  writeln('Too many different words (',max_words,'):',in_word);
                  writeln('Only partial list examined.');
                end
              else if nb_words < max_words then
                begin
                  nb_words:=nb_words+1;
                  tab_words[nb_words]:=in_word;
                  fir_words[nb_words]:=last_page_no;
                  las_words[nb_words]:=last_page_no;
                end
              ;
              x1000: ;
            end
          ;
        end
      ;
    read_one_line:
    end
  ; {end while}
  close(input_file);   (* end reading DVI *)

  writeln('first page:',first_page_no);
  writeln('last page:',last_page_no);

(* starting sorting the words *)
  writeln('Sorting ',nb_words,' words');

  desordre:=TRUE;
  for k:=1 to nb_words do ord_words[k]:=k; (* initialize sorting *)

  quick:= nb_words div 2;
  if quick=0 then quick:=1;
  while (desordre or (quick>1)) do
    begin desordre:=FALSE;
      for l:=1 to nb_words-quick do
        begin
          x:=ord_words[l]; xp:=ord_words[l+quick];
          if tab_words[x]>tab_words[xp] then
            begin desordre:=TRUE;
              ord_words[l+quick]:=x; ord_words[l]:=xp;
            end
          ; {end if tab_words}
        end
      ; {end for}
      if (not desordre) then
        begin
          if (quick>1) then
            begin quick:=quick div 2; if quick=0 then quick:=1;
              (*  writeln ('quick=',quick); *)
              desordre:=TRUE;
            end
          ;
        end
      ;
    end
  ; {end while desordre}

(*  for k:=1 to nb_words do
    begin ordk:=ord_words[k];
      if tab_words[ordk] <> '' then
      writeln(k,':',fir_words[ordk],'->',las_words[ordk],
        '''',tab_words[ordk],''''); 
    end
  ;
*)
  
(* Now searching the dictionnaries *)
  opened:=FALSE; first_char:='@' ; old_first_char:=' '; (* nothing *)
  
  for k:=1 to nb_words do
    begin ordk:=ord_words[k]; work_word:=tab_words[ordk];
      (* seeking word in forbidden list *)
      for l:=1 to nb_forbids do
        begin
          if(in_word = forbid[l]) then
            begin writeln('Forbidden word: ''',in_word,''', kept.');
              goto x2000;
            end
          ;
        end
      ;
      (* seeking work_word in dictionnary *)
      first_char:=work_word[1];
      initial:=ord(first_char);
      if (initial>ord('z')) or (initial<ord('a')) then
        first_char:='0';

      if old_first_char <> first_char then
        begin (* new first letter *)
          if opened then close(dict_file); (* close if previously opened file *)
          dict_name:=first_char+dict_gname;
          if dict_dir <> '' then
            dict_name:=dict_dir+'\'+language_name+'\'+dict_name;

          writeln('Opening dictionary: ''',dict_name,'''');
          assign(dict_file,dict_name); opened:=TRUE;
          reset(dict_file);
          if ioresult <> 0 then
            begin
              writeln('Unable to open ''',dict_name,''' for input.'); halt;
            end
          ;
          old_first_char:=first_char;
(*          readln(dict_file,in_dict);  *)
(*          reset(dict_file);     *)
          in_dict:=' ';
        end
      ;
          if in_dict=work_word then
            begin tab_words[ordk]:='';
              if verbose then writeln('Found ''',in_dict,''', erased.');
              goto x2000;
            end
          ;
      while (not eof(dict_file)) and (in_dict < work_word) do
        begin
          readln(dict_file, in_dict);
          if in_dict=work_word then
            begin tab_words[ordk]:='';
              if verbose then writeln('Found ''',in_dict,''', erased.');
              goto x2000;
            end
          ;
        end
      ;
      x2000: ;

    end
  ; close(dict_file);


(* Printing the final list *)

  for k:=1 to nb_words do
    begin ordk:=ord_words[k];
      if tab_words[ordk] <> '' then
      writeln(output_file,tab_words[ordk],' at pages ',fir_words[ordk],
        ' --- ', las_words[ordk]);
(*      writeln(k,':',fir_words[ordk],'->',las_words[ordk],'''',tab_words[ordk],'''');  *)
    end
  ;
  close(output_file);
end.


