/* 
   Copyright (C) 1991-1995 Eberhard Mattes 

   dvisprep.c - part of dvisprep.
   dvisprep compiles a dvispell parameter file into binary format, to be used
   by dvispell 
   This program is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the
   Free Software Foundation; either version 2, or (at your option) any
   later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  
*/

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <fcntl.h>
#include <getopt.h>    
#include "dvispell.h"
#include "ext.h"


#define INPUT_STACK_SIZE  8

#define TOK_ERROR       0
#define TOK_EOF         1
#define TOK_LPAR        2
#define TOK_RPAR        3
#define TOK_STRING      4
#define TOK_NUMBER      5

struct input_level
{
  const char *fname;
  FILE *f;
  long line_no;
};

struct encoding
{
  long pos;
  unsigned *symbols[CHAR_CODES];
  unsigned lengths[CHAR_CODES];
};

struct font_or_layout
{
  struct font_or_layout *next;
  struct encoding *encoding;
  unsigned name;
};

struct output
{
  struct output *next;
  unsigned *table;
  long pos;
  unsigned name;
  unsigned count;
  unsigned alloc;
};

struct conv
{
  struct conv *next;
  unsigned *in_table;
  unsigned *out_table;
  unsigned in_count;
  unsigned out_count;
};

struct conversion
{
  struct conversion *next;
  struct conv *head;
  long pos;
  unsigned name;
};

struct prod
{
  unsigned *table;
  unsigned count;
};

struct transition
{
  unsigned input;               /* Input symbol */
  unsigned output;              /* New state number */
};

struct state
{
  unsigned *accept_table;
  struct transition *transition_table;
  unsigned accept_count;
  unsigned transition_count;
  unsigned transition_alloc;
  unsigned def_output;
};

struct htrans
{
  unsigned state;
  unsigned input;
  unsigned output;
};


static char debugging = FALSE;
static int  verbose = 0;
static int  logo = TRUE;

static char inp_fname[FNAME_SIZE];
static char out_fname[FNAME_SIZE];

static FILE *out_file;
static FILE *list_file;

static struct input_level  input_stack[INPUT_STACK_SIZE];
static struct input_level  cur_input;
static int  input_sp;

static unsigned  token;
static unsigned  number;
static char  str[256];

#define STR_HASH_SIZE 1009

static byte **  str_pool;
static size_t *  str_length;
static unsigned *  str_next;
static unsigned  str_pool_size;
static unsigned  str_pool_used;
static unsigned  str_hash[STR_HASH_SIZE];

static struct font_or_layout *  layout_head;
static struct font_or_layout *  font_head;
static struct output *  output_head;

static struct state *  state_table;
static unsigned  state_count;
static unsigned  state_alloc;

static struct prod *  prod_table;
static unsigned  prod_count;
static unsigned  prod_alloc;

static struct conversion *  conversion_head;
static struct conversion **  conversion_add;

static unsigned *  letter_table;
static unsigned  letter_count;
static unsigned  letter_alloc;

static unsigned  s_include;
static unsigned  s_output;
static unsigned  s_layout;
static unsigned  s_font;
static unsigned  s_letters;
static unsigned  s_conversion;
static unsigned  s_newline;
static unsigned  s_newpage;
static unsigned  s_wordspace;
static unsigned  s_start;
static unsigned  s_end;
static unsigned  s_nonletter;




static void parse (const char *fname);


static void banner (void)
{
  fputs ("GNU dvisprep 1.0b \n"
	 "compiles a dvispell parameter file into binary format for dvispell\n"
	 "Copyright (c) 1995 by Eberhard Mattes\n"
  	 "GNU dvispell comes with ABSOLUTELY NO WARRANTY\n"
	 "This is free software. You may redistribute copies of dvispell\n"
	 "under the terms of the GNU General Public License.\n"
	 "For more information about these matters, see the file named COPYING.\n\n"  ,stderr);
}


static void error (int rc, const char *fmt, ...)
{
  va_list arg_ptr;

  fprintf (stderr, "dvisprep: ");
  va_start (arg_ptr, fmt);
  vfprintf (stderr, fmt, arg_ptr);
  va_end (arg_ptr);
  fprintf (stderr, "\n");
  exit (rc);
}       

static void internal (int n)
{
  error (EXIT_INT, "Internal error, case %d", n);
}


static void out_of_mem (void)
{
  error (EXIT_MEM, "Out of memory");
}


/* Note: This program requires xmalloc (0) != NULL. */

static void *xmalloc (size_t n)
{
  void *p;

  p = malloc (n);
  if (p == NULL)
    out_of_mem ();
  return (p);
}


static void *xrealloc (void *p, size_t n)
{
  p = realloc (p, n);
  if (p == NULL)
    out_of_mem ();
  return (p);
}


static void xfree (void *p)
{
  if (p != NULL)
    free (p);
}


static void syntax (const char *fmt, ...)
{
  va_list arg_ptr;

  fprintf (stderr, "%s:%ld: ", cur_input.fname, cur_input.line_no);
  va_start (arg_ptr, fmt);
  vfprintf (stderr, fmt, arg_ptr);
  va_end (arg_ptr);
  fprintf (stderr, "\n");
  exit (EXIT_SYNTAX);
}


static void expected (const char *what)
{
  syntax ("%s expected", what);
}


static void strmov (char *dst, const char *src, unsigned size)
{
  if (size > 0)
    {
      strncpy (dst, src, size-1);
      dst[size-1] = 0;
    }
}


static unsigned stringn (const byte *src, size_t len)
{
  unsigned h;
  unsigned i;
  char *p;

  h = 0;
  for (i = 0; i < len; ++i)
    h = (h << 2) ^ src[i];
  h %= STR_HASH_SIZE;

  for (i = str_hash[h]; i != NIL16; i = str_next[i])
    if (str_length[i] == len && memcmp (str_pool[i], src, len) == 0)
      return (i);
  if (str_pool_used >= str_pool_size)
    {
      str_pool_size += 64;
      RESIZE (str_pool, str_pool_size);
      RESIZE (str_length, str_pool_size);
      RESIZE (str_next, str_pool_size);
    }
  p = xmalloc (len);
  memcpy (p, src, len);
  str_pool[str_pool_used] = p;
  str_length[str_pool_used] = len;
  str_next[str_pool_used] = str_hash[h];
  str_hash[h] = str_pool_used;
  return (str_pool_used++);
}


static unsigned string (const byte *src)
{
  return (stringn (src, strlen (src)));
}



static const char *get_str (unsigned i)
{
  memcpy (str, str_pool[i], str_length[i]);
  str[str_length[i]] = 0;
  return (str);
}


static void list_table (const unsigned *tab, unsigned count)
{
  unsigned i;

  if (count == 0)
    fputs ("END", list_file);
  else
    {
      fputc ('(', list_file);
      for (i = 0; i < count; ++i)
        {
          if (i != 0)
            fputs (", ", list_file);
          fprintf (list_file, "<%s>", get_str (tab[i]));
        }
      fputc (')', list_file);
    }
}


static int equal_table (const unsigned *p1, const unsigned *p2, unsigned count)
{
  unsigned i;

  for (i = 0; i < count; ++i)
    if (p1[i] != p2[i])
      return (FALSE);
  return (TRUE);
}


static void start_parse (void)
{
  layout_head = NULL;
  font_head = NULL;
  output_head = NULL;
  conversion_head = NULL;
  conversion_add = &conversion_head;
  letter_table = NULL;
  letter_count = 0;
  letter_alloc = 0;
}


static void start_input (const char *fname)
{
  struct input_level new_input;

  if (input_sp + 1 >= INPUT_STACK_SIZE)
    syntax ("Include files nested too deeply");
  if (input_sp == -1 && strcmp (fname, "-") == 0)
    new_input.f = stdin;
  else
    {
      new_input.f = fopen (fname, "r");
      if (new_input.f == NULL)
        error (EXIT_FILE, "Cannot open input file `%s'", fname);
    }
  new_input.fname = fname;
  new_input.line_no = 1;
  if (input_sp >= 0)
    input_stack[input_sp] = cur_input;
  ++input_sp;
  cur_input = new_input;
}


static void end_input (void)
{
  if (ferror (cur_input.f))
    error (EXIT_FILE, "Error reading data file %s", cur_input.fname);
  fclose (cur_input.f);
  --input_sp;
  if (input_sp >= 0)
    cur_input = input_stack[input_sp];
}


static void get_token (void)
{
  int c, i, base;
  static char digits[] = "0123456789abcdefABCDEF";
  char *p;

next:
  c = fgetc (cur_input.f);
  switch (c)
    {
    case EOF:
      token = TOK_EOF;
      break;
    case '\n':
      ++cur_input.line_no;
      goto next;
    case ' ':
    case '\t':
      goto next;
    case '%':
      for (;;)
        {
          c = fgetc (cur_input.f);
          if (c == EOF || c == '\n')
            break;
        }
      if (c == '\n')
        ++cur_input.line_no;
      if (c == EOF)
        token = TOK_EOF;
      else
        goto next;
      break;
    case '(':
      token = TOK_LPAR;
      break;
    case ')':
      token = TOK_RPAR;
      break;
    case '<':
      token = TOK_STRING;
      i = 0;
      for (;;)
        {
          c = fgetc (cur_input.f);
          if (c == '\t')
            c = ' ';
          if (c == EOF || c == '\n')
            syntax ("String not terminated");
          if (c == '>')
            break;
          if (!isascii (c) || !isprint (c))
            syntax ("Invalid string character");
          if (i >= sizeof (str)-1)
            syntax ("String too long");
          str[i++] = (char)c;
        }
      str[i] = 0;
      number = string (str);
      break;
    case '`':
      token = TOK_NUMBER;
      c = fgetc (cur_input.f);
      if (c == EOF || c == '\n' || !isascii (c) || !isprint (c))
        syntax ("Invalid quoted character");
      number = (unsigned)c;
      break;
    case '\'':
    case '"':
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
      token = TOK_ERROR;
      switch (c)
        {
        case '\'':
          base = 8;
          c = fgetc (cur_input.f);
          break;
        case '"':
          base = 16;
          c = fgetc (cur_input.f);
          break;
        default:
          base = 10;
          break;
        }
      number = 0;
      for (;;)
        {
          if (c == EOF)
            break;
          p = strchr (digits, (char)c);
          if (p == NULL)
            {
              ungetc (c, cur_input.f);
              break;
            }
          token = TOK_NUMBER;
          i = p - digits;
          if (i >= 16) i -= 6;
          if (i >= base)
            syntax ("Invalid digit");
          number = number * base + i;
          if (number > CHAR_CODES-1)
            syntax ("Number out of range");
          c = fgetc (cur_input.f);
        }
      break;
    default:
      token = TOK_STRING;
      i = 0;
      for (;;)
        {
          if (!isascii (c) || !isprint (c))
            syntax ("Invalid string character");
          if (i >= sizeof (str)-1)
            syntax ("String too long");
          str[i++] = (char)c;
          c = fgetc (cur_input.f);
          if (c == EOF || c == ' ' || c == '\t' || c == '\n' || c == ')')
            {
              if (c != EOF)
                ungetc (c, cur_input.f);
              break;
            }
        }
      str[i] = 0;
      number = string (str);
      break;
    }
}


static unsigned parse_codes (int empty)
{
  unsigned char codes[256];
  unsigned i;

  i = 0;
  if (token == TOK_NUMBER)
    {
      codes[i++] = (unsigned char)number;
      get_token ();
    }
  else if (token == TOK_LPAR)
    {
      for (;;)
        {
          get_token ();
          if (i == 0 && !empty && token != TOK_NUMBER)
            expected ("Character code");
          if (token == TOK_RPAR)
            break;
          if (token != TOK_NUMBER)
            expected ("Character code or `)'");
          if (i >= sizeof (codes))
            syntax ("Too many character codes");
          codes[i++] = (unsigned char)number;
        }
      get_token ();
    }
  else if (!empty)
    expected ("Character code or `('");
  return (stringn (codes, i));
}


static void parse_names (int empty, const char *type, unsigned **ptab,
                         unsigned *pcount)
{
  unsigned names[256], *tab;
  unsigned i, count;

  count = 0;
  if (token == TOK_STRING)
    {
      names[count++] = number;
      get_token ();
    }
  else if (token == TOK_LPAR)
    {
      for (;;)
        {
          get_token ();
          if (count == 0 && !empty && token != TOK_STRING)
            syntax ("%s name expected", type);
          if (token == TOK_RPAR)
            break;
          if (token != TOK_STRING)
            syntax ("%s name or `)' expected", type);
          if (count >= 256)
            syntax ("List too long");
          names[count++] = number;
        }
      get_token ();
    }
  else if (!empty)
    syntax ("%s name or `(' expected", type);
  ALLOCARRAY (tab, count);
  for (i = 0; i < count; ++i)
    tab[i] = names[i];
  *ptab = tab;
  *pcount = count;
}


static void parse_include (void)
{
  for (;;)
    {
      get_token ();
      if (token == TOK_RPAR)
        break;
      if (token != TOK_STRING)
        expected ("Include file name or `)'");
      parse (str);
    }
}


static void add_output (struct output *p, unsigned from, unsigned to)
{
  unsigned i;

  for (i = 0; i < p->count; ++i)
    if (p->table[2*i+0] == from)
      {
        p->table[2*i+1] = to;
        return;
      }
  i = p->count++;
  if (i >= p->alloc)
    {
      p->alloc += 16;
      RESIZE (p->table, 2 * p->alloc);
    }
  p->table[2*i+0] = from;
  p->table[2*i+1] = to;
}


static void parse_output (void)
{
  unsigned name, codes, i;
  struct output *p, *out;

  get_token ();
  if (token != TOK_STRING)
    expected ("Output table name");
  name = number;
  for (p = output_head; p != NULL; p = p->next)
    if (p->name == name)
      syntax ("Redefinition of output table");
  ALLOC (out);
  out->next = NULL;
  out->name = name;
  out->count = 0;
  out->alloc = 0;
  out->table = NULL;
  for (;;)
    {
      get_token ();
      switch (token)
        {
        case TOK_RPAR:
          out->next = output_head;
          output_head = out;
          return;

        case TOK_LPAR:
          get_token ();
          if (token != TOK_STRING)
            expected ("Character name");
          i = number;
          get_token ();
          codes = parse_codes (TRUE);
          if (token != TOK_RPAR)
            expected ("`)'");
          add_output (out, i, codes);
          break;

        case TOK_STRING:
          for (p = output_head; p != NULL; p = p->next)
            if (p->name == number)
              break;
          if (p == NULL)
            syntax ("Output table `%s' not found", str);
          for (i = 0; i < p->count; ++i)
            add_output (out, p->table[2*i+0], p->table[2*i+1]);
          break;

        default:
          expected ("`(' or `)' or output table name");
        }
    }
}


static void parse_layout (int is_font)
{
  unsigned name, *names;
  unsigned i, name_count;
  struct font_or_layout *f;
  struct encoding *tab;

  get_token ();
  name_count = 0; name = 0;
  if (is_font)
    {
      parse_names (TRUE, "Font table", &names, &name_count);
      for (i = 0; i < name_count; ++i)
        {
          name = names[i];
          for (f = font_head; f != NULL; f = f->next)
            if (f->name == name)
              syntax ("Redefinition of font table");
        }
    }
  else
    {
      if (token != TOK_STRING)
        expected ("Layout table name");
      name = number;
      for (f = layout_head; f != NULL; f = f->next)
        if (f->name == name)
          syntax ("Redefinition of layout table");
      get_token ();
    }
  ALLOC (tab);
  tab->pos = 0;
  for (i = 0; i < CHAR_CODES; ++i)
    {
      tab->lengths[i] = 0;
      tab->symbols[i] = NULL;
    }
  for (;;)
    {
      switch (token)
        {
        case TOK_RPAR:
          if (is_font)
            for (i = 0; i < name_count; ++i)
              {
                ALLOC (f);
                f->next = font_head;
                f->name = names[i];
                f->encoding = tab;
                font_head = f;
              }
          else
            {
              ALLOC (f);
              f->next = layout_head;
              f->name = name;
              f->encoding = tab;
              layout_head = f;
            }
          return;
        case TOK_LPAR:
          get_token ();
          if (token != TOK_NUMBER)
            expected ("Character code");
          i = number;
          get_token ();
          parse_names (TRUE, "Character", &tab->symbols[i], &tab->lengths[i]);
          if (token != TOK_RPAR)
            expected ("`)'");
          break;
        case TOK_STRING:
          for (f = layout_head; f != NULL; f = f->next)
            if (f->name == number)
              break;
          if (f == NULL)
            syntax ("Layout table `%s' not found", str);
          for (i = 0; i < CHAR_CODES; ++i)
            if (f->encoding->symbols[i] != NULL)
              {
                tab->lengths[i] = f->encoding->lengths[i];
                tab->symbols[i] = f->encoding->symbols[i];
              }
          break;
        default:
          expected ("`(' or `)' or layout table name");
        }
      get_token ();
    }
}


static void add_conv (struct conversion *dst, const struct conv *src)
{
  struct conv *p, **add;

  add = &dst->head;
  for (p = dst->head; p != NULL; p = p->next)
    {
      if (p->in_count == src->in_count && p->out_count == src->out_count
          && equal_table (p->in_table, src->in_table, src->in_count)
          && equal_table (p->out_table, src->out_table, src->out_count))
        return;
      add = &p->next;
    }
  ALLOC (p);
  *p = *src;
  p->next = NULL;
  *add = p;
}


static void parse_conversion (void)
{
  struct conversion *p, *q;
  struct conv tmp, *copy;

  get_token ();
  if (token != TOK_STRING)
    expected ("Conversion table name");
  for (p = conversion_head; p != NULL; p = p->next)
    if (p->name == number)
      syntax ("Redefinition of conversion table");
  ALLOC (p);
  p->next = NULL;
  p->name = number;
  p->head = NULL;

  get_token ();
  for (;;)
    {
      switch (token)
        {
        case TOK_RPAR:
          p->next = NULL;
          *conversion_add = p;
          conversion_add = &p->next;
          return;

        case TOK_LPAR:
          get_token ();
          parse_names (FALSE, "Character", &tmp.in_table, &tmp.in_count);
          parse_names (TRUE, "Character", &tmp.out_table, &tmp.out_count);
          if (token != TOK_RPAR)
            expected ("`)'");
          add_conv (p, &tmp);
          break;

        case TOK_STRING:
          for (q = conversion_head; q != NULL; q = q->next)
            if (q->name == number)
              break;
          if (q == NULL)
            syntax ("Conversion table `%s' not found", str);
          for (copy = q->head; copy != NULL; copy = copy->next)
            add_conv (p, copy);
          break;

        default:
          expected ("`(' or `)' or conversion table name");
        }
      get_token ();
    }
}


static void parse_letters (void)
{
  for (;;)
    {
      get_token ();
      if (token == TOK_RPAR)
        break;
      if (token != TOK_STRING)
        syntax ("Character name or `)' expected");
      if (letter_count >= letter_alloc)
        {
          letter_alloc += 64;
          RESIZE (letter_table, letter_alloc);
        }
      letter_table[letter_count++] = number;
    }
}


static void parse (const char *fname)
{
  start_input (fname);
  for (;;)
    {
      get_token ();
      if (token == TOK_EOF)
        break;
      if (token != TOK_LPAR)
        expected ("`('");
      get_token ();
      if (token != TOK_STRING)
        expected ("Keyword");
      if (number == s_include)
        parse_include ();
      else if (number == s_output)
        parse_output ();
      else if (number == s_layout)
        parse_layout (FALSE);
      else if (number == s_font)
        parse_layout (TRUE);
      else if (number == s_letters)
        parse_letters ();
      else if (number == s_conversion)
        parse_conversion ();
      else
        syntax ("Unknown keyword");
    }
  end_input ();
}


static void write_bytes (const void *src, size_t n)
{
  if (fwrite (src, 1, n, out_file) != n)
    error (EXIT_FILE, "Cannot write to output file `%s'", out_fname);
}


static void write_u16 (unsigned n)
{
  unsigned char b[2];

  b[0] = (unsigned char)(n >> 8);
  b[1] = (unsigned char)n;
  write_bytes (b, 2);
}


static void write_u32 (u32 n)
{
  unsigned char b[4];

  b[0] = (unsigned char)(n >> 24);
  b[1] = (unsigned char)(n >> 16);
  b[2] = (unsigned char)(n >> 8);
  b[3] = (unsigned char)n;
  write_bytes (b, 4);
}


static void write_pad (void)
{
  char b[3] = {0, 0, 0};
  size_t n;

  n = (size_t)ftell (out_file) & 3;
  if (n != 0)
    write_bytes (b, 4 - n);
}


static long write_dfa (void)
{
  long pos0;
  unsigned prod_size, trans_count;
  unsigned i, j, k, h;
  struct htrans *htrans_table;
  unsigned *index;
  static unsigned hash_count[DFA_HASH_SIZE];
  static unsigned hash_offset[DFA_HASH_SIZE+1];
  static unsigned hash_ptr[DFA_HASH_SIZE];

  pos0 = ftell (out_file);
  prod_size = 0; trans_count = 0;
  for (i = 0; i < state_count; ++i)
    trans_count += state_table[i].transition_count;
  for (i = 0; i < prod_count; ++i)
    prod_size += prod_table[i].count;
  write_u16 (state_count);
  write_u16 (prod_count);
  write_u16 (prod_size);
  write_u16 (trans_count);
  prod_size = 0;
  for (i = 0; i < prod_count; ++i)
    {
      write_u16 (prod_size);
      prod_size += prod_table[i].count;
    }
  write_u16 (prod_size);
  trans_count = 0;
  for (i = 0; i < state_count; ++i)
    {
      write_u16 (trans_count);
      trans_count += state_table[i].transition_count;
    }
  write_u16 (trans_count);
  for (i = 0; i < prod_count; ++i)
    for (j = 0; j < prod_table[i].count; ++j)
      write_u16 (prod_table[i].table[j]);
  for (i = 0; i < state_count; ++i)
    write_u16 (state_table[i].def_output);
  for (i = 0; i < state_count; ++i)
    for (j = 0; j < state_table[i].transition_count; ++j)
      {
        write_u16 (state_table[i].transition_table[j].input);
        write_u16 (state_table[i].transition_table[j].output);
      }

  /* Create hashed transition table.  hash_offset[h] holds the offset
     into the hashed transition table at which entries for transitions
     of equivalence class h start.  The sequence of entries for that
     equivalence class ends at offset hash_offset[h+1].

     First, build a table containing all the transitions. */

  ALLOCARRAY (htrans_table, trans_count);

  k = 0;
  for (i = 0; i < state_count; ++i)
    for (j = 0; j < state_table[i].transition_count; ++j)
      {
        htrans_table[k].state = i;
        htrans_table[k].input = state_table[i].transition_table[j].input;
        htrans_table[k].output = state_table[i].transition_table[j].output;
        ++k;
      }
  if (k != trans_count) internal (90);

  /* Compute the sizes of the equvialence classes. */

  for (i = 0; i < DFA_HASH_SIZE; ++i)
    hash_count[i] = 0;
  for (i = 0; i < trans_count; ++i)
    {
      h = DFA_HASH (htrans_table[i].state, htrans_table[i].input);
      ++hash_count[h];
    }

  /* Now compute the offsets for the equivalence classes.  Don't
     forget to set the last entry, which is used for indicating the
     end of the last class. */

  k = 0;
  for (i = 0; i < DFA_HASH_SIZE; ++i)
    {
      hash_offset[i] = k;
      k += hash_count[i];
    }
  hash_offset[DFA_HASH_SIZE] = k;

  /* Build a temporary index table (pointing into htrans_table) which
     is partitioned into equivalence classes, according to the
     hash_offset table (we copy it to hash_ptr, as we'll need
     hash_offset later). */

  ALLOCARRAY (index, trans_count);
  for (i = 0; i < DFA_HASH_SIZE; ++i)
    hash_ptr[i] = hash_offset[i];
  for (i = 0; i < trans_count; ++i)
    {
      h = DFA_HASH (htrans_table[i].state, htrans_table[i].input);
      j = hash_ptr[h]++;
      index[j] = i;
    }

  /* Write the hashed transition table (sorted by equivalence class)
     and the hash table to the binary file. */

  for (i = 0; i < trans_count; ++i)
    {
      j = index[i];
      write_u16 (htrans_table[j].state);
      write_u16 (htrans_table[j].input);
      write_u16 (htrans_table[j].output);
    }
  for (i = 0; i <= DFA_HASH_SIZE; ++i)
    write_u16 (hash_offset[i]);

  xfree (index); xfree (htrans_table);
  return (pos0);
}


static void list_dfa_trans (const struct state *st,
                            const struct transition *tr)
{
  if (st->accept_count == 0)
    fprintf (list_file, "END");
  else
    list_table (st->accept_table, st->accept_count);
  fputs (" & ", list_file);
  if (tr->input == NIL16)
    {
      fputs ("END -> ", list_file);
      list_table (prod_table[tr->output].table, prod_table[tr->output].count);
    }
  else
    fprintf (list_file, "<%s> -> %d", get_str (tr->input), tr->output);
  fputc ('\n', list_file);
}


static unsigned new_state (void)
{
  if (state_count >= state_alloc)
    {
      state_alloc += 16;
      RESIZE (state_table, state_alloc);
    }
  state_table[state_count].accept_count = 0;
  state_table[state_count].accept_table = NULL;
  state_table[state_count].transition_count = 0;
  state_table[state_count].transition_alloc = 0;
  state_table[state_count].transition_table = NULL;
  state_table[state_count].def_output = NIL16;
  return (state_count++);
}


static void add_transition (unsigned from, unsigned input, unsigned output)
{
  struct state *st;
  unsigned i;

  st = &state_table[from];
  for (i = 0; i < st->transition_count; ++i)
    {
      if (st->transition_table[i].input == input
          && st->transition_table[i].output == output)
        return;
    }
  if (st->transition_count >= st->transition_alloc)
    {
      st->transition_alloc += 32;
      RESIZE (st->transition_table, st->transition_alloc);
    }
  i = st->transition_count++;
  st->transition_table[i].input = input;
  st->transition_table[i].output = output;
}


static unsigned add_prod (unsigned *table, unsigned count)
{
  unsigned i;

  for (i = 0; i < prod_count; ++i)
    if (prod_table[i].count == count
        && equal_table (prod_table[i].table, table, count))
      return (i);
  if (prod_count >= prod_alloc)
    {
      prod_alloc += 32;
      RESIZE (prod_table, prod_alloc);
    }
  prod_table[prod_count].count = count;
  prod_table[prod_count].table = table;
  return (prod_count++);
}


static void dfa_word (const struct conv *p)
{
  unsigned from, to, len;

  from = 0                    /* Start state */;
  to = NIL16;
  for (len = 1; len <= p->in_count; ++len)
    {
      unsigned i, j;

      for (i = 0; i < state_count; ++i)
        if (state_table[i].accept_count == len
            && equal_table (state_table[i].accept_table, p->in_table, len))
          break;
      if (i < state_count)
        to = i;
      else
        {
          to = new_state ();
          state_table[to].accept_count = len;
          ALLOCARRAY (state_table[to].accept_table, len);
          for (j = 0; j < len; ++j)
            state_table[to].accept_table[j] = p->in_table[j];
        }
      add_transition (from, p->in_table[len-1], to);
      from = to;
    }
  if (to == NIL16) internal (21);
  state_table[to].def_output = add_prod (p->out_table, p->out_count);
}


static long compute_dfa (const struct conv *p)
{
  unsigned i, j;
  long pos0;

  state_count = 0; state_alloc = 0; state_table = NULL;
  prod_count = 0; prod_alloc = 0; prod_table = NULL;
  new_state ();                 /* Create start state */
  while (p != NULL)
    {
      dfa_word (p);
      p = p->next;
    }
  if (debugging)
    {
      for (i = 0; i < state_count; ++i)
        {
          fprintf (list_file, "State %d:\n", i);
          for (j = 0; j < state_table[i].transition_count; ++j)
            list_dfa_trans (&state_table[i],
                            &state_table[i].transition_table[j]);
          fputc ('\n', list_file);
        }
    }
  pos0 = write_dfa ();
  for (i = 0; i < state_count; ++i)
    {
      xfree (state_table[i].accept_table);
      xfree (state_table[i].transition_table);
    }
  xfree (state_table);
  /* Do NOT free prod_table[i]! */
  xfree (prod_table);
  return (pos0);
}


struct
{
  long pos_str;
  long pos_conv;
  long pos_font;
  long pos_layout;
  long pos_output;
  long pos_letter;
} mem_header;


static void write_header (void)
{
  static char header[] = {'d', 's', 'b', 1};

  write_bytes (header, 4);
  write_u32 ((u32)mem_header.pos_str);
  write_u32 ((u32)mem_header.pos_conv);
  write_u32 ((u32)mem_header.pos_font);
  write_u32 ((u32)mem_header.pos_layout);
  write_u32 ((u32)mem_header.pos_output);
  write_u32 ((u32)mem_header.pos_letter);
}


static void write_strings (void)
{
  unsigned size, i;

  mem_header.pos_str = ftell (out_file);
  size = 0;
  for (i = 0; i < str_pool_used; ++i)
    size += str_length[i];
  write_u16 (str_pool_used);
  write_u16 (size);
  size = 0;
  for (i = 0; i < str_pool_used; ++i)
    {
      write_u16 (size);
      size += str_length[i];
    }
  write_u16 (size);
  for (i = 0; i < str_pool_used; ++i)
    write_bytes (str_pool[i], str_length[i]);
  write_pad ();
}


static void write_encoding (struct encoding *p)
{
  unsigned i, j, chars, size;
  long pos1;
  unsigned offset[CHAR_CODES];

  if (p->pos != 0)
    return;
  p->pos = ftell (out_file);
  for (chars = CHAR_CODES; chars != 0; --chars)
    if (p->lengths[chars-1] != 0)
      break;
  write_u16 (chars);
  pos1 = ftell (out_file);
  write_u16 (0);
  for (i = 0; i <= chars; ++i)
    write_u16 (0);
  size = 0;
  for (i = 0; i < chars; ++i)
    {
      offset[i] = size;
      for (j = 0; j < p->lengths[i]; ++j)
        {
          write_u16 (p->symbols[i][j]);
          ++size;
        }
    }
  write_pad ();
  fseek (out_file, pos1, SEEK_SET);
  write_u16 (size);
  for (i = 0; i < chars; ++i)
    write_u16 (offset[i]);
  write_u16 (size);
  fseek (out_file, 0L, SEEK_END);
}


static long write_layouts (struct font_or_layout *head)
{
  unsigned count, i;
  long pos0, pos1;
  struct font_or_layout *p;

  pos0 = ftell (out_file);
  count = 0;
  for (p = head; p != NULL; p = p->next)
    ++count;
  write_u16 (count);
  for (p = head; p != NULL; p = p->next)
    write_u16 (p->name);
  pos1 = ftell (out_file);
  for (i = 0; i < count; ++i)
    write_u32 (0);
  write_pad ();
  for (p = head; p != NULL; p = p->next)
    write_encoding (p->encoding);
  fseek (out_file, pos1, SEEK_SET);
  for (p = head; p != NULL; p = p->next)
    write_u32 ((u32)p->encoding->pos);
  fseek (out_file, 0L, SEEK_END);
  return (pos0);
}


/* Write one output table and store the position in P->pos. */

#define OUTPUT_HASH_SIZE 257

static void write_output (struct output *p)
{
  unsigned i, j, h, offset, *index;
  static unsigned hash_count[OUTPUT_HASH_SIZE];
  static unsigned hash_offset[OUTPUT_HASH_SIZE+1];
  static unsigned hash_ptr[OUTPUT_HASH_SIZE];

  /* Build hash table for output table.  hash_offset[h] holds the
     offset into the output table at which entries for symbols in
     equivalence class h start.  The sequence of entries for that
     equivalence class ends at offset hash_offset[h+1].

     First, compute the sizes of the equivalence classes.  The hash
     function is a simple modulus operation. */

  for (i = 0; i < OUTPUT_HASH_SIZE; ++i)
    hash_count[i] = 0;
  for (i = 0; i < p->count; ++i)
    {
      h = p->table[2*i+0] % OUTPUT_HASH_SIZE;
      ++hash_count[h];
    }

  /* Now compute the offsets for the equivalence classes.  Don't
     forget to set the last entry, which is used for indicating the
     end of the last class. */

  offset = 0;
  for (i = 0; i < OUTPUT_HASH_SIZE; ++i)
    {
      hash_offset[i] = offset;
      offset += hash_count[i];
    }
  hash_offset[OUTPUT_HASH_SIZE] = offset;

  /* Build a temporary index table (pointing into p->table) which is
     partitioned into equivalence classes, according to the
     hash_offset table (we copy it to hash_ptr, as we'll need
     hash_offset later). */

  ALLOCARRAY (index, p->count);
  for (i = 0; i < OUTPUT_HASH_SIZE; ++i)
    hash_ptr[i] = hash_offset[i];
  for (i = 0; i < p->count; ++i)
    {
      h = p->table[2*i+0] % OUTPUT_HASH_SIZE;
      j = hash_ptr[h]++;
      index[j] = i;
    }

  /* Write the output table (sorted by equivalence class) and the hash
     table to the binary file. */

  p->pos = ftell (out_file);
  write_u16 (p->count);
  for (i = 0; i < p->count; ++i)
    {
      j = index[i];
      write_u16 (p->table[2*j+0]);
      write_u16 (p->table[2*j+1]);
    }
  for (i = 0; i <= OUTPUT_HASH_SIZE; ++i)
    write_u16 (hash_offset[i]);
  xfree (index);
}


static void write_outputs (void)
{
  unsigned count;
  struct output *p;

  mem_header.pos_output = ftell (out_file);
  count = 0;
  write_u16 (0);
  for (p = output_head; p != NULL; p = p->next)
    {
      ++count;
      write_u16 (0);
      write_u32 (0);
    }
  write_pad ();
  for (p = output_head; p != NULL; p = p->next)
    write_output (p);
  fseek (out_file, mem_header.pos_output, SEEK_SET);
  write_u16 (count);
  for (p = output_head; p != NULL; p = p->next)
    write_u16 (p->name);
  for (p = output_head; p != NULL; p = p->next)
    write_u32 ((u32)p->pos);
  fseek (out_file, 0L, SEEK_END);
}


static void write_letters (void)
{
  unsigned i;

  mem_header.pos_letter = ftell (out_file);
  write_u16 (letter_count);
  for(i = 0; i < letter_count; ++i)
    write_u16 (letter_table[i]);
}


static void compute (void)
{
  struct conversion *p;
  unsigned count;

  write_header ();
  write_strings ();
  mem_header.pos_font = write_layouts (font_head);
  mem_header.pos_layout = write_layouts (layout_head);
  write_outputs ();
  write_letters ();

  mem_header.pos_conv = ftell (out_file);
  write_u16 (0);
  for (p = conversion_head; p != NULL; p = p->next)
    {
      write_u16 (0);
      write_u32 (0);
    }
  write_pad ();

  count = 0;
  for (p = conversion_head; p != NULL; p = p->next)
    {
      ++count;
      if (verbose >= 1)
        fprintf (stderr, "Computing <%s>...\n", get_str (p->name));
      p->pos = compute_dfa (p->head);
    }

  fseek (out_file, mem_header.pos_conv, SEEK_SET);
  write_u16 (count);
  for (p = conversion_head; p != NULL; p = p->next)
    write_u16 (p->name);
  for (p = conversion_head; p != NULL; p = p->next)
    write_u32 ((u32)p->pos);

  fseek (out_file, 0L, SEEK_SET);
  write_header ();
}


static void usage (void)
{
  banner ();
  fputs ("\nUsage: dvisprep [-q] <input_file> [<output_file>]\n", stderr);
   fputs ("\nOptions:\n", stderr);
  fputs ("  -h, --help                        Give this help\n", stderr);
  fputs ("  -V, --version                     Display version number\n", stderr);
  fputs ("  -q, --quiet                       Don't display logo\n", stderr);
  exit (EXIT_ARG);
}


static void get_args (int argc, char *argv[])
{
  int quit=FALSE;
  int c;
  int option_index=0;
  extern int optopt;

  struct option long_options[] = 
  {
    {"help",0,0,'h'},
    {"version",0,0,'V'},
    {"quiet",0,0,'q'},
    {"verbose",0,0,'v'},
    {0,0,0,0}
  };
  opterr = FALSE;
  //optswchar = "-/";
  optind = 0;
  while ((c = getopt_long (argc, argv, "VhqDqv",
			   long_options, &option_index)) != EOF && !quit)
    {
      switch (c)
        {
	case 'V':
	  logo=TRUE;
	  quit=TRUE;
	  break;
	case 'h':
	  usage();
	  quit=TRUE;
	  break;
        case 'q':
          logo = FALSE;
          break;
        case 'v':
          ++verbose;
          break;
        case 'D':
          debugging = TRUE;
          break;
        default:
	  quit=TRUE;
	  fprintf(stderr,"Unknown option -%c\n",optopt);
          usage ();
        }
    }
 if (logo)
    banner ();
 if (quit)
   exit(0);

  if (argc - optind < 1 || argc - optind > 2)
    usage ();
 
  strmov (inp_fname, argv[optind++], sizeof (inp_fname));
  if (strcmp (inp_fname, "-") != 0 && access (inp_fname, 4) != 0)
    error (EXIT_FILE, "Cannot open input file `%s'", inp_fname);

  if (optind < argc)
    strmov (out_fname, argv[optind++], sizeof (out_fname) - 4);
  else
    {
      strmov (out_fname, inp_fname, sizeof (out_fname) - 4);
      fnremext (out_fname);
    }
  if (strcmp (out_fname, "-") == 0)
    error (EXIT_ARG, "Output file name is missing");
  fndefext (out_fname, "dsb");
  out_file = fopen (out_fname, "wb");
  if (out_file == NULL)
    error (EXIT_FILE, "Cannot open output file `%s'", out_fname);
  list_file = stdout;
}


static void start (void)
{
  unsigned i;

  input_sp = -1;
  str_pool = NULL; str_length = NULL; str_next = NULL;
  str_pool_size = str_pool_used = 0;
  for (i = 0; i < STR_HASH_SIZE; ++i)
    str_hash[i] = NIL16;
  s_include    = string ("include");    /* 0 */
  s_output     = string ("output");     /* 1 */
  s_layout     = string ("layout");     /* 2 */
  s_font       = string ("font");       /* 3 */
  s_letters    = string ("letters");    /* 4 */
  s_conversion = string ("conversion"); /* 5 */
  s_newpage    = string ("NEWPAGE");    /* 6 */
  s_newline    = string ("NEWLINE");    /* 7 */
  s_wordspace  = string ("WORDSPACE");  /* 8 */
  s_start      = string ("START");      /* 9 */
  s_end        = string ("END");        /* 10 */
  s_nonletter  = string ("NONLETTER");  /* 11 */
}


int main (int argc, char *argv[])
{
  /* Parse the command line arguments. */

  get_args (argc, argv);

  /* Initialize variables. */

  start ();
  start_parse ();

  /* Parse the input file. */

  if (verbose >= 1)
    fprintf (stderr, "Parsing...\n");
  parse (inp_fname);

  /* Compute the automatons and write the output file. */

  compute ();

  /* Done. */

  return (EXIT_OK);
}
