unit MakeDawg;

{ This module creates a Directed Acyclic Word Graph and writes it to a file
  that is compatible with the Hasbro Scrabble CDROM game. The code is not
  pretty, but it does the job.

  To make the DAWG, we first store the words in a tree. Next, we start at
  the leaf nodes of the tree and start combining references to identical
  branches. The end result is that words with common endings share the same
  branch. }

interface
uses
    SysUtils;

type
  PDawgNode = ^DawgNode;

  DawgNode = Record
             Letter: Char;
             IsEndOfWord: Boolean;
             Children: PDawgNode;
             Next: PDawgNode;
             Prev: PDawgNode;
             NumChildren: Integer;
             ChildDepth: Integer;
             Offset: Integer;
             Written: Boolean;
             Redirect: PDawgNode;
             RedirNext: PDawgNode;
             Visiting: Boolean;
  End;

  DawgNodeArray = Array of PDawgNode;

  ByteFile = File Of Byte;

  StatusCallback = Procedure(StatusMessage: String);
  ProgressCallback = Procedure(CompletionPct: Integer);

var
   NumNodes, NumVisited, NumWords: Integer;
   InFile: TextFile;
   OutFile: ByteFile;
   Line: String;
   TheRoot: PDawgNode;
   Offset: Integer;
   i,j: Integer;
   RedirNodes: Array[0..15, 0..26] of PDawgNode;
   NumWritten: Integer;
   b: Byte;
   progCallback: ProgressCallback;

procedure AddWord(Word: String; Offset: Integer; var Root: PDawgNode);
procedure WalkTree(StartWord: String; CurrNode: PDawgNode);
procedure ComputeOffsets(InStartNode: PDawgNode; Depth: Integer);
procedure WriteNodeEntry(var OutFile: ByteFile; Node: PDawgNode);
procedure WriteNode(var OutFile: ByteFile; InStartNode: PDawgNode;
          Depth: Integer);
procedure UpdateCounts(StartNode: PDawgNode);
function NodesAreEquivalent(Node1, Node2: PDawgNode) : Boolean;
procedure CreateDawg(inFileName: String; outFileName: String;
        Status: StatusCallback; PCallback: ProgressCallback);

implementation

{ AddWord adds a word to the tree }

procedure AddWord(Word: String; Offset: Integer; var Root: PDawgNode);
var
   PrevNode: PDawgNode;
   CurrNode: PDawgNode;
   NewNode: PDawgNode;

begin
     PrevNode := nil;

{ If this is a new section of the tree, create a new node }
     if Root = nil then
     begin
          NEW(Root);
          Root^.Letter := Word[Offset];
          Root^.IsEndOfWord := false;
          Root^.Children := nil;
          Root^.Next := nil;
          Root^.Prev := nil;
          Root^.Offset := -1;
          Root^.Written := False;
          Root^.Redirect := Nil;
          Root^.Visiting := False;
          CurrNode := Root;

          NumNodes := NumNodes + 1;
     end
     else
     begin
          CurrNode := Root;

          { Search down the current level of the tree and see if the current
            letter in the word is already in the tree at this section. }

          while (CurrNode <> nil) do
          begin
               if CurrNode^.Letter = Word[Offset] then break;

               { If this letter isn't in this part of the tree, add
                 a new node for it. }
               if Word[Offset] < CurrNode^.Letter then
               begin
                    New(NewNode);
                    NewNode^.Letter := Word[Offset];
                    NewNode^.IsEndOfWord := false;
                    NewNode^.Children := nil;
                    NewNode^.Next := CurrNode;
                    NewNode^.Offset := -1;
                    NewNode^.Written := False;
                    NewNode^.Redirect := Nil;
                    NewNode^.Visiting := False;

                    NewNode^.Prev := PrevNode;

                    if PrevNode = Nil then
                    begin
                         Root := NewNode;
                    end
                    else
                    begin
                         PrevNode^.Next := NewNode;
                    end;
                    break;
               end;

               PrevNode := CurrNode;
               CurrNode := CurrNode^.Next;
          end;

          { If we got through the search, the new node belongs at the end }

          if CurrNode = Nil then
          begin
               New(CurrNode);
               CurrNode^.Letter := Word[Offset];
               CurrNode^.IsEndOfWord := false;
               CurrNode^.Children := nil;
               CurrNode^.Next := nil;
               CurrNode^.Prev := PrevNode;
               CurrNode^.Offset := -1;
               CurrNode^.Written := False;
               CurrNode^.Redirect := Nil;
               CurrNode^.Visiting := False;

               PrevNode^.Next := CurrNode;

               NumNodes := NumNodes + 1;
          end;
     end;

     if Offset = Length(Word) then
     begin
          CurrNode^.IsEndOfWord := true;
     end
     else
     begin
          AddWord(Word, Offset+1, CurrNode^.Children);
     end;
end;

{ Walk tree goes through the DAWG and prints out the contents }
procedure WalkTree(StartWord: String; CurrNode: PDawgNode);
begin
     while CurrNode <> Nil do
     begin
          {if CurrNode^.Redirect <> Nil then
          begin
             CurrNode := CurrNode^.Redirect;
          end;}

          if CurrNode.IsEndOfWord then
          begin
               Writeln(StartWord+CurrNode^.Letter);
          end;

          if (CurrNode^.Children <> Nil) and
             (CurrNode^.Children^.Redirect <> Nil) then
             WalkTree(StartWord+CurrNode^.Letter, CurrNode^.Children^.Redirect)
          else
             WalkTree(StartWord+CurrNode^.Letter, CurrNode^.Children);

          CurrNode := CurrNode^.Next;
     end;
end;

{ ComputeOffsets figures out where in the tree file each node will
  be written. The computations need to be done before nodes can be
  written because some nodes may contain a forward reference. }

procedure ComputeOffsets(InStartNode: PDawgNode; Depth: Integer);
var
   CurrNode: PDawgNode;
   StartNode: PDawgNode;

begin

     if InStartNode = Nil then
     begin
          Exit;
     end;

     if InStartNode^.Offset >= 0 then
     begin
          Exit;
     end;

     NumVisited := NumVisited + 1;
     progCallback((NumVisited * 100) div NumNodes);

     StartNode := InStartNode;

     while StartNode^.Prev <> Nil do
     begin
          StartNode := StartNode^.Prev;
     end;

     if Depth > 0 then
     begin
          CurrNode := StartNode;

          while CurrNode <> Nil do
          begin
               CurrNode^.Offset := Offset;
               Offset := Offset + 1;
               CurrNode := CurrNode^.Next;
          end;
     end;

     CurrNode := StartNode;

     while CurrNode <> Nil do
     begin
          if (CurrNode^.Children <> Nil) and
             (CurrNode^.Children^.Redirect <> Nil) then
          begin
               ComputeOffsets(CurrNode^.Children^.Redirect, Depth+1);
          end
          else
          begin
               ComputeOffsets(CurrNode^.Children, Depth+1);
          end;

          CurrNode := CurrNode^.Next;
     end;

     if Depth = 0 then
     begin
          CurrNode := StartNode;

          while CurrNode <> Nil do
          begin
               CurrNode^.Offset := Offset;
               Offset := Offset + 1;
               CurrNode := CurrNode^.Next;
          end;
     end;
end;

{ WriteNodeEntry writes a node to the file }

procedure WriteNodeEntry(var OutFile: ByteFile; Node: PDawgNode);
var
   OutValue: Integer;
   b1, b2, b3, b4: Byte;
begin
     if Node^.Children <> Nil then
     begin
          if Node^.Children^.Redirect <> Nil then
          begin
               OutValue := Node^.Children^.Redirect^.Offset * 1024;
          end
          else
          begin
               OutValue := Node^.Children^.Offset * 1024;
          end
     end
     else
     begin
          OutValue := 0;
     end;

     OutValue := OutValue + Ord(Node^.Letter);

     if Node^.IsEndOfWord then
     begin
          OutValue := OutValue + 256;
     end;

     if Node^.Next = Nil then
     begin
          OutValue := OutValue + 512;
     end;

     b1 := (OutValue shr 24) and 255;
     b2 := (OutValue shr 16) and 255;
     b3 := (OutValue shr 8) and 255;
     b4 := OutValue and 255;

     Write(OutFile, b1);
     Write(OutFile, b2);
     Write(OutFile, b3);
     Write(OutFile, b4);
end;

{ WriteNode traverses the tree and calls WriteNodeEntry to write each node }

procedure WriteNode(var OutFile: ByteFile; InStartNode: PDawgNode;
          Depth: Integer);
var
   CurrNode: PDawgNode;
   StartNode: PDawgNode;

begin

     if InStartNode = Nil then
     begin
          Exit;
     end;

     if InStartNode^.Written then
     begin
          Exit;
     end;

     NumVisited := NumVisited + 1;
     progCallback((NumVisited * 100) div NumNodes);

     StartNode := InStartNode;

     while StartNode^.Prev <> Nil do
     begin
          StartNode := StartNode^.Prev;
     end;

     if Depth > 0 then
     begin
          CurrNode := StartNode;

          while CurrNode <> Nil do
          begin
               WriteNodeEntry(OutFile, CurrNode);
               CurrNode^.Written := True;
               CurrNode := CurrNode^.Next;
          end;
     end;

     CurrNode := StartNode;

     while CurrNode <> Nil do
     begin
          if (CurrNode^.Children <> Nil) and
             (CurrNode^.Children^.Redirect <> Nil) then
          begin
               WriteNode(OutFile, CurrNode^.Children^.Redirect, Depth+1);
          end
          else
          begin
               WriteNode(OutFile, CurrNode^.Children, Depth+1);
          end;

          CurrNode := CurrNode^.Next;
     end;

     if Depth = 0 then
     begin
          CurrNode := StartNode;

          while CurrNode <> Nil do
          begin
               WriteNodeEntry(OutFile, CurrNode);
               CurrNode^.Written := True;
               CurrNode := CurrNode^.Next;
          end;
     end;
end;

{ UpdateCounts computes the number of children and maximum depths for each
  tree node. }
procedure UpdateCounts(StartNode: PDawgNode);
var
   CurrChild: PDawgNode;
   CurrNode: PDawgNode;

begin
     CurrNode := StartNode;

     while CurrNode <> Nil do
     begin
          CurrNode^.NumChildren := 0;
          CurrNode^.ChildDepth := 0;

          NumVisited := NumVisited + 1;
          progCallback((NumVisited * 100) div NumNodes);

          UpdateCounts(CurrNode^.Children);

          CurrChild := CurrNode^.Children;

          while CurrChild <> Nil do
          begin
               CurrNode^.NumChildren := CurrNode^.NumChildren +
                                     CurrChild^.NumChildren;
               if CurrChild^.ChildDepth > CurrNode^.ChildDepth then
               begin
                    CurrNode^.ChildDepth := CurrChild^.ChildDepth;
               end;

               CurrChild := CurrChild^.Next;
          end;

          CurrNode^.ChildDepth := CurrNode^.ChildDepth + 1;
          CurrNode^.RedirNext := RedirNodes[CurrNode^.ChildDepth,
                              Ord(CurrNode^.Letter) - Ord('a')];
          RedirNodes[CurrNode^.ChildDepth, Ord(CurrNode^.Letter) - Ord('a')]
                                           := CurrNode;

          CurrNode := CurrNode^.Next;
     end;
end;

{ NodesAreEquivalend returns true if two nodes are equivalent in depth,
  number of children, and node values. }
function NodesAreEquivalent(Node1, Node2: PDawgNode) : Boolean;
begin
     NodesAreEquivalent := True;

     if Node1 = Node2 then
     begin;
        Exit;
     end;

     if (Node1 = Nil) or (Node2 = Nil) then
     begin
          NodesAreEquivalent := False;
          Exit;
     end;

     if (Node1^.Letter <> Node2^.Letter) or
        (Node1^.IsEndOfWord <> Node2^.IsEndOfWord) or
        (Node1^.NumChildren <> Node2^.NumChildren) or
        (Node1^.ChildDepth <> Node2^.ChildDepth) then
     begin
          NodesAreEquivalent := False;
          Exit;
     end;

     if not NodesAreEquivalent(Node1^.Children, Node2^.Children) then
     begin
          NodesAreEquivalent := False;
          Exit;
     end;

     if not NodesAreEquivalent(Node1^.Next, Node2^.Next) then
     begin
          NodesAreEquivalent := False;
          Exit;
     end;
end;

{ CombineNodes loops through all the nodes and searches for identical ones.
  When it finds an equivalent node, it sets the redirect pointer in the
  other node. The redirect keep the program from trying to change all
  references to the redirected node. When the nodes are written to a file,
  a node with a redirect is not written. }
  
procedure CombineNodes;
var
   CurrNode, CurrCompareNode : PDawgNode;
   i, j: Integer;

begin
     for i := 0 to 15 do
         for j := 0 to 26 do
     begin
          ProgCallback((100 * ((i * 27) + j)) div 432);

          CurrNode := RedirNodes[i,j];

          while CurrNode <> Nil do
          begin
               if CurrNode^.Redirect <> Nil then
               begin
                    CurrNode := CurrNode^.RedirNext;
                    Continue;
               end;

               CurrCompareNode := CurrNode^.RedirNext;

               while CurrCompareNode <> Nil do
               begin
                    if CurrCompareNode^.Redirect <> Nil then
                    begin
                         CurrCompareNode := CurrCompareNode^.RedirNext;
                         Continue;
                    end;


                    if NodesAreEquivalent(CurrNode, CurrCompareNode) then
                    begin
                         CurrCompareNode^.Redirect := CurrNode;
                    end;

                    CurrCompareNode := CurrCompareNode^.RedirNext;
               end;

               CurrNode := CurrNode^.RedirNext;
          end;
     end;
end;

procedure CreateDawg(inFileName: String; outFileName: String;
        Status: StatusCallback; PCallback: ProgressCallback);
var
        i,j: Integer;

begin
     TheRoot := nil;
     NumNodes := 0;
     NumWords := 0;
     ProgCallback := PCallback;

     AssignFile(InFile, inFileName);
     Reset(InFile);

     while not eof(InFile) do
     begin
          Readln(InFile, Line);
          if (NumNodes mod 100) = 0 then
          begin
                Status(Line);
          end;
          AddWord(LowerCase(Line), 1, TheRoot);
          NumWords := NumWords + 1;
     end;

     CloseFile(InFile);

     for i := 0 to 15 do
         for j := 0 to 26 do
           RedirNodes[i, j] := Nil;

     Status('Updating counts');
     NumVisited := 0;
     ProgCallback(0);

     UpdateCounts(TheRoot);

     Status('Combining nodes');
     CombineNodes;

     Offset := 1;

     Status('Writing dictionary');

     AssignFile(OutFile, outFileName);
     Rewrite(OutFile);

     b := 0;
     Write(OutFile, b);
     Write(OutFile, b);
     b := 3;
     Write(OutFile, b);
     b := 0;
     Write(OutFile, b);

     Status('Computing offsets');
     NumVisited := 0;
     ProgCallback(0);
     ComputeOffsets(TheRoot, 0);
     Status('Writing nodes');
     NumVisited := 0;
     ProgCallback(0);
     WriteNode(OutFile, TheRoot, 0);
     ProgCallback(-1);

     CloseFile(OutFile);
     Status('Conversion complete.');
end;

end.
