John B. Matthews, M.D.

Return home.


Jumble.

Algorithm 1: Using Hashed_Maps & Ordered_Sets
Collisions: Using Ordered_Maps
Algorithm 2: Using Permutations


Jumble is an Ada program designed to unscramble jumbled words. You can use it to solve word puzzles; or, when making your own puzzles, you can check that a jumbled word doesn't unscramble to more than one word. You can build Jumble from the command line with gnatmake:
gnatmake jumble
To unscramble the word "aelpt", for example, enter this command:
jumble aelpt
The result should look something like this:
leapt: leapt palet patel pelta petal plate pleat tepal 
You can include more than one word on the command line. If you don't supply a word, the program prints all entries with more than one permutation. You may need to change the value of File_Name if your dictionary is located elsewhere on the system. A similar implementation in Java may be found here.

The two versions of Jumble shown below implement the algorithms discussed here.

Algorithm 1: The first version constructs a hashed map of dictionary entries. Each unique key is comprised of the sorted characters of a dictionary word; the matching element is an ordered set of words each having a different permutation of the same characters. Building such a map is slightly more difficult, but permutations of arbitrarily large words can then be found in constant time. By iterating over the map, you can find words with a high number of permutations, e.g. "caret" and "ester".

--------------------------------------------------------
--
-- Jumble: Unscramble jumbled words.
--
-- John B. Matthews, M.D., 10-Jun-2009
--
-- Distribution: per GPL
--
--------------------------------------------------------
with Ada.Characters.Handling;
with Ada.Command_Line;
with Ada.Containers.Generic_Array_Sort;
with Ada.Containers.Hashed_Maps;
with Ada.Containers.Ordered_Sets;
with Ada.Strings.Bounded;
with Ada.Strings.Bounded.Hash;
with Ada.Text_IO;
with Ada.Integer_Text_IO;

procedure Jumble is

   Max_Word  : constant Positive := 24;
   Max_Count : constant Ada.Containers.Count_Type := 250_000;
   File_Name : constant String := "/usr/share/dict/words";
   
   package ACH renames Ada.Characters.Handling;
   package ACL renames Ada.Command_Line;
   package ASB is new Ada.Strings.Bounded.Generic_Bounded_Length(Max_Word);
      use type ASB.Bounded_String;

   procedure Sort is new Ada.Containers.Generic_Array_Sort(
      Index_Type   => Positive,
      Element_Type => Character,
      Array_Type   => String);

   package ACOS is new Ada.Containers.Ordered_Sets(
      Element_Type => ASB.Bounded_String);
   type Word_Set is access ACOS.Set;

   function Hash is new Ada.Strings.Bounded.Hash(ASB);
      use type Ada.Containers.Hash_Type;
   package ACHM is new Ada.Containers.Hashed_Maps(
      Key_Type => ASB.Bounded_String,
      Element_Type => Word_Set,
      Hash => Hash,
      Equivalent_Keys => "=");

   Word_Map : ACHM.Map;

   -- Sort the characters of Str returning a lower case Key
   function Sort(Str : String) return ASB.Bounded_String is
      Key : String(1 .. Str'Length) := ACH.To_Lower(Str);
   begin
      Sort(Key);
      return ASB.To_Bounded_String(Key);
   end Sort;

   -- Read dictionary words up to Max_Word in length
   -- Map character-sorted words to permutatively equivalent words
   procedure Read_Dictionary(Word_Map : in out ACHM.Map) is
      Dict_File : Ada.Text_IO.File_Type;
      Line : String (1 .. 32);
      Last : Natural;
      Word : ASB.Bounded_String;
      Sorted : ASB.Bounded_String;
      Position : ACHM.Cursor;
      Inserted : Boolean;
      Set : Word_Set;
   begin
      Ada.Text_IO.Open(Dict_File, Ada.Text_IO.In_File, File_Name);
      while not Ada.Text_IO.End_Of_File(Dict_File) loop
         Ada.Text_IO.Get_Line (Dict_File, Line, Last);
         if Last <= Max_Word then
            Word := ASB.To_Bounded_String(Line(1 .. Last));
            Sorted := Sort(Line(1 .. Last));
            Word_Map.Insert(Sorted, null, Position, Inserted);
            if Inserted then
               Set := new ACOS.Set;
               Set.Insert(Word);
               Word_Map.Replace_Element(Position, Set);
            else
               Set := ACHM.Element(Position);
               Set.Insert(Word);
            end if;
         end if;
      end loop;
      Ada.Text_IO.Close(Dict_File);
   end Read_Dictionary;

   -- Print each word in a set.
   procedure Print_Word(Position : in ACOS.Cursor) is
      Item : ASB.Bounded_String := ACOS.Element(Position);
   begin
      Ada.Text_IO.Put(ASB.To_String(Item) & " ");
   end Print_Word;

   -- Find Str in Word_Map; print permutatively equivalent words.
   procedure Show_Results(Word_Map : ACHM.Map; Str : in String) is
      Word : ASB.Bounded_String := Sort(Str);
      Set : Word_Set;
   begin
      Ada.Text_IO.Put(Str & ": ");
      if Word_Map.Contains(Word) then
         Set := Word_Map.Element(Word);
         Set.Iterate(Print_Word'Access);
         Ada.Text_IO.New_Line;
      else
         Ada.Text_IO.Put_Line("no match.");
      end if;
   end Show_Results;

   -- Print each entry having more than one word in its set.
   procedure Print_All(Position : in ACHM.Cursor) is
      Set : Word_Set := ACHM.Element(Position);
      Length : Natural := Natural(Set.Length);
   begin
      if Length > 1 then
         Ada.Integer_Text_IO.Put(Length, 0);
         Ada.Text_IO.Put(" ");
         Set.Iterate(Print_Word'Access);
         Ada.Text_IO.New_Line;
      end if;
   end Print_All;

begin
   Word_Map.Reserve_Capacity(Max_Count);
   Read_Dictionary(Word_Map);
   Ada.Text_IO.Put_Line("Checking" &
      Natural'Image(Natural(Word_Map.Length)) & " entries.");
   if ACL.Argument_Count = 0 then
      Word_Map.Iterate(Print_All'Access);
   else
      for i in 1 .. ACL.Argument_Count loop
         if ACL.Argument(i)'Length <= Max_Word then
            Show_Results(Word_Map, ACL.Argument(i));
         end if;
      end loop;
   end if;
end Jumble;
Collisions: Here is a program that examines collisions in a hashed table. In the program's output, the row labels represent occupancy classes. Row zero is unused table entries; row one is table entries occupied by a single word; row two is table entries occupied by two words; etc. Next to each count is the percentage of words hashed to that occupancy class. In the example shown, 53.8% of the 234,936 words have unique hashes; in the worst case, eight words hash to the same table entry. See also Hash Table.
./collisions
/usr/share/dict/words
Word count: 234936
Table size: 393241
Load factor: 59.74%
 0: 218461 (0.00%)
 1: 126393 (53.80%)
 2: 38524 (32.80%)
 3: 8228 (10.51%)
 4: 1400 (2.38%)
 5: 203 (0.43%)
 6: 29 (0.07%)
 7: 2 (0.01%)
 8: 1 (0.00%)


------------------------------------------------------------------
--
--  Collisions
--  Count collisions that would occur in an instance of
--  Ada.Containers.Hashed_Maps using Ada.Strings.Bounded.Hash
--
--  Author: John B. Matthews
--  Last Modified: 4-Sep-2013
--
------------------------------------------------------------------

pragma Warnings("I"); -- allow internal unit, Prime_Numbers
with Ada.Containers;
with Ada.Containers.Ordered_Maps;
with Ada.Containers.Prime_Numbers;
with Ada.Containers.Vectors;
with Ada.Float_Text_IO;
with Ada.Strings.Bounded.Hash;
with Ada.Text_IO;

procedure Collisions is

   Max_Word  : constant Positive := 26;
   File_Name : constant String := "/usr/share/dict/words";

   package ASB is new Ada.Strings.Bounded.Generic_Bounded_Length(Max_Word);
      use type ASB.Bounded_String;
   package Container is new Ada.Containers.Vectors (NaturalASB.Bounded_String);
   function Hash is new Ada.Strings.Bounded.Hash(ASB);
      use type Ada.Containers.Hash_Type;
   package ACOM is new Ada.Containers.Ordered_Maps(
      Key_Type => Natural,
      Element_Type => Natural);
   type Table_Array is array(Natural range <>) of Natural;
   type Table_Array_Ptr is access Table_Array;

   Word_List : Container.Vector;
   Word_Count : Ada.Containers.Count_Type;
   Table : Table_Array_Ptr;
   Counts : ACOM.Map;

   procedure Show_Counts(Position : in ACOM.Cursoris
      Key : Natural := ACOM.Key(Position);
      Element : Natural := ACOM.Element(Position);
      Percent : Float := Float (Key * Element) / Float(Word_Count);
   begin
      Ada.Text_IO.Put(Natural'Image(Key) & ":");
      Ada.Text_IO.Put(Natural'Image(Element) & " (");
      Ada.Float_Text_IO.Put(Percent * 100.0120);
      Ada.Text_IO.Put_Line("%)");
   end Show_Counts;

begin
   Ada.Text_IO.Put_Line(File_Name);
   -- Read the dictionary into Word_List
   declare
      Dict_File : Ada.Text_IO.File_Type;
      Line : String (1 .. Max_Word);
      Last : Natural;
      Word : ASB.Bounded_String;
   begin
      Ada.Text_IO.Open(Dict_FileAda.Text_IO.In_FileFile_Name);
      while not Ada.Text_IO.End_Of_File(Dict_Fileloop
         Ada.Text_IO.Get_Line (Dict_FileLineLast);
         if Last <= Max_Word then
            Word := ASB.To_Bounded_String(Line(1 .. Last));
            Word_List.Append(Word);
         end if;
      end loop;
      Ada.Text_IO.Close(Dict_File);
   end;
   Word_Count := Word_List.Length;
   -- Hash Word_List elements into Table
   declare
      Last : Ada.Containers.Hash_Type :=
         Ada.Containers.Prime_Numbers.To_Prime(Word_Count) - 1;
      Index : Natural;
      Load_Factor : Float;
   begin
      Table := new Table_Array(0 .. Natural(Last));
      Table.all := (others => 0);
      for I in Word_List.First_Index .. Word_List.Last_Index loop
         Index := Natural(Hash(Word_List.Element(I)) mod Table'Length);
         Table(Index) := Table(Index) + 1;
      end loop;
      Ada.Text_IO.Put_Line("Word count:" & Natural'Image(Natural(Word_Count)));
      Ada.Text_IO.Put_Line("Table size:" & Natural'Image(Table'Length));
      Load_Factor := Float(Word_Count) / Float(Table'Length);
      Ada.Text_IO.Put("Load factor: ");
      Ada.Float_Text_IO.Put(Load_Factor * 100.0120);
      Ada.Text_IO.Put_Line("%");
   end;
   -- Tally results
   declare
      Position : ACOM.Cursor;
      Inserted : Boolean;
      Value : Natural;
   begin
      for I in Table'Range loop
         Counts.Insert(Table(I), 1PositionInserted);
         if not Inserted then
            Value := ACOM.Element(Position) + 1;
            Counts.Replace(Table(I), Value);
         end if;
      end loop;
      Counts.Iterate(Show_Counts'Access);
   end;
end Collisions;
Algorithm 2: The second version of Jumble reads the dictionary into memory in the procedure Read_Dictionary, storing entries in the variable List. Also, you may need to increase the value of Max_List if your dictionary is especially large. Read_Dictionary only reads words that are Max_Word characters or less in length. Max_Word is set to 10, since a 10 character string has 10! = 3,628,800 permutations. Longer words take factorially longer to process.

Next, Permute_String finds each of the possible permutations of the word and calls Look_Up to do a binary search of the dictionary list. If the word is found, Permute_String stores it in Found.

Finally, the list of Found words is sorted (so we can skip duplicates) and printed.

No more excuses; get puzzling!

--------------------------------------------------------
--
-- Jumble: Unscramble jumbled words.
--
-- John B. Matthews, M.D., 08/01/2003
--
-- Distribution: per GPL
--
--------------------------------------------------------
with Ada.Characters.Handling;
with Ada.Command_Line;
with Ada.Strings.Bounded;
with Ada.Text_IO;
with GNAT.Bubble_Sort_G;

procedure Jumble is

Max_Word  : constant Positive := 10;
Max_Found : constant Positive := 32;
Max_List  : constant Positive := 250_000;
File_Name : constant String := "/usr/share/dict/words";

package Words is new Ada.Strings.Bounded.Generic_Bounded_Length(Max_Word);
  use type Words.Bounded_String;
type Words_Array is array (Natural range <>) of Words.Bounded_String;
type Words_Ptr is access Words_Array;

List       : Words_Ptr := new Words_Array (1 .. Max_List);
Last_Word  : Natural; -- Last word read into List.
Found      : Words_Array (0 .. Max_Found);
Last_Found : Natural; -- Last word in Found.

  -- Read the dictionary for words up to Max_Word in length
  procedure Read_Dictionary (List : Words_Ptr; Count : out Natural) is
    Dict_File : Ada.Text_IO.File_Type;
    Line : String (1 .. 30);
    Last : Natural;
  begin
    Count := 0;
    Ada.Text_IO.Open(Dict_File, Ada.Text_IO.In_File, File_Name);
    while not Ada.Text_IO.End_Of_File(Dict_File) loop
      Ada.Text_IO.Get_Line (Dict_File, Line, Last);
      if Last <= Max_Word then
        Count := Count + 1;
        Line(1 .. Last) := Ada.Characters.Handling.To_Lower(Line(1 .. Last));
        List(Count) := Words.To_Bounded_String(Line(1 .. Last));
      end if;
    end loop;
    Ada.Text_IO.Close(Dict_File);
  end Read_Dictionary;

  -- Binary search of List for Word; return position or 0 if not found.
  function Look_Up (List : Words_Ptr; Word : Words.Bounded_String) return Natural is
    Lower  : Natural := List'First;
    Middle : Natural;
    Upper  : Natural := Last_Word;
    Found  : Boolean;
  begin
    loop
      Middle := (Lower + Upper) / 2;
      Found := Word = List(Middle);
      if Word > List(Middle) then
        Lower := Middle + 1;
      else
        Upper := Middle - 1;
      end if;
      exit when Found or Lower > Upper;
    end loop;
    if Found then
      return Middle;
    else
      return 0;
    end if;
  end Look_Up;

  -- Recursively permute Str starting at position K.
  procedure Permute_String (Str : in String; K : Positive := 1) is
    S : String(Str'Range) := Str;
    N : Positive := S'Length;
    B : Words.Bounded_String;
    temp : Character;
  begin
    if K = N then
      B := Words.To_Bounded_String(S);
      if Look_Up(List, B) > 0 then -- Save in Found
        Last_Found := Last_Found + 1;
        Found(Last_Found) := B;
      end if;
    else
      for i in K .. N loop
        temp := S(i);
        S(i) := S(K);
        s(K) := temp;
        Permute_String(S, K + 1);
      end loop;
    end if;
  end Permute_String;
  
  -- Used by generic sort
  procedure Assign (From, To : Natural) is
  begin
    Found(To) := Found(From);
  end Assign;

  -- Used by generic sort
  function Compare (Left, Right : Natural) return Boolean is
  begin
    return Found(Left) < Found(Right);
  end Compare;

  -- Bubble sort is fine for this short list
  package Found_Sort is new GNAT.Bubble_Sort_G(Assign, Compare);

  -- Sort and display results, skipping duplicates
  procedure Show_Results (Str : in String) is
    Last_Out: Words.Bounded_String := Words.To_Bounded_String("");
  begin
    if Last_Found = 0 then
      Ada.Text_IO.Put_Line(Str);
    else
      Found_Sort.Sort(Last_Found);
      for i in 1 .. Last_Found loop
        if Found(i) /= Last_Out then
          Ada.Text_IO.Put_Line(Words.To_String(Found(i)));
          Last_Out := Found(i);
        end if;
      end loop;
    end if;
  end Show_Results;

begin
  Read_Dictionary(List, Last_Word);
  Ada.Text_IO.Put_Line("Checking" & Natural'Image(Last_Word) & " entries.");
  for i in 1 .. Ada.Command_Line.Argument_Count loop
    if Ada.Command_Line.Argument(i)'Length > Max_Word then
      Ada.Text_IO.Put_Line(Ada.Command_Line.Argument(i) & " is too long!");
    else
      Last_Found := 0;
      Permute_String (Ada.Command_Line.Argument(i));
      Show_Results (Ada.Command_Line.Argument(i));
    end if;
  end loop;
end Jumble;

Copyright 1999, 2003, 2009 John B. Matthews
Distribution permitted under the terms of the GPL: http://www.gnu.org/copyleft/gpl.html.
Last updated 3-Sep-2013
Return home.