/* This file is part of cardwords (c) 1998 1999 Tobias Peters see file COPYING for the copyright terms. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ // cardwords_charcombination.hh #ifndef CARDWORDS_CHARCOMBINATION_HH #define CARDWORDS_CHARCOMBINATION_HH /* The purpose of class CardWords_CharCombination: When it comes to search for possible words to lay with a fixed length of characters, we have a certain pool of N characters from which to choose and we know how many of them we need (Z). We then want to iterate over all possible combinations of Z characters chosen from those N characters. The class defined in this file will iterate through all possible combinations for us. When created (constructor), we have to tell it about the pool of possible characters (this is done in the form of a const CardWords_String &). It is not necessary to tell how long the combinations should be, the CardWords_CharCombinations object will calculate all combinations from length 1 to pool.length(). This is different from the previous implementation and saves recalculation for every single table position. After the creation there are CardWords_CharCombination[i].size() combinations with length (i+1). They are not CardWords_Strings, but only hash_keys of type hashing_type, because thats all we need. they are accessible through CardWords_CharCombinations[i][j] whith 0 <= j <= CardWords_CharCombination[i].size(). If there are many wildcards (many may also be 2 or even 1), it may be that calculating possible combinations is an unnecessary overhead. Consider there are 5,000 - 10,000 words with length 5,6,7, contained in a hash table in as many buckets or less. Consider there are 2 Wildcards, each of the wildcard can take N meanings. They can form \sum_{n=1}^N{n} or N(N+1)/2 different combinations. For a german charset (N=30) that's 465 combinations only for the wildcards. Then, for a 5-letter-word, we also need 3 from the remaining 5 non-wildcard cards. That's another 10 combinations. Together we have about 4,000 combinations to check. That's not far from the total number of words we have in the dictionary at all. It might be better, not to calculate combinations (and store them!), but to search every word in the dictionary and test if it fits. At the moment, I am uncertain where to place the border, at 2 or 3 wildcards, or even 1? A generic aproach seems to be the best to me now. Nothing of this is implemented yet. */ #include "cardwords_string.hh" #include #include #include template class CardWords_CharCombination : public vector< vector< hashing_type > > { public: // This constructor is for Cards without wildcards: CardWords_CharCombination ( const CardWords_String & possibleChars); // This constructor is for Cards with wildcards: CardWords_CharCombination ( const CardWords_String & possibleChars, // w/o wildcards size_t no_of_wildcards); private: // combinate is for initialisation and directly called from the constructor. // It is a recursive (and cascading) function and needs to know about how // deep the recursion has gone already. This is done in this way: // // combinate gets a param size_t length_so_far. That's the length of the // combination that the calling function generated. It is at the same time // the index of the parent class vector where this instance has to store its // calculated combinations of length length_so_far+1. Another param is // size_t next_usable_index. This is the index of the first CardWords_Char // in sorted_pool that this instance of combinate is allowed to use. void combinate (const hashing_type & combination_so_far, size_t length_so_far, size_t next_usable_index, const CardWords_String & sorted_pool); // the number of available chars: const size_t noAvail; } ; template inline CardWords_CharCombination::CardWords_CharCombination ( const CardWords_String & pool) : vector< vector< hashing_type > >(pool.length()), noAvail(pool.length()) { assert (noAvail > 0); size_t i; size_t len_over_i = 1; // holds the binominal coeficient noAvail over (i+1) for (i = 0; i < noAvail; i++) { len_over_i *= noAvail - i; len_over_i /= i + 1; (*this)[i].reserve(len_over_i); } CardWords_String sorted_pool = pool; sorted_pool.sort(); hashing_type combination_so_far; combinate (combination_so_far, 0U, 0U, sorted_pool); } // combinate is for initialisation and directly called from the constructor. // It is a recursive (and cascading) function and needs to know about how // deep the recursion has gone already. This is done in this way: // // combinate gets a param size_t length_so_far. That's the length of the // combination that the calling function generated. It is at the same time the // index of the parent class vector where this instance has to store its // calculated combinations of length length_so_far+1. Another param is // size_t next_usable_index. This is the index of the first CardWords_Char in // sorted_pool that this instance of combinate is allowed to use. template inline void CardWords_CharCombination:: combinate (const hashing_type & combination_so_far, size_t length_so_far, size_t next_usable_index, const CardWords_String & sorted_pool) { CardWords_Char used_char = CardWords_Char::blanko; // blankos are not used // in sorted_pool. // This way we asure that the // if-condition will take the // else-branch first. size_t & vector_index = length_so_far; // as mentioned above, these two // semantically different sizes // always have the same value in // this function, because a // combination with length k+1 will // be stored in (*this)[k]. for (size_t pool_index = next_usable_index; pool_index < noAvail; // number of available chars. ++pool_index) { if (sorted_pool[pool_index] == used_char) { // This means sorted_pool[pool_index] == sorted_pool[pool_index-1]. This // would not lead to a new combination. continue; } else { // combination_so_far + sorted_pool[pool_index] gives a new, valid // combination. Store it: used_char = sorted_pool[pool_index]; (*this)[vector_index].push_back(combination_so_far); (*this)[vector_index].back() += used_char; combinate ((*this)[vector_index].back(), length_so_far + 1, pool_index + 1, sorted_pool); } } } template inline CardWords_CharCombination::CardWords_CharCombination ( const CardWords_String & pool, size_t no_of_wildcards) : vector< vector< hashing_type > >(pool.length() + no_of_wildcards), noAvail(pool.length() + no_of_wildcards) { // If there are non-wildcards, store all combinations of the non_wildcards // here: CardWords_CharCombination * no_wilds = 0; if (pool.length() > 0) { // Create the charcomnbination for the non-wildcards no_wilds = new CardWords_CharCombination(pool); if (no_wilds == 0) { throw CardWords_XLowResources (gettext("Out of memory in constructor "\ "for CardWords_CharCombination.") ); } } { // A new block, it can free a lot of memory when left // Generate the first (no_of_wildcards) entrys: size_t wildcard_index; size_t prevComb_index; size_t no_of_prevComb; CardWords_Char meaning; vector vect[2]; size_t prevComb_vect = 0; size_t act_vect = 1; hashing_type hash_key; // Previous combination was the empty string: vect[prevComb_vect].push_back(CardWords_String()); for ((wildcard_index = 0),(act_vect = 1),(prevComb_vect = 0); wildcard_index < no_of_wildcards; (++wildcard_index),(prevComb_vect=act_vect),(act_vect=1-act_vect)) { no_of_prevComb = vect[prevComb_vect].size(); vect[act_vect].erase(vect[act_vect].begin(), vect[act_vect].end()); for (prevComb_index = 0; prevComb_index < no_of_prevComb; ++prevComb_index) { // Set the start value for the following forloop: // either the successor of CardWords_Char::blanko or the last char of // the actual previous combination. if (wildcard_index == 0) { meaning = CardWords_Char::blanko; ++meaning; } else { meaning = vect [prevComb_vect] [prevComb_index] [wildcard_index-1]; } hash_key = vect[prevComb_vect] [prevComb_index]; for(; meaning != CardWords_Char::blanko; ++meaning) { hash_key += meaning; (*this)[wildcard_index].push_back(hash_key); vect[act_vect].push_back(vect[prevComb_vect] [prevComb_index]); vect[act_vect].back() += meaning; hash_key -= meaning; } } } // We have now created all possible combinations consisting only of // wildcards. // We do not need those vectors any more: } // Now Add the non-wildcards: if (no_wilds != 0) { if (no_of_wildcards == 0) { *(vector< vector< hashing_type > >*)this = *no_wilds; } else { set hash_value_set; // A set for uniquifying the hash // values. set::const_iterator set_iter; hashing_type hash_key; size_t wildcomb_count; size_t wildcomb_index; size_t nonwild_count; size_t nonwild_index; size_t nonwildcomb_count; size_t nonwildcomb_index; wildcomb_count = (*this)[no_of_wildcards - 1].size(); nonwild_count = pool.length(); for (nonwild_index = 0; nonwild_index < nonwild_count; ++nonwild_index) { nonwildcomb_count = (*no_wilds)[nonwild_index].size(); for (nonwildcomb_index = 0; nonwildcomb_index < nonwildcomb_count; ++nonwildcomb_index) { for (wildcomb_index = 0; wildcomb_index < wildcomb_count; ++wildcomb_index) { hash_key = (*this)[no_of_wildcards - 1][wildcomb_index]; hash_key += (*no_wilds)[nonwild_index][nonwildcomb_index]; hash_value_set.insert(hash_key); } } // copy the contents of the set in the proper vector: for (set_iter = hash_value_set.begin(); set_iter != hash_value_set.end(); ++set_iter) { (*this)[no_of_wildcards + nonwild_index].push_back(*set_iter); } hash_value_set.erase(hash_value_set.begin(), hash_value_set.end()); } } delete no_wilds; } } #endif