sourcecode

Wednesday, November 14, 2012

Word evolution

/**26 Answers
Given a source string and a destination string write a program to display 
sequence of strings to travel from source to destination. Rules for traversing:
1. You can only change one character at a time 
2. Any resulting word has to be a valid word from dictionary
Example: Given source word CAT and destination word DOG , one of the valid 
sequence would be 
CAT -> COT -> DOT -> DOG
Another valid sequence can be 
CAT -> COT - > COG -> DOG

One character can change at one time and every resulting word has be a valid 
word from dictionary

- Dee on November 06, 2012 in United States | Report Duplicate 
*/

/**This is a graph problem. We define "neighbor" as the words off by only 
one letter.
First step, find all the neighbors of the source word. Then find the neighbors
of the neighbors... and so on, until one of them is the destination word.
*/

/**The function below has memory leak... pretty bad that sometime it eats up
1G memory...

The dictionary I used was from Ubuntu 12.04 LTS, /usr/share/dict/words
*/
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <sstream>
#include <memory>
#include <set>
#include <deque>
using namespace std;
set<string> dictParser(string fileName = "words.txt"){
  ifstream dictFile(fileName);
  set<string> result;
  if (!dictFile.good()) return result;

  dictFile.seekg (0, ios::end);
  size_t const fileLength = dictFile.tellg();
  dictFile.seekg (0, ios::beg);

  char* readInBuffer = new char[fileLength];
  dictFile.read(readInBuffer, fileLength);
  stringstream allWords(readInBuffer);
  delete[] readInBuffer;
  dictFile.close();

  string word;
  while(!allWords.eof()){
    allWords >> word;
    result.insert(word);
  }
  return result;
}

class Node{
public:
  Node(string name):word(name),parent(0){}
  string word;
  vector<Node* > children;
  Node* parent;
};

bool off_by_one(string const& s1, string const& s2){
  if (s1.length() != s2.length()) return false;
  int counter = 0;
  for(int ii = 0; ii < s1.length(); ++ii){
    if (s1[ii] != s2[ii]) ++counter;
  }
  return (1 == counter);
}
vector<string> mutationPath(string const& source, string& const dest, 
  set<string>& dict){

  vector<string> result;
  Node* sp = new Node(source);
  dict.erase(source);
  deque<Node* > frontier;
  frontier.push_back(sp);
  while(!frontier.empty()){
    Node* node = frontier.front(); frontier.pop_front();
    string const word = node->word;
    for (auto it = dict.begin(); it != dict.end();){
      string const neighbor(*it);
      if (neighbor.size() != dest.size()){dict.erase(it++); continue;}
      if (off_by_one(word, neighbor)){
        if (dest == neighbor){
          result.push_back(neighbor);
          while(node){
            result.push_back(node->word);
            node = node->parent;
          }
          return vector<string>(result.rbegin(),result.rend());
        }
        dict.erase(it++);
        auto it_n = new Node(neighbor);
        node->children.push_back(it_n);
        node->children.back()->parent = node;
        frontier.push_back(it_n);
      }else ++it;
    }//for it
  }//while not empty
  return result;
  //--------------------------
}

int main(){
  set<string> dict(dictParser());//it is sorted
  cout<<dict.size()<<endl;

  string sourceWord("cat");
  string destWord("dog");

  vector<string> path(mutationPath(sourceWord, destWord, dict));
  for(int ii = 0; ii < path.size(); ++ii){
    cout << path[ii] <<endl;
  }
  return 0;

}
 
And here is an improved version using smart pointers that avoids memory leak:
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <sstream>
#include <memory>
#include <set>
#include <deque>
using namespace std;
set<string> dictParser(string fileName = "words.txt"){
  ifstream dictFile(fileName);
  set<string> result;
  if (!dictFile.good()) return result;

  dictFile.seekg (0, ios::end);
  size_t const fileLength = static_cast<size_t>(dictFile.tellg());
  dictFile.seekg (0, ios::beg);

  shared_ptr<char> readInBuffer(new char[fileLength]);
  dictFile.read(readInBuffer.get(), fileLength);
  stringstream allWords(readInBuffer.get());
  //delete[] readInBuffer;
  dictFile.close();

  string word;
  while(!allWords.eof()){
    allWords >> word;
    result.insert(word);
  }
  return result;
}

class Node{
public:
  Node(string name):word(name){}
  string word;
  vector<shared_ptr<Node> > children;
  weak_ptr<Node> parent;
};

template <typename T>
int edit_distance(T const& s1, T const& s2){
  int const s1s = s1.size(), s2s = s2.size();
  vector<vector<int>> matrix(s1s+1, vector<int>(s2s+1));
  //s2 horizontal, s1 vertical
  for(int ii = 0; ii <= s1s; ++ii) matrix[ii][0] = ii;
  for(int ii = 0; ii <= s2s; ++ii) matrix[0][ii] = ii;
  for(int ii = 1; ii <= s1s; ++ii){
    for(int jj = 1; jj <= s2s; ++jj){//scan from left to right
      //line by line
      matrix[ii][jj] = min(min(matrix[ii-1][jj], matrix[ii][jj-1])+1,
        matrix[ii-1][jj-1]+static_cast<int>(s1[ii-1] != s2[jj-1]));
    }//for jj
  }//for ii
  return matrix[s1s][s2s];
}

bool off_by_one(string const& s1, string const& s2){
  if (s1.length() != s2.length()) return false;
  int counter = 0;
  for(unsigned int ii = 0; ii < s1.length(); ++ii){
    if (s1[ii] != s2[ii]) ++counter;
  }
  return (1 == counter);
}
vector<string> mutationPath(string const& source, string const& dest, 
  set<string>& dict){

  vector<string> result;
  shared_ptr<Node> sp(new Node(source));
  dict.erase(source);
  deque<shared_ptr<Node> > frontier;
  frontier.push_back(sp);
  while(!frontier.empty()){
    shared_ptr<Node> node(frontier.front()); frontier.pop_front();
    string const word = node->word;
    for (auto it = dict.begin(); it != dict.end();){
      string const neighbor(*it);
      if (neighbor.size() != dest.size()){dict.erase(it++); continue;}
      if (off_by_one(word, neighbor)){
        if (dest == neighbor){
          result.push_back(neighbor);
          while(node){
            result.push_back(node->word);
            node = node->parent.lock();
          }
          return vector<string>(result.rbegin(),result.rend());
        }
        dict.erase(it++);
        shared_ptr<Node> it_n(new Node(neighbor));
        node->children.push_back(it_n);
        node->children.back()->parent = node;
        frontier.push_back(it_n);
      }else ++it;
    }//for it
  }//while not empty
  return result;
  //--------------------------
}

int main(){
  set<string> dict(dictParser());//it is sorted
  cout<<dict.size()<<endl;

  string sourceWord("cat");
  string destWord("dog");

  vector<string> path(mutationPath(sourceWord, destWord, dict));
  for(int ii = 0; ii < path.size(); ++ii){
    cout << path[ii] <<endl;
  }
  return 0;

}

No comments: