Compare commits

...

10 Commits

3 changed files with 450 additions and 64 deletions

View File

@@ -1,2 +1,3 @@
# Coding_for_Crosswords_Python_Version
Python implementation of the C++ crossword solver from [here](https://codingforcrosswords.com/)

227
a.c
View File

@@ -2,12 +2,15 @@
#include <string> // support for strings
#include <vector> // support for vectors
#include <unordered_map> // support for hash-tables
#include <unordered_set>
#include <assert.h>
#include <fstream> // support for reading files
using namespace std;
// For compiling C++ code
// g++ a.c -o a
// with optimizations
// g++ a.c -O2 -o a
string ToUpper(string s) {
string s2;
@@ -17,6 +20,20 @@ string ToUpper(string s) {
return s2;
}
// --------------------------------------------------------------------------------
typedef unordered_set<string> StringSet; // A set will avoid duplicates
// Returns 'true' if string 's' exists in set already.
bool ExistsInSet(const StringSet& set, const string& s) {
auto it = set.find(s);
return it != set.end();
}
void AddToSet(StringSet& set, const string& s) {
assert(!ExistsInSet(set, s));
set.insert(s);
}
// --------------------------------------------------------------------------------
//-Point
struct Point {
@@ -37,7 +54,14 @@ ostream& operator<<(ostream& os, const Point& p) { // for printing
//-Span
struct Span {
Span(Point p, int l, bool v) : point(p), len(l), vert(v) {}
Point GetPoint(int i) const {
assert(i >= 0 && i < len);
if (vert) {
return Point(point.row + i, point.col);
} else {
return Point(point.row, point.col + i);
}
}
friend ostream& operator<<(ostream& os, const Span& s);
Point point;
@@ -72,13 +96,13 @@ public:
delete w;
}
}
void FindWord(const string& s) const { // references are prefered instead of copies
// Returns NULL if can't find any matches to the given /pattern
const Words* FindWord(const string& s) const { // references are prefered instead of copies
auto it = word_map_.find(s);
if (it != word_map_.end()) {
for (const Word* w : it->second) {
cout << " " << w->word;
}
cout << "\n";
return &it->second; // address of the vector of words
} else {
return NULL;
}
}
bool IsWord(string s) const {
@@ -88,7 +112,6 @@ public:
} else {
return true;
}
//return word_map_.count(s) > 0; // True if word exists
}
void ComputeStats() {
assert(counts_.empty());
@@ -112,37 +135,34 @@ public:
}
void CreatePatternHash(Word* w) {
int len = w->len();
if (len > 7) return; // avoid load of long words
int num_patterns = 1 << len; // create 2^len patterns
// cout << "PATTERN HASH on " << w->word << "\n";
for (int i=0; i<num_patterns; i++) {
// cout << " " << i << "\n";
string temp = w->word;
for (int j=0; j<len; j++) {
if ((i >> j) & 1) { // get every bit and check if it's 1
temp[j] = '-';
}
}
// cout << " " << temp << "\n";
word_map_[temp].push_back(w);
}
}
void ReadFromFile(string filename) {
void ReadFromFile(string filename, int max_size) {
ifstream f;
f.open(filename);
while (f.is_open() && !f.eof()) { // check for the file!
string line;
getline(f, line);
// cout << line << "\n";
if (!line.empty()) {
line = ToUpper(line);
int len = line.length();
if (line[len - 1] == '\r') {
line = line.substr(0, len - 1);
}
Word* w = new Word(line);
words_.push_back(w); // Word would be allocated on the heap
CreatePatternHash(w);
if (line.length() <= max_size) {
Word* w = new Word(line);
words_.push_back(w); // Word would be allocated on the heap
CreatePatternHash(w);
}
}
}
cout << "Read " << words_.size() << " words from file '"
@@ -160,6 +180,21 @@ private: // _ is used to indicate privacy
vector<int> counts_;
};
Library lib; // not ideal, but it is not so bad for this application
// --------------------------------------------------------------------------------
//-Attr
struct Attr {
bool is_empty() const { return has_blanks && !has_letters; }
bool is_partial() const { return has_blanks && has_letters; }
bool is_full() const { return !has_blanks && has_letters; }
bool has_letters = false;
bool has_blanks = false;
};
// --------------------------------------------------------------------------------
//-Grid
struct Grid {
Grid(string n) {
name = n;
@@ -172,12 +207,17 @@ struct Grid {
return lines[0].size();
}
}
int max_size() const { return max(rows(), cols()); }
// Returns character value of the box at point 'p'
// 'p' must be in bounds
char box(const Point& p) const {
assert(in_bounds(p));
return lines[p.row][p.col];
}
void write_box(const Point& p, char c) {
assert(in_bounds(p));
lines[p.row][p.col] = c;
}
// Returns true if point p is a '.' "block" in the grid
// 'p' must be in bounds
bool is_block(const Point& p) const {
@@ -193,6 +233,31 @@ struct Grid {
bool in_bounds(const Point& p) const {
return (p.row >= 0 && p.row < rows() && p.col >= 0 && p.col < cols());
}
// Fills in attributes of the string
string GetString(const Span& s, Attr& attr) const {
int len = s.len;
string temp;
temp.resize(len);
for (int i=0; i<len; i++) {
Point p = s.GetPoint(i);
char c = box(p);
if (c == '-') {
attr.has_blanks = true;
} else if (c >= 'A' && c <= 'Z') {
attr.has_letters = true;
}
temp[i] = box(p);
}
return temp;
}
void WriteString(const Span& s, const string& t) {
int len = s.len;
assert(t.length() == len);
for (int i=0; i<len; i++) {
Point p = s.GetPoint(i);
write_box(p, t[i]);
}
}
// Next increments the point across the grid, one box at a time
// Returns true if point is still in bounds
bool Next(Point& p, bool vert) {
@@ -211,6 +276,27 @@ struct Grid {
}
return in_bounds(p);
}
// NextStopAtWrap is like "Next" except it returns false at every wrap
// Returns true if we stay on the same line
bool NextStopAtWrap(Point& p, bool vert) {
bool wrap = false;
if (vert) {
p.row++;
if (p.row >= rows()) {
p.row = 0;
p.col++;
wrap = true;
}
} else {
p.col++;
if (p.col >= cols()) {
p.col = 0;
p.row++;
wrap = true;
}
}
return !wrap;
}
void FillSpans(bool vert) {
Point p;
// check all spans
@@ -221,14 +307,13 @@ struct Grid {
}
if (!in_bounds(p)) return;
Point startp = p;
// cout << "SPAN START: " << p << "\n";
int len = 0;
bool keep_going = false;
do {
Next(p, vert);
keep_going = NextStopAtWrap(p, vert);
len++;
} while (in_bounds(p) && !is_block(p));
//cout << "END OF SPAN!!! len=" << len << "\n";
} while (keep_going && !is_block(p));
spans.push_back(Span(startp, len, vert));
}
}
@@ -244,7 +329,6 @@ struct Grid {
while (f.is_open() && !f.eof()) { // check for the file
string line;
getline(f, line);
// cout << line << "\n";
if (!line.empty() && line[0] != '#') {
lines.push_back(line);
}
@@ -258,7 +342,8 @@ struct Grid {
void Print() const {
cout << "Grid: " << name
<< " (rows=" << rows()
<< ", cols=" << cols() << ")\n";
<< ", cols=" << cols()
<< ", max_size=" << max_size() << ")\n";
for (string s : lines) {
cout << " " << s << "\n";
}
@@ -266,7 +351,8 @@ struct Grid {
void PrintSpans() const {
cout << "Spans:\n";
for (const Span& s : spans) {
cout << " " << s << "\n";
Attr attr;
cout << " " << s << " " << GetString(s, attr) << "\n";
}
}
string name; // strings are initialized empty
@@ -274,14 +360,99 @@ struct Grid {
Spans spans;
};
// --------------------------------------------------------------------------------
//-Slot
struct Slot {
Slot(const Span s, const string& p) : span(s), pattern(p) {}
friend ostream& operator<<(ostream& os, const Slot& s);
Span span;
string pattern;
};
typedef vector<Slot> Slots;
ostream& operator<<(ostream& os, const Slot& s) {
os << s.span << " '" << s.pattern << "'";
return os;
}
// --------------------------------------------------------------------------------
//-Solver
class Solver {
public:
Solver() {}
void Solve(const Grid& grid) { // reference to the grid
cout << "Solving this grid:\n";
grid.Print();
Loop(grid, 0);
}
private:
void Loop(Grid grid, int depth) { // full copy of the grid to allow recursion
depth++;
Slots empty_slots; // these are the ones we want to work on
Slots partial_slots;
Slots full_slots;
for (const Span& s : grid.spans) {
Attr attr;
string temp = grid.GetString(s, attr);
if (attr.is_empty()) {
empty_slots.push_back(Slot(s, temp));
} else if (attr.is_partial()) {
partial_slots.push_back(Slot(s, temp));
} else if (attr.is_full()) {
full_slots.push_back(Slot(s, temp));
}
}
int num_empty = empty_slots.size();
int num_partial = partial_slots.size();
int num_full = full_slots.size();
// need to check that all words so far are valid!
for (const Slot& s : full_slots) {
if (!lib.IsWord(s.pattern)) {
return;
}
}
// need to check that all words are unique! no duplicates allowed.
StringSet set;
for (const Slot& s : full_slots) {
if (ExistsInSet(set, s.pattern)) {
return;
}
AddToSet(set, s.pattern);
}
if (num_partial == 0 && num_empty == 0) {
cout << "SOLUTION!!\n";
grid.Print();
return;
}
assert(num_partial > 0);
CommitSlot(grid, partial_slots[0], depth);
}
void CommitSlot(Grid& grid, const Slot& slot, int depth) {
const Words* words = lib.FindWord(slot.pattern);
if (words) {
for (const Word* w : *words) {
grid.WriteString(slot.span, w->word);
Loop(grid, depth); // Recursion
}
}
}
};
// --------------------------------------------------------------------------------
int main() {
Library lib;
lib.ReadFromFile("top_12000.txt");
Grid grid("MY GRID");
Grid grid("MY GRID"); // grid lives on the stack
grid.LoadFromFile("test");
grid.Check();
grid.Print();
grid.FillSpans();
grid.PrintSpans();
lib.ReadFromFile("top_12000.txt", grid.max_size());
Solver solver;
solver.Solve(grid);
}

286
a.py
View File

@@ -1,6 +1,9 @@
from copy import copy, deepcopy
def ToUpper(s):
return s.upper()
## ----------------------------------------------------------------------------
##-Point
class Point:
@@ -14,15 +17,26 @@ class Point:
## ----------------------------------------------------------------------------
##-Span
class Span:
class Spans:
spans = []
def __init__(self, p, l, v):
self.point = p
self.len = l
self.vert = v
self.vert = v
def GetPoint(self, i):
assert(i >= 0 and i < self.len)
if self.vert:
return Point(self.point.row + i, self.point.col)
else:
return Point(self.point.row, self.point.col + i)
def __str__(self):
return f'[{self.point} len={self.len} vert={self.vert}]'
## ----------------------------------------------------------------------------
##-Words
class Words:
@@ -36,21 +50,18 @@ class Words:
class Library:
def __init__(self):
# master vector of word
self.words_ = Words()
self.words_ = Words() # master vector of word
self.counts_ = {}
# hash table
self.word_map_ = {}
self.word_map_ = {} # hash table
# Returns NULL if can't find any matches to the given pattern
def FindWord(self, s):
print(list(key for key, values in self.word_map_.items() if s in values))
return [key for key, values in self.word_map_.items() if s in values]
def IsWord(self, s):
return s in self.word_map_
def ComputeStats(self):
# assert self.counts == {}
for i in range(18):
self.counts_[i] = []
for s in self.words_:
@@ -61,7 +72,6 @@ class Library:
def PrintStats(self):
print("Here are the counts of each word length")
for k,v in self.counts_.items():
# print(v)
if k != 0:
print(f"[{k}] {len(v)}")
@@ -71,38 +81,59 @@ class Library:
def CreatePatternHash(self, w):
len_w = len(w)
if len_w > 7:
return
num_patterns = 1 << len_w
# print(f"PATTERN HASH on {w}")
self.word_map_[w] = []
for i in range(num_patterns):
# print(f" {i}")
tmp = list(w)
for j in range(len_w):
if ((i >> j) & 1):
tmp[j] = "-"
# print(f' {"".join(tmp)}')
self.word_map_[w].append("".join(tmp))
def ReadFromFile(self, filename):
def ReadFromFile(self, filename, max_size):
with open(filename, 'r') as f:
for line in f:
line = ToUpper(line.rstrip())
self.words_.word.append(line)
self.CreatePatternHash(line)
len_w = len(line)
if len_w <= max_size:
self.words_.word.append(line)
self.CreatePatternHash(line)
print(f"Read {len(self.words_.word)} words from file '{filename}'")
def DebugBuckets(self):
for i, (k,v) in enumerate(self.word_map_.items()):
print(f"[{i}] {len(v)}")# {self.word_map_[i]}")
print(f"[{i}] {len(v)}")
lib = Library()
## ----------------------------------------------------------------------------
##-Attr
class Attr:
has_letters = False
has_blanks = False
def __init__(self):
pass
def is_empty():
return (Attr.has_blanks and not Attr.has_letters)
def is_partial():
return (Attr.has_blanks and Attr.has_letters)
def is_full():
return (not Attr.has_blanks and Attr.has_letters)
## ----------------------------------------------------------------------------
##-Grid
class Grid:
def __init__(self, n):
self.name = n
self.lines = []
self.sp = Spans.spans
def rows(self):
return len(self.lines)
@@ -112,11 +143,113 @@ class Grid:
return 0
else:
return len(self.lines[0])
def max_size(self):
return max(self.rows(), self.cols())
# Returns character value of the box at point 'p'
# 'p' must be in bounds
def box(self, p):
assert self.in_bounds(p), p
return self.lines[p.row][p.col]
def write_box(self, p, c):
assert self.in_bounds(p), p
self.lines[p.row][p.col] = c
# Returns True if the point p is a '.' "block" in the grid
# 'p' must be in bounds
def is_block(self, p):
return self.box(p) == '.'
def is_blank(self, p):
return self.box(p) == '-'
def is_letter(self, p):
c = self.box(p)
return c >= 'A' and c <= 'Z'
def in_bounds(self, p):
return p.row >= 0 and p.row < self.rows() and p.col >= 0 and p.col < self.cols()
# Fills in attributes of the string
def GetString(self, sp, attr):
len_ = sp.len
temp = []
for i in range(len_):
p = sp.GetPoint(i)
c = self.box(p)
if c == '-':
attr.has_blanks = True
elif c >= 'A' and c <= 'Z':
attr.has_letters = True
temp.append(c)
return ''.join(temp)
def WriteString(self, sp, s):
len_ = sp.len
assert(len(s) == len_)
new_str = []
for i in range(len_):
p = sp.GetPoint(i)
self.write_box(p, s[i])
# Next increments the point across the grid, one box at a time
# Returns True if point is still in bounds
def Next(self, p, vert):
if vert:
p.row += 1
if p.row >= self.rows():
p.row = 0
p.col += 1
else:
p.col += 1
if p.col >= self.cols():
p.col = 0
p.row += 1
return self.in_bounds(p)
# NextStopAtWrap is like "Next" except it returns False at every wrap
# Returns True if we stay in the same line
def NextStopAtWrap(self, p, vert):
wrap = False
if vert:
p.row += 1
if p.row >= self.rows():
p.row = 0
p.col += 1
wrap = True
else:
p.col += 1
if p.col >= self.cols():
p.col = 0
p.row += 1
wrap = True
return not wrap
def FillSpans_(self, vert):
p = Point()
while (self.in_bounds(p)):
while (self.in_bounds(p) and self.is_block(p)):
self.Next(p, vert)
if not self.in_bounds(p):
return
startp = copy(p)
len_ = 0
keep_going = True
while (keep_going and not self.is_block(p)):
keep_going = self.NextStopAtWrap(p, vert)
len_ += 1
self.sp.append(Spans(startp, len_, vert))
def FillSpans(self):
self.FillSpans_(vert=False) # horiz
self.FillSpans_(vert=True) # vert
def LoadFromFile(self, filename):
with open(filename, 'r') as f:
for line in f:
#print(f"{line.rstrip()} ({len(line.rstrip())})")
if not line.startswith('#'):
self.lines.append(list(line.rstrip()))
@@ -127,27 +260,108 @@ class Grid:
def Print(self):
print(f"Grid: {self.name} "
f"(rows={self.rows()},"
f" cols={self.cols()})")
f" cols={self.cols()},"
f" max_size={self.max_size()})")
for s in self.lines:
print(f" {''.join(s)}")
def PrintSpans(self):
print(f"Spans:")
for span in self.sp:
print(f" {span} {self.GetString(span, Attr())}")
## ----------------------------------------------------------------------------
##-Slot
class Slot:
slots = []
def __init__(self, s, p):
self.span = s
self.pattern = p
def __str__(self):
return f"{self.span} '{self.pattern}'"
## ----------------------------------------------------------------------------
##-Solver
class Solver:
def __init__(self):
pass
def ExistsInSet(self, set_, s):
return s in set_
def AddToSet(self, set_, s):
assert(not self.ExistsInSet(set_, s))
set_.add(s)
def Solve(self, grid):
print(f"Solving this grid")
grid.Print()
self.Loop(grid, 0)
def Loop(self, grid, depth):
depth += 1
empty_slots = []
partial_slots = [] # these are the ones we want to work on
full_slots = []
for s in grid.sp:
attr = Attr
temp = grid.GetString(s, attr)
if attr.is_empty():
empty_slots.append(Slot(s, temp))
elif attr.is_partial():
partial_slots.append(Slot(s, temp))
elif attr.is_full():
full_slots.append(Slot(s, temp))
Attr.has_letters = False
Attr.has_blanks = False
num_empty = len(empty_slots)
num_partial = len(partial_slots)
num_full = len(full_slots)
# need to check that all words so far are valid!
for slot in full_slots:
if not lib.IsWord(slot.pattern):
return
# need to check that all words are unique! no duplicates allowed.
my_set = set()
for slot in full_slots:
if self.ExistsInSet(my_set, slot.pattern):
return
self.AddToSet(my_set, slot.pattern)
if (num_partial == 0 and num_empty == 0):
print("SOLUTION!!")
grid.Print()
return
assert(num_partial > 0)
self.CommitSlot(grid, partial_slots[0], depth)
def CommitSlot(self, grid, slot, depth):
grid = deepcopy(grid)
words = lib.FindWord(slot.pattern)
if words:
for w in words:
grid.WriteString(slot.span, w)
self.Loop(grid, depth)
## ----------------------------------------------------------------------------
if __name__ == "__main__":
lib = Library()
lib.ReadFromFile("top_12000.txt")
grid = Grid("MY GRID")
grid.LoadFromFile("test")
grid.LoadFromFile('test')
grid.Check()
grid.Print()
grid.FillSpans()
grid.PrintSpans()
p1 = Point()
p2 = Point(2, 1)
print(f"Point1 is {p1}")
print(f"Point2 is {p2}")
s1 = Span(p1, 3, True)
s2 = Span(p2, 5, False)
print(f"Span1 is {s1}")
print(f"Span2 is {s2}")
lib.ReadFromFile("top_12000.txt", grid.max_size())
solver = Solver()
solver.Solve(grid)