Module 10: Search the Library, Fast

This commit is contained in:
David Doblas Jiménez 2021-07-23 12:31:19 +02:00
parent 6df8d25296
commit 0c75398111
1 changed files with 56 additions and 107 deletions

163
a.c
View File

@ -1,6 +1,7 @@
#include <iostream> // library for printing
#include <string> // support for strings
#include <vector> // support for vectors
#include <unordered_map> // support for hash-tables
#include <assert.h>
#include <fstream> // support for reading files
using namespace std;
@ -8,172 +9,120 @@ using namespace std;
// For compiling C++ code
// g++ a.c -o a
string ToUpper(string s)
{
string ToUpper(string s) {
string s2;
for (char c : s)
{
for (char c : s) {
s2.push_back(toupper(c));
}
return s2;
}
struct Word
{
Word(string s)
{
word = s;
}
struct Word {
Word() {} // default empty constructor
Word(string s) : word(s) {} // standard way of initialization
string word;
};
typedef vector<Word> Words;
typedef unordered_map<string, Word> WordMap; // hash-table from stl
const int num_buckets = 1001;
// hash function
int bucket(string s)
{
assert(!s.empty());
int i = 0;
for (char c : s)
{
i = (i * 217) + c;
}
i = abs(i);
int b = i % num_buckets;
assert(b >= 0 && b < num_buckets);
return b;
}
class Library
{
class Library {
public:
Library()
{
shelves.resize(num_buckets);
}
bool IsWord(string s) const
{
for (Word w : shelves[bucket(s)])
{
if (s == w.word)
{
return true;
}
Library() {} // hash-tables are automatically initialized
bool IsWord(string s) const {
auto it = word_map_.find(s); // use iterator
if (it == word_map_.end()) {
return false; // if word is not found on hast-table
} else {
return true;
}
return false;
//return word_map_.count(s) > 0; // True if word exists
}
void ComputeStats()
{
assert(counts.empty());
counts.resize(18);
for (Word w : words)
{
void ComputeStats() {
assert(counts_.empty());
counts_.resize(18);
for (Word w : words_) {
int len = w.word.length();
if (len < 18)
{
counts[len]++;
if (len < 18) {
counts_[len]++;
}
}
}
void PrintStats() const
{
void PrintStats() const {
cout << "Here are the counts of each word length:\n";
for (int i = 1; i < counts.size(); i++)
{
cout << "[" << i << "] " << counts[i] << "\n";
for (int i = 1; i < counts_.size(); i++) {
cout << "[" << i << "] " << counts_[i] << "\n";
}
}
string GetWord(int i) const
{
assert(i >= 0 && i < words.size());
return words[i].word;
string GetWord(int i) const {
assert(i >= 0 && i < words_.size());
return words_[i].word;
}
void ReadFromFile(string filename)
{
void ReadFromFile(string filename) {
ifstream f;
f.open(filename);
while (!f.eof())
{
while (!f.eof()) {
string line;
getline(f, line);
// cout << line << "\n";
if (!line.empty())
{
if (!line.empty()) {
line = ToUpper(line);
int len = line.length();
if (line[len - 1] == '\r')
{
if (line[len - 1] == '\r') {
line = line.substr(0, len - 1);
}
words.push_back(Word(line));
shelves[bucket(line)].push_back(Word(line));
words_.push_back(Word(line));
word_map_[line] = Word(line); // create entry for the hash-table
}
}
cout << "Read " << words.size() << " words from file '"
cout << "Read " << words_.size() << " words from file '"
<< filename << "'\n";
}
void DebugBuckets() const
{
for (int i = 0; i < shelves.size(); i++)
{
cout << "[" << i << "] " << shelves[i].size() << "\n";
void DebugBuckets() const {
for (int i = 0; i < word_map_.bucket_count(); i++) {
cout << "[" << i << "] " << word_map_.bucket_size(i) << "\n";
}
}
private:
Words words;
vector<Words> shelves;
vector<int> counts;
private: // _ is used to indicate privacy
Words words_;
WordMap word_map_;
vector<int> counts_;
};
struct Grid
{
Grid(string n)
{
struct Grid {
Grid(string n) {
name = n;
}
int rows() const { return lines.size(); }
int cols() const
{
if (lines.empty())
{
int cols() const {
if (lines.empty()) {
return 0;
}
else
{
} else {
return lines[0].size();
}
}
void LoadFromFile(string filename)
{
void LoadFromFile(string filename) {
ifstream f;
f.open("test");
while (!f.eof())
{
while (!f.eof()) {
string line;
getline(f, line);
// cout << line << "\n";
if (!line.empty() && line[0] != '#')
{
if (!line.empty() && line[0] != '#') {
lines.push_back(line);
}
}
}
void Check() const
{
for (string s : lines)
{
void Check() const {
for (string s : lines) {
assert(s.size() == cols());
}
}
void Print() const
{
void Print() const {
cout << "Grid: " << name
<< " (rows=" << rows()
<< ", cols=" << cols() << ")\n";
for (string s : lines)
{
for (string s : lines) {
cout << " " << s << "\n";
}
}
@ -181,8 +130,7 @@ struct Grid
vector<string> lines;
};
int main()
{
int main() {
Library lib;
lib.ReadFromFile("top_12000.txt");
//lib.ComputeStats();
@ -191,6 +139,7 @@ int main()
cout << lib.IsWord("DOG") << "\n";
cout << lib.IsWord("CAT") << "\n";
cout << lib.IsWord("THANKS") << "\n";
cout << lib.IsWord("TFAFAS") << "\n";
lib.DebugBuckets();
//Grid grid("MY GRID");