2021-07-13 17:27:14 +02:00
|
|
|
#include <iostream> // library for printing
|
|
|
|
#include <string> // support for strings
|
|
|
|
#include <vector> // support for vectors
|
|
|
|
#include <assert.h>
|
2021-07-15 17:45:54 +02:00
|
|
|
#include <fstream> // support for reading files
|
2021-07-13 17:27:14 +02:00
|
|
|
using namespace std;
|
2021-07-11 19:11:55 +02:00
|
|
|
|
|
|
|
// For compiling C++ code
|
|
|
|
// g++ a.c -o a
|
2021-07-16 19:17:37 +02:00
|
|
|
|
2021-07-19 22:15:11 +02:00
|
|
|
string ToUpper(string s)
|
|
|
|
{
|
|
|
|
string s2;
|
|
|
|
for (char c : s)
|
|
|
|
{
|
|
|
|
s2.push_back(toupper(c));
|
|
|
|
}
|
|
|
|
return s2;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct Word
|
|
|
|
{
|
|
|
|
Word(string s)
|
|
|
|
{
|
|
|
|
word = s;
|
|
|
|
}
|
|
|
|
string word;
|
|
|
|
};
|
|
|
|
typedef vector<Word> Words;
|
|
|
|
|
|
|
|
const int num_buckets = 1001;
|
|
|
|
|
|
|
|
// hash function
|
|
|
|
int bucket(string s)
|
|
|
|
{
|
|
|
|
assert(!s.empty());
|
|
|
|
int i = 0;
|
|
|
|
for (char c : s)
|
|
|
|
{
|
|
|
|
i = (i * 217) + c;
|
|
|
|
}
|
|
|
|
i = abs(i);
|
|
|
|
int b = i % num_buckets;
|
|
|
|
assert(b >= 0 && b < num_buckets);
|
|
|
|
return b;
|
|
|
|
}
|
|
|
|
|
|
|
|
class Library
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
Library()
|
|
|
|
{
|
|
|
|
shelves.resize(num_buckets);
|
|
|
|
}
|
|
|
|
bool IsWord(string s) const
|
|
|
|
{
|
|
|
|
for (Word w : shelves[bucket(s)])
|
|
|
|
{
|
|
|
|
if (s == w.word)
|
|
|
|
{
|
|
|
|
return true;
|
2021-07-16 19:17:37 +02:00
|
|
|
}
|
|
|
|
}
|
2021-07-19 22:15:11 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
void ComputeStats()
|
|
|
|
{
|
|
|
|
assert(counts.empty());
|
|
|
|
counts.resize(18);
|
|
|
|
for (Word w : words)
|
|
|
|
{
|
|
|
|
int len = w.word.length();
|
|
|
|
if (len < 18)
|
|
|
|
{
|
|
|
|
counts[len]++;
|
2021-07-16 19:17:37 +02:00
|
|
|
}
|
|
|
|
}
|
2021-07-19 22:15:11 +02:00
|
|
|
}
|
|
|
|
void PrintStats() const
|
|
|
|
{
|
|
|
|
cout << "Here are the counts of each word length:\n";
|
|
|
|
for (int i = 1; i < counts.size(); i++)
|
|
|
|
{
|
|
|
|
cout << "[" << i << "] " << counts[i] << "\n";
|
2021-07-16 19:17:37 +02:00
|
|
|
}
|
2021-07-19 22:15:11 +02:00
|
|
|
}
|
|
|
|
string GetWord(int i) const
|
|
|
|
{
|
|
|
|
assert(i >= 0 && i < words.size());
|
|
|
|
return words[i].word;
|
|
|
|
}
|
|
|
|
void ReadFromFile(string filename)
|
|
|
|
{
|
|
|
|
ifstream f;
|
|
|
|
f.open(filename);
|
|
|
|
while (!f.eof())
|
|
|
|
{
|
|
|
|
string line;
|
|
|
|
getline(f, line);
|
|
|
|
// cout << line << "\n";
|
|
|
|
if (!line.empty())
|
2021-07-16 19:17:37 +02:00
|
|
|
{
|
2021-07-19 22:15:11 +02:00
|
|
|
line = ToUpper(line);
|
|
|
|
int len = line.length();
|
|
|
|
if (line[len - 1] == '\r')
|
2021-07-16 19:17:37 +02:00
|
|
|
{
|
2021-07-19 22:15:11 +02:00
|
|
|
line = line.substr(0, len - 1);
|
2021-07-16 19:17:37 +02:00
|
|
|
}
|
2021-07-19 22:15:11 +02:00
|
|
|
words.push_back(Word(line));
|
|
|
|
shelves[bucket(line)].push_back(Word(line));
|
2021-07-16 19:17:37 +02:00
|
|
|
}
|
|
|
|
}
|
2021-07-19 22:15:11 +02:00
|
|
|
cout << "Read " << words.size() << " words from file '"
|
|
|
|
<< filename << "'\n";
|
|
|
|
}
|
|
|
|
void DebugBuckets() const
|
|
|
|
{
|
|
|
|
for (int i = 0; i < shelves.size(); i++)
|
|
|
|
{
|
|
|
|
cout << "[" << i << "] " << shelves[i].size() << "\n";
|
|
|
|
}
|
|
|
|
}
|
2021-07-16 19:17:37 +02:00
|
|
|
|
2021-07-19 22:15:11 +02:00
|
|
|
private:
|
|
|
|
Words words;
|
|
|
|
vector<Words> shelves;
|
|
|
|
vector<int> counts;
|
|
|
|
};
|
2021-07-16 19:17:37 +02:00
|
|
|
|
2021-07-14 21:10:44 +02:00
|
|
|
struct Grid
|
|
|
|
{
|
2021-07-15 17:45:54 +02:00
|
|
|
Grid(string n)
|
|
|
|
{
|
2021-07-14 21:10:44 +02:00
|
|
|
name = n;
|
|
|
|
}
|
|
|
|
int rows() const { return lines.size(); }
|
2021-07-15 17:45:54 +02:00
|
|
|
int cols() const
|
|
|
|
{
|
|
|
|
if (lines.empty())
|
|
|
|
{
|
2021-07-14 21:10:44 +02:00
|
|
|
return 0;
|
|
|
|
}
|
2021-07-15 17:45:54 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
return lines[0].size();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
void LoadFromFile(string filename)
|
|
|
|
{
|
|
|
|
ifstream f;
|
|
|
|
f.open("test");
|
|
|
|
while (!f.eof())
|
|
|
|
{
|
|
|
|
string line;
|
|
|
|
getline(f, line);
|
|
|
|
// cout << line << "\n";
|
|
|
|
if (!line.empty() && line[0] != '#')
|
|
|
|
{
|
|
|
|
lines.push_back(line);
|
|
|
|
}
|
|
|
|
}
|
2021-07-14 21:10:44 +02:00
|
|
|
}
|
2021-07-15 17:45:54 +02:00
|
|
|
void Check() const
|
|
|
|
{
|
2021-07-14 21:10:44 +02:00
|
|
|
for (string s : lines)
|
|
|
|
{
|
|
|
|
assert(s.size() == cols());
|
|
|
|
}
|
|
|
|
}
|
2021-07-15 17:45:54 +02:00
|
|
|
void Print() const
|
|
|
|
{
|
2021-07-14 21:10:44 +02:00
|
|
|
cout << "Grid: " << name
|
2021-07-15 17:45:54 +02:00
|
|
|
<< " (rows=" << rows()
|
|
|
|
<< ", cols=" << cols() << ")\n";
|
|
|
|
for (string s : lines)
|
|
|
|
{
|
2021-07-14 21:10:44 +02:00
|
|
|
cout << " " << s << "\n";
|
|
|
|
}
|
|
|
|
}
|
2021-07-19 22:15:11 +02:00
|
|
|
string name; // strings are initialized empty
|
2021-07-14 21:10:44 +02:00
|
|
|
vector<string> lines;
|
|
|
|
};
|
2021-07-11 19:11:55 +02:00
|
|
|
|
2021-07-13 17:27:14 +02:00
|
|
|
int main()
|
|
|
|
{
|
2021-07-16 19:17:37 +02:00
|
|
|
Library lib;
|
|
|
|
lib.ReadFromFile("top_12000.txt");
|
2021-07-19 22:15:11 +02:00
|
|
|
//lib.ComputeStats();
|
|
|
|
//lib.PrintStats();
|
|
|
|
|
|
|
|
cout << lib.IsWord("DOG") << "\n";
|
|
|
|
cout << lib.IsWord("CAT") << "\n";
|
|
|
|
cout << lib.IsWord("THANKS") << "\n";
|
|
|
|
lib.DebugBuckets();
|
2021-07-13 17:27:14 +02:00
|
|
|
|
2021-07-16 19:17:37 +02:00
|
|
|
//Grid grid("MY GRID");
|
|
|
|
//grid.LoadFromFile("test");
|
|
|
|
//grid.Check();
|
|
|
|
//grid.Print();
|
2021-07-11 19:11:55 +02:00
|
|
|
}
|