Skip to content
Snippets Groups Projects
Commit 52febe52 authored by Tomáš Kořistka's avatar Tomáš Kořistka
Browse files

Changing inverted index to deque

parent c8277171
No related branches found
No related tags found
No related merge requests found
#include <iostream> #include <iostream>
#include <climits> #include <climits>
#include <utility>
#include "Computor.h" #include "Computor.h"
#include "exceptions/Exceptions.h" #include "exceptions/Exceptions.h"
   
using namespace std; using namespace std;
   
Computor::Computor(Space space, const Query &query) : space(space), query(query) {} Computor::Computor(Space space, Query query) : space(std::move(space)), query(std::move(query)) {}
   
   
map<int, double> Computor::compute() { map<int, double> Computor::compute() {
exhaustedIndices = {};
auto totalIndices = query.terms.size();
int lowestDocumentID = nextID();
space.forward(lowestDocumentID, query.terms);
map<int, double> results; map<int, double> results;
   
while (exhaustedIndices.size() != totalIndices) { availableTerms = query.termsKeyset;
results.emplace(lowestDocumentID, computeForDocument(lowestDocumentID));
lowestDocumentID = nextID();
space.forward(lowestDocumentID, query.terms);
}
return results;
}
   
while (!availableTerms.empty()) {
int ID = nextID(); //get lowest ID
double result = 0;
   
double Computor::computeForDocument(int ID) { for (const auto &term: availableTerms) /*Go through all the remaining terms*/ {
double result = 0; try {
double queryWeight = query.terms.at(term);
double documentWeight = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID);
result += documentWeight * queryWeight;
}
catch (const IDNotFoundException &e) {//inverted index does not contain given ID
}
   
for (const auto &[term, weight]: query.terms) { catch (const EndOfIndexException &e) {
try { availableTerms.erase(term); //exhaust term
auto d = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID); }
result += weight * d;
}
catch (EndOfIndexException &e) {
exhaustedIndices.emplace(term); //add another exhausted inverted index
}
catch (IDNotFoundException &e) {
continue;
} }
results[ID] = result;
} }
   
return result; return results;
} }
   
int Computor::nextID() {
int lowestID = INT_MAX;
   
int Computor::nextID() const { for (const auto &queryRecord: query.terms)
int nextID = INT_MAX; lowestID = min(space[queryRecord.first].getLowestID(), lowestID);
   
for (const auto &term: query.terms) return lowestID;
nextID = min(space[term.first].getLowestID(), nextID);
return nextID;
} }
   
...@@ -14,15 +14,15 @@ private: ...@@ -14,15 +14,15 @@ private:
//Attributes-------------- //Attributes--------------
Space space; Space space;
const Query query; const Query query;
std::set<std::string> exhaustedIndices; std::set<std::string> availableTerms;
   
//Methods----------------- //Methods-----------------
int nextID() const; int nextID();
   
double computeForDocument(int ID); double computeForDocument(int ID);
   
public: public:
Computor(Space space, const Query &query); Computor(Space space, Query query);
   
std::map<int, double> compute(); std::map<int, double> compute();
}; };
......
...@@ -25,7 +25,7 @@ map<string, InvertedIndex> InputParser::getInvertedIndices() { ...@@ -25,7 +25,7 @@ map<string, InvertedIndex> InputParser::getInvertedIndices() {
sort(weights.begin(), weights.end(), sort(weights.begin(), weights.end(),
[](const auto &a, const auto &b) { return a.getID() < b.getID(); }); [](const auto &a, const auto &b) { return a.getID() < b.getID(); });
   
invertedIndices.insert({term, InvertedIndex(term, weights)}); invertedIndices.emplace(term, InvertedIndex(weights));
} }
   
return invertedIndices; return invertedIndices;
......
...@@ -4,16 +4,21 @@ ...@@ -4,16 +4,21 @@
   
using namespace std; using namespace std;
   
InvertedIndex::InvertedIndex(string term, vector<DocumentWeight> documentWeights) InvertedIndex::InvertedIndex(deque<DocumentWeight> documentWeights)
: position(0), : documentWeights(move(documentWeights)) {}
term(move(term)),
documentWeights(move(documentWeights)) {}
   
double InvertedIndex::getDocumentWeightByID(int ID) { double InvertedIndex::getDocumentWeightByID(int ID) {
   
if (position + 1 == documentWeights.size()) if (position + 1 == documentWeights.size())
throw EndOfIndexException(); throw EndOfIndexException();
   
while (documentWeights.front().getID() != ID) {
documentWeights.pop_front();
}
return documentWeights.front().getID();
for (auto it = documentWeights.begin() + position; it != documentWeights.end(); it++) { for (auto it = documentWeights.begin() + position; it != documentWeights.end(); it++) {
int documentID = (*it).getID(); int documentID = (*it).getID();
   
...@@ -21,25 +26,8 @@ double InvertedIndex::getDocumentWeightByID(int ID) { ...@@ -21,25 +26,8 @@ double InvertedIndex::getDocumentWeightByID(int ID) {
throw IDNotFoundException(); throw IDNotFoundException();
   
if (documentID == ID) //match! if (documentID == ID) //match!
{
position = it - documentWeights.begin() + 1; //update position
return (*it).getWeight(); return (*it).getWeight();
}
}
/*
for (size_t i = position; position < documentWeights.size(); ++i) {
int documentID = documentWeights[i].getWeight();
if (documentID < ID)
throw IDNotFoundException();
if (documentID == ID) {
position = i + 1;
return documentWeights[i];
}
} }
*/
   
throw IDNotFoundException(); throw IDNotFoundException();
} }
...@@ -49,7 +37,7 @@ const DocumentWeight &InvertedIndex::operator[](size_t i) { ...@@ -49,7 +37,7 @@ const DocumentWeight &InvertedIndex::operator[](size_t i) {
} }
   
int InvertedIndex::getLowestID() const { int InvertedIndex::getLowestID() const {
return documentWeights[position].getID(); return documentWeights.front().getID();
} }
   
void InvertedIndex::forward(int ID) { void InvertedIndex::forward(int ID) {
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
   
   
#include <string> #include <string>
#include <vector> #include <deque>
#include "../DocumentWeight.h" #include "../DocumentWeight.h"
   
/** /**
...@@ -14,19 +14,16 @@ ...@@ -14,19 +14,16 @@
class InvertedIndex { class InvertedIndex {
private: private:
//Attributes------------- //Attributes-------------
size_t position; std::deque<DocumentWeight> documentWeights; /**<Inverted index list of documents & their weights in which the term appears */
std::string term;/**<Term identifier */
std::vector<DocumentWeight> documentWeights; /**<Inverted index list of documents & their weights in which the term appears */
   
public: public:
   
//Methods----------------- //Methods-----------------
/** /**
* Constructor * Constructor
* @param term Term identifier
* @param documentWeights Inverted index list of documents in which the term appears, and their weights * @param documentWeights Inverted index list of documents in which the term appears, and their weights
*/ */
InvertedIndex(std::string term, std::vector<DocumentWeight> documentWeights); InvertedIndex(std::deque<DocumentWeight> documentWeights);
   
/** /**
* @brief Finds the @ref DocumentWeight object with a given @ref DocumentWeight::ID in @ref documentWeights * @brief Finds the @ref DocumentWeight object with a given @ref DocumentWeight::ID in @ref documentWeights
...@@ -42,8 +39,6 @@ public: ...@@ -42,8 +39,6 @@ public:
   
int getLowestID() const; int getLowestID() const;
   
void forward(int ID);
const DocumentWeight &operator[](size_t i); const DocumentWeight &operator[](size_t i);
}; };
   
......
...@@ -6,5 +6,14 @@ ...@@ -6,5 +6,14 @@
   
using namespace std; using namespace std;
   
Query::Query(std::map<std::string, double> terms, const double threshold) Query::Query(std::map<std::string, double> terms, double threshold)
: terms(std::move(terms)), threshold(threshold) {} : terms(std::move(terms)),
termsKeyset(getKeyset(terms)),
threshold(threshold) {}
set<string> Query::getKeyset(const map<std::string, double> &terms) {
set<string> dummy;
for (const auto &term: terms)
dummy.emplace(term.first);
return dummy;
}
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
   
   
#include <map> #include <map>
#include <set>
#include <vector> #include <vector>
#include <string> #include <string>
#include "Space.h" #include "Space.h"
...@@ -14,6 +15,7 @@ class Query { ...@@ -14,6 +15,7 @@ class Query {
public: public:
//Attributes------------- //Attributes-------------
const std::map<std::string, double> terms; /**<Terms and their weights in the query */ const std::map<std::string, double> terms; /**<Terms and their weights in the query */
const std::set<std::string> termsKeyset; /**<Keyset of terms */
const double threshold; /**<Acceptable result threshold */ const double threshold; /**<Acceptable result threshold */
   
//Methods---------------- //Methods----------------
...@@ -22,7 +24,9 @@ public: ...@@ -22,7 +24,9 @@ public:
* @param terms Terms and their weights in the query * @param terms Terms and their weights in the query
* @param threshold Acceptable result threshold * @param threshold Acceptable result threshold
*/ */
explicit Query(std::map<std::string, double> terms, const double threshold); explicit Query(std::map<std::string, double> terms, double threshold);
static std::set<std::string> getKeyset(const std::map<std::string, double> &terms);
}; };
   
   
......
...@@ -17,7 +17,7 @@ InvertedIndex &Space::getInvertedIndexByKey(const string &key) { ...@@ -17,7 +17,7 @@ InvertedIndex &Space::getInvertedIndexByKey(const string &key) {
return terms.at(key); return terms.at(key);
} }
   
void Space::forward(int ID, const std::map<std::string, double> q) { void Space::forward(int ID, const std::map<std::string, double> &q) {
for (const auto &term: q) for (const auto &term: q)
terms.at(term.first).forward(ID); terms.at(term.first).forward(ID);
} }
......
...@@ -32,7 +32,7 @@ public: ...@@ -32,7 +32,7 @@ public:
   
const InvertedIndex &operator[](const std::string &key) const; const InvertedIndex &operator[](const std::string &key) const;
   
void forward(int ID, const std::map<std::string, double> q); void forward(int ID, const std::map<std::string, double> &q);
}; };
   
   
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment