diff --git a/querying/src/Computor.cpp b/querying/src/Computor.cpp index 693500ef4e6c6ca0bb370a1be71683073bea3c6d..ebbd268690575abf2bc158a39dfa64fcc480083d 100644 --- a/querying/src/Computor.cpp +++ b/querying/src/Computor.cpp @@ -1,58 +1,49 @@ #include <iostream> #include <climits> +#include <utility> #include "Computor.h" #include "exceptions/Exceptions.h" using namespace std; -Computor::Computor(Space space, const Query &query) : space(space), query(query) {} +Computor::Computor(Space space, Query query) : space(std::move(space)), query(std::move(query)) {} map<int, double> Computor::compute() { - exhaustedIndices = {}; - auto totalIndices = query.terms.size(); - - int lowestDocumentID = nextID(); - space.forward(lowestDocumentID, query.terms); map<int, double> results; - while (exhaustedIndices.size() != totalIndices) { - results.emplace(lowestDocumentID, computeForDocument(lowestDocumentID)); - lowestDocumentID = nextID(); - space.forward(lowestDocumentID, query.terms); - } - - return results; -} + availableTerms = query.termsKeyset; + while (!availableTerms.empty()) { + int ID = nextID(); //get lowest ID + double result = 0; -double Computor::computeForDocument(int ID) { - double result = 0; + for (const auto &term: availableTerms) /*Go through all the remaining terms*/ { + try { + double queryWeight = query.terms.at(term); + double documentWeight = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID); + result += documentWeight * queryWeight; + } + catch (const IDNotFoundException &e) {//inverted index does not contain given ID + } - for (const auto &[term, weight]: query.terms) { - try { - auto d = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID); - result += weight * d; - } - - catch (EndOfIndexException &e) { - exhaustedIndices.emplace(term); //add another exhausted inverted index - } - catch (IDNotFoundException &e) { - continue; + catch (const EndOfIndexException &e) { + availableTerms.erase(term); //exhaust term + } } + results[ID] = result; } - return result; + return results; } +int Computor::nextID() { + int lowestID = INT_MAX; -int Computor::nextID() const { - int nextID = INT_MAX; + for (const auto &queryRecord: query.terms) + lowestID = min(space[queryRecord.first].getLowestID(), lowestID); - for (const auto &term: query.terms) - nextID = min(space[term.first].getLowestID(), nextID); - - return nextID; + return lowestID; } + diff --git a/querying/src/Computor.h b/querying/src/Computor.h index 361517086a70e16080c95e69bd429875ef4d8bdb..9a9fa8620b424f2c6bbe677ca7bd9cfdf19b52d4 100644 --- a/querying/src/Computor.h +++ b/querying/src/Computor.h @@ -14,15 +14,15 @@ private: //Attributes-------------- Space space; const Query query; - std::set<std::string> exhaustedIndices; + std::set<std::string> availableTerms; //Methods----------------- - int nextID() const; + int nextID(); double computeForDocument(int ID); public: - Computor(Space space, const Query &query); + Computor(Space space, Query query); std::map<int, double> compute(); }; diff --git a/querying/src/inverted_index/InputParser.cpp b/querying/src/inverted_index/InputParser.cpp index f8d1e99ec74764b19bcc8ced9d9effc0389129eb..a26ac60b18ea05033c32d15f863822efa55b1b26 100644 --- a/querying/src/inverted_index/InputParser.cpp +++ b/querying/src/inverted_index/InputParser.cpp @@ -25,7 +25,7 @@ map<string, InvertedIndex> InputParser::getInvertedIndices() { sort(weights.begin(), weights.end(), [](const auto &a, const auto &b) { return a.getID() < b.getID(); }); - invertedIndices.insert({term, InvertedIndex(term, weights)}); + invertedIndices.emplace(term, InvertedIndex(weights)); } return invertedIndices; diff --git a/querying/src/inverted_index/InvertedIndex.cpp b/querying/src/inverted_index/InvertedIndex.cpp index 7a464a5b38a8483fbc4e1b3d22e7f6c300067697..ed048ee74cfecbafe9b4f5f1ac53d53db3d3832c 100644 --- a/querying/src/inverted_index/InvertedIndex.cpp +++ b/querying/src/inverted_index/InvertedIndex.cpp @@ -4,16 +4,21 @@ using namespace std; -InvertedIndex::InvertedIndex(string term, vector<DocumentWeight> documentWeights) - : position(0), - term(move(term)), - documentWeights(move(documentWeights)) {} +InvertedIndex::InvertedIndex(deque<DocumentWeight> documentWeights) + : documentWeights(move(documentWeights)) {} + double InvertedIndex::getDocumentWeightByID(int ID) { if (position + 1 == documentWeights.size()) throw EndOfIndexException(); + + while (documentWeights.front().getID() != ID) { + documentWeights.pop_front(); + } + return documentWeights.front().getID(); + for (auto it = documentWeights.begin() + position; it != documentWeights.end(); it++) { int documentID = (*it).getID(); @@ -21,25 +26,8 @@ double InvertedIndex::getDocumentWeightByID(int ID) { throw IDNotFoundException(); if (documentID == ID) //match! - { - position = it - documentWeights.begin() + 1; //update position return (*it).getWeight(); - } - } - - /* - - for (size_t i = position; position < documentWeights.size(); ++i) { - int documentID = documentWeights[i].getWeight(); - - if (documentID < ID) - throw IDNotFoundException(); - if (documentID == ID) { - position = i + 1; - return documentWeights[i]; - } } -*/ throw IDNotFoundException(); } @@ -49,7 +37,7 @@ const DocumentWeight &InvertedIndex::operator[](size_t i) { } int InvertedIndex::getLowestID() const { - return documentWeights[position].getID(); + return documentWeights.front().getID(); } void InvertedIndex::forward(int ID) { diff --git a/querying/src/inverted_index/InvertedIndex.h b/querying/src/inverted_index/InvertedIndex.h index 1b9dee396a7dbb0aacef50d603dd527b871dd2c3..677715e996d746edde670ff9ea4eb77e3f491095 100644 --- a/querying/src/inverted_index/InvertedIndex.h +++ b/querying/src/inverted_index/InvertedIndex.h @@ -3,7 +3,7 @@ #include <string> -#include <vector> +#include <deque> #include "../DocumentWeight.h" /** @@ -14,19 +14,16 @@ class InvertedIndex { private: //Attributes------------- - size_t position; - std::string term;/**<Term identifier */ - std::vector<DocumentWeight> documentWeights; /**<Inverted index list of documents & their weights in which the term appears */ + std::deque<DocumentWeight> documentWeights; /**<Inverted index list of documents & their weights in which the term appears */ public: //Methods----------------- /** * Constructor - * @param term Term identifier * @param documentWeights Inverted index list of documents in which the term appears, and their weights */ - InvertedIndex(std::string term, std::vector<DocumentWeight> documentWeights); + InvertedIndex(std::deque<DocumentWeight> documentWeights); /** * @brief Finds the @ref DocumentWeight object with a given @ref DocumentWeight::ID in @ref documentWeights @@ -42,8 +39,6 @@ public: int getLowestID() const; - void forward(int ID); - const DocumentWeight &operator[](size_t i); }; diff --git a/querying/src/space/Query.cpp b/querying/src/space/Query.cpp index 7c3eac28ad0699a4e52ab689f3813bbfb731dc96..6d92bfe3d23fa63449caa41fca3182834f5f2ab7 100644 --- a/querying/src/space/Query.cpp +++ b/querying/src/space/Query.cpp @@ -6,5 +6,14 @@ using namespace std; -Query::Query(std::map<std::string, double> terms, const double threshold) - : terms(std::move(terms)), threshold(threshold) {} +Query::Query(std::map<std::string, double> terms, double threshold) + : terms(std::move(terms)), + termsKeyset(getKeyset(terms)), + threshold(threshold) {} + +set<string> Query::getKeyset(const map<std::string, double> &terms) { + set<string> dummy; + for (const auto &term: terms) + dummy.emplace(term.first); + return dummy; +} diff --git a/querying/src/space/Query.h b/querying/src/space/Query.h index 0b23e779fbda4d6b2fd100a713b0bfa40cb11708..bda25cd2745fee69fe085734ac4ce3ad1a2024af 100644 --- a/querying/src/space/Query.h +++ b/querying/src/space/Query.h @@ -3,6 +3,7 @@ #include <map> +#include <set> #include <vector> #include <string> #include "Space.h" @@ -14,6 +15,7 @@ class Query { public: //Attributes------------- const std::map<std::string, double> terms; /**<Terms and their weights in the query */ + const std::set<std::string> termsKeyset; /**<Keyset of terms */ const double threshold; /**<Acceptable result threshold */ //Methods---------------- @@ -22,7 +24,9 @@ public: * @param terms Terms and their weights in the query * @param threshold Acceptable result threshold */ - explicit Query(std::map<std::string, double> terms, const double threshold); + explicit Query(std::map<std::string, double> terms, double threshold); + + static std::set<std::string> getKeyset(const std::map<std::string, double> &terms); }; diff --git a/querying/src/space/Space.cpp b/querying/src/space/Space.cpp index 727dfdb3276bfd1393b15456b612009af6c40e44..f1c8fc23f7b391f53569f002710b9a843d69d1ed 100644 --- a/querying/src/space/Space.cpp +++ b/querying/src/space/Space.cpp @@ -17,7 +17,7 @@ InvertedIndex &Space::getInvertedIndexByKey(const string &key) { return terms.at(key); } -void Space::forward(int ID, const std::map<std::string, double> q) { +void Space::forward(int ID, const std::map<std::string, double> &q) { for (const auto &term: q) terms.at(term.first).forward(ID); } diff --git a/querying/src/space/Space.h b/querying/src/space/Space.h index 641355e22a01532165d6362f91bda1ed129f87b2..783bd0b2eed5755d4a5fb649138f58cd735c783b 100644 --- a/querying/src/space/Space.h +++ b/querying/src/space/Space.h @@ -32,7 +32,7 @@ public: const InvertedIndex &operator[](const std::string &key) const; - void forward(int ID, const std::map<std::string, double> q); + void forward(int ID, const std::map<std::string, double> &q); };