diff --git a/querying/CMakeLists.txt b/querying/CMakeLists.txt index 1d665ab9a01a0fd6b8956720e7319addcb4db98c..9114188d3eb1f78b0d88f01ee4e957d672d4045e 100644 --- a/querying/CMakeLists.txt +++ b/querying/CMakeLists.txt @@ -7,6 +7,6 @@ add_subdirectory(lib/SQLiteCpp) SET(CMAKE_CXX_FLAGS "-g -Wall -pedantic -Wextra") -add_executable(main src/main.cpp src/inverted_index/InvertedIndex.cpp src/inverted_index/InvertedIndex.h src/inverted_index/InputParser.cpp src/inverted_index/InputParser.h src/space/Space.cpp src/space/Space.h src/space/Query.cpp src/space/Query.h src/Computor.cpp src/Computor.h src/DocumentWeight.cpp src/DocumentWeight.h src/exceptions/Exceptions.h) +add_executable(main src/main.cpp src/inverted_index/InvertedIndex.cpp src/inverted_index/InvertedIndex.h src/inverted_index/InputParser.cpp src/inverted_index/InputParser.h src/space/Space.cpp src/space/Space.h src/space/Query.cpp src/space/Query.h src/Computor.cpp src/Computor.h src/Document.cpp src/Document.h src/exceptions/Exceptions.h) target_link_libraries(main SQLiteCpp) diff --git a/querying/src/Computor.cpp b/querying/src/Computor.cpp index ebbd268690575abf2bc158a39dfa64fcc480083d..68fffb541a076419e2c9f14547ce8499a1389de8 100644 --- a/querying/src/Computor.cpp +++ b/querying/src/Computor.cpp @@ -8,7 +8,6 @@ using namespace std; Computor::Computor(Space space, Query query) : space(std::move(space)), query(std::move(query)) {} - map<int, double> Computor::compute() { map<int, double> results; @@ -18,10 +17,10 @@ map<int, double> Computor::compute() { int ID = nextID(); //get lowest ID double result = 0; - for (const auto &term: availableTerms) /*Go through all the remaining terms*/ { + for (const auto& term: availableTerms) /*Go through all the remaining terms*/ { try { - double queryWeight = query.terms.at(term); double documentWeight = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID); + double queryWeight = query.terms.at(term); result += documentWeight * queryWeight; } catch (const IDNotFoundException &e) {//inverted index does not contain given ID @@ -31,7 +30,8 @@ map<int, double> Computor::compute() { availableTerms.erase(term); //exhaust term } } - results[ID] = result; + if(result > query.threshold) + results[ID] = result; } return results; @@ -40,8 +40,8 @@ map<int, double> Computor::compute() { int Computor::nextID() { int lowestID = INT_MAX; - for (const auto &queryRecord: query.terms) - lowestID = min(space[queryRecord.first].getLowestID(), lowestID); + for (const auto &term: availableTerms) + lowestID = min(space[term].getLowestID(), lowestID); return lowestID; } diff --git a/querying/src/Document.cpp b/querying/src/Document.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7bcebb284ffd9732ac505b23f0de34815b6ee7bb --- /dev/null +++ b/querying/src/Document.cpp @@ -0,0 +1,16 @@ +// +// Created by tomas on 3/24/20. +// + +#include "Document.h" + +Document::Document(const int id, const double weight) : ID(id), weight(weight) {} + +int Document::getID() const { + return ID; +} + +double Document::getWeight() const { + return weight; +} + diff --git a/querying/src/Document.h b/querying/src/Document.h new file mode 100644 index 0000000000000000000000000000000000000000..aabf0d16c69c83364b0817366ab1e64d0039b82f --- /dev/null +++ b/querying/src/Document.h @@ -0,0 +1,26 @@ +// +// Created by tomas on 3/24/20. +// + +#ifndef QUERYING_DOCUMENT_H +#define QUERYING_DOCUMENT_H + + +class Document { +private: + int ID; + double weight; + +public: + Document(int id, double weight); + + Document() = default; + + int getID() const; + + double getWeight() const; + +}; + + +#endif //QUERYING_DOCUMENT_H diff --git a/querying/src/DocumentWeight.cpp b/querying/src/DocumentWeight.cpp deleted file mode 100644 index 198ea45e233dad3b92553bfae4877671348b427e..0000000000000000000000000000000000000000 --- a/querying/src/DocumentWeight.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// -// Created by tomas on 3/24/20. -// - -#include "DocumentWeight.h" - -DocumentWeight::DocumentWeight(const int id, const double weight) : ID(id), weight(weight) {} - -int DocumentWeight::getID() const { - return ID; -} - -double DocumentWeight::getWeight() const { - return weight; -} - diff --git a/querying/src/DocumentWeight.h b/querying/src/DocumentWeight.h deleted file mode 100644 index 299bab014aa731e538c19a57378a957e1bf7a9fe..0000000000000000000000000000000000000000 --- a/querying/src/DocumentWeight.h +++ /dev/null @@ -1,23 +0,0 @@ -// -// Created by tomas on 3/24/20. -// - -#ifndef QUERYING_DOCUMENTWEIGHT_H -#define QUERYING_DOCUMENTWEIGHT_H - - -class DocumentWeight { -private: - int ID; - double weight; - -public: - DocumentWeight(int id, double weight); - - int getID() const; - - double getWeight() const; -}; - - -#endif //QUERYING_DOCUMENTWEIGHT_H diff --git a/querying/src/inverted_index/InputParser.cpp b/querying/src/inverted_index/InputParser.cpp index a26ac60b18ea05033c32d15f863822efa55b1b26..b885d4f3a11a86a9659b17ba9b80df5eb1d65d34 100644 --- a/querying/src/inverted_index/InputParser.cpp +++ b/querying/src/inverted_index/InputParser.cpp @@ -5,6 +5,7 @@ #include "InputParser.h" #include <utility> +#include <deque> #include "../../lib/json.hpp" using namespace std; @@ -17,7 +18,7 @@ map<string, InvertedIndex> InputParser::getInvertedIndices() { json root = loadJsonFromFile(); map<string, InvertedIndex> invertedIndices; for (const auto &[term, documentIDs]: root.items()) { - vector<DocumentWeight> weights; + deque<Document> weights; for (const auto &[documentID, weight] : documentIDs.items()) weights.emplace_back(stoi(documentID), weight); diff --git a/querying/src/inverted_index/InvertedIndex.cpp b/querying/src/inverted_index/InvertedIndex.cpp index ed048ee74cfecbafe9b4f5f1ac53d53db3d3832c..c9af044b9159eda31df59d03e9312d6b640af4f6 100644 --- a/querying/src/inverted_index/InvertedIndex.cpp +++ b/querying/src/inverted_index/InvertedIndex.cpp @@ -4,50 +4,34 @@ using namespace std; -InvertedIndex::InvertedIndex(deque<DocumentWeight> documentWeights) - : documentWeights(move(documentWeights)) {} - +InvertedIndex::InvertedIndex(deque<Document> documentWeights) + : documents(move(documentWeights)) {} double InvertedIndex::getDocumentWeightByID(int ID) { + Document document{}; - if (position + 1 == documentWeights.size()) + if (documents.empty()) throw EndOfIndexException(); - - while (documentWeights.front().getID() != ID) { - documentWeights.pop_front(); - } - return documentWeights.front().getID(); - - for (auto it = documentWeights.begin() + position; it != documentWeights.end(); it++) { - int documentID = (*it).getID(); - - if (documentID < ID) + while ((document = documents.front()).getID() != ID) { + if (documents.empty()) //end of the index; index is exhausted + throw EndOfIndexException(); + if (document.getID() > ID) // IDs are sorted asc, i.e. if doc ID is higher, then the ID for is not in index throw IDNotFoundException(); - - if (documentID == ID) //match! - return (*it).getWeight(); + documents.pop_front(); } - throw IDNotFoundException(); + documents.pop_front(); + return document.getWeight(); } -const DocumentWeight &InvertedIndex::operator[](size_t i) { - return documentWeights[i]; +const Document &InvertedIndex::operator[](size_t i) { + return documents[i]; } int InvertedIndex::getLowestID() const { - return documentWeights.front().getID(); + return documents.front().getID(); } -void InvertedIndex::forward(int ID) { - auto it = documentWeights.begin() + position; - while (it != documentWeights.end() && (*it).getID() != ID) { - if (ID < (*it).getID()) - return; - position++; - it++; - } -} diff --git a/querying/src/inverted_index/InvertedIndex.h b/querying/src/inverted_index/InvertedIndex.h index 677715e996d746edde670ff9ea4eb77e3f491095..c682307f30592b2fd45feacc9ecb888b66e9b831 100644 --- a/querying/src/inverted_index/InvertedIndex.h +++ b/querying/src/inverted_index/InvertedIndex.h @@ -4,7 +4,7 @@ #include <string> #include <deque> -#include "../DocumentWeight.h" +#include "../Document.h" /** * @brief Data container of term and its inverted index list @@ -14,7 +14,7 @@ class InvertedIndex { private: //Attributes------------- - std::deque<DocumentWeight> documentWeights; /**<Inverted index list of documents & their weights in which the term appears */ + std::deque<Document> documents; /**<Inverted index list of documents & their weights in which the term appears */ public: @@ -23,23 +23,23 @@ public: * Constructor * @param documentWeights Inverted index list of documents in which the term appears, and their weights */ - InvertedIndex(std::deque<DocumentWeight> documentWeights); + explicit InvertedIndex(std::deque<Document> documentWeights); /** - * @brief Finds the @ref DocumentWeight object with a given @ref DocumentWeight::ID in @ref documentWeights + * @brief Finds the @ref Document object with a given @ref Document::ID in @ref documents * * Given an ID, looks through the container starting at @ref position. - * Assumes that the items are sorted by @ref DocumentWeight::ID, ascending. + * Assumes that the items are sorted by @ref Document::ID, ascending. * - * @param ID @ref DocumentWeight::ID to be found. + * @param ID @ref Document::ID to be found. * @throws IDNotFoundException if no Document is found or it had been processed already - * @return DocumentWeight with @ref DocumentWeight::ID == ID + * @return Document with @ref Document::ID == ID */ double getDocumentWeightByID(int ID); int getLowestID() const; - const DocumentWeight &operator[](size_t i); + const Document &operator[](size_t i); }; diff --git a/querying/src/main.cpp b/querying/src/main.cpp index e1d38790407679bb0da9fdf0d032fa7b20c09299..6356b01ae28cec7f7b2b704ea126dfa31ac6912e 100644 --- a/querying/src/main.cpp +++ b/querying/src/main.cpp @@ -11,9 +11,9 @@ int main() { Space space(InputParser("../../data/persistence/dummy.json").getInvertedIndices()); Query query({ - {"forest", 0.5}, - {"mountain", 0.4}, - {"nature", 0.3}}, 0); + {"forest", 0.2}, + {"mountain", 0.1}, + {"nature", 0.8}}, 0.5); auto res = Computor(space, query).compute(); for (const auto &[id, value]: res) diff --git a/querying/src/space/Query.cpp b/querying/src/space/Query.cpp index 6d92bfe3d23fa63449caa41fca3182834f5f2ab7..fc4a32260666179395a17687162d0c8e1d2f83a9 100644 --- a/querying/src/space/Query.cpp +++ b/querying/src/space/Query.cpp @@ -6,14 +6,14 @@ using namespace std; -Query::Query(std::map<std::string, double> terms, double threshold) - : terms(std::move(terms)), - termsKeyset(getKeyset(terms)), +Query::Query(std::map<std::string, double> t, double threshold) + : termsKeyset(getKeyset(t)), + terms(std::move(t)), threshold(threshold) {} -set<string> Query::getKeyset(const map<std::string, double> &terms) { +set<string> Query::getKeyset(const map<std::string, double> &t) { set<string> dummy; - for (const auto &term: terms) + for (const auto &term: t) dummy.emplace(term.first); return dummy; } diff --git a/querying/src/space/Query.h b/querying/src/space/Query.h index bda25cd2745fee69fe085734ac4ce3ad1a2024af..f724a7088786d5f6385253e0f7a9a326d58d5812 100644 --- a/querying/src/space/Query.h +++ b/querying/src/space/Query.h @@ -14,8 +14,8 @@ class Query { public: //Attributes------------- - const std::map<std::string, double> terms; /**<Terms and their weights in the query */ const std::set<std::string> termsKeyset; /**<Keyset of terms */ + const std::map<std::string, double> terms; /**<Terms and their weights in the query */ const double threshold; /**<Acceptable result threshold */ //Methods---------------- @@ -24,9 +24,9 @@ public: * @param terms Terms and their weights in the query * @param threshold Acceptable result threshold */ - explicit Query(std::map<std::string, double> terms, double threshold); + explicit Query(std::map<std::string, double> t, double threshold); - static std::set<std::string> getKeyset(const std::map<std::string, double> &terms); + static std::set<std::string> getKeyset(const std::map<std::string, double> &t); }; diff --git a/querying/src/space/Space.cpp b/querying/src/space/Space.cpp index f1c8fc23f7b391f53569f002710b9a843d69d1ed..e4bf9b58d9ce89e44f972a19a029c9328b103c79 100644 --- a/querying/src/space/Space.cpp +++ b/querying/src/space/Space.cpp @@ -17,10 +17,5 @@ InvertedIndex &Space::getInvertedIndexByKey(const string &key) { return terms.at(key); } -void Space::forward(int ID, const std::map<std::string, double> &q) { - for (const auto &term: q) - terms.at(term.first).forward(ID); -} -