From 84e3100081609453c0f0032b188e8078a8ee7d60 Mon Sep 17 00:00:00 2001 From: weirdwizardthomas <thomas.koristka@gmail.com> Date: Sat, 28 Mar 2020 13:49:07 +0100 Subject: [PATCH] Finished relevance calculation --- querying/CMakeLists.txt | 2 +- querying/src/Computor.cpp | 12 ++--- querying/src/Document.cpp | 16 +++++++ querying/src/Document.h | 26 +++++++++++ querying/src/DocumentWeight.cpp | 16 ------- querying/src/DocumentWeight.h | 23 ---------- querying/src/inverted_index/InputParser.cpp | 3 +- querying/src/inverted_index/InvertedIndex.cpp | 44 ++++++------------- querying/src/inverted_index/InvertedIndex.h | 16 +++---- querying/src/main.cpp | 6 +-- querying/src/space/Query.cpp | 10 ++--- querying/src/space/Query.h | 6 +-- querying/src/space/Space.cpp | 5 --- 13 files changed, 84 insertions(+), 101 deletions(-) create mode 100644 querying/src/Document.cpp create mode 100644 querying/src/Document.h delete mode 100644 querying/src/DocumentWeight.cpp delete mode 100644 querying/src/DocumentWeight.h diff --git a/querying/CMakeLists.txt b/querying/CMakeLists.txt index 1d665ab..9114188 100644 --- a/querying/CMakeLists.txt +++ b/querying/CMakeLists.txt @@ -7,6 +7,6 @@ add_subdirectory(lib/SQLiteCpp) SET(CMAKE_CXX_FLAGS "-g -Wall -pedantic -Wextra") -add_executable(main src/main.cpp src/inverted_index/InvertedIndex.cpp src/inverted_index/InvertedIndex.h src/inverted_index/InputParser.cpp src/inverted_index/InputParser.h src/space/Space.cpp src/space/Space.h src/space/Query.cpp src/space/Query.h src/Computor.cpp src/Computor.h src/DocumentWeight.cpp src/DocumentWeight.h src/exceptions/Exceptions.h) +add_executable(main src/main.cpp src/inverted_index/InvertedIndex.cpp src/inverted_index/InvertedIndex.h src/inverted_index/InputParser.cpp src/inverted_index/InputParser.h src/space/Space.cpp src/space/Space.h src/space/Query.cpp src/space/Query.h src/Computor.cpp src/Computor.h src/Document.cpp src/Document.h src/exceptions/Exceptions.h) target_link_libraries(main SQLiteCpp) diff --git a/querying/src/Computor.cpp b/querying/src/Computor.cpp index ebbd268..68fffb5 100644 --- a/querying/src/Computor.cpp +++ b/querying/src/Computor.cpp @@ -8,7 +8,6 @@ using namespace std; Computor::Computor(Space space, Query query) : space(std::move(space)), query(std::move(query)) {} - map<int, double> Computor::compute() { map<int, double> results; @@ -18,10 +17,10 @@ map<int, double> Computor::compute() { int ID = nextID(); //get lowest ID double result = 0; - for (const auto &term: availableTerms) /*Go through all the remaining terms*/ { + for (const auto& term: availableTerms) /*Go through all the remaining terms*/ { try { - double queryWeight = query.terms.at(term); double documentWeight = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID); + double queryWeight = query.terms.at(term); result += documentWeight * queryWeight; } catch (const IDNotFoundException &e) {//inverted index does not contain given ID @@ -31,7 +30,8 @@ map<int, double> Computor::compute() { availableTerms.erase(term); //exhaust term } } - results[ID] = result; + if(result > query.threshold) + results[ID] = result; } return results; @@ -40,8 +40,8 @@ map<int, double> Computor::compute() { int Computor::nextID() { int lowestID = INT_MAX; - for (const auto &queryRecord: query.terms) - lowestID = min(space[queryRecord.first].getLowestID(), lowestID); + for (const auto &term: availableTerms) + lowestID = min(space[term].getLowestID(), lowestID); return lowestID; } diff --git a/querying/src/Document.cpp b/querying/src/Document.cpp new file mode 100644 index 0000000..7bcebb2 --- /dev/null +++ b/querying/src/Document.cpp @@ -0,0 +1,16 @@ +// +// Created by tomas on 3/24/20. +// + +#include "Document.h" + +Document::Document(const int id, const double weight) : ID(id), weight(weight) {} + +int Document::getID() const { + return ID; +} + +double Document::getWeight() const { + return weight; +} + diff --git a/querying/src/Document.h b/querying/src/Document.h new file mode 100644 index 0000000..aabf0d1 --- /dev/null +++ b/querying/src/Document.h @@ -0,0 +1,26 @@ +// +// Created by tomas on 3/24/20. +// + +#ifndef QUERYING_DOCUMENT_H +#define QUERYING_DOCUMENT_H + + +class Document { +private: + int ID; + double weight; + +public: + Document(int id, double weight); + + Document() = default; + + int getID() const; + + double getWeight() const; + +}; + + +#endif //QUERYING_DOCUMENT_H diff --git a/querying/src/DocumentWeight.cpp b/querying/src/DocumentWeight.cpp deleted file mode 100644 index 198ea45..0000000 --- a/querying/src/DocumentWeight.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// -// Created by tomas on 3/24/20. -// - -#include "DocumentWeight.h" - -DocumentWeight::DocumentWeight(const int id, const double weight) : ID(id), weight(weight) {} - -int DocumentWeight::getID() const { - return ID; -} - -double DocumentWeight::getWeight() const { - return weight; -} - diff --git a/querying/src/DocumentWeight.h b/querying/src/DocumentWeight.h deleted file mode 100644 index 299bab0..0000000 --- a/querying/src/DocumentWeight.h +++ /dev/null @@ -1,23 +0,0 @@ -// -// Created by tomas on 3/24/20. -// - -#ifndef QUERYING_DOCUMENTWEIGHT_H -#define QUERYING_DOCUMENTWEIGHT_H - - -class DocumentWeight { -private: - int ID; - double weight; - -public: - DocumentWeight(int id, double weight); - - int getID() const; - - double getWeight() const; -}; - - -#endif //QUERYING_DOCUMENTWEIGHT_H diff --git a/querying/src/inverted_index/InputParser.cpp b/querying/src/inverted_index/InputParser.cpp index a26ac60..b885d4f 100644 --- a/querying/src/inverted_index/InputParser.cpp +++ b/querying/src/inverted_index/InputParser.cpp @@ -5,6 +5,7 @@ #include "InputParser.h" #include <utility> +#include <deque> #include "../../lib/json.hpp" using namespace std; @@ -17,7 +18,7 @@ map<string, InvertedIndex> InputParser::getInvertedIndices() { json root = loadJsonFromFile(); map<string, InvertedIndex> invertedIndices; for (const auto &[term, documentIDs]: root.items()) { - vector<DocumentWeight> weights; + deque<Document> weights; for (const auto &[documentID, weight] : documentIDs.items()) weights.emplace_back(stoi(documentID), weight); diff --git a/querying/src/inverted_index/InvertedIndex.cpp b/querying/src/inverted_index/InvertedIndex.cpp index ed048ee..c9af044 100644 --- a/querying/src/inverted_index/InvertedIndex.cpp +++ b/querying/src/inverted_index/InvertedIndex.cpp @@ -4,50 +4,34 @@ using namespace std; -InvertedIndex::InvertedIndex(deque<DocumentWeight> documentWeights) - : documentWeights(move(documentWeights)) {} - +InvertedIndex::InvertedIndex(deque<Document> documentWeights) + : documents(move(documentWeights)) {} double InvertedIndex::getDocumentWeightByID(int ID) { + Document document{}; - if (position + 1 == documentWeights.size()) + if (documents.empty()) throw EndOfIndexException(); - - while (documentWeights.front().getID() != ID) { - documentWeights.pop_front(); - } - return documentWeights.front().getID(); - - for (auto it = documentWeights.begin() + position; it != documentWeights.end(); it++) { - int documentID = (*it).getID(); - - if (documentID < ID) + while ((document = documents.front()).getID() != ID) { + if (documents.empty()) //end of the index; index is exhausted + throw EndOfIndexException(); + if (document.getID() > ID) // IDs are sorted asc, i.e. if doc ID is higher, then the ID for is not in index throw IDNotFoundException(); - - if (documentID == ID) //match! - return (*it).getWeight(); + documents.pop_front(); } - throw IDNotFoundException(); + documents.pop_front(); + return document.getWeight(); } -const DocumentWeight &InvertedIndex::operator[](size_t i) { - return documentWeights[i]; +const Document &InvertedIndex::operator[](size_t i) { + return documents[i]; } int InvertedIndex::getLowestID() const { - return documentWeights.front().getID(); + return documents.front().getID(); } -void InvertedIndex::forward(int ID) { - auto it = documentWeights.begin() + position; - while (it != documentWeights.end() && (*it).getID() != ID) { - if (ID < (*it).getID()) - return; - position++; - it++; - } -} diff --git a/querying/src/inverted_index/InvertedIndex.h b/querying/src/inverted_index/InvertedIndex.h index 677715e..c682307 100644 --- a/querying/src/inverted_index/InvertedIndex.h +++ b/querying/src/inverted_index/InvertedIndex.h @@ -4,7 +4,7 @@ #include <string> #include <deque> -#include "../DocumentWeight.h" +#include "../Document.h" /** * @brief Data container of term and its inverted index list @@ -14,7 +14,7 @@ class InvertedIndex { private: //Attributes------------- - std::deque<DocumentWeight> documentWeights; /**<Inverted index list of documents & their weights in which the term appears */ + std::deque<Document> documents; /**<Inverted index list of documents & their weights in which the term appears */ public: @@ -23,23 +23,23 @@ public: * Constructor * @param documentWeights Inverted index list of documents in which the term appears, and their weights */ - InvertedIndex(std::deque<DocumentWeight> documentWeights); + explicit InvertedIndex(std::deque<Document> documentWeights); /** - * @brief Finds the @ref DocumentWeight object with a given @ref DocumentWeight::ID in @ref documentWeights + * @brief Finds the @ref Document object with a given @ref Document::ID in @ref documents * * Given an ID, looks through the container starting at @ref position. - * Assumes that the items are sorted by @ref DocumentWeight::ID, ascending. + * Assumes that the items are sorted by @ref Document::ID, ascending. * - * @param ID @ref DocumentWeight::ID to be found. + * @param ID @ref Document::ID to be found. * @throws IDNotFoundException if no Document is found or it had been processed already - * @return DocumentWeight with @ref DocumentWeight::ID == ID + * @return Document with @ref Document::ID == ID */ double getDocumentWeightByID(int ID); int getLowestID() const; - const DocumentWeight &operator[](size_t i); + const Document &operator[](size_t i); }; diff --git a/querying/src/main.cpp b/querying/src/main.cpp index e1d3879..6356b01 100644 --- a/querying/src/main.cpp +++ b/querying/src/main.cpp @@ -11,9 +11,9 @@ int main() { Space space(InputParser("../../data/persistence/dummy.json").getInvertedIndices()); Query query({ - {"forest", 0.5}, - {"mountain", 0.4}, - {"nature", 0.3}}, 0); + {"forest", 0.2}, + {"mountain", 0.1}, + {"nature", 0.8}}, 0.5); auto res = Computor(space, query).compute(); for (const auto &[id, value]: res) diff --git a/querying/src/space/Query.cpp b/querying/src/space/Query.cpp index 6d92bfe..fc4a322 100644 --- a/querying/src/space/Query.cpp +++ b/querying/src/space/Query.cpp @@ -6,14 +6,14 @@ using namespace std; -Query::Query(std::map<std::string, double> terms, double threshold) - : terms(std::move(terms)), - termsKeyset(getKeyset(terms)), +Query::Query(std::map<std::string, double> t, double threshold) + : termsKeyset(getKeyset(t)), + terms(std::move(t)), threshold(threshold) {} -set<string> Query::getKeyset(const map<std::string, double> &terms) { +set<string> Query::getKeyset(const map<std::string, double> &t) { set<string> dummy; - for (const auto &term: terms) + for (const auto &term: t) dummy.emplace(term.first); return dummy; } diff --git a/querying/src/space/Query.h b/querying/src/space/Query.h index bda25cd..f724a70 100644 --- a/querying/src/space/Query.h +++ b/querying/src/space/Query.h @@ -14,8 +14,8 @@ class Query { public: //Attributes------------- - const std::map<std::string, double> terms; /**<Terms and their weights in the query */ const std::set<std::string> termsKeyset; /**<Keyset of terms */ + const std::map<std::string, double> terms; /**<Terms and their weights in the query */ const double threshold; /**<Acceptable result threshold */ //Methods---------------- @@ -24,9 +24,9 @@ public: * @param terms Terms and their weights in the query * @param threshold Acceptable result threshold */ - explicit Query(std::map<std::string, double> terms, double threshold); + explicit Query(std::map<std::string, double> t, double threshold); - static std::set<std::string> getKeyset(const std::map<std::string, double> &terms); + static std::set<std::string> getKeyset(const std::map<std::string, double> &t); }; diff --git a/querying/src/space/Space.cpp b/querying/src/space/Space.cpp index f1c8fc2..e4bf9b5 100644 --- a/querying/src/space/Space.cpp +++ b/querying/src/space/Space.cpp @@ -17,10 +17,5 @@ InvertedIndex &Space::getInvertedIndexByKey(const string &key) { return terms.at(key); } -void Space::forward(int ID, const std::map<std::string, double> &q) { - for (const auto &term: q) - terms.at(term.first).forward(ID); -} - -- GitLab