diff --git a/querying/CMakeLists.txt b/querying/CMakeLists.txt index 9114188d3eb1f78b0d88f01ee4e957d672d4045e..c56e82a1fd6a7b765951d410e467695454eead95 100644 --- a/querying/CMakeLists.txt +++ b/querying/CMakeLists.txt @@ -7,6 +7,6 @@ add_subdirectory(lib/SQLiteCpp) SET(CMAKE_CXX_FLAGS "-g -Wall -pedantic -Wextra") -add_executable(main src/main.cpp src/inverted_index/InvertedIndex.cpp src/inverted_index/InvertedIndex.h src/inverted_index/InputParser.cpp src/inverted_index/InputParser.h src/space/Space.cpp src/space/Space.h src/space/Query.cpp src/space/Query.h src/Computor.cpp src/Computor.h src/Document.cpp src/Document.h src/exceptions/Exceptions.h) +add_executable(main src/main.cpp src/inverted_index/InvertedIndex.cpp src/inverted_index/InvertedIndex.h src/inverted_index/InvertedIndexJSONParser.cpp src/inverted_index/InvertedIndexJSONParser.h src/space/Space.cpp src/space/Space.h src/space/Query.cpp src/space/Query.h src/Computor.cpp src/Computor.h src/Document.cpp src/Document.h src/exceptions/Exceptions.h) target_link_libraries(main SQLiteCpp) diff --git a/querying/src/Computor.cpp b/querying/src/Computor.cpp index 68fffb541a076419e2c9f14547ce8499a1389de8..fce9f8d28bf771c9f1ee194627c6f443d886b20d 100644 --- a/querying/src/Computor.cpp +++ b/querying/src/Computor.cpp @@ -6,7 +6,9 @@ using namespace std; -Computor::Computor(Space space, Query query) : space(std::move(space)), query(std::move(query)) {} +Computor::Computor(Space space, Query query) + : space(std::move(space)), + query(std::move(query)) {} map<int, double> Computor::compute() { map<int, double> results; @@ -17,7 +19,7 @@ map<int, double> Computor::compute() { int ID = nextID(); //get lowest ID double result = 0; - for (const auto& term: availableTerms) /*Go through all the remaining terms*/ { + for (const auto &term: availableTerms) /*Go through all the remaining terms*/ { try { double documentWeight = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID); double queryWeight = query.terms.at(term); @@ -30,7 +32,7 @@ map<int, double> Computor::compute() { availableTerms.erase(term); //exhaust term } } - if(result > query.threshold) + if (result > query.threshold) //filter out irrelevant documents results[ID] = result; } @@ -41,7 +43,7 @@ int Computor::nextID() { int lowestID = INT_MAX; for (const auto &term: availableTerms) - lowestID = min(space[term].getLowestID(), lowestID); + lowestID = min(space[term].getNextID(), lowestID); return lowestID; } diff --git a/querying/src/Computor.h b/querying/src/Computor.h index 9a9fa8620b424f2c6bbe677ca7bd9cfdf19b52d4..d9c358e623c9d234b4980222fd14463900e175c2 100644 --- a/querying/src/Computor.h +++ b/querying/src/Computor.h @@ -9,21 +9,37 @@ #include "space/Space.h" #include "space/Query.h" +/** + * @brief A class that encompasses the calculation of a document's relevancy to the query + * + * @author koristo1@fit.cvut.cz + */ class Computor { private: //Attributes-------------- - Space space; - const Query query; - std::set<std::string> availableTerms; + Space space; /**< Term space */ + std::set<std::string> availableTerms; /**< Non exhausted terms of the query */ //Methods----------------- + /** + * Finds the next ID for computation + * @return next eligible ID in @ref Space::terms + */ int nextID(); - double computeForDocument(int ID); - public: + //Attributes-------------- + const Query query;/**< Query against the space */ + + //Methods----------------- Computor(Space space, Query query); + /** + * @brief Computes relevancies of documents to the query + * + * Filters relevancies that are below @ref Query::threshold + * @return Map of document IDs and their relevancies to the query + */ std::map<int, double> compute(); }; diff --git a/querying/src/Document.cpp b/querying/src/Document.cpp index 7bcebb284ffd9732ac505b23f0de34815b6ee7bb..6fa1336a2a82ae75c69029ae1f935fc087323936 100644 --- a/querying/src/Document.cpp +++ b/querying/src/Document.cpp @@ -4,7 +4,8 @@ #include "Document.h" -Document::Document(const int id, const double weight) : ID(id), weight(weight) {} +Document::Document(int id, double weight) + : ID(id), weight(weight) {} int Document::getID() const { return ID; diff --git a/querying/src/Document.h b/querying/src/Document.h index aabf0d16c69c83364b0817366ab1e64d0039b82f..9b5f6d7e7fdf53c98f9f87ab22dc8fe23d06a84f 100644 --- a/querying/src/Document.h +++ b/querying/src/Document.h @@ -5,13 +5,19 @@ #ifndef QUERYING_DOCUMENT_H #define QUERYING_DOCUMENT_H - +/** + * @brief A data class of document's ID and it's weight for a given term + * + * @author koristo1@fit.cvut.cz + */ class Document { private: - int ID; - double weight; + //Attributes------------- + int ID; /**< Document's database ID */ + double weight; /**< Weight of the document for a given term */ public: + //Methods--------------- Document(int id, double weight); Document() = default; @@ -19,7 +25,6 @@ public: int getID() const; double getWeight() const; - }; diff --git a/querying/src/inverted_index/InvertedIndex.cpp b/querying/src/inverted_index/InvertedIndex.cpp index c9af044b9159eda31df59d03e9312d6b640af4f6..feb63b2488216652152e4bd1b4b95a9fa8132bb3 100644 --- a/querying/src/inverted_index/InvertedIndex.cpp +++ b/querying/src/inverted_index/InvertedIndex.cpp @@ -4,8 +4,8 @@ using namespace std; -InvertedIndex::InvertedIndex(deque<Document> documentWeights) - : documents(move(documentWeights)) {} +InvertedIndex::InvertedIndex(deque<Document> documents) + : documents(move(documents)) {} double InvertedIndex::getDocumentWeightByID(int ID) { Document document{}; @@ -25,11 +25,11 @@ double InvertedIndex::getDocumentWeightByID(int ID) { return document.getWeight(); } -const Document &InvertedIndex::operator[](size_t i) { - return documents[i]; +const Document &InvertedIndex::operator[](size_t index) { + return documents[index]; } -int InvertedIndex::getLowestID() const { +int InvertedIndex::getNextID() const { return documents.front().getID(); } diff --git a/querying/src/inverted_index/InvertedIndex.h b/querying/src/inverted_index/InvertedIndex.h index c682307f30592b2fd45feacc9ecb888b66e9b831..03a38008a257de1fe8c9ccbde411586b20452db1 100644 --- a/querying/src/inverted_index/InvertedIndex.h +++ b/querying/src/inverted_index/InvertedIndex.h @@ -7,7 +7,7 @@ #include "../Document.h" /** - * @brief Data container of term and its inverted index list + * @brief A data class of a term's inverted index list * * @author koristo1@fit.cvut.cz */ @@ -17,13 +17,12 @@ private: std::deque<Document> documents; /**<Inverted index list of documents & their weights in which the term appears */ public: - //Methods----------------- /** * Constructor - * @param documentWeights Inverted index list of documents in which the term appears, and their weights + * @param documents Inverted index list of documents in which the term appears, and their weights */ - explicit InvertedIndex(std::deque<Document> documentWeights); + explicit InvertedIndex(std::deque<Document> documents); /** * @brief Finds the @ref Document object with a given @ref Document::ID in @ref documents @@ -37,9 +36,18 @@ public: */ double getDocumentWeightByID(int ID); - int getLowestID() const; + /** + * @brief Returns the lowest (=first) document ID in documents + * @return Lowest ID in @ref InvertedIndex::documents + */ + int getNextID() const; - const Document &operator[](size_t i); + /** + * @brief Returns a @ref Document at @ref index position in @ref InvertedIndex::documents + * @param index of the element to return + * @return Document at @ref index + */ + const Document &operator[](size_t index); }; diff --git a/querying/src/inverted_index/InputParser.cpp b/querying/src/inverted_index/InvertedIndexJSONParser.cpp similarity index 74% rename from querying/src/inverted_index/InputParser.cpp rename to querying/src/inverted_index/InvertedIndexJSONParser.cpp index b885d4f3a11a86a9659b17ba9b80df5eb1d65d34..2ce067db5d85c6d8b527a27851ebadfaa82d93f4 100644 --- a/querying/src/inverted_index/InputParser.cpp +++ b/querying/src/inverted_index/InvertedIndexJSONParser.cpp @@ -2,7 +2,7 @@ // Created by tomas on 3/23/20. // -#include "InputParser.h" +#include "InvertedIndexJSONParser.h" #include <utility> #include <deque> @@ -12,11 +12,13 @@ using namespace std; using json = nlohmann::json; -InputParser::InputParser(string filePath) : filePath(move(filePath)) {} +InvertedIndexJSONParser::InvertedIndexJSONParser(string filePath) + : filePath(move(filePath)) {} -map<string, InvertedIndex> InputParser::getInvertedIndices() { +map<string, InvertedIndex> InvertedIndexJSONParser::parse() { json root = loadJsonFromFile(); map<string, InvertedIndex> invertedIndices; + for (const auto &[term, documentIDs]: root.items()) { deque<Document> weights; @@ -24,7 +26,7 @@ map<string, InvertedIndex> InputParser::getInvertedIndices() { weights.emplace_back(stoi(documentID), weight); sort(weights.begin(), weights.end(), - [](const auto &a, const auto &b) { return a.getID() < b.getID(); }); + [](const auto &a, const auto &b) { return a.getID() < b.getID(); }); //sort according to document ID invertedIndices.emplace(term, InvertedIndex(weights)); } @@ -32,7 +34,7 @@ map<string, InvertedIndex> InputParser::getInvertedIndices() { return invertedIndices; } -json InputParser::loadJsonFromFile() { +json InvertedIndexJSONParser::loadJsonFromFile() { fileStream = ifstream(filePath); if (fileStream.fail()) diff --git a/querying/src/inverted_index/InputParser.h b/querying/src/inverted_index/InvertedIndexJSONParser.h similarity index 72% rename from querying/src/inverted_index/InputParser.h rename to querying/src/inverted_index/InvertedIndexJSONParser.h index 5dd8af948eccd2fc1d1f94686495b1cb6f50b83f..fc6596a964e79e69620db90454fb1aeea4f753a6 100644 --- a/querying/src/inverted_index/InputParser.h +++ b/querying/src/inverted_index/InvertedIndexJSONParser.h @@ -1,5 +1,5 @@ -#ifndef QUERYING_INPUTPARSER_H -#define QUERYING_INPUTPARSER_H +#ifndef QUERYING_INVERTEDINDEXJSONPARSER_H +#define QUERYING_INVERTEDINDEXJSONPARSER_H #include <fstream> @@ -9,12 +9,11 @@ #include "InvertedIndex.h" /** - * @brief Class that handles loading a JSON file of inverted indices and parses it + * @brief A class that loads a JSON file of inverted indices and parses it * * @author koristo1@fit.cvut.cz - * */ -class InputParser { +class InvertedIndexJSONParser { private: //Attributes------------- std::ifstream fileStream; /**<File stream of the read JSON file*/ @@ -34,15 +33,15 @@ public: * Constructor * @param filePath Path of the JSON file to be processed */ - explicit InputParser(std::string filePath); + explicit InvertedIndexJSONParser(std::string filePath); /** * Loads and parses a JSON file specified by @ref filePath * @return Parsed data of the JSON file, * where key = term name, value = inverted index list of the term */ - std::map<std::string, InvertedIndex> getInvertedIndices(); + std::map<std::string, InvertedIndex> parse(); }; -#endif //QUERYING_INPUTPARSER_H +#endif //QUERYING_INVERTEDINDEXJSONPARSER_H diff --git a/querying/src/main.cpp b/querying/src/main.cpp index 6356b01ae28cec7f7b2b704ea126dfa31ac6912e..bd470eb32ecdf642b86bf23540945de79e685be2 100644 --- a/querying/src/main.cpp +++ b/querying/src/main.cpp @@ -1,6 +1,6 @@ #include <iostream> -#include "inverted_index/InputParser.h" +#include "inverted_index/InvertedIndexJSONParser.h" #include "space/Query.h" #include "space/Space.h" #include "Computor.h" @@ -8,7 +8,7 @@ using namespace std; int main() { - Space space(InputParser("../../data/persistence/dummy.json").getInvertedIndices()); + Space space(InvertedIndexJSONParser("../../data/persistence/dummy.json").parse()); Query query({ {"forest", 0.2}, diff --git a/querying/src/space/Query.cpp b/querying/src/space/Query.cpp index fc4a32260666179395a17687162d0c8e1d2f83a9..c282ecc7a9b6d81bae2858602ba9058f2f5c8d3f 100644 --- a/querying/src/space/Query.cpp +++ b/querying/src/space/Query.cpp @@ -7,11 +7,11 @@ using namespace std; Query::Query(std::map<std::string, double> t, double threshold) - : termsKeyset(getKeyset(t)), + : termsKeyset(createKeyset(t)), terms(std::move(t)), threshold(threshold) {} -set<string> Query::getKeyset(const map<std::string, double> &t) { +set<string> Query::createKeyset(const map<std::string, double> &t) { set<string> dummy; for (const auto &term: t) dummy.emplace(term.first); diff --git a/querying/src/space/Query.h b/querying/src/space/Query.h index f724a7088786d5f6385253e0f7a9a326d58d5812..d73589bd910b202b00070e7652f955343099031c 100644 --- a/querying/src/space/Query.h +++ b/querying/src/space/Query.h @@ -9,7 +9,9 @@ #include "Space.h" /** - * @brief A class representing user query against the collection + * @brief A data class representing user query against the collection + * + * @author koristo1@fit.cvut.cz */ class Query { public: @@ -26,7 +28,12 @@ public: */ explicit Query(std::map<std::string, double> t, double threshold); - static std::set<std::string> getKeyset(const std::map<std::string, double> &t); + /** + * Extracts the keyset from the @ref t map + * @param t A map from which the keyset is to extract + * @return keys of @ref t + */ + static std::set<std::string> createKeyset(const std::map<std::string, double> &t); }; diff --git a/querying/src/space/Space.cpp b/querying/src/space/Space.cpp index e4bf9b58d9ce89e44f972a19a029c9328b103c79..df868b3e881e0b6df21ab0108f24d680bac3b642 100644 --- a/querying/src/space/Space.cpp +++ b/querying/src/space/Space.cpp @@ -6,7 +6,8 @@ using namespace std; -Space::Space(map<string, InvertedIndex> terms) : terms(move(terms)) {} +Space::Space(map<string, InvertedIndex> terms) + : terms(move(terms)) {} const InvertedIndex &Space::operator[](const string &key) const { diff --git a/querying/src/space/Space.h b/querying/src/space/Space.h index 783bd0b2eed5755d4a5fb649138f58cd735c783b..a42675a0a1612645b2027b0d0e42122e25ce3613 100644 --- a/querying/src/space/Space.h +++ b/querying/src/space/Space.h @@ -20,19 +20,26 @@ private: std::map<std::string, InvertedIndex> terms; /**<Terms and their inverted indices in the collection*/ public: - //Methods---------------- /** * Constructor - * @param terms Terms and their inverted indices in the collection + * @param terms Terms defining the space; and their inverted indices */ explicit Space(std::map<std::string, InvertedIndex> terms); + /** + * @brief Gets an element from @ref Space::terms with key @ref key + * @param key Key of the element to find + * @return Value of @ref Space::terms at @ref key + */ InvertedIndex &getInvertedIndexByKey(const std::string &key); + /** + * @brief Gets an element from @ref Space::terms with key @ref key + * @param key Key of the element to find + * @return Value of @ref Space::terms at @ref key + */ const InvertedIndex &operator[](const std::string &key) const; - - void forward(int ID, const std::map<std::string, double> &q); };