From 16b2c1bdb1e209c55f24ffee2077eadc9a58a19b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Paukert?= <paukeluk@fit.cvut.cz> Date: Tue, 7 Apr 2020 21:28:31 +0200 Subject: [PATCH] Small refactoring --- querying/CMakeLists.txt | 3 +- querying/src/calculation/Computor.cpp | 4 +- querying/src/calculation/Computor.h | 9 ++--- querying/src/calculation/Space.cpp | 4 +- querying/src/calculation/Space.h | 6 +-- querying/src/database/Database.cpp | 38 +++++++++++++++++++ .../{DocumentCollection.h => Database.h} | 23 ++++++----- querying/src/database/Document.cpp | 3 +- querying/src/database/Document.h | 7 +--- querying/src/database/DocumentCollection.cpp | 26 ------------- querying/src/database/Terms.cpp | 17 --------- querying/src/database/Terms.h | 23 ----------- querying/src/main.cpp | 14 +------ querying/src/ui_wt/MainPage.cpp | 22 +++++------ querying/src/ui_wt/MainPage.h | 2 +- 15 files changed, 79 insertions(+), 122 deletions(-) create mode 100644 querying/src/database/Database.cpp rename querying/src/database/{DocumentCollection.h => Database.h} (50%) delete mode 100644 querying/src/database/DocumentCollection.cpp delete mode 100644 querying/src/database/Terms.cpp delete mode 100644 querying/src/database/Terms.h diff --git a/querying/CMakeLists.txt b/querying/CMakeLists.txt index edd3757..4b101ac 100644 --- a/querying/CMakeLists.txt +++ b/querying/CMakeLists.txt @@ -45,8 +45,7 @@ set(SOURCES src/calculation/WeightedDocument.cpp src/calculation/WeightedDocument.h src/util/QueryJSONParser.cpp src/util/QueryJSONParser.h src/exceptions/Exceptions.h - src/database/Terms.cpp src/database/Terms.h - src/database/DocumentCollection.cpp src/database/DocumentCollection.h + src/database/Database.cpp src/database/Database.h src/util/ArgumentParser.cpp src/util/ArgumentParser.h src/database/Document.cpp src/database/Document.h) diff --git a/querying/src/calculation/Computor.cpp b/querying/src/calculation/Computor.cpp index eb40b78..e005c66 100644 --- a/querying/src/calculation/Computor.cpp +++ b/querying/src/calculation/Computor.cpp @@ -13,9 +13,9 @@ Computor::Computor(Space space, Query query) : space(std::move(space)), query(std::move(query)) {} -vector<pair<int, double>> Computor::compute(Terms & collection, int document_id) { +vector<pair<int, double>> Computor::compute(Database & database, int document_id) { vector<pair<int, double>> results; - map<string, double> currentDocument = space.getTermsAndWeightsByID(collection, document_id); + map<string, double> currentDocument = space.getTermsAndWeightsByID(database, document_id); availableTerms = query.termsKeyset; diff --git a/querying/src/calculation/Computor.h b/querying/src/calculation/Computor.h index 292cad5..41896dd 100644 --- a/querying/src/calculation/Computor.h +++ b/querying/src/calculation/Computor.h @@ -9,7 +9,7 @@ #include "Space.h" #include "Query.h" -#include "./../database/Terms.h" +#include "./../database/Database.h" /** * @brief A class that encompasses the calculation of a document's relevancy to the query @@ -37,12 +37,11 @@ public: Computor(Space space, Query query); /** - * @brief Computes relevancies of documents to the query - * - * Filters relevancies that are below @ref Query::threshold + * @brief Computes relevancies of documents to the query, from results removes documents that have relevancies below @ref Query::threshold + * @param database, document_id Database connection and ID of current document (query) * @return Vector of pairs with document IDs and their similarity to the query, sorted by similarity desc */ - std::vector<std::pair<int, double>> compute(Terms & collection, int document_id); + std::vector<std::pair<int, double>> compute(Database & database, int document_id); }; diff --git a/querying/src/calculation/Space.cpp b/querying/src/calculation/Space.cpp index ddfb1a6..b7ea969 100644 --- a/querying/src/calculation/Space.cpp +++ b/querying/src/calculation/Space.cpp @@ -18,9 +18,9 @@ InvertedIndex &Space::getInvertedIndexByKey(const string &key) { return terms.at(key); } -const map<string, double> Space::getTermsAndWeightsByID(Terms & collection, int document_id) { +const map<string, double> Space::getTermsAndWeightsByID(Database & database, int document_id) { map<string, double> terms; - vector<string> dummy = collection.getTermsByDocumentID(document_id); + vector<string> dummy = database.getTermsByDocumentID(document_id); for (string term : dummy) { InvertedIndex tmp = getInvertedIndexByKey(term); diff --git a/querying/src/calculation/Space.h b/querying/src/calculation/Space.h index 2e62ee7..d3e643a 100644 --- a/querying/src/calculation/Space.h +++ b/querying/src/calculation/Space.h @@ -8,7 +8,7 @@ #include "InvertedIndex.h" #include "Query.h" -#include "./../database/Terms.h" +#include "./../database/Database.h" /** * @brief A class representing the vector space of the collection @@ -37,10 +37,10 @@ public: /** * @brief Finds all terms in DB which occurs in specific document - * @param collection, document_id Instance of Terms class with DB connection and document_id to process + * @param database, document_id Instance of DB connection and document_id to process * @return Map with terms as keys and weights as their values */ - const std::map<std::string, double> getTermsAndWeightsByID(Terms & collection, int document_id); + const std::map<std::string, double> getTermsAndWeightsByID(Database & database, int document_id); /** * @brief Gets an element from @ref Space::terms with key @ref key diff --git a/querying/src/database/Database.cpp b/querying/src/database/Database.cpp new file mode 100644 index 0000000..6aa7615 --- /dev/null +++ b/querying/src/database/Database.cpp @@ -0,0 +1,38 @@ +#include "Database.h" + +using namespace std; + +Database::Database(const string &path) + : db(path) {} + +vector<Document> Database::getDocumentsCollection() { + SQLite::Statement query(db, "SELECT id, filename FROM Document"); + + vector<Document> result; + + while (query.executeStep()) + result.emplace_back(query.getColumn("id"), query.getColumn("filename")); + + return result; +} + +Document Database::getDocumentByID(int id) { + SQLite::Statement query(db, "SELECT filename FROM Document WHERE id = :document_id"); + query.bind(":document_id", id); + query.executeStep(); + + return {id, query.getColumn("filename")}; +} + +vector<string> Database::getTermsByDocumentID(int document_id) { + vector<string> terms; + SQLite::Statement query(db, "SELECT Term.value FROM Term " + "JOIN TermDocumentOccurrence ON Term.id = TermDocumentOccurrence.Term_id " + "WHERE TermDocumentOccurrence.Document_id = :id"); + query.bind(":id", document_id); + + while(query.executeStep()) + terms.emplace_back(query.getColumn("value")); + + return terms; +} diff --git a/querying/src/database/DocumentCollection.h b/querying/src/database/Database.h similarity index 50% rename from querying/src/database/DocumentCollection.h rename to querying/src/database/Database.h index e6c6b56..cfb1d94 100644 --- a/querying/src/database/DocumentCollection.h +++ b/querying/src/database/Database.h @@ -1,27 +1,26 @@ -#ifndef QUERYING_DOCUMENTCOLLECTION_H -#define QUERYING_DOCUMENTCOLLECTION_H +#pragma once +#include <SQLiteCpp/SQLiteCpp.h> #include <vector> #include <string> -#include <lib/SQLiteCpp/include/SQLiteCpp/Database.h> #include "Document.h" /** - * A class that handles fetching Document rows from the Document table + * A class that handles fetching data from database */ -class DocumentCollection { +class Database { private: SQLite::Database db; /**< Database connection */ public: - explicit DocumentCollection(const std::string &path); + explicit Database(const std::string &path); /** * Fetches the entire table Document * @return Vector with all rows of table Document, sorted by ID */ - std::vector<Document> fetchCollection(); + std::vector<Document> getDocumentsCollection(); /** * Fetches a single Document row with a specific ID @@ -29,7 +28,11 @@ public: * @return Document row with ID equal to ID */ Document getDocumentByID(int id); -}; - -#endif //QUERYING_DOCUMENTCOLLECTION_H + /** + * @brief Finds all terms from specified document + * @param document_id to process + * @return Vector with strings which are in specified document + */ + std::vector<std::string> getTermsByDocumentID(int document_id); +}; diff --git a/querying/src/database/Document.cpp b/querying/src/database/Document.cpp index ee36d40..9885846 100644 --- a/querying/src/database/Document.cpp +++ b/querying/src/database/Document.cpp @@ -4,4 +4,5 @@ using namespace std; -Document::Document(const int id, string name) : id(id), name(move(name)) {} +Document::Document(int id, string name) + : id(id), name(move(name)) {} diff --git a/querying/src/database/Document.h b/querying/src/database/Document.h index bf0ac9e..80cad73 100644 --- a/querying/src/database/Document.h +++ b/querying/src/database/Document.h @@ -1,6 +1,4 @@ -#ifndef QUERYING_DOCUMENT_H -#define QUERYING_DOCUMENT_H - +#pragma once #include <string> @@ -16,6 +14,3 @@ public: Document(int id, std::string name); }; - - -#endif //QUERYING_DOCUMENT_H diff --git a/querying/src/database/DocumentCollection.cpp b/querying/src/database/DocumentCollection.cpp deleted file mode 100644 index 9406a4c..0000000 --- a/querying/src/database/DocumentCollection.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include "DocumentCollection.h" -#include <SQLiteCpp/SQLiteCpp.h> - -using namespace std; - -DocumentCollection::DocumentCollection(const string &path) - : db(path) {} - -std::vector<Document> DocumentCollection::fetchCollection() { - SQLite::Statement query(db, "SELECT id, filename FROM Document"); - - vector<Document> result; - - while (query.executeStep()) - result.emplace_back(query.getColumn("id"), query.getColumn("filename")); - - return result; -} - -Document DocumentCollection::getDocumentByID(int id) { - SQLite::Statement query(db, "SELECT filename FROM Document WHERE id = :document_id"); - query.bind(":document_id", id); - query.executeStep(); - - return {id, query.getColumn("filename")}; -} diff --git a/querying/src/database/Terms.cpp b/querying/src/database/Terms.cpp deleted file mode 100644 index 0efff72..0000000 --- a/querying/src/database/Terms.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "Terms.h" - -Terms::Terms(const std::string &path) - : db(path) {} - -std::vector<std::string> Terms::getTermsByDocumentID(int document_id) { - std::vector<std::string> terms; - SQLite::Statement query(db, "SELECT Term.value FROM Term " - "JOIN TermDocumentOccurrence ON Term.id = TermDocumentOccurrence.Term_id " - "WHERE TermDocumentOccurrence.Document_id = :id"); - query.bind(":id", document_id); - - while(query.executeStep()) - terms.emplace_back(query.getColumn("value")); - - return terms; -} diff --git a/querying/src/database/Terms.h b/querying/src/database/Terms.h deleted file mode 100644 index 43bff9d..0000000 --- a/querying/src/database/Terms.h +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include <SQLiteCpp/SQLiteCpp.h> -#include <vector> -#include <string> - -/** - * A class that handles fetching Terms from the TermDocumentOccurrence table - */ -class Terms { -private: - SQLite::Database db; /**< Database connection */ - -public: - explicit Terms(const std::string &path); - - /** - * @brief Finds all terms from specified document - * @param document_id to process - * @return Vector with strings which are in specified document - */ - std::vector<std::string> getTermsByDocumentID(int document_id); -}; diff --git a/querying/src/main.cpp b/querying/src/main.cpp index 1d1429b..b8d1949 100644 --- a/querying/src/main.cpp +++ b/querying/src/main.cpp @@ -1,7 +1,6 @@ #include <iostream> #include <map> -//#include <QApplication> #include <src/util/ArgumentParser.h> #include "calculation/Query.h" @@ -9,9 +8,8 @@ #include "calculation/Computor.h" #include "util/QueryJSONParser.h" #include "util/InvertedIndexJSONParser.h" -//#include "src/ui/forms/mainform.h" #include "src/ui_wt/MainPage.h" -#include "src/database/DocumentCollection.h" +#include "src/database/Database.h" #include "src/database/Document.h" using namespace std; @@ -50,16 +48,6 @@ int main(int argc, char *argv[]) { DocumentCollection collection(argumentParser.getDatabasePath()); auto availableDocuments = collection.fetchCollection(); - //Start UI - QApplication application(argc, argv); - - MainForm mainForm; - mainForm - .setAvailableDocuments(availableDocuments) - .setOpenedDocument(availableDocuments.front()); - mainForm.show(); - - return QApplication::exec(); */ } diff --git a/querying/src/ui_wt/MainPage.cpp b/querying/src/ui_wt/MainPage.cpp index eb3a07a..c9c4fd2 100644 --- a/querying/src/ui_wt/MainPage.cpp +++ b/querying/src/ui_wt/MainPage.cpp @@ -13,7 +13,7 @@ #include "./../calculation/Computor.h" #include "./../util/QueryJSONParser.h" #include "./../util/InvertedIndexJSONParser.h" -#include "./../database/DocumentCollection.h" +#include "./../database/Database.h" #include "MainPage.h" @@ -21,8 +21,8 @@ MainPage::MainPage(const Wt::WEnvironment& env) : Wt::WApplication(env) { Space space(InvertedIndexJSONParser("./../../data/persistence/invertedList.json").parse()); - DocumentCollection collection("./../../data/persistence/docs_and_terms.db"); - auto availableDocuments = collection.fetchCollection(); + Database database("./../../data/persistence/docs_and_terms.db"); + auto availableDocuments = database.getDocumentsCollection(); auto container = root()->addWidget(Wt::cpp14::make_unique<Wt::WContainerWidget>()); auto buttonPtr = Wt::cpp14::make_unique<Wt::WPushButton>("Show me more!"); @@ -40,15 +40,15 @@ MainPage::MainPage(const Wt::WEnvironment& env) container->addWidget(std::move(buttonPtr)); button->clicked().connect([=] { - displayDetail(space, availableDocuments, container, availableDocuments.at(menu->currentIndex()).id); + displayDetail(space, container, availableDocuments.at(menu->currentIndex()).id); }); } std::string MainPage::getName(const std::string & path) { std::string name = path; - name = name.substr(name.find_last_of('/') + 1); - name = name.substr(0, name.find_last_of('.')); + size_t begin = name.find_last_of('/') + 1; + name = name.substr(begin, name.find_last_of('.') - begin); name.replace(name.find("___"), 3, ": "); return name; @@ -65,18 +65,18 @@ std::string MainPage::getDocument(const std::string & path) return content; } -void MainPage::displayDetail(Space space, const std::vector<Document> & availableDocuments, Wt::WContainerWidget * container, int document_id) +void MainPage::displayDetail(Space space, Wt::WContainerWidget * container, int document_id) { // deletes everything from current container container->clear(); // udelat to lepe, aby tu nemusely byt cesty napevno.. - Terms collection("./../../data/persistence/docs_and_terms.db"); - Document document = availableDocuments.at(document_id - 1); + Database database("./../../data/persistence/docs_and_terms.db"); + Document document = database.getDocumentByID(document_id); // threshold je nyni nastaven na -1 --> ve vysledku budou i uplne rozdilne dokumenty - Query query(space.getTermsAndWeightsByID(collection, document.id), -1); - auto result = Computor(space, query).compute(collection, document.id); + Query query(space.getTermsAndWeightsByID(database, document.id), -1); + auto result = Computor(space, query).compute(database, document.id); // dodelat proklikavani na zobrazene podobne dokumenty for (size_t i = 0; i < 10 && i < result.size(); i++) { diff --git a/querying/src/ui_wt/MainPage.h b/querying/src/ui_wt/MainPage.h index b6a5528..8165809 100644 --- a/querying/src/ui_wt/MainPage.h +++ b/querying/src/ui_wt/MainPage.h @@ -13,5 +13,5 @@ public: private: std::string getName(const std::string & path); std::string getDocument(const std::string & path); - void displayDetail(Space space, const std::vector<Document> & availableDocuments, Wt::WContainerWidget * container, int document_id); + void displayDetail(Space space, Wt::WContainerWidget * container, int document_id); }; -- GitLab