From 84e3100081609453c0f0032b188e8078a8ee7d60 Mon Sep 17 00:00:00 2001
From: weirdwizardthomas <thomas.koristka@gmail.com>
Date: Sat, 28 Mar 2020 13:49:07 +0100
Subject: [PATCH] Finished relevance calculation

---
 querying/CMakeLists.txt                       |  2 +-
 querying/src/Computor.cpp                     | 12 ++---
 querying/src/Document.cpp                     | 16 +++++++
 querying/src/Document.h                       | 26 +++++++++++
 querying/src/DocumentWeight.cpp               | 16 -------
 querying/src/DocumentWeight.h                 | 23 ----------
 querying/src/inverted_index/InputParser.cpp   |  3 +-
 querying/src/inverted_index/InvertedIndex.cpp | 44 ++++++-------------
 querying/src/inverted_index/InvertedIndex.h   | 16 +++----
 querying/src/main.cpp                         |  6 +--
 querying/src/space/Query.cpp                  | 10 ++---
 querying/src/space/Query.h                    |  6 +--
 querying/src/space/Space.cpp                  |  5 ---
 13 files changed, 84 insertions(+), 101 deletions(-)
 create mode 100644 querying/src/Document.cpp
 create mode 100644 querying/src/Document.h
 delete mode 100644 querying/src/DocumentWeight.cpp
 delete mode 100644 querying/src/DocumentWeight.h

diff --git a/querying/CMakeLists.txt b/querying/CMakeLists.txt
index 1d665ab..9114188 100644
--- a/querying/CMakeLists.txt
+++ b/querying/CMakeLists.txt
@@ -7,6 +7,6 @@ add_subdirectory(lib/SQLiteCpp)
 
 SET(CMAKE_CXX_FLAGS "-g -Wall -pedantic -Wextra")
 
-add_executable(main src/main.cpp src/inverted_index/InvertedIndex.cpp src/inverted_index/InvertedIndex.h src/inverted_index/InputParser.cpp src/inverted_index/InputParser.h src/space/Space.cpp src/space/Space.h src/space/Query.cpp src/space/Query.h src/Computor.cpp src/Computor.h src/DocumentWeight.cpp src/DocumentWeight.h src/exceptions/Exceptions.h)
+add_executable(main src/main.cpp src/inverted_index/InvertedIndex.cpp src/inverted_index/InvertedIndex.h src/inverted_index/InputParser.cpp src/inverted_index/InputParser.h src/space/Space.cpp src/space/Space.h src/space/Query.cpp src/space/Query.h src/Computor.cpp src/Computor.h src/Document.cpp src/Document.h src/exceptions/Exceptions.h)
 
 target_link_libraries(main SQLiteCpp)
diff --git a/querying/src/Computor.cpp b/querying/src/Computor.cpp
index ebbd268..68fffb5 100644
--- a/querying/src/Computor.cpp
+++ b/querying/src/Computor.cpp
@@ -8,7 +8,6 @@ using namespace std;
 
 Computor::Computor(Space space, Query query) : space(std::move(space)), query(std::move(query)) {}
 
-
 map<int, double> Computor::compute() {
     map<int, double> results;
 
@@ -18,10 +17,10 @@ map<int, double> Computor::compute() {
         int ID = nextID(); //get lowest ID
         double result = 0;
 
-        for (const auto &term: availableTerms) /*Go through all the remaining terms*/ {
+        for (const auto& term: availableTerms) /*Go through all the remaining terms*/ {
             try {
-                double queryWeight = query.terms.at(term);
                 double documentWeight = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID);
+                double queryWeight = query.terms.at(term);
                 result += documentWeight * queryWeight;
             }
             catch (const IDNotFoundException &e) {//inverted index does not contain given ID
@@ -31,7 +30,8 @@ map<int, double> Computor::compute() {
                 availableTerms.erase(term); //exhaust term
             }
         }
-        results[ID] = result;
+        if(result > query.threshold)
+            results[ID] = result;
     }
 
     return results;
@@ -40,8 +40,8 @@ map<int, double> Computor::compute() {
 int Computor::nextID() {
     int lowestID = INT_MAX;
 
-    for (const auto &queryRecord: query.terms)
-        lowestID = min(space[queryRecord.first].getLowestID(), lowestID);
+    for (const auto &term: availableTerms)
+        lowestID = min(space[term].getLowestID(), lowestID);
 
     return lowestID;
 }
diff --git a/querying/src/Document.cpp b/querying/src/Document.cpp
new file mode 100644
index 0000000..7bcebb2
--- /dev/null
+++ b/querying/src/Document.cpp
@@ -0,0 +1,16 @@
+//
+// Created by tomas on 3/24/20.
+//
+
+#include "Document.h"
+
+Document::Document(const int id, const double weight) : ID(id), weight(weight) {}
+
+int Document::getID() const {
+    return ID;
+}
+
+double Document::getWeight() const {
+    return weight;
+}
+
diff --git a/querying/src/Document.h b/querying/src/Document.h
new file mode 100644
index 0000000..aabf0d1
--- /dev/null
+++ b/querying/src/Document.h
@@ -0,0 +1,26 @@
+//
+// Created by tomas on 3/24/20.
+//
+
+#ifndef QUERYING_DOCUMENT_H
+#define QUERYING_DOCUMENT_H
+
+
+class Document {
+private:
+    int ID;
+    double weight;
+
+public:
+    Document(int id, double weight);
+
+    Document() = default;
+
+    int getID() const;
+
+    double getWeight() const;
+
+};
+
+
+#endif //QUERYING_DOCUMENT_H
diff --git a/querying/src/DocumentWeight.cpp b/querying/src/DocumentWeight.cpp
deleted file mode 100644
index 198ea45..0000000
--- a/querying/src/DocumentWeight.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-//
-// Created by tomas on 3/24/20.
-//
-
-#include "DocumentWeight.h"
-
-DocumentWeight::DocumentWeight(const int id, const double weight) : ID(id), weight(weight) {}
-
-int DocumentWeight::getID() const {
-    return ID;
-}
-
-double DocumentWeight::getWeight() const {
-    return weight;
-}
-
diff --git a/querying/src/DocumentWeight.h b/querying/src/DocumentWeight.h
deleted file mode 100644
index 299bab0..0000000
--- a/querying/src/DocumentWeight.h
+++ /dev/null
@@ -1,23 +0,0 @@
-//
-// Created by tomas on 3/24/20.
-//
-
-#ifndef QUERYING_DOCUMENTWEIGHT_H
-#define QUERYING_DOCUMENTWEIGHT_H
-
-
-class DocumentWeight {
-private:
-    int ID;
-    double weight;
-
-public:
-    DocumentWeight(int id, double weight);
-
-    int getID() const;
-
-    double getWeight() const;
-};
-
-
-#endif //QUERYING_DOCUMENTWEIGHT_H
diff --git a/querying/src/inverted_index/InputParser.cpp b/querying/src/inverted_index/InputParser.cpp
index a26ac60..b885d4f 100644
--- a/querying/src/inverted_index/InputParser.cpp
+++ b/querying/src/inverted_index/InputParser.cpp
@@ -5,6 +5,7 @@
 #include "InputParser.h"
 
 #include <utility>
+#include <deque>
 #include "../../lib/json.hpp"
 
 using namespace std;
@@ -17,7 +18,7 @@ map<string, InvertedIndex> InputParser::getInvertedIndices() {
     json root = loadJsonFromFile();
     map<string, InvertedIndex> invertedIndices;
     for (const auto &[term, documentIDs]: root.items()) {
-        vector<DocumentWeight> weights;
+        deque<Document> weights;
 
         for (const auto &[documentID, weight] :  documentIDs.items())
             weights.emplace_back(stoi(documentID), weight);
diff --git a/querying/src/inverted_index/InvertedIndex.cpp b/querying/src/inverted_index/InvertedIndex.cpp
index ed048ee..c9af044 100644
--- a/querying/src/inverted_index/InvertedIndex.cpp
+++ b/querying/src/inverted_index/InvertedIndex.cpp
@@ -4,50 +4,34 @@
 
 using namespace std;
 
-InvertedIndex::InvertedIndex(deque<DocumentWeight> documentWeights)
-        : documentWeights(move(documentWeights)) {}
-
+InvertedIndex::InvertedIndex(deque<Document> documentWeights)
+        : documents(move(documentWeights)) {}
 
 double InvertedIndex::getDocumentWeightByID(int ID) {
+    Document document{};
 
-    if (position + 1 == documentWeights.size())
+    if (documents.empty())
         throw EndOfIndexException();
 
-
-    while (documentWeights.front().getID() != ID) {
-        documentWeights.pop_front();
-    }
-    return documentWeights.front().getID();
-
-    for (auto it = documentWeights.begin() + position; it != documentWeights.end(); it++) {
-        int documentID = (*it).getID();
-
-        if (documentID < ID)
+    while ((document = documents.front()).getID() != ID) {
+        if (documents.empty()) //end of the index; index is exhausted
+            throw EndOfIndexException();
+        if (document.getID() > ID) // IDs are sorted asc, i.e. if doc ID is higher, then the ID for is not in index
             throw IDNotFoundException();
-
-        if (documentID == ID) //match!
-            return (*it).getWeight();
+        documents.pop_front();
     }
 
-    throw IDNotFoundException();
+    documents.pop_front();
+    return document.getWeight();
 }
 
-const DocumentWeight &InvertedIndex::operator[](size_t i) {
-    return documentWeights[i];
+const Document &InvertedIndex::operator[](size_t i) {
+    return documents[i];
 }
 
 int InvertedIndex::getLowestID() const {
-    return documentWeights.front().getID();
+    return documents.front().getID();
 }
 
-void InvertedIndex::forward(int ID) {
-    auto it = documentWeights.begin() + position;
 
-    while (it != documentWeights.end() && (*it).getID() != ID) {
-        if (ID < (*it).getID())
-            return;
-        position++;
-        it++;
-    }
-}
 
diff --git a/querying/src/inverted_index/InvertedIndex.h b/querying/src/inverted_index/InvertedIndex.h
index 677715e..c682307 100644
--- a/querying/src/inverted_index/InvertedIndex.h
+++ b/querying/src/inverted_index/InvertedIndex.h
@@ -4,7 +4,7 @@
 
 #include <string>
 #include <deque>
-#include "../DocumentWeight.h"
+#include "../Document.h"
 
 /**
  * @brief Data container of term and its inverted index list
@@ -14,7 +14,7 @@
 class InvertedIndex {
 private:
     //Attributes-------------
-    std::deque<DocumentWeight> documentWeights; /**<Inverted index list of documents & their weights in which the term appears */
+    std::deque<Document> documents; /**<Inverted index list of documents & their weights in which the term appears */
 
 public:
 
@@ -23,23 +23,23 @@ public:
      * Constructor
      * @param documentWeights Inverted index list of documents in which the term appears, and their weights
      */
-    InvertedIndex(std::deque<DocumentWeight> documentWeights);
+    explicit InvertedIndex(std::deque<Document> documentWeights);
 
     /**
-     * @brief Finds the @ref DocumentWeight object with a given @ref DocumentWeight::ID in @ref documentWeights
+     * @brief Finds the @ref Document object with a given @ref Document::ID in @ref documents
      *
      * Given an ID, looks through the container starting at @ref position.
-     * Assumes that the items are sorted by @ref DocumentWeight::ID, ascending.
+     * Assumes that the items are sorted by @ref Document::ID, ascending.
      *
-     * @param ID @ref DocumentWeight::ID to be found.
+     * @param ID @ref Document::ID to be found.
      * @throws IDNotFoundException if no Document is found or it had been processed already
-     * @return DocumentWeight with @ref DocumentWeight::ID ==  ID
+     * @return Document with @ref Document::ID ==  ID
      */
     double getDocumentWeightByID(int ID);
 
     int getLowestID() const;
 
-    const DocumentWeight &operator[](size_t i);
+    const Document &operator[](size_t i);
 };
 
 
diff --git a/querying/src/main.cpp b/querying/src/main.cpp
index e1d3879..6356b01 100644
--- a/querying/src/main.cpp
+++ b/querying/src/main.cpp
@@ -11,9 +11,9 @@ int main() {
     Space space(InputParser("../../data/persistence/dummy.json").getInvertedIndices());
 
     Query query({
-                        {"forest",   0.5},
-                        {"mountain", 0.4},
-                        {"nature",   0.3}}, 0);
+                        {"forest",   0.2},
+                        {"mountain", 0.1},
+                        {"nature",   0.8}}, 0.5);
 
     auto res = Computor(space, query).compute();
     for (const auto &[id, value]: res)
diff --git a/querying/src/space/Query.cpp b/querying/src/space/Query.cpp
index 6d92bfe..fc4a322 100644
--- a/querying/src/space/Query.cpp
+++ b/querying/src/space/Query.cpp
@@ -6,14 +6,14 @@
 
 using namespace std;
 
-Query::Query(std::map<std::string, double> terms, double threshold)
-        : terms(std::move(terms)),
-          termsKeyset(getKeyset(terms)),
+Query::Query(std::map<std::string, double> t, double threshold)
+        : termsKeyset(getKeyset(t)),
+          terms(std::move(t)),
           threshold(threshold) {}
 
-set<string> Query::getKeyset(const map<std::string, double> &terms) {
+set<string> Query::getKeyset(const map<std::string, double> &t) {
     set<string> dummy;
-    for (const auto &term: terms)
+    for (const auto &term: t)
         dummy.emplace(term.first);
     return dummy;
 }
diff --git a/querying/src/space/Query.h b/querying/src/space/Query.h
index bda25cd..f724a70 100644
--- a/querying/src/space/Query.h
+++ b/querying/src/space/Query.h
@@ -14,8 +14,8 @@
 class Query {
 public:
     //Attributes-------------
-    const std::map<std::string, double> terms; /**<Terms and their weights in the query */
     const std::set<std::string> termsKeyset; /**<Keyset of terms */
+    const std::map<std::string, double> terms; /**<Terms and their weights in the query */
     const double threshold; /**<Acceptable result threshold */
 
     //Methods----------------
@@ -24,9 +24,9 @@ public:
      * @param terms Terms and their weights in the query
      * @param threshold Acceptable result threshold
      */
-    explicit Query(std::map<std::string, double> terms, double threshold);
+    explicit Query(std::map<std::string, double> t, double threshold);
 
-    static std::set<std::string> getKeyset(const std::map<std::string, double> &terms);
+    static std::set<std::string> getKeyset(const std::map<std::string, double> &t);
 };
 
 
diff --git a/querying/src/space/Space.cpp b/querying/src/space/Space.cpp
index f1c8fc2..e4bf9b5 100644
--- a/querying/src/space/Space.cpp
+++ b/querying/src/space/Space.cpp
@@ -17,10 +17,5 @@ InvertedIndex &Space::getInvertedIndexByKey(const string &key) {
     return terms.at(key);
 }
 
-void Space::forward(int ID, const std::map<std::string, double> &q) {
-    for (const auto &term: q)
-        terms.at(term.first).forward(ID);
-}
-
 
 
-- 
GitLab