From 52febe524767618b248bd8b3ba03b3f8a97a68b8 Mon Sep 17 00:00:00 2001
From: Tomas Koristka <koristo1@fit.cvut.cz>
Date: Sat, 28 Mar 2020 11:43:05 +0100
Subject: [PATCH] Changing inverted index to deque

---
 querying/src/Computor.cpp                     | 59 ++++++++-----------
 querying/src/Computor.h                       |  6 +-
 querying/src/inverted_index/InputParser.cpp   |  2 +-
 querying/src/inverted_index/InvertedIndex.cpp | 32 ++++------
 querying/src/inverted_index/InvertedIndex.h   | 11 +---
 querying/src/space/Query.cpp                  | 13 +++-
 querying/src/space/Query.h                    |  6 +-
 querying/src/space/Space.cpp                  |  2 +-
 querying/src/space/Space.h                    |  2 +-
 9 files changed, 60 insertions(+), 73 deletions(-)

diff --git a/querying/src/Computor.cpp b/querying/src/Computor.cpp
index 693500e..ebbd268 100644
--- a/querying/src/Computor.cpp
+++ b/querying/src/Computor.cpp
@@ -1,58 +1,49 @@
 #include <iostream>
 #include <climits>
+#include <utility>
 #include "Computor.h"
 #include "exceptions/Exceptions.h"
 
 using namespace std;
 
-Computor::Computor(Space space, const Query &query) : space(space), query(query) {}
+Computor::Computor(Space space, Query query) : space(std::move(space)), query(std::move(query)) {}
 
 
 map<int, double> Computor::compute() {
-    exhaustedIndices = {};
-    auto totalIndices = query.terms.size();
-
-    int lowestDocumentID = nextID();
-    space.forward(lowestDocumentID, query.terms);
     map<int, double> results;
 
-    while (exhaustedIndices.size() != totalIndices) {
-        results.emplace(lowestDocumentID, computeForDocument(lowestDocumentID));
-        lowestDocumentID = nextID();
-        space.forward(lowestDocumentID, query.terms);
-    }
-
-    return results;
-}
+    availableTerms = query.termsKeyset;
 
+    while (!availableTerms.empty()) {
+        int ID = nextID(); //get lowest ID
+        double result = 0;
 
-double Computor::computeForDocument(int ID) {
-    double result = 0;
+        for (const auto &term: availableTerms) /*Go through all the remaining terms*/ {
+            try {
+                double queryWeight = query.terms.at(term);
+                double documentWeight = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID);
+                result += documentWeight * queryWeight;
+            }
+            catch (const IDNotFoundException &e) {//inverted index does not contain given ID
+            }
 
-    for (const auto &[term, weight]: query.terms) {
-        try {
-            auto d = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID);
-            result += weight * d;
-        }
-
-        catch (EndOfIndexException &e) {
-            exhaustedIndices.emplace(term); //add another exhausted inverted index
-        }
-        catch (IDNotFoundException &e) {
-            continue;
+            catch (const EndOfIndexException &e) {
+                availableTerms.erase(term); //exhaust term
+            }
         }
+        results[ID] = result;
     }
 
-    return result;
+    return results;
 }
 
+int Computor::nextID() {
+    int lowestID = INT_MAX;
 
-int Computor::nextID() const {
-    int nextID = INT_MAX;
+    for (const auto &queryRecord: query.terms)
+        lowestID = min(space[queryRecord.first].getLowestID(), lowestID);
 
-    for (const auto &term: query.terms)
-        nextID = min(space[term.first].getLowestID(), nextID);
-
-    return nextID;
+    return lowestID;
 }
 
+
diff --git a/querying/src/Computor.h b/querying/src/Computor.h
index 3615170..9a9fa86 100644
--- a/querying/src/Computor.h
+++ b/querying/src/Computor.h
@@ -14,15 +14,15 @@ private:
     //Attributes--------------
     Space space;
     const Query query;
-    std::set<std::string> exhaustedIndices;
+    std::set<std::string> availableTerms;
 
     //Methods-----------------
-    int nextID() const;
+    int nextID();
 
     double computeForDocument(int ID);
 
 public:
-    Computor(Space space, const Query &query);
+    Computor(Space space, Query query);
 
     std::map<int, double> compute();
 };
diff --git a/querying/src/inverted_index/InputParser.cpp b/querying/src/inverted_index/InputParser.cpp
index f8d1e99..a26ac60 100644
--- a/querying/src/inverted_index/InputParser.cpp
+++ b/querying/src/inverted_index/InputParser.cpp
@@ -25,7 +25,7 @@ map<string, InvertedIndex> InputParser::getInvertedIndices() {
         sort(weights.begin(), weights.end(),
              [](const auto &a, const auto &b) { return a.getID() < b.getID(); });
 
-        invertedIndices.insert({term, InvertedIndex(term, weights)});
+        invertedIndices.emplace(term, InvertedIndex(weights));
     }
 
     return invertedIndices;
diff --git a/querying/src/inverted_index/InvertedIndex.cpp b/querying/src/inverted_index/InvertedIndex.cpp
index 7a464a5..ed048ee 100644
--- a/querying/src/inverted_index/InvertedIndex.cpp
+++ b/querying/src/inverted_index/InvertedIndex.cpp
@@ -4,16 +4,21 @@
 
 using namespace std;
 
-InvertedIndex::InvertedIndex(string term, vector<DocumentWeight> documentWeights)
-        : position(0),
-          term(move(term)),
-          documentWeights(move(documentWeights)) {}
+InvertedIndex::InvertedIndex(deque<DocumentWeight> documentWeights)
+        : documentWeights(move(documentWeights)) {}
+
 
 double InvertedIndex::getDocumentWeightByID(int ID) {
 
     if (position + 1 == documentWeights.size())
         throw EndOfIndexException();
 
+
+    while (documentWeights.front().getID() != ID) {
+        documentWeights.pop_front();
+    }
+    return documentWeights.front().getID();
+
     for (auto it = documentWeights.begin() + position; it != documentWeights.end(); it++) {
         int documentID = (*it).getID();
 
@@ -21,25 +26,8 @@ double InvertedIndex::getDocumentWeightByID(int ID) {
             throw IDNotFoundException();
 
         if (documentID == ID) //match!
-        {
-            position = it - documentWeights.begin() + 1; //update position
             return (*it).getWeight();
-        }
-    }
-
-    /*
-
-    for (size_t i = position; position < documentWeights.size(); ++i) {
-        int documentID = documentWeights[i].getWeight();
-
-        if (documentID < ID)
-            throw IDNotFoundException();
-        if (documentID == ID) {
-            position = i + 1;
-            return documentWeights[i];
-        }
     }
-*/
 
     throw IDNotFoundException();
 }
@@ -49,7 +37,7 @@ const DocumentWeight &InvertedIndex::operator[](size_t i) {
 }
 
 int InvertedIndex::getLowestID() const {
-    return documentWeights[position].getID();
+    return documentWeights.front().getID();
 }
 
 void InvertedIndex::forward(int ID) {
diff --git a/querying/src/inverted_index/InvertedIndex.h b/querying/src/inverted_index/InvertedIndex.h
index 1b9dee3..677715e 100644
--- a/querying/src/inverted_index/InvertedIndex.h
+++ b/querying/src/inverted_index/InvertedIndex.h
@@ -3,7 +3,7 @@
 
 
 #include <string>
-#include <vector>
+#include <deque>
 #include "../DocumentWeight.h"
 
 /**
@@ -14,19 +14,16 @@
 class InvertedIndex {
 private:
     //Attributes-------------
-    size_t position;
-    std::string term;/**<Term identifier */
-    std::vector<DocumentWeight> documentWeights; /**<Inverted index list of documents & their weights in which the term appears */
+    std::deque<DocumentWeight> documentWeights; /**<Inverted index list of documents & their weights in which the term appears */
 
 public:
 
     //Methods-----------------
     /**
      * Constructor
-     * @param term Term identifier
      * @param documentWeights Inverted index list of documents in which the term appears, and their weights
      */
-    InvertedIndex(std::string term, std::vector<DocumentWeight> documentWeights);
+    InvertedIndex(std::deque<DocumentWeight> documentWeights);
 
     /**
      * @brief Finds the @ref DocumentWeight object with a given @ref DocumentWeight::ID in @ref documentWeights
@@ -42,8 +39,6 @@ public:
 
     int getLowestID() const;
 
-    void forward(int ID);
-
     const DocumentWeight &operator[](size_t i);
 };
 
diff --git a/querying/src/space/Query.cpp b/querying/src/space/Query.cpp
index 7c3eac2..6d92bfe 100644
--- a/querying/src/space/Query.cpp
+++ b/querying/src/space/Query.cpp
@@ -6,5 +6,14 @@
 
 using namespace std;
 
-Query::Query(std::map<std::string, double> terms, const double threshold)
-        : terms(std::move(terms)), threshold(threshold) {}
+Query::Query(std::map<std::string, double> terms, double threshold)
+        : terms(std::move(terms)),
+          termsKeyset(getKeyset(terms)),
+          threshold(threshold) {}
+
+set<string> Query::getKeyset(const map<std::string, double> &terms) {
+    set<string> dummy;
+    for (const auto &term: terms)
+        dummy.emplace(term.first);
+    return dummy;
+}
diff --git a/querying/src/space/Query.h b/querying/src/space/Query.h
index 0b23e77..bda25cd 100644
--- a/querying/src/space/Query.h
+++ b/querying/src/space/Query.h
@@ -3,6 +3,7 @@
 
 
 #include <map>
+#include <set>
 #include <vector>
 #include <string>
 #include "Space.h"
@@ -14,6 +15,7 @@ class Query {
 public:
     //Attributes-------------
     const std::map<std::string, double> terms; /**<Terms and their weights in the query */
+    const std::set<std::string> termsKeyset; /**<Keyset of terms */
     const double threshold; /**<Acceptable result threshold */
 
     //Methods----------------
@@ -22,7 +24,9 @@ public:
      * @param terms Terms and their weights in the query
      * @param threshold Acceptable result threshold
      */
-    explicit Query(std::map<std::string, double> terms, const double threshold);
+    explicit Query(std::map<std::string, double> terms, double threshold);
+
+    static std::set<std::string> getKeyset(const std::map<std::string, double> &terms);
 };
 
 
diff --git a/querying/src/space/Space.cpp b/querying/src/space/Space.cpp
index 727dfdb..f1c8fc2 100644
--- a/querying/src/space/Space.cpp
+++ b/querying/src/space/Space.cpp
@@ -17,7 +17,7 @@ InvertedIndex &Space::getInvertedIndexByKey(const string &key) {
     return terms.at(key);
 }
 
-void Space::forward(int ID, const std::map<std::string, double> q) {
+void Space::forward(int ID, const std::map<std::string, double> &q) {
     for (const auto &term: q)
         terms.at(term.first).forward(ID);
 }
diff --git a/querying/src/space/Space.h b/querying/src/space/Space.h
index 641355e..783bd0b 100644
--- a/querying/src/space/Space.h
+++ b/querying/src/space/Space.h
@@ -32,7 +32,7 @@ public:
 
     const InvertedIndex &operator[](const std::string &key) const;
 
-    void forward(int ID, const std::map<std::string, double> q);
+    void forward(int ID, const std::map<std::string, double> &q);
 };
 
 
-- 
GitLab