From 1d087141652d0afe05d6dce815267bf88b0b3c85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Paukert?= <paukeluk@fit.cvut.cz>
Date: Tue, 28 Apr 2020 00:22:56 +0200
Subject: [PATCH] Improved searching in inverted index

---
 querying/CMakeLists.txt                    |  1 +
 querying/src/calculation/Computor.cpp      | 23 ++++++++++++----------
 querying/src/calculation/InvertedIndex.cpp | 10 ++++------
 querying/src/enum/EInvertedIndex.h         |  7 +++++++
 4 files changed, 25 insertions(+), 16 deletions(-)
 create mode 100644 querying/src/enum/EInvertedIndex.h

diff --git a/querying/CMakeLists.txt b/querying/CMakeLists.txt
index c36784d..5833e4f 100644
--- a/querying/CMakeLists.txt
+++ b/querying/CMakeLists.txt
@@ -39,6 +39,7 @@ set(SOURCES
         src/calculation/WeightedDocument.cpp src/calculation/WeightedDocument.h
         src/database/Database.cpp src/database/Database.h
         src/database/Document.cpp src/database/Document.h
+        src/enum/EInvertedIndex.h
         src/exceptions/Exceptions.h
         src/ui/Page.cpp src/ui/Page.h
         src/util/ArgumentParser.cpp src/util/ArgumentParser.h
diff --git a/querying/src/calculation/Computor.cpp b/querying/src/calculation/Computor.cpp
index 714b0f3..d458e38 100644
--- a/querying/src/calculation/Computor.cpp
+++ b/querying/src/calculation/Computor.cpp
@@ -5,6 +5,7 @@
 #include <cmath>
 
 #include "Computor.h"
+#include "../enum/EInvertedIndex.h"
 #include "../exceptions/Exceptions.h"
 
 using namespace std;
@@ -28,17 +29,19 @@ vector<pair<int, double>> Computor::compute(Database & database) {
         double result = 0, denominator = 0;
 
         for (const auto &term: availableTerms) /*Go through all the remaining terms*/ {
-            try {
-                double documentWeight = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID);
-                double queryWeight = query.terms.at(term);
-                result += documentWeight * queryWeight;
-            }
-            catch (const IDNotFoundException &e) {//inverted index does not contain given ID
-            }
 
-            catch (const EndOfIndexException &e) {
-                availableTerms.erase(term); //exhaust term
+            double documentWeight = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID);
+
+            if (documentWeight == EInvertedIndex::IDNotFound)
+                continue;
+
+            if (documentWeight == EInvertedIndex::EndOfIndex) {
+                availableTerms.erase(term);
+                continue;
             }
+
+            double queryWeight = query.terms.at(term);
+            result += documentWeight * queryWeight;
         }
 
         denominator = sqrt(vectorSizes[ID] * vectorQuerySize);
@@ -50,7 +53,7 @@ vector<pair<int, double>> Computor::compute(Database & database) {
             results.emplace_back(make_pair(ID, result));
     }
 
-    sort(results.begin(), results.end(), [] (const pair<int, double> & a, const pair<int, double> & b)
+    std::sort(results.begin(), results.end(), [] (const pair<int, double> & a, const pair<int, double> & b)
                                          { return a.second > b.second; });
 
     return results;
diff --git a/querying/src/calculation/InvertedIndex.cpp b/querying/src/calculation/InvertedIndex.cpp
index da9d0f0..8989af5 100644
--- a/querying/src/calculation/InvertedIndex.cpp
+++ b/querying/src/calculation/InvertedIndex.cpp
@@ -1,5 +1,6 @@
 #include "InvertedIndex.h"
 
+#include "../enum/EInvertedIndex.h"
 #include "../exceptions/Exceptions.h"
 
 using namespace std;
@@ -11,13 +12,13 @@ double InvertedIndex::getDocumentWeightByID(int ID) {
     WeightedDocument document{};
 
     if (documents.empty())
-        throw EndOfIndexException();
+        return EInvertedIndex::EndOfIndex;
 
     while ((document = documents.front()).getID() != ID) {
         if (documents.empty()) //end of the index; index is exhausted
-            throw EndOfIndexException();
+            return EInvertedIndex::EndOfIndex;
         if (document.getID() > ID) // IDs are sorted asc, i.e. if doc ID is higher, then the ID for is not in index
-            throw IDNotFoundException();
+            return EInvertedIndex::IDNotFound;
         documents.pop_front();
     }
 
@@ -32,6 +33,3 @@ const WeightedDocument &InvertedIndex::operator[](size_t index) {
 int InvertedIndex::getNextID() const {
     return documents.front().getID();
 }
-
-
-
diff --git a/querying/src/enum/EInvertedIndex.h b/querying/src/enum/EInvertedIndex.h
new file mode 100644
index 0000000..3fd4039
--- /dev/null
+++ b/querying/src/enum/EInvertedIndex.h
@@ -0,0 +1,7 @@
+/**
+ * Enum for returning special value when searching in inverted index
+ */
+
+enum EInvertedIndex {
+    EndOfIndex = 5, IDNotFound
+};
-- 
GitLab