From 1d087141652d0afe05d6dce815267bf88b0b3c85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Paukert?= <paukeluk@fit.cvut.cz> Date: Tue, 28 Apr 2020 00:22:56 +0200 Subject: [PATCH] Improved searching in inverted index --- querying/CMakeLists.txt | 1 + querying/src/calculation/Computor.cpp | 23 ++++++++++++---------- querying/src/calculation/InvertedIndex.cpp | 10 ++++------ querying/src/enum/EInvertedIndex.h | 7 +++++++ 4 files changed, 25 insertions(+), 16 deletions(-) create mode 100644 querying/src/enum/EInvertedIndex.h diff --git a/querying/CMakeLists.txt b/querying/CMakeLists.txt index c36784d..5833e4f 100644 --- a/querying/CMakeLists.txt +++ b/querying/CMakeLists.txt @@ -39,6 +39,7 @@ set(SOURCES src/calculation/WeightedDocument.cpp src/calculation/WeightedDocument.h src/database/Database.cpp src/database/Database.h src/database/Document.cpp src/database/Document.h + src/enum/EInvertedIndex.h src/exceptions/Exceptions.h src/ui/Page.cpp src/ui/Page.h src/util/ArgumentParser.cpp src/util/ArgumentParser.h diff --git a/querying/src/calculation/Computor.cpp b/querying/src/calculation/Computor.cpp index 714b0f3..d458e38 100644 --- a/querying/src/calculation/Computor.cpp +++ b/querying/src/calculation/Computor.cpp @@ -5,6 +5,7 @@ #include <cmath> #include "Computor.h" +#include "../enum/EInvertedIndex.h" #include "../exceptions/Exceptions.h" using namespace std; @@ -28,17 +29,19 @@ vector<pair<int, double>> Computor::compute(Database & database) { double result = 0, denominator = 0; for (const auto &term: availableTerms) /*Go through all the remaining terms*/ { - try { - double documentWeight = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID); - double queryWeight = query.terms.at(term); - result += documentWeight * queryWeight; - } - catch (const IDNotFoundException &e) {//inverted index does not contain given ID - } - catch (const EndOfIndexException &e) { - availableTerms.erase(term); //exhaust term + double documentWeight = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID); + + if (documentWeight == EInvertedIndex::IDNotFound) + continue; + + if (documentWeight == EInvertedIndex::EndOfIndex) { + availableTerms.erase(term); + continue; } + + double queryWeight = query.terms.at(term); + result += documentWeight * queryWeight; } denominator = sqrt(vectorSizes[ID] * vectorQuerySize); @@ -50,7 +53,7 @@ vector<pair<int, double>> Computor::compute(Database & database) { results.emplace_back(make_pair(ID, result)); } - sort(results.begin(), results.end(), [] (const pair<int, double> & a, const pair<int, double> & b) + std::sort(results.begin(), results.end(), [] (const pair<int, double> & a, const pair<int, double> & b) { return a.second > b.second; }); return results; diff --git a/querying/src/calculation/InvertedIndex.cpp b/querying/src/calculation/InvertedIndex.cpp index da9d0f0..8989af5 100644 --- a/querying/src/calculation/InvertedIndex.cpp +++ b/querying/src/calculation/InvertedIndex.cpp @@ -1,5 +1,6 @@ #include "InvertedIndex.h" +#include "../enum/EInvertedIndex.h" #include "../exceptions/Exceptions.h" using namespace std; @@ -11,13 +12,13 @@ double InvertedIndex::getDocumentWeightByID(int ID) { WeightedDocument document{}; if (documents.empty()) - throw EndOfIndexException(); + return EInvertedIndex::EndOfIndex; while ((document = documents.front()).getID() != ID) { if (documents.empty()) //end of the index; index is exhausted - throw EndOfIndexException(); + return EInvertedIndex::EndOfIndex; if (document.getID() > ID) // IDs are sorted asc, i.e. if doc ID is higher, then the ID for is not in index - throw IDNotFoundException(); + return EInvertedIndex::IDNotFound; documents.pop_front(); } @@ -32,6 +33,3 @@ const WeightedDocument &InvertedIndex::operator[](size_t index) { int InvertedIndex::getNextID() const { return documents.front().getID(); } - - - diff --git a/querying/src/enum/EInvertedIndex.h b/querying/src/enum/EInvertedIndex.h new file mode 100644 index 0000000..3fd4039 --- /dev/null +++ b/querying/src/enum/EInvertedIndex.h @@ -0,0 +1,7 @@ +/** + * Enum for returning special value when searching in inverted index + */ + +enum EInvertedIndex { + EndOfIndex = 5, IDNotFound +}; -- GitLab