Skip to content
Snippets Groups Projects
Commit 1d087141 authored by Lukáš Paukert's avatar Lukáš Paukert
Browse files

Improved searching in inverted index

parent d13161d0
No related branches found
No related tags found
No related merge requests found
......@@ -39,6 +39,7 @@ set(SOURCES
src/calculation/WeightedDocument.cpp src/calculation/WeightedDocument.h
src/database/Database.cpp src/database/Database.h
src/database/Document.cpp src/database/Document.h
src/enum/EInvertedIndex.h
src/exceptions/Exceptions.h
src/ui/Page.cpp src/ui/Page.h
src/util/ArgumentParser.cpp src/util/ArgumentParser.h
......
......@@ -5,6 +5,7 @@
#include <cmath>
 
#include "Computor.h"
#include "../enum/EInvertedIndex.h"
#include "../exceptions/Exceptions.h"
 
using namespace std;
......@@ -28,17 +29,19 @@ vector<pair<int, double>> Computor::compute(Database & database) {
double result = 0, denominator = 0;
 
for (const auto &term: availableTerms) /*Go through all the remaining terms*/ {
try {
double documentWeight = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID);
double queryWeight = query.terms.at(term);
result += documentWeight * queryWeight;
}
catch (const IDNotFoundException &e) {//inverted index does not contain given ID
}
 
catch (const EndOfIndexException &e) {
availableTerms.erase(term); //exhaust term
double documentWeight = space.getInvertedIndexByKey(term).getDocumentWeightByID(ID);
if (documentWeight == EInvertedIndex::IDNotFound)
continue;
if (documentWeight == EInvertedIndex::EndOfIndex) {
availableTerms.erase(term);
continue;
}
double queryWeight = query.terms.at(term);
result += documentWeight * queryWeight;
}
 
denominator = sqrt(vectorSizes[ID] * vectorQuerySize);
......@@ -50,7 +53,7 @@ vector<pair<int, double>> Computor::compute(Database & database) {
results.emplace_back(make_pair(ID, result));
}
 
sort(results.begin(), results.end(), [] (const pair<int, double> & a, const pair<int, double> & b)
std::sort(results.begin(), results.end(), [] (const pair<int, double> & a, const pair<int, double> & b)
{ return a.second > b.second; });
 
return results;
......
#include "InvertedIndex.h"
 
#include "../enum/EInvertedIndex.h"
#include "../exceptions/Exceptions.h"
 
using namespace std;
......@@ -11,13 +12,13 @@ double InvertedIndex::getDocumentWeightByID(int ID) {
WeightedDocument document{};
 
if (documents.empty())
throw EndOfIndexException();
return EInvertedIndex::EndOfIndex;
 
while ((document = documents.front()).getID() != ID) {
if (documents.empty()) //end of the index; index is exhausted
throw EndOfIndexException();
return EInvertedIndex::EndOfIndex;
if (document.getID() > ID) // IDs are sorted asc, i.e. if doc ID is higher, then the ID for is not in index
throw IDNotFoundException();
return EInvertedIndex::IDNotFound;
documents.pop_front();
}
 
......@@ -32,6 +33,3 @@ const WeightedDocument &InvertedIndex::operator[](size_t index) {
int InvertedIndex::getNextID() const {
return documents.front().getID();
}
/**
* Enum for returning special value when searching in inverted index
*/
enum EInvertedIndex {
EndOfIndex = 5, IDNotFound
};
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment