Skip to content
Snippets Groups Projects
Commit d54ee66a authored by Lukáš Paukert's avatar Lukáš Paukert
Browse files

Sequential search

parent caa9d41b
No related branches found
No related tags found
No related merge requests found
...@@ -35,6 +35,7 @@ set(SOURCES ...@@ -35,6 +35,7 @@ set(SOURCES
src/calculation/Computor.cpp src/calculation/Computor.h src/calculation/Computor.cpp src/calculation/Computor.h
src/calculation/InvertedIndex.cpp src/calculation/InvertedIndex.h src/calculation/InvertedIndex.cpp src/calculation/InvertedIndex.h
src/calculation/Query.cpp src/calculation/Query.h src/calculation/Query.cpp src/calculation/Query.h
src/calculation/SequentialSearch.cpp src/calculation/SequentialSearch.h
src/calculation/Space.cpp src/calculation/Space.h src/calculation/Space.cpp src/calculation/Space.h
src/calculation/WeightedDocument.cpp src/calculation/WeightedDocument.h src/calculation/WeightedDocument.cpp src/calculation/WeightedDocument.h
src/database/Database.cpp src/database/Database.h src/database/Database.cpp src/database/Database.h
......
...@@ -9,11 +9,19 @@ body { ...@@ -9,11 +9,19 @@ body {
   
button { button {
font-size: 20px; font-size: 20px;
margin: 25px; margin: 10px;
padding: 5px; padding: 5px;
width: 250px; width: 250px;
} }
   
label {
font-size: 20px;
}
input {
margin: 30px 10px 0px 0px;
}
.navigation { .navigation {
width: 700px; width: 700px;
list-style: none; list-style: none;
......
#include <algorithm>
#include <cmath>
#include "SequentialSearch.h"
#include "./../database/Document.h"
using namespace std;
vector<pair<int, double>> SequentialSearch::search(Database & database, int queryDocument) {
vector<pair<int, double>> results;
map<int, double> vectorSizes = database.getVectorSizes();
vector<Document> availableDocuments = database.getDocumentsCollection();
for (const Document & document : availableDocuments) {
double result = 0, denominator = 0;
vector<pair<double, double>> termsWeights = database.getTermsWeights(queryDocument, document.id);
for (const pair<double, double> & record : termsWeights)
result += record.first * record.second;
denominator = sqrt(vectorSizes[queryDocument] * vectorSizes[document.id]);
// Input should not be zero vector but if it is, do not divide and "just" return wrong result..
if (denominator != 0)
result = result / denominator;
results.emplace_back(make_pair(document.id, result));
}
sort(results.begin(), results.end(), [] (const pair<int, double> & a, const pair<int, double> & b)
{ return a.second > b.second; });
return results;
}
#include <utility>
#include <vector>
#include "./../database/Database.h"
namespace SequentialSearch {
/**
* @brief Function used for sequential search
* @param database Database connection
* @param queryDocument ID of document which represents query
*/
std::vector<std::pair<int, double>> search(Database & database, int queryDocument);
}
...@@ -24,12 +24,12 @@ Document Database::getDocumentByID(int id) { ...@@ -24,12 +24,12 @@ Document Database::getDocumentByID(int id) {
return {id, query.getColumn(0)}; return {id, query.getColumn(0)};
} }
   
map<string, double> Database::getTermsAndWightsByDocumentID(int document_id) { map<string, double> Database::getTermsAndWightsByDocumentID(int documentID) {
map<string, double> termsAndWeights; map<string, double> termsAndWeights;
SQLite::Statement query(db, "SELECT Term.value, TermDocumentOccurrence.weight FROM Term " SQLite::Statement query(db, "SELECT Term.value, TermDocumentOccurrence.weight FROM Term "
"JOIN TermDocumentOccurrence ON Term.id = TermDocumentOccurrence.Term_id " "JOIN TermDocumentOccurrence ON Term.id = TermDocumentOccurrence.Term_id "
"WHERE TermDocumentOccurrence.Document_id = :id"); "WHERE TermDocumentOccurrence.Document_id = :documentID");
query.bind(":id", document_id); query.bind(":documentID", documentID);
   
while(query.executeStep()) while(query.executeStep())
termsAndWeights[query.getColumn(0)] = query.getColumn(1); termsAndWeights[query.getColumn(0)] = query.getColumn(1);
...@@ -48,3 +48,21 @@ map<int, double> Database::getVectorSizes() { ...@@ -48,3 +48,21 @@ map<int, double> Database::getVectorSizes() {
   
return vectorSizes; return vectorSizes;
} }
vector<pair<double, double>> Database::getTermsWeights(int queryDocumentID, int documentID) {
vector<pair<double, double>> results;
SQLite::Statement query(db, "SELECT Term_id AS Term_id_1, weight AS weight_1, weight_2 "
"FROM TermDocumentOccurrence "
"JOIN (SELECT Term_id AS Term_id_2, weight AS weight_2 "
"FROM TermDocumentOccurrence "
"WHERE Document_id = :documentID) "
"ON Term_id_1 = Term_id_2 "
"WHERE Document_id = :queryDocumentID");
query.bind(":queryDocumentID", queryDocumentID);
query.bind(":documentID", documentID);
while(query.executeStep())
results.emplace_back(make_pair(query.getColumn("weight_1"), query.getColumn("weight_2")));
return results;
}
...@@ -32,14 +32,22 @@ public: ...@@ -32,14 +32,22 @@ public:
   
/** /**
* @brief Finds all terms and their weights from specified document * @brief Finds all terms and their weights from specified document
* @param document_id Document's ID to process * @param documentID Document's ID to process
* @return Map with terms as keys and weights as their values * @return Map with terms as keys and weights as their values
*/ */
std::map<std::string, double> getTermsAndWightsByDocumentID(int document_id); std::map<std::string, double> getTermsAndWightsByDocumentID(int documentID);
   
/** /**
* @brief Computes size of vector for every document in database * @brief Computes size of vector for every document in database
* @return Map with document_id as key and size of vector as value * @return Map with document_id as key and size of vector as value
*/ */
std::map<int, double> getVectorSizes(); std::map<int, double> getVectorSizes();
/**
* @brief Gets weights of terms which are in both given documents
* @param queryDocumentID ID of document which is used as query
* @param documentID ID of compared document
* @return Vector of pairs, first is weight of term from document which is used as query, second is weight of term from compared document
*/
std::vector<std::pair<double, double>> getTermsWeights(int queryDocumentID, int documentID);
}; };
#include <Wt/WCheckBox.h>
#include <Wt/WText.h> #include <Wt/WText.h>
#include <Wt/WMenu.h> #include <Wt/WMenu.h>
#include <Wt/WPushButton.h> #include <Wt/WPushButton.h>
...@@ -6,6 +7,7 @@ ...@@ -6,6 +7,7 @@
   
#include "./../calculation/Query.h" #include "./../calculation/Query.h"
#include "./../calculation/Computor.h" #include "./../calculation/Computor.h"
#include "./../calculation/SequentialSearch.h"
#include "./../util/InvertedIndexJSONParser.h" #include "./../util/InvertedIndexJSONParser.h"
#include "Page.h" #include "Page.h"
   
...@@ -35,10 +37,14 @@ void Page::displayMainPage() ...@@ -35,10 +37,14 @@ void Page::displayMainPage()
} }
menu->select(0); menu->select(0);
   
Wt::WCheckBox *checkBox = container->addNew<Wt::WCheckBox>("Search using inverted index");
checkBox->setInline(false);
checkBox->setChecked(true);
container->addWidget(std::move(buttonPtr)); container->addWidget(std::move(buttonPtr));
   
button->clicked().connect([=] { button->clicked().connect([=] {
displayDetail(availableDocuments.at(menu->currentIndex()).id); displayDetail(availableDocuments.at(menu->currentIndex()).id, checkBox->isChecked());
}); });
} }
   
...@@ -63,7 +69,7 @@ std::string Page::getContent(const std::string & path) ...@@ -63,7 +69,7 @@ std::string Page::getContent(const std::string & path)
return content; return content;
} }
   
void Page::displayDetail(int document_id) void Page::displayDetail(int document_id, bool useInvertedIndex)
{ {
container->clear(); container->clear();
   
...@@ -73,9 +79,16 @@ void Page::displayDetail(int document_id) ...@@ -73,9 +79,16 @@ void Page::displayDetail(int document_id)
auto nextBookButtonPtr = Wt::cpp14::make_unique<Wt::WPushButton>("Jump to selected book"); auto nextBookButtonPtr = Wt::cpp14::make_unique<Wt::WPushButton>("Jump to selected book");
auto nextBookButton = nextBookButtonPtr.get(); auto nextBookButton = nextBookButtonPtr.get();
   
// threshold je nyni nastaven na -1 --> ve vysledku budou i uplne rozdilne dokumenty std::vector<std::pair<int, double>> result;
Query query(database.getTermsAndWightsByDocumentID(document_id), -1);
auto result = Computor(space, query).compute(database); if (useInvertedIndex) {
// threshold je nyni nastaven na -1 --> ve vysledku budou i uplne rozdilne dokumenty
Query query(database.getTermsAndWightsByDocumentID(document_id), -1);
result = Computor(space, query).compute(database);
}
else {
result = SequentialSearch::search(database, document_id);
}
   
container->addNew<Wt::WText>("<h1>The most similar books</h1>"); container->addNew<Wt::WText>("<h1>The most similar books</h1>");
...@@ -86,6 +99,10 @@ void Page::displayDetail(int document_id) ...@@ -86,6 +99,10 @@ void Page::displayDetail(int document_id)
} }
menu->select(0); menu->select(0);
   
Wt::WCheckBox *checkBox = container->addNew<Wt::WCheckBox>("Search using inverted index");
checkBox->setInline(false);
checkBox->setChecked(true);
container->addWidget(std::move(nextBookButtonPtr)); container->addWidget(std::move(nextBookButtonPtr));
container->addWidget(std::move(mainPageButtonPtr)); container->addWidget(std::move(mainPageButtonPtr));
   
...@@ -97,7 +114,7 @@ void Page::displayDetail(int document_id) ...@@ -97,7 +114,7 @@ void Page::displayDetail(int document_id)
   
nextBookButton->clicked().connect([=] { nextBookButton->clicked().connect([=] {
if (menu->currentIndex() != 0) if (menu->currentIndex() != 0)
displayDetail(result.at(menu->currentIndex()).first); displayDetail(result.at(menu->currentIndex()).first, checkBox->isChecked());
}); });
   
mainPageButton->clicked().connect([=] { mainPageButton->clicked().connect([=] {
......
...@@ -42,6 +42,7 @@ private: ...@@ -42,6 +42,7 @@ private:
/** /**
* @brief Displays page with document content and similar documents * @brief Displays page with document content and similar documents
* @param document_id ID of document to display * @param document_id ID of document to display
* @param useInvertedIndex bool that determines if should be inverted index used on search for similar documents
*/ */
void displayDetail(int document_id); void displayDetail(int document_id, bool useInvertedIndex);
}; };
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment