Skip to content
Snippets Groups Projects
Commit ee2bfe75 authored by Lukáš Paukert's avatar Lukáš Paukert
Browse files

WIP: Fixed computation of cosine similarity

parent a64c3ca1
No related branches found
No related tags found
No related merge requests found
......@@ -13,15 +13,21 @@ Computor::Computor(Space space, Query query)
: space(std::move(space)),
query(std::move(query)) {}
 
vector<pair<int, double>> Computor::compute(Database & database, int document_id) {
vector<pair<int, double>> Computor::compute(Database & database) {
vector<pair<int, double>> results;
map<string, double> currentDocument = space.getTermsAndWeightsByID(database, document_id);
map<string, double> computedDocument;
 
availableTerms = query.termsKeyset;
 
double vectorQuerySize;
for (const auto & entry : query.terms)
vectorQuerySize += entry.second * entry.second;
while (!availableTerms.empty()) {
int ID = nextID(); //get lowest ID
double result = 0, denominator = 0, tmp = 0;
double result = 0, denominator = 0;
// hrozne zpomaluje beh programu (v radu sekund!!) --> vymyslet jine reseni...
computedDocument = space.getTermsAndWeightsByID(database, ID);
 
for (const auto &term: availableTerms) /*Go through all the remaining terms*/ {
try {
......@@ -37,13 +43,10 @@ vector<pair<int, double>> Computor::compute(Database & database, int document_id
}
}
 
for (const auto & entry : query.terms)
for (const auto & entry : computedDocument)
denominator += entry.second * entry.second;
for (const auto & entry : currentDocument)
tmp += entry.second * entry.second;
denominator = sqrt(denominator * tmp);
denominator = sqrt(denominator * vectorQuerySize);
// Input should not be zero vector but if it is, do not divide and "just" return wrong result..
if (denominator != 0)
result = result / denominator;
......
......@@ -38,10 +38,10 @@ public:
 
/**
* @brief Computes relevancies of documents to the query, from results removes documents that have relevancies below @ref Query::threshold
* @param database, document_id Database connection and ID of current document (query)
* @param database Database connection
* @return Vector of pairs with document IDs and their similarity to the query, sorted by similarity desc
*/
std::vector<std::pair<int, double>> compute(Database & database, int document_id);
std::vector<std::pair<int, double>> compute(Database & database);
};
 
 
......
......@@ -22,7 +22,7 @@ const map<string, double> Space::getTermsAndWeightsByID(Database & database, int
map<string, double> terms;
vector<string> dummy = database.getTermsByDocumentID(document_id);
 
for (string term : dummy) {
for (const string & term : dummy) {
InvertedIndex tmp = getInvertedIndexByKey(term);
terms[term] = tmp.getDocumentWeightByID(document_id);
}
......
......@@ -76,7 +76,7 @@ void MainPage::displayDetail(Space space, Wt::WContainerWidget * container, int
 
// threshold je nyni nastaven na -1 --> ve vysledku budou i uplne rozdilne dokumenty
Query query(space.getTermsAndWeightsByID(database, document.id), -1);
auto result = Computor(space, query).compute(database, document.id);
auto result = Computor(space, query).compute(database);
 
// dodelat proklikavani na zobrazene podobne dokumenty
for (size_t i = 0; i < 10 && i < result.size(); i++) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment