Skip to content
Snippets Groups Projects
Commit 4059a7ce authored by Lukáš Paukert's avatar Lukáš Paukert
Browse files

First version of weight calculation, not storing as JSON yet

parent 1f326fcf
No related branches found
No related tags found
No related merge requests found
# custom
data/
.idea/
.vscode/
 
# Byte-compiled / optimized / DLL files
__pycache__/
......
# Požadavky
- kompilátor podporující minimálně C++ 11
- vytvořit adresář ```lib``` v kořenovém adresáři projektu pro následující knihovny
- knihovna pro práci s SQLite:
- ke stažení [ZDE](https://github.com/SRombauts/SQLiteCpp/releases)
- rozbalit do adresáře ```lib``` a složku přejmenovat na ```SQLiteCpp```
- knihovna pro práci s JSON soubory:
- ke stažení [ZDE](https://github.com/nlohmann/json/releases)
- stačí stáhnout hlavičkový soubor ```json.hpp``` a umístit ho do adresáře ```lib```
- pro sestavení lze využít skript ```src/weight_calculation/build.sh```
- binárka se poté nachází v ```src/weight_calculation/build/main```, je nutné ji spustit z adresáře ```build```
# Minimum CMake version, project name and project version
cmake_minimum_required(VERSION 3.1)
project(VectorModel VERSION 1.0)
# C++ 11 compiler is required
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# Add subdirectory SQLiteCpp with all necessary files
add_subdirectory(./../../lib/SQLiteCpp ./SQLite)
add_executable(main main.cpp)
target_link_libraries(main SQLiteCpp)
#!/bin/bash
# Exit on first error
set -e
# Create dir build
mkdir -p build
cd build
cmake -DCMAKE_BUILD_TYPE=Debug ..
# Build
cmake --build .
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <SQLiteCpp/SQLiteCpp.h>
#include "../../lib/json.hpp"
using json = nlohmann::json;
bool calculateWeight(SQLite::Database & db, std::ofstream & ostream, const json & maxOccurrences, const std::string & term)
{
uint32_t occurrences;
double weight;
maxOccurrences[term].get_to(occurrences);
try
{
SQLite::Statement query(db, "SELECT * FROM TermDocumentOccurrence JOIN Term ON TermDocumentOccurrence.Term_id = Term.id WHERE Term.value = :term");
query.bind(":term", term);
ostream << term << ":";
while(query.executeStep())
{
weight = query.getColumn("count").getInt() / (occurrences*1.0);
ostream << " " << query.getColumn("Document_id") << " " << std::setprecision(20) << weight;
}
ostream << std::endl;
}
catch(const std::exception& e)
{
std::cout << "SQLite exception: " << e.what() << std::endl;
return false;
}
return true;
}
bool process(std::ofstream & ostream, const json & maxOccurrences)
{
try
{
SQLite::Database db("./../../../data/persistance/db", SQLite::OPEN_READWRITE|SQLite::OPEN_CREATE);
SQLite::Statement query(db, "SELECT value FROM Term");
while (query.executeStep())
if(!calculateWeight(db, ostream, maxOccurrences, query.getColumn("value")))
return false;
}
catch(const std::exception& e)
{
std::cout << "SQLite exception: " << e.what() << std::endl;
return false;
}
return true;
}
int main (void)
{
std::ifstream istream("./../../../data/persistance/most_frequent_words.json");
std::ofstream ostream("./../../../data/persistance/invertedList.json");
if (istream.fail() || ostream.fail())
{
std::cout << "Cannot open/find file 'most_frequent_words.json' or cannot create file 'invertedList.json'" << std::endl;
return EXIT_FAILURE;
}
json maxOccurrences;
istream >> maxOccurrences;
istream.close();
if (istream.fail())
{
std::cout << "Something went wrong with file 'most_frequent_words.json'" << std::endl;
return EXIT_FAILURE;
}
if (!process(ostream, maxOccurrences))
return EXIT_FAILURE;
ostream.close();
if (ostream.fail())
{
std::cout << "Something went wrong during writing to output file 'invertedList.json'" << std::endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment