diff --git a/astringology2/src/astringology.cpp b/astringology2/src/astringology.cpp index c69944ec8014f9783349ad0e635c7fdf96232a30..4b75041ba0544b22da0cf62489b920b2cf972e2e 100644 --- a/astringology2/src/astringology.cpp +++ b/astringology2/src/astringology.cpp @@ -31,6 +31,10 @@ int main ( int argc, char * argv[] ) { allowed.push_back ( "deadZoneUsingBadCharacterShift" ); allowed.push_back ( "exactMatchingAutomaton" ); + allowed.push_back ( "hammingMatchingAutomaton" ); + allowed.push_back ( "levenshteinMatchingAutomaton" ); + allowed.push_back ( "generalizedLevenshteinMatchingAutomaton" ); + allowed.push_back ( "exactNondeterministicFactorAutomaton" ); allowed.push_back ( "exactSubsequenceAutomaton" ); allowed.push_back ( "exactNondeterministicSubsequenceAutomaton" ); @@ -54,6 +58,13 @@ int main ( int argc, char * argv[] ) { allowed.push_back ( "goodSuffixShiftTable" ); allowed.push_back ( "hammingMatchDynamicProgramming" ); + allowed.push_back ( "levenshteinMatchDynamicProgramming" ); + allowed.push_back ( "generalizedLevenshteinMatchDynamicProgramming" ); + + allowed.push_back ( "exactMatchBitParalelism"); + allowed.push_back ( "hammingMatchBitParalelism" ); + allowed.push_back ( "levenshteinMatchBitParalelism" ); + allowed.push_back ( "generalizedLevenshteinMatchBitParalelism"); TCLAP::ValuesConstraint < std::string > allowedVals ( allowed ); @@ -107,7 +118,13 @@ int main ( int argc, char * argv[] ) { || algorithm.getValue ( ) == "compressedBitParallelIndex" || algorithm.getValue ( ) == "suffixArray" || algorithm.getValue ( ) == "suffixAutomaton" - || algorithm.getValue ( ) == "hammingMatchDynamicProgramming" ) { + || algorithm.getValue ( ) == "hammingMatchDynamicProgramming" + || algorithm.getValue ( ) == "levenshteinMatchDynamicProgramming" + || algorithm.getValue ( ) == "generalizedLevenshteinMatchDynamicProgramming" + || algorithm.getValue ( ) == "exactMatchBitParalelism" + || algorithm.getValue ( ) == "hammingMatchBitParalelism" + || algorithm.getValue ( ) == "levenshteinMatchBitParalelism" + || algorithm.getValue ( ) == "generalizedLevenshteinMatchBitParalelism" ) { std::string input; if ( subjectInput.getValue ( ).size ( ) == 0 ) input = "-"; @@ -131,13 +148,23 @@ int main ( int argc, char * argv[] ) { || algorithm.getValue ( ) == "backwardOracleMatching" || algorithm.getValue ( ) == "backwardDAWGMatching" || algorithm.getValue ( ) == "exactMatchingAutomaton" + || algorithm.getValue ( ) == "hammingMatchingAutomaton" + || algorithm.getValue ( ) == "levenshteinMatchingAutomaton" + || algorithm.getValue ( ) == "generalizedLevenshteinMatchingAutomaton" || algorithm.getValue ( ) == "exactFactorOracleAutomaton" || algorithm.getValue ( ) == "exactNondeterministicSuffixAutomaton" || algorithm.getValue ( ) == "bndmMatcher" || algorithm.getValue ( ) == "badCharacterShiftTable" || algorithm.getValue ( ) == "quickSearchBadCharacterShiftTable" || algorithm.getValue ( ) == "goodSuffixShiftTable" - || algorithm.getValue ( ) == "hammingMatchDynamicProgramming" ) { + || algorithm.getValue ( ) == "hammingMatchDynamicProgramming" + || algorithm.getValue ( ) == "levenshteinMatchDynamicProgramming" + || algorithm.getValue ( ) == "generalizedLevenshteinMatchDynamicProgramming" + || algorithm.getValue ( ) == "exactMatchBitParalelism" + || algorithm.getValue ( ) == "hammingMatchBitParalelism" + || algorithm.getValue ( ) == "levenshteinMatchBitParalelism" + || algorithm.getValue ( ) == "generalizedLevenshteinMatchBitParalelism" ) { + std::string input; if ( patternInput.getValue ( ).size ( ) == 0 ) input = "-"; @@ -164,7 +191,15 @@ int main ( int argc, char * argv[] ) { parser.parse ( )->run ( environment ); } - if ( algorithm.getValue ( ) == "hammingMatchDynamicProgramming" ) { + if ( algorithm.getValue ( ) == "hammingMatchDynamicProgramming" + || algorithm.getValue ( ) == "levenshteinMatchDynamicProgramming" + || algorithm.getValue ( ) == "generalizedLevenshteinMatchDynamicProgramming" + || algorithm.getValue ( ) == "hammingMatchingAutomaton" + || algorithm.getValue ( ) == "levenshteinMatchingAutomaton" + || algorithm.getValue ( ) == "generalizedLevenshteinMatchingAutomaton" + || algorithm.getValue ( ) == "hammingMatchBitParalelism" + || algorithm.getValue ( ) == "levenshteinMatchBitParalelism" + || algorithm.getValue ( ) == "generalizedLevenshteinMatchBitParalelism" ) { unsigned input = distanceInput.getValue ( ); environment.setVariable ( "distance", input ); @@ -223,6 +258,13 @@ int main ( int argc, char * argv[] ) { cliCommand = "execute stringology::matching::BNDMMatcherConstruction $pattern > $output"; } else if ( algorithm.getValue ( ) == "exactMatchingAutomaton" ) { cliCommand = "execute stringology::matching::ExactMatchingAutomaton $pattern > $output"; + } else if ( algorithm.getValue ( ) == "hammingMatchingAutomaton" ) { + cliCommand = "execute stringology::matching::HammingMatchingAutomaton $pattern $distance > $output"; + } else if ( algorithm.getValue ( ) == "levenshteinMatchingAutomaton" ) { + cliCommand = "execute stringology::matching::LevenshteinMatchingAutomaton $pattern $distance > $output"; + } else if ( algorithm.getValue ( ) == "generalizedLevenshteinMatchingAutomaton" ) { + cliCommand = "execute stringology::matching::GeneralizedLevenshteinMatchingAutomaton $pattern $distance > $output"; + } else if ( algorithm.getValue ( ) == "borderArray" ) { cliCommand = "execute string::properties::BorderArray $subject > $output"; @@ -234,7 +276,19 @@ int main ( int argc, char * argv[] ) { cliCommand = "execute string::properties::GoodSuffixShiftTable $pattern > $output"; } else if ( algorithm.getValue ( ) == "hammingMatchDynamicProgramming" ) { cliCommand = "execute stringology::simulations::HammingDynamicProgramming $subject $pattern $distance > $output"; - } else { + } else if ( algorithm.getValue ( ) == "levenshteinMatchDynamicProgramming" ) { + cliCommand = "execute stringology::simulations::LevenshteinDynamicProgramming $subject $pattern $distance > $output"; + } else if ( algorithm.getValue ( ) == "generalizedLevenshteinMatchDynamicProgramming" ) { + cliCommand = "execute stringology::simulations::GeneralizedLevenshteinDynamicProgramming $subject $pattern $distance > $output"; + } else if ( algorithm.getValue ( ) == "exactMatchBitParalelism" ) { + cliCommand = "execute stringology::simulations::ExactBitParalelism $subject $pattern > $output"; + } else if ( algorithm.getValue ( ) == "hammingMatchBitParalelism" ) { + cliCommand = "execute stringology::simulations::HammingBitParalelism $subject $pattern $distance > $output"; + } else if ( algorithm.getValue ( ) == "levenshteinMatchBitParalelism" ) { + cliCommand = "execute stringology::simulations::LevenshteinBitParalelism $subject $pattern $distance > $output"; + } else if ( algorithm.getValue ( ) == "generalizedLevenshteinMatchBitParalelism" ) { + cliCommand = "execute stringology::simulations::GeneralizedLevenshteinBitParalelism $subject $pattern $distance > $output"; + } else { throw exception::CommonException ( "Invalid algorithm" ); } diff --git a/tests.astringology.sh b/tests.astringology.sh index 5d8dd193074debc02c08ac2b0bd9954cdef0f8c8..07339f02d72cee817cdee6155f107bbef4089a50 100755 --- a/tests.astringology.sh +++ b/tests.astringology.sh @@ -212,6 +212,36 @@ function runTest { clearResults } + +# $1 - aconversions2 sequence +# $2 - first tested entity +# $3 - second tested entity +function runTestAgainst { + echo $1 + echo -ne "\t" + + initResults + bgxgrp="" + + # random tests + for i in $(seq 1 $TESTCASE_ITERATIONS ); + do + SUBJECT_FILE=$(mktemp) + PATTERN_FILE=$(mktemp) + + cat <(generateSubject) > $SUBJECT_FILE + cat <(generatePattern) > $PATTERN_FILE + + bgxlimit ${JOBS} runTest2 "$2" "$3" "$SUBJECT_FILE" "$PATTERN_FILE" + done + + bgxwait + + outputResults + clearResults +} + + runTest "DAWG Factors" "./astringology2 -a suffixAutomaton -s \"\$SUBJECT_FILE\" | ./aquery2 -q suffixAutomatonFactors -p \"\$PATTERN_FILE\" | ./astat2 -p size" runTest "BNDM Matcher" "./astringology2 -a bndmMatcher -p \"\$PATTERN_FILE\" | ./astringology2 -a bndmOccurrences -p - -s \"\$SUBJECT_FILE\" | ./astat2 -p size" runTest "Exact Boyer Moore" "./astringology2 -a boyerMoore -s \"\$SUBJECT_FILE\" -p <(./aaccess2 --string alphabet -o add -i \"\$PATTERN_FILE\" -c <(./aaccess2 --string alphabet -o get -i \"\$SUBJECT_FILE\")) | ./astat2 -p size" @@ -225,3 +255,15 @@ runTest "Exact Reversed Boyer Moore Horspool" "./astringology2 -a reversedBoyerM runTest "Quick Search" "./astringology2 -a quickSearch -s \"\$SUBJECT_FILE\" -p <(./aaccess2 --string alphabet -o add -i \"\$PATTERN_FILE\" -c <(./aaccess2 --string alphabet -o get -i \"\$SUBJECT_FILE\")) | ./astat2 -p size" runTest "Exact Matching Automaton" "./arun2 -t occurrences -a <(./astringology2 -a exactMatchingAutomaton -p <(./aaccess2 --string alphabet -o add -i \"\$PATTERN_FILE\" -c <(./aaccess2 --string alphabet -o get -i \"\$SUBJECT_FILE\")) | ./adeterminize2) -i \"\$SUBJECT_FILE\" | ./astat2 -p size" runTest "Exact Dead Zone Using Bad Character Shift" "./astringology2 -a deadZoneUsingBadCharacterShift -s \"\$SUBJECT_FILE\" -p <(./aaccess2 --string alphabet -o add -i \"\$PATTERN_FILE\" -c <(./aaccess2 --string alphabet -o get -i \"\$SUBJECT_FILE\")) | ./astat2 -p size" + +runTestAgainst "Hamming automaton vs. dynamic programing" "./arun2 -t occurrences -i \"\$SUBJECT_FILE\" -a <(./astringology2 -a hammingMatchingAutomaton -p <(./aaccess2 --string alphabet -o add -i \"\$PATTERN_FILE\" -c <(./aaccess2 --string alphabet -o get -i \"\$SUBJECT_FILE\")) -d 3 | ./adeterminize2) | ./astat2 -p size" "./astringology2 -a hammingMatchDynamicProgramming -p \"\$PATTERN_FILE\" -s \"\$SUBJECT_FILE\" -d 3 | ./astat2 -p size" +runTestAgainst "Hamming automaton vs. bit paralelism" "./arun2 -t occurrences -i \"\$SUBJECT_FILE\" -a <(./astringology2 -a hammingMatchingAutomaton -p <(./aaccess2 --string alphabet -o add -i \"\$PATTERN_FILE\" -c <(./aaccess2 --string alphabet -o get -i \"\$SUBJECT_FILE\")) -d 3 | ./adeterminize2) | ./astat2 -p size" "./astringology2 -a hammingMatchBitParalelism -p \"\$PATTERN_FILE\" -s \"\$SUBJECT_FILE\" -d 3 | ./astat2 -p size" +runTestAgainst "Hamming dynamic programming vs. bit paralelism" "./astringology2 -a hammingMatchDynamicProgramming -p \"\$PATTERN_FILE\" -s \"\$SUBJECT_FILE\" -d 3 | ./astat2 -p size" "./astringology2 -a hammingMatchBitParalelism -p \"\$PATTERN_FILE\" -s \"\$SUBJECT_FILE\" -d 3 | ./astat2 -p size" + +runTestAgainst "Levenshtein automaton vs. dynamic programing" "./arun2 -t occurrences -i \"\$SUBJECT_FILE\" -a <(./astringology2 -a levenshteinMatchingAutomaton -p <(./aaccess2 --string alphabet -o add -i \"\$PATTERN_FILE\" -c <(./aaccess2 --string alphabet -o get -i \"\$SUBJECT_FILE\")) -d 3 | ./aepsilon2 | ./adeterminize2) | ./astat2 -p size" "./astringology2 -a levenshteinMatchDynamicProgramming -p \"\$PATTERN_FILE\" -s \"\$SUBJECT_FILE\" -d 3 | ./astat2 -p size" +runTestAgainst "Levenshtein automaton vs. bit paralelism" "./arun2 -t occurrences -i \"\$SUBJECT_FILE\" -a <(./astringology2 -a levenshteinMatchingAutomaton -p <(./aaccess2 --string alphabet -o add -i \"\$PATTERN_FILE\" -c <(./aaccess2 --string alphabet -o get -i \"\$SUBJECT_FILE\")) -d 3 | ./aepsilon2 | ./adeterminize2) | ./astat2 -p size" "./astringology2 -a levenshteinMatchBitParalelism -p \"\$PATTERN_FILE\" -s \"\$SUBJECT_FILE\" -d 3 | ./astat2 -p size" +runTestAgainst "Levenshtein dynamic programming vs. bit paralelism" "./astringology2 -a levenshteinMatchDynamicProgramming -p \"\$PATTERN_FILE\" -s \"\$SUBJECT_FILE\" -d 3 | ./astat2 -p size" "./astringology2 -a levenshteinMatchBitParalelism -p \"\$PATTERN_FILE\" -s \"\$SUBJECT_FILE\" -d 3 | ./astat2 -p size" + +runTestAgainst "Generalized Levenshtein automaton vs. dynamic programing" "./arun2 -t occurrences -i \"\$SUBJECT_FILE\" -a <(./astringology2 -a generalizedLevenshteinMatchingAutomaton -p <(./aaccess2 --string alphabet -o add -i \"\$PATTERN_FILE\" -c <(./aaccess2 --string alphabet -o get -i \"\$SUBJECT_FILE\")) -d 3 | ./aepsilon2 | ./adeterminize2) | ./astat2 -p size" "./astringology2 -a generalizedLevenshteinMatchDynamicProgramming -p \"\$PATTERN_FILE\" -s \"\$SUBJECT_FILE\" -d 3 | ./astat2 -p size" +runTestAgainst "Generalized Levenshtein automaton vs. bit paralelism" "./arun2 -t occurrences -i \"\$SUBJECT_FILE\" -a <(./astringology2 -a generalizedLevenshteinMatchingAutomaton -p <(./aaccess2 --string alphabet -o add -i \"\$PATTERN_FILE\" -c <(./aaccess2 --string alphabet -o get -i \"\$SUBJECT_FILE\")) -d 3 | ./aepsilon2 | ./adeterminize2) | ./astat2 -p size" "./astringology2 -a generalizedLevenshteinMatchBitParalelism -p \"\$PATTERN_FILE\" -s \"\$SUBJECT_FILE\" -d 3 | ./astat2 -p size" +runTestAgainst "Generalized Levenshtein dynamic programming vs. bit paralelism" "./astringology2 -a generalizedLevenshteinMatchDynamicProgramming -p \"\$PATTERN_FILE\" -s \"\$SUBJECT_FILE\" -d 3 | ./astat2 -p size" "./astringology2 -a generalizedLevenshteinMatchBitParalelism -p \"\$PATTERN_FILE\" -s \"\$SUBJECT_FILE\" -d 3 | ./astat2 -p size"