diff --git a/src/main/java/edu/cvut/fit/kw/vmm/alignment_solution/AlignedSequencePair.java b/src/main/java/edu/cvut/fit/kw/vmm/alignment_solution/AlignedSequencePair.java new file mode 100644 index 0000000000000000000000000000000000000000..3e72c47971112062df3b14ef92d21d95a6b477a4 --- /dev/null +++ b/src/main/java/edu/cvut/fit/kw/vmm/alignment_solution/AlignedSequencePair.java @@ -0,0 +1,42 @@ +package edu.cvut.fit.kw.vmm.alignment_solution; + +import edu.cvut.fit.kw.vmm.DnaSequence; + +/** + * Contains pair of sequences and information how to align them to have mathing part of the sequences on the same + * position. + */ +public class AlignedSequencePair { + + private final AlignmentSolution alignment; + private final DnaSequence seq0; + private final DnaSequence seq1; + + public AlignedSequencePair(DnaSequence seq0, DnaSequence seq1, AlignmentSolution alignment) { + this.seq0 = seq0; + this.seq1 = seq1; + this.alignment = alignment; + } + + /** + * @return information how to align them to have mathing part of the sequences on the same position. It specifies + * how to transform first string into the second one. + */ + public AlignmentSolution getAlignment() { + return alignment; + } + + /** + * @return first DNA sequence + */ + public DnaSequence getFirstSequence() { + return seq0; + } + + /** + * @return second DNA sequence + */ + public DnaSequence getSecondSequence() { + return seq1; + } +} diff --git a/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinder.java b/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinder.java index a2c5c9468d4042537af6be233ed64f6f5521133a..bba7750e4f4e628405c2538909e9024215ed89e5 100644 --- a/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinder.java +++ b/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinder.java @@ -1,28 +1,32 @@ package edu.cvut.fit.kw.vmm.backend; +import edu.cvut.fit.kw.vmm.alignment_solution.AlignedSequencePair; + import java.util.List; /** - * Class containing methods for finding similiar DNA to the specified one + * Class containing methods for finding DNA similar to the specified one */ public interface SimilarDnaFinder { /** * Finds k similar DNA sequences - * @param sequence sequences similar to this will be found. It is not case-sensitive + * @param sequence sequences similar to this will be found * @param alignment type of alignment (local/global) * @param k number of similar sequences to find - * @return list of k dna sequences + * @return list of k dna sequence pairs - first sequence is always the one specified in input, second is the one + * found in DNA collection */ - List<String> findKSimilar(String sequence, AlignmentType alignment, int k); + List<AlignedSequencePair> findKSimilar(String sequence, AlignmentType alignment, int k); /** * Finds DNA sequences with similarity to the given string higher than specified threshold - * @param sequence sequences similar to this will be found. It is not case-sensitive + * @param sequence sequences similar to this will be found * @param alignment type of alignment (local/global) * @param minSimilarity only sequences with value higher or equal this are included in result - * @return list of most similar DNA sequences + * @return list of most similar DNA sequences - first sequence is always the one specified in input, second is the + * one found in DNA collection */ - List<String> findSimilarWithRange(String sequence, AlignmentType alignment, double minSimilarity); + List<AlignedSequencePair> findSimilarWithRange(String sequence, AlignmentType alignment, double minSimilarity); } diff --git a/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinderImpl.java b/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinderImpl.java index 98118b0f3fc635157ce94c722b000f61a58136f0..5c655341c9a8d7bc7509245733ddad7916023669 100644 --- a/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinderImpl.java +++ b/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinderImpl.java @@ -1,40 +1,80 @@ package edu.cvut.fit.kw.vmm.backend; +import edu.cvut.fit.kw.vmm.DnaSequence; +import edu.cvut.fit.kw.vmm.alignment_solution.AlignedSequencePair; import edu.cvut.fit.kw.vmm.alignment_solution.AlignmentSolution; +import edu.cvut.fit.kw.vmm.backend.sequence_containers.SequenceContainer; import edu.cvut.fit.kw.vmm.backend.sequences_comparement.NeedlemanWunsch; +import edu.cvut.fit.kw.vmm.backend.sequences_comparement.SequenceComparator; -import java.util.ArrayList; import java.util.LinkedList; import java.util.List; +/** + * Finds similar DNA sequences in specified container + */ public class SimilarDnaFinderImpl implements SimilarDnaFinder { - List<String> sequences; - List<AlignmentSolution> similarSequences; + private static final String INPUT_SEQUENCE_NAME = "input"; + private final SequenceContainer container; /** - * Creates container of sequences. These sequences are stored in specified files. - * @param inputFiles paths of files containing DNA sequences. If path points to a directory, it recursively finds - * all files in it and load as DNS sequence files. + * Creates new instance of class for finding similar sequences to the specified one in the sequence container. + * @param container object containing all the sequences which will be compared to the specified one */ - public SimilarDnaFinderImpl(List<String> inputFiles) { - sequences = new ArrayList<>(); - similarSequences = new LinkedList<>(); + public SimilarDnaFinderImpl(SequenceContainer container) { + this.container = container; } @Override - public List<String> findKSimilar(String sequence, AlignmentType alignment, int k) { - List<String> best; - for(String refSeq : sequences) { - NeedlemanWunsch solver = new NeedlemanWunsch(refSeq, sequence); - AlignmentSolution solution = solver.solve(); - // TODO: saving the best results + public List<AlignedSequencePair> findKSimilar(String inputSeqString, AlignmentType alignmentType, int k) { + DnaSequence inputSequence = new DnaSequence(INPUT_SEQUENCE_NAME, inputSeqString); + List<AlignedSequencePair> similarSequences = new LinkedList<>(); // First has lowest similarity, last highest + for(DnaSequence sequence : container) { + String seqString = sequence.getSequence(); + SequenceComparator comparator = null; + if(alignmentType == AlignmentType.GLOBAL) { + comparator = new NeedlemanWunsch(inputSeqString, seqString); + AlignmentSolution alignment = comparator.solve(); + tryFitInList(inputSequence, sequence, alignment, similarSequences, k); + } + } + return similarSequences; + } + + private void tryFitInList(DnaSequence seq0, + DnaSequence seq1, + AlignmentSolution alignment, + List<AlignedSequencePair> similarSequences, + int maxSize) { + int firstHigherScoreValue = 0; // First sequence in the list which has score higher than this one + while(firstHigherScoreValue < similarSequences.size() && + similarSequences.get(firstHigherScoreValue).getAlignment().getSimilarity() < alignment.getSimilarity()) { + firstHigherScoreValue++; + } + if(firstHigherScoreValue > 0 || similarSequences.size() < maxSize) { + similarSequences.add(firstHigherScoreValue, new AlignedSequencePair(seq0, seq1, alignment)); + if(similarSequences.size() > maxSize) { + similarSequences.remove(0); + } } - return null; // TODO } @Override - public List<String> findSimilarWithRange(String sequence, AlignmentType alignment, double minSimilarity) { - return null; + public List<AlignedSequencePair> findSimilarWithRange(String inputSeqString, AlignmentType alignmentType, double minSimilarity) { + DnaSequence inputSequence = new DnaSequence(INPUT_SEQUENCE_NAME, inputSeqString); + List<AlignedSequencePair> similarSequences = new LinkedList<>(); // First has lowest similarity, last highest + for(DnaSequence sequence : container) { + String seqString = sequence.getSequence(); + SequenceComparator comparator = null; + if(alignmentType == AlignmentType.GLOBAL) { + comparator = new NeedlemanWunsch(inputSeqString, seqString); + AlignmentSolution alignment = comparator.solve(); + if(alignment.getSimilarity() >= minSimilarity) { + similarSequences.add(new AlignedSequencePair(inputSequence, sequence, alignment)); + } + } + } + return similarSequences; } }