From 4099d2eb7db68eb4dd1a27da0fc355fa0108d735 Mon Sep 17 00:00:00 2001
From: Petr Wudi <petr.wudi@gmail.com>
Date: Fri, 17 Nov 2017 23:56:39 +0100
Subject: [PATCH] Modify SimilarDnaFinder interface and change implementation
 according to it, closes #5

---
 .../AlignedSequencePair.java                  | 42 ++++++++++
 .../fit/kw/vmm/backend/SimilarDnaFinder.java  | 18 +++--
 .../kw/vmm/backend/SimilarDnaFinderImpl.java  | 76 ++++++++++++++-----
 3 files changed, 111 insertions(+), 25 deletions(-)
 create mode 100644 src/main/java/edu/cvut/fit/kw/vmm/alignment_solution/AlignedSequencePair.java

diff --git a/src/main/java/edu/cvut/fit/kw/vmm/alignment_solution/AlignedSequencePair.java b/src/main/java/edu/cvut/fit/kw/vmm/alignment_solution/AlignedSequencePair.java
new file mode 100644
index 0000000..3e72c47
--- /dev/null
+++ b/src/main/java/edu/cvut/fit/kw/vmm/alignment_solution/AlignedSequencePair.java
@@ -0,0 +1,42 @@
+package edu.cvut.fit.kw.vmm.alignment_solution;
+
+import edu.cvut.fit.kw.vmm.DnaSequence;
+
+/**
+ * Contains pair of sequences and information how to align them to have mathing part of the sequences on the same
+ * position.
+ */
+public class AlignedSequencePair {
+
+    private final AlignmentSolution alignment;
+    private final DnaSequence seq0;
+    private final DnaSequence seq1;
+
+    public AlignedSequencePair(DnaSequence seq0, DnaSequence seq1, AlignmentSolution alignment) {
+        this.seq0 = seq0;
+        this.seq1 = seq1;
+        this.alignment = alignment;
+    }
+
+    /**
+     * @return information how to align them to have mathing part of the sequences on the same position. It specifies
+     * how to transform first string into the second one.
+     */
+    public AlignmentSolution getAlignment() {
+        return alignment;
+    }
+
+    /**
+     * @return first DNA sequence
+     */
+    public DnaSequence getFirstSequence() {
+        return seq0;
+    }
+
+    /**
+     * @return second DNA sequence
+     */
+    public DnaSequence getSecondSequence() {
+        return seq1;
+    }
+}
diff --git a/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinder.java b/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinder.java
index a2c5c94..bba7750 100644
--- a/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinder.java
+++ b/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinder.java
@@ -1,28 +1,32 @@
 package edu.cvut.fit.kw.vmm.backend;
 
+import edu.cvut.fit.kw.vmm.alignment_solution.AlignedSequencePair;
+
 import java.util.List;
 
 /**
- * Class containing methods for finding similiar DNA to the specified one
+ * Class containing methods for finding DNA similar to the specified one
  */
 public interface SimilarDnaFinder {
 
     /**
      * Finds k similar DNA sequences
-     * @param sequence sequences similar to this will be found. It is not case-sensitive
+     * @param sequence sequences similar to this will be found
      * @param alignment type of alignment (local/global)
      * @param k number of similar sequences to find
-     * @return list of k dna sequences
+     * @return list of k dna sequence pairs - first sequence is always the one specified in input, second is the one
+     * found in DNA collection
      */
-    List<String> findKSimilar(String sequence, AlignmentType alignment, int k);
+    List<AlignedSequencePair> findKSimilar(String sequence, AlignmentType alignment, int k);
 
     /**
      * Finds DNA sequences with similarity to the given string higher than specified threshold
-     * @param sequence sequences similar to this will be found. It is not case-sensitive
+     * @param sequence sequences similar to this will be found
      * @param alignment type of alignment (local/global)
      * @param minSimilarity only sequences with value higher or equal this are included in result
-     * @return list of most similar DNA sequences
+     * @return list of most similar DNA sequences - first sequence is always the one specified in input, second is the
+     * one found in DNA collection
      */
-    List<String> findSimilarWithRange(String sequence, AlignmentType alignment, double minSimilarity);
+    List<AlignedSequencePair> findSimilarWithRange(String sequence, AlignmentType alignment, double minSimilarity);
 
 }
diff --git a/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinderImpl.java b/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinderImpl.java
index 98118b0..5c65534 100644
--- a/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinderImpl.java
+++ b/src/main/java/edu/cvut/fit/kw/vmm/backend/SimilarDnaFinderImpl.java
@@ -1,40 +1,80 @@
 package edu.cvut.fit.kw.vmm.backend;
 
+import edu.cvut.fit.kw.vmm.DnaSequence;
+import edu.cvut.fit.kw.vmm.alignment_solution.AlignedSequencePair;
 import edu.cvut.fit.kw.vmm.alignment_solution.AlignmentSolution;
+import edu.cvut.fit.kw.vmm.backend.sequence_containers.SequenceContainer;
 import edu.cvut.fit.kw.vmm.backend.sequences_comparement.NeedlemanWunsch;
+import edu.cvut.fit.kw.vmm.backend.sequences_comparement.SequenceComparator;
 
-import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
 
+/**
+ * Finds similar DNA sequences in specified container
+ */
 public class SimilarDnaFinderImpl implements SimilarDnaFinder {
 
-    List<String> sequences;
-    List<AlignmentSolution> similarSequences;
+    private static final String INPUT_SEQUENCE_NAME = "input";
+    private final SequenceContainer container;
 
     /**
-     * Creates container of sequences. These sequences are stored in specified files.
-     * @param inputFiles paths of files containing DNA sequences. If path points to a directory, it recursively finds
-     *                   all files in it and load as DNS sequence files.
+     * Creates new instance of class for finding similar sequences to the specified one in the sequence container.
+     * @param container object containing all the sequences which will be compared to the specified one
      */
-    public SimilarDnaFinderImpl(List<String> inputFiles) {
-        sequences = new ArrayList<>();
-        similarSequences = new LinkedList<>();
+    public SimilarDnaFinderImpl(SequenceContainer container) {
+        this.container = container;
     }
 
     @Override
-    public List<String> findKSimilar(String sequence, AlignmentType alignment, int k) {
-        List<String> best;
-        for(String refSeq : sequences) {
-            NeedlemanWunsch solver = new NeedlemanWunsch(refSeq, sequence);
-            AlignmentSolution solution = solver.solve();
-            // TODO: saving the best results
+    public List<AlignedSequencePair> findKSimilar(String inputSeqString, AlignmentType alignmentType, int k) {
+        DnaSequence inputSequence = new DnaSequence(INPUT_SEQUENCE_NAME, inputSeqString);
+        List<AlignedSequencePair> similarSequences = new LinkedList<>(); // First has lowest similarity, last highest
+        for(DnaSequence sequence : container) {
+            String seqString = sequence.getSequence();
+            SequenceComparator comparator = null;
+            if(alignmentType == AlignmentType.GLOBAL) {
+                comparator = new NeedlemanWunsch(inputSeqString, seqString);
+                AlignmentSolution alignment = comparator.solve();
+                tryFitInList(inputSequence, sequence, alignment, similarSequences, k);
+            }
+        }
+        return similarSequences;
+    }
+
+    private void tryFitInList(DnaSequence seq0,
+                              DnaSequence seq1,
+                              AlignmentSolution alignment,
+                              List<AlignedSequencePair> similarSequences,
+                              int maxSize) {
+        int firstHigherScoreValue = 0; // First sequence in the list which has score higher than this one
+        while(firstHigherScoreValue < similarSequences.size() &&
+                similarSequences.get(firstHigherScoreValue).getAlignment().getSimilarity() < alignment.getSimilarity()) {
+            firstHigherScoreValue++;
+        }
+        if(firstHigherScoreValue > 0 || similarSequences.size() < maxSize) {
+            similarSequences.add(firstHigherScoreValue, new AlignedSequencePair(seq0, seq1, alignment));
+            if(similarSequences.size() > maxSize) {
+                similarSequences.remove(0);
+            }
         }
-        return null; // TODO
     }
 
     @Override
-    public List<String> findSimilarWithRange(String sequence, AlignmentType alignment, double minSimilarity) {
-        return null;
+    public List<AlignedSequencePair> findSimilarWithRange(String inputSeqString, AlignmentType alignmentType, double minSimilarity) {
+        DnaSequence inputSequence = new DnaSequence(INPUT_SEQUENCE_NAME, inputSeqString);
+        List<AlignedSequencePair> similarSequences = new LinkedList<>(); // First has lowest similarity, last highest
+        for(DnaSequence sequence : container) {
+            String seqString = sequence.getSequence();
+            SequenceComparator comparator = null;
+            if(alignmentType == AlignmentType.GLOBAL) {
+                comparator = new NeedlemanWunsch(inputSeqString, seqString);
+                AlignmentSolution alignment = comparator.solve();
+                if(alignment.getSimilarity() >= minSimilarity) {
+                    similarSequences.add(new AlignedSequencePair(inputSequence, sequence, alignment));
+                }
+            }
+        }
+        return similarSequences;
     }
 }
-- 
GitLab