...
 
Commits (2)
This diff is collapsed.
......@@ -7,7 +7,7 @@ def load_picture_dictionary(module):
imagesDictionary = dict()
logging.info("loading image URLs")
with open('files/processed_data.csv') as csv_file:
with open('../files/processed_data.csv') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
line_count = 0
for row in csv_reader:
......
......@@ -6,7 +6,7 @@ from pathlib import Path
import gensim
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
LOCAL_MODULE = "models/local-module.bin"
LOCAL_MODULE = "../models/local-module.bin"
ITERATIONS = 1
......@@ -22,7 +22,7 @@ def load_local_module(limit=None):
def load_google_module(limit=None):
logging.info("loading google module")
return load_module_from_file("models/GoogleNews-vectors-negative300.bin", limit=limit)
return load_module_from_file("../models/GoogleNews-vectors-negative300.bin", limit=limit)
def load_module_from_file(file_name, bin=True, limit=None):
......@@ -30,7 +30,7 @@ def load_module_from_file(file_name, bin=True, limit=None):
def train_local_module():
documents = list(read_input("files/reviews_data.txt.gz"))
documents = list(read_input("../files/reviews_data.txt.gz"))
logging.info("reading data file done")
model = gensim.models.Word2Vec(documents, size=150, window=10, min_count=2, workers=multiprocessing.cpu_count(),
......
This diff is collapsed.
......@@ -8,7 +8,7 @@ from nltk.tokenize import word_tokenize
import src.ImageManager as ImageManager
import src.ModuleManager as ModuleManager
google_module = ModuleManager.load_google_module(limit=10000000)
google_module = ModuleManager.load_google_module(limit=2500000)
local_module = ModuleManager.load_local_module()
google_image_dictionary = ImageManager.load_picture_dictionary(google_module)
local_image_dictionary = ImageManager.load_picture_dictionary(local_module)
......@@ -18,7 +18,7 @@ stopWords = set(stopwords.words('english'))
def findImage(text):
all = parseText(text)
print(all)
#print(all)
top_words = Counter(all).most_common(3)
......@@ -35,12 +35,15 @@ def findImage(text):
def search_module(word, module, images):
try:
return images[module.wv.most_similar_to_given(word, list(images.keys()))]
label = module.wv.most_similar_to_given(word, list(images.keys()))
similarity = module.wv.similarity(w1=word, w2=label)
return label, similarity, images[label]
except KeyError:
return None
def parseText(text):
text=text.replace("[","")
text = text.replace("]", "")
sentences = nltk.sent_tokenize(text)
tagged_words = []
......
#import src.Word2Image as wi
from flask import Flask, render_template, request
app = Flask(__name__)
@app.route('/')
def index():
text = request.args.get("text","")
results = []
if(text ==""):
render_result = False
else:
render_result = True
results = wi.findImage(text)
return render_template("index.html", text = text, results = results, render_result = render_result)
if __name__ == '__main__':
import src.Word2Image as wi
app.run(debug=True)
\ No newline at end of file
<html>
<body>
<form action = "/" method = "GET">
<textarea rows="4" cols="50" name="text" placeholder="Input your text">{{text}}</textarea>
<p><input type = "submit" value = "submit" /></p>
{% if render_result %}
{% for word in results[0] %}
<table class="tg">
<tr>
<th colspan="2">{{word[0]}}</th>
</tr>
<tr>
<td>Google</td>
<td>Local</td>
</tr>
<tr>
<td>{{results[1][word[0]][0]}}</td>
<td>{{results[2][word[0]][0]}}</td>
</tr>
<tr>
<td>{{results[1][word[0]][1]}}</td>
<td>{{results[2][word[0]][1]}}</td>
</tr>
<tr>
<td><img src={{results[1][word[0]][2]}} alt={{results[1][word[0]][0]}} width="250" height="200"></td>
<td><img src={{results[2][word[0]][2]}} alt={{results[2][word[0]][0]}} width="250" height="200"></td>
</tr>
</table>
{% endfor %}
{% endif %}
</body>
</html>
\ No newline at end of file