Commit 0ef2de73 authored by Martin Znamenáček's avatar Martin Znamenáček

java parser

parent 45209add
......@@ -10,4 +10,4 @@ gen
models
files
java
target
.idea
*.iml
*.csv
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>cz.fit.cvut</groupId>
<artifactId>mvi-parser</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.6</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.8.1</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
<configuration>
<source>1.11</source>
<target>1.11</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
\ No newline at end of file
package parser;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang3.StringUtils;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
public class Parser {
/**
* https://storage.googleapis.com/openimages/web/download.html
*/
public static void main(String[] args) throws IOException {
Reader labelText = new FileReader("label-text.csv");
Reader imageLabel = new FileReader("image-label.csv");
Reader imageUrl = new FileReader("image-url.csv");
Map<String, String> labelImageIdMap = parseImageLabel(imageLabel);
System.out.println("labelImageIdMap done");
Map<String, String> labelTextMap = parseLabelText(labelText);
System.out.println("labelTextMap done");
Map<String, String> imageUrlMap = parseImageUrl(imageUrl);
System.out.println("imageUrlMap done");
Map<String, String> processedMap = process(labelImageIdMap, labelTextMap, imageUrlMap);
System.out.println("processedMap done");
writeResult(processedMap);
System.out.println("Done");
}
private static Map<String, String> parseImageLabel(Reader reader) throws IOException {
Map<String, String> imageLabelMap = new HashMap<>();
String[] HEADERS = {"ImageID", "Source", "LabelName", "Confidence"};
Iterable<CSVRecord> records = CSVFormat.DEFAULT
.withHeader(HEADERS)
.withFirstRecordAsHeader()
.parse(reader);
for (CSVRecord record : records) {
String confidence = record.get("Confidence");
if ("0".equals(confidence)) {
continue;
}
imageLabelMap.put(record.get("LabelName"), record.get("ImageID"));
}
return imageLabelMap;
}
private static Map<String, String> parseLabelText(Reader reader) throws IOException {
Map<String, String> labelTextMap = new HashMap<>();
Iterable<CSVRecord> records = CSVFormat.DEFAULT
.parse(reader);
for (CSVRecord record : records) {
labelTextMap.put(record.get(0), record.get(1));
}
return labelTextMap;
}
private static Map<String, String> parseImageUrl(Reader reader) throws IOException {
Map<String, String> imageUrlMap = new HashMap<>();
Iterable<CSVRecord> records = CSVFormat.DEFAULT
.withHeader()
.withFirstRecordAsHeader()
.parse(reader);
for (CSVRecord record : records) {
imageUrlMap.put(record.get("ImageID"), record.get("OriginalURL"));
}
return imageUrlMap;
}
private static Map<String, String> process(Map<String, String> labelImageMap,
Map<String, String> labelTextMap,
Map<String, String> imageUrlMap) {
Map<String, String> processedMap = new HashMap<>();
Set<String> labels = labelTextMap.keySet();
for (String label : labels) {
if(StringUtils.containsWhitespace(labelTextMap.get(label))){
continue;
}
String image = labelImageMap.get(label);
String imageUrl = imageUrlMap.get(image);
if (imageUrl == null) {
continue;
}
processedMap.put(labelTextMap.get(label), imageUrl);
}
return processedMap;
}
private static void writeResult(Map<String, String> processedMap) throws IOException {
FileWriter out = new FileWriter("processed_data.csv");
try (CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT
.withHeader("Label", "URL"))) {
for (Map.Entry<String, String> entry : processedMap.entrySet()) {
String label = entry.getKey();
String url = entry.getValue();
printer.printRecord(label, url);
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment