diff --git a/src/main/java/org/gcube/dataanalysis/geo/test/projections/GeolocateCountry.java b/src/main/java/org/gcube/dataanalysis/geo/test/projections/GeolocateCountry.java index 74e81ae..da1459e 100644 --- a/src/main/java/org/gcube/dataanalysis/geo/test/projections/GeolocateCountry.java +++ b/src/main/java/org/gcube/dataanalysis/geo/test/projections/GeolocateCountry.java @@ -1,11 +1,15 @@ package org.gcube.dataanalysis.geo.test.projections; import java.io.BufferedReader; +import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; +import java.io.FileWriter; import java.util.ArrayList; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import org.gcube.contentmanagement.graphtools.utils.MathFunctions; import org.gcube.contentmanagement.lexicalmatcher.utils.DistanceCalculator; @@ -41,7 +45,7 @@ public class GeolocateCountry { fr.close(); } - public static void main(String[] args) throws Exception { + public static void main2(String[] args) throws Exception { BufferedReader fr = new BufferedReader(new FileReader(new File(faoreport))); String line = fr.readLine(); parseCentroidsFile(); @@ -51,27 +55,96 @@ public class GeolocateCountry { while (line != null) { List p = Transformations.parseCVSString(line, ","); String country = p.get(1); - //TO DO rebuild the original CSV file + // TO DO rebuild the original CSV file String suggestion = yetDone.get(country); - if (suggestion==null){ - suggestion = getCentroid(country,capitals,0.6); - if (suggestion.length()==0) - suggestion = getCentroid(country,centroids,0.3); - + if (suggestion == null) { + suggestion = getCentroid(country, capitals, 0.6); + if (suggestion.length() == 0) + suggestion = getCentroid(country, centroids, 0.3); + yetDone.put(country, suggestion); } - System.out.println(line+","+suggestion); - + System.out.println(line + "," + suggestion); + line = fr.readLine(); } fr.close(); } + public static void main(String[] args) throws Exception { + String file = "LargeTS.csv"; + System.out.println("Processing"); + List countries = GeolocateCountry.geoLocateCountries(1, file); + System.out.println("Dumping"); + BufferedWriter bw = new BufferedWriter(new FileWriter(new File("LargeTsGeo.csv"))); + for (String country:countries){ + bw.write(country+"\n"); + } + bw.close(); + System.out.println("Done"); + } + + public static List geoLocateCountries(int idxCountryColumn, String file) throws Exception { + BufferedReader fr = new BufferedReader(new FileReader(new File(file))); + String line = fr.readLine(); + parseCentroidsFile(); + parseWorldCapitalsFile(); + line = fr.readLine(); + List yetDone = new ArrayList(); + + while (line != null) { + List p = Transformations.parseCVSString(line, ","); + String country = p.get(idxCountryColumn); + String suggestion = null; + suggestion = getCentroid(country, capitals, 0.6); + if (suggestion.length() == 0) + suggestion = getCentroid(country, centroids, 0.3); + if (suggestion==null || suggestion.length()==0) + suggestion = ",,,,"; + String outstring = country + "," + suggestion; + yetDone.add(outstring); +// System.out.println(outstring); + line = fr.readLine(); + } + + fr.close(); + return yetDone; + } + + public static Map geoLocateCountriesWithNoDuplicates(int idxCountryColumn, String file) throws Exception { + BufferedReader fr = new BufferedReader(new FileReader(new File(file))); + String line = fr.readLine(); + parseCentroidsFile(); + parseWorldCapitalsFile(); + line = fr.readLine(); + LinkedHashMap yetDone = new LinkedHashMap(); + + while (line != null) { + List p = Transformations.parseCVSString(line, ","); + String country = p.get(idxCountryColumn); + String suggestion = yetDone.get(country); + if (suggestion == null) { + suggestion = getCentroid(country, capitals, 0.6); + if (suggestion.length() == 0) + suggestion = getCentroid(country, centroids, 0.3); + + yetDone.put(country, suggestion); + } + + System.out.println(line + "," + suggestion); + + line = fr.readLine(); + } + + fr.close(); + return yetDone; + } + static HashMap centroids = new HashMap(); static HashMap capitals = new HashMap(); - + public static void parseCentroidsFile() throws Exception { BufferedReader fr = new BufferedReader(new FileReader(new File("countriescentroids.txt"))); String line = fr.readLine(); @@ -103,7 +176,7 @@ public class GeolocateCountry { fr.close(); } - + public static String getCentroid(String country, HashMap centroids, double threshold) { String c = centroids.get(country); @@ -113,36 +186,33 @@ public class GeolocateCountry { if (c == null) { for (String key : centroids.keySet()) { if (key.length() > 0) { -/* - if (key.contains(country) || country.contains(key)) { - if (sb.length() > 0) - sb.append("/"); - - sb.append(key + "," + centroids.get(key) + "("+0.8+")"+" "); - } else { - */ - double score = dc.CD(false, country, key,true,false); - if (score > threshold) { - int i = 0; - for (Double cscore : scores){ - if (cscore 0) sb.append("/"); + * + * sb.append(key + "," + centroids.get(key) + "("+0.8+")"+" "); } else { + */ + double score = dc.CD(false, country, key, true, false); + if (score > threshold) { + int i = 0; + for (Double cscore : scores) { + if (cscore < score) + break; + i++; } -// } + sb.add(i, key + "," + centroids.get(key) + "," + MathFunctions.roundDecimal(score, 2)); + scores.add(i, score); + } + + // } } } - if (sb.size()>0) + if (sb.size() > 0) return sb.get(0).toString(); else return ""; } else - return country+","+c+ ","+1; + return country + "," + c + "," + 1; } }