Gianpaolo Coro 2015-02-04 16:38:12 +00:00
parent b2931efb2a
commit 45aa7df7c6
1 changed files with 103 additions and 33 deletions

View File

@ -1,11 +1,15 @@
package org.gcube.dataanalysis.geo.test.projections;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.gcube.contentmanagement.graphtools.utils.MathFunctions;
import org.gcube.contentmanagement.lexicalmatcher.utils.DistanceCalculator;
@ -41,7 +45,7 @@ public class GeolocateCountry {
fr.close();
}
public static void main(String[] args) throws Exception {
public static void main2(String[] args) throws Exception {
BufferedReader fr = new BufferedReader(new FileReader(new File(faoreport)));
String line = fr.readLine();
parseCentroidsFile();
@ -51,27 +55,96 @@ public class GeolocateCountry {
while (line != null) {
List<String> p = Transformations.parseCVSString(line, ",");
String country = p.get(1);
//TO DO rebuild the original CSV file
// TO DO rebuild the original CSV file
String suggestion = yetDone.get(country);
if (suggestion==null){
suggestion = getCentroid(country,capitals,0.6);
if (suggestion.length()==0)
suggestion = getCentroid(country,centroids,0.3);
if (suggestion == null) {
suggestion = getCentroid(country, capitals, 0.6);
if (suggestion.length() == 0)
suggestion = getCentroid(country, centroids, 0.3);
yetDone.put(country, suggestion);
}
System.out.println(line+","+suggestion);
System.out.println(line + "," + suggestion);
line = fr.readLine();
}
fr.close();
}
public static void main(String[] args) throws Exception {
String file = "LargeTS.csv";
System.out.println("Processing");
List<String> countries = GeolocateCountry.geoLocateCountries(1, file);
System.out.println("Dumping");
BufferedWriter bw = new BufferedWriter(new FileWriter(new File("LargeTsGeo.csv")));
for (String country:countries){
bw.write(country+"\n");
}
bw.close();
System.out.println("Done");
}
public static List<String> geoLocateCountries(int idxCountryColumn, String file) throws Exception {
BufferedReader fr = new BufferedReader(new FileReader(new File(file)));
String line = fr.readLine();
parseCentroidsFile();
parseWorldCapitalsFile();
line = fr.readLine();
List<String> yetDone = new ArrayList<String>();
while (line != null) {
List<String> p = Transformations.parseCVSString(line, ",");
String country = p.get(idxCountryColumn);
String suggestion = null;
suggestion = getCentroid(country, capitals, 0.6);
if (suggestion.length() == 0)
suggestion = getCentroid(country, centroids, 0.3);
if (suggestion==null || suggestion.length()==0)
suggestion = ",,,,";
String outstring = country + "," + suggestion;
yetDone.add(outstring);
// System.out.println(outstring);
line = fr.readLine();
}
fr.close();
return yetDone;
}
public static Map<String, String> geoLocateCountriesWithNoDuplicates(int idxCountryColumn, String file) throws Exception {
BufferedReader fr = new BufferedReader(new FileReader(new File(file)));
String line = fr.readLine();
parseCentroidsFile();
parseWorldCapitalsFile();
line = fr.readLine();
LinkedHashMap<String, String> yetDone = new LinkedHashMap<String, String>();
while (line != null) {
List<String> p = Transformations.parseCVSString(line, ",");
String country = p.get(idxCountryColumn);
String suggestion = yetDone.get(country);
if (suggestion == null) {
suggestion = getCentroid(country, capitals, 0.6);
if (suggestion.length() == 0)
suggestion = getCentroid(country, centroids, 0.3);
yetDone.put(country, suggestion);
}
System.out.println(line + "," + suggestion);
line = fr.readLine();
}
fr.close();
return yetDone;
}
static HashMap<String, String> centroids = new HashMap<String, String>();
static HashMap<String, String> capitals = new HashMap<String, String>();
public static void parseCentroidsFile() throws Exception {
BufferedReader fr = new BufferedReader(new FileReader(new File("countriescentroids.txt")));
String line = fr.readLine();
@ -103,7 +176,7 @@ public class GeolocateCountry {
fr.close();
}
public static String getCentroid(String country, HashMap<String, String> centroids, double threshold) {
String c = centroids.get(country);
@ -113,36 +186,33 @@ public class GeolocateCountry {
if (c == null) {
for (String key : centroids.keySet()) {
if (key.length() > 0) {
/*
if (key.contains(country) || country.contains(key)) {
if (sb.length() > 0)
sb.append("/");
sb.append(key + "," + centroids.get(key) + "("+0.8+")"+" ");
} else {
*/
double score = dc.CD(false, country, key,true,false);
if (score > threshold) {
int i = 0;
for (Double cscore : scores){
if (cscore<score)
break;
i++;
}
sb.add(i,key + "," + centroids.get(key) + ","+MathFunctions.roundDecimal(score,2));
scores.add(i,score);
/*
* if (key.contains(country) || country.contains(key)) { if (sb.length() > 0) sb.append("/");
*
* sb.append(key + "," + centroids.get(key) + "("+0.8+")"+" "); } else {
*/
double score = dc.CD(false, country, key, true, false);
if (score > threshold) {
int i = 0;
for (Double cscore : scores) {
if (cscore < score)
break;
i++;
}
// }
sb.add(i, key + "," + centroids.get(key) + "," + MathFunctions.roundDecimal(score, 2));
scores.add(i, score);
}
// }
}
}
if (sb.size()>0)
if (sb.size() > 0)
return sb.get(0).toString();
else
return "";
} else
return country+","+c+ ","+1;
return country + "," + c + "," + 1;
}
}