git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-analysis/EcologicalEngineGeoSpatialExtension@111671 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
b2931efb2a
commit
45aa7df7c6
|
@ -1,11 +1,15 @@
|
|||
package org.gcube.dataanalysis.geo.test.projections;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.gcube.contentmanagement.graphtools.utils.MathFunctions;
|
||||
import org.gcube.contentmanagement.lexicalmatcher.utils.DistanceCalculator;
|
||||
|
@ -41,7 +45,7 @@ public class GeolocateCountry {
|
|||
fr.close();
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
public static void main2(String[] args) throws Exception {
|
||||
BufferedReader fr = new BufferedReader(new FileReader(new File(faoreport)));
|
||||
String line = fr.readLine();
|
||||
parseCentroidsFile();
|
||||
|
@ -51,27 +55,96 @@ public class GeolocateCountry {
|
|||
while (line != null) {
|
||||
List<String> p = Transformations.parseCVSString(line, ",");
|
||||
String country = p.get(1);
|
||||
//TO DO rebuild the original CSV file
|
||||
// TO DO rebuild the original CSV file
|
||||
String suggestion = yetDone.get(country);
|
||||
if (suggestion==null){
|
||||
suggestion = getCentroid(country,capitals,0.6);
|
||||
if (suggestion.length()==0)
|
||||
suggestion = getCentroid(country,centroids,0.3);
|
||||
|
||||
if (suggestion == null) {
|
||||
suggestion = getCentroid(country, capitals, 0.6);
|
||||
if (suggestion.length() == 0)
|
||||
suggestion = getCentroid(country, centroids, 0.3);
|
||||
|
||||
yetDone.put(country, suggestion);
|
||||
}
|
||||
|
||||
System.out.println(line+","+suggestion);
|
||||
|
||||
System.out.println(line + "," + suggestion);
|
||||
|
||||
line = fr.readLine();
|
||||
}
|
||||
|
||||
fr.close();
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String file = "LargeTS.csv";
|
||||
System.out.println("Processing");
|
||||
List<String> countries = GeolocateCountry.geoLocateCountries(1, file);
|
||||
System.out.println("Dumping");
|
||||
BufferedWriter bw = new BufferedWriter(new FileWriter(new File("LargeTsGeo.csv")));
|
||||
for (String country:countries){
|
||||
bw.write(country+"\n");
|
||||
}
|
||||
bw.close();
|
||||
System.out.println("Done");
|
||||
}
|
||||
|
||||
public static List<String> geoLocateCountries(int idxCountryColumn, String file) throws Exception {
|
||||
BufferedReader fr = new BufferedReader(new FileReader(new File(file)));
|
||||
String line = fr.readLine();
|
||||
parseCentroidsFile();
|
||||
parseWorldCapitalsFile();
|
||||
line = fr.readLine();
|
||||
List<String> yetDone = new ArrayList<String>();
|
||||
|
||||
while (line != null) {
|
||||
List<String> p = Transformations.parseCVSString(line, ",");
|
||||
String country = p.get(idxCountryColumn);
|
||||
String suggestion = null;
|
||||
suggestion = getCentroid(country, capitals, 0.6);
|
||||
if (suggestion.length() == 0)
|
||||
suggestion = getCentroid(country, centroids, 0.3);
|
||||
if (suggestion==null || suggestion.length()==0)
|
||||
suggestion = ",,,,";
|
||||
String outstring = country + "," + suggestion;
|
||||
yetDone.add(outstring);
|
||||
// System.out.println(outstring);
|
||||
line = fr.readLine();
|
||||
}
|
||||
|
||||
fr.close();
|
||||
return yetDone;
|
||||
}
|
||||
|
||||
public static Map<String, String> geoLocateCountriesWithNoDuplicates(int idxCountryColumn, String file) throws Exception {
|
||||
BufferedReader fr = new BufferedReader(new FileReader(new File(file)));
|
||||
String line = fr.readLine();
|
||||
parseCentroidsFile();
|
||||
parseWorldCapitalsFile();
|
||||
line = fr.readLine();
|
||||
LinkedHashMap<String, String> yetDone = new LinkedHashMap<String, String>();
|
||||
|
||||
while (line != null) {
|
||||
List<String> p = Transformations.parseCVSString(line, ",");
|
||||
String country = p.get(idxCountryColumn);
|
||||
String suggestion = yetDone.get(country);
|
||||
if (suggestion == null) {
|
||||
suggestion = getCentroid(country, capitals, 0.6);
|
||||
if (suggestion.length() == 0)
|
||||
suggestion = getCentroid(country, centroids, 0.3);
|
||||
|
||||
yetDone.put(country, suggestion);
|
||||
}
|
||||
|
||||
System.out.println(line + "," + suggestion);
|
||||
|
||||
line = fr.readLine();
|
||||
}
|
||||
|
||||
fr.close();
|
||||
return yetDone;
|
||||
}
|
||||
|
||||
static HashMap<String, String> centroids = new HashMap<String, String>();
|
||||
static HashMap<String, String> capitals = new HashMap<String, String>();
|
||||
|
||||
|
||||
public static void parseCentroidsFile() throws Exception {
|
||||
BufferedReader fr = new BufferedReader(new FileReader(new File("countriescentroids.txt")));
|
||||
String line = fr.readLine();
|
||||
|
@ -103,7 +176,7 @@ public class GeolocateCountry {
|
|||
|
||||
fr.close();
|
||||
}
|
||||
|
||||
|
||||
public static String getCentroid(String country, HashMap<String, String> centroids, double threshold) {
|
||||
|
||||
String c = centroids.get(country);
|
||||
|
@ -113,36 +186,33 @@ public class GeolocateCountry {
|
|||
if (c == null) {
|
||||
for (String key : centroids.keySet()) {
|
||||
if (key.length() > 0) {
|
||||
/*
|
||||
if (key.contains(country) || country.contains(key)) {
|
||||
if (sb.length() > 0)
|
||||
sb.append("/");
|
||||
|
||||
sb.append(key + "," + centroids.get(key) + "("+0.8+")"+" ");
|
||||
} else {
|
||||
*/
|
||||
double score = dc.CD(false, country, key,true,false);
|
||||
if (score > threshold) {
|
||||
int i = 0;
|
||||
for (Double cscore : scores){
|
||||
if (cscore<score)
|
||||
break;
|
||||
i++;
|
||||
}
|
||||
|
||||
sb.add(i,key + "," + centroids.get(key) + ","+MathFunctions.roundDecimal(score,2));
|
||||
scores.add(i,score);
|
||||
/*
|
||||
* if (key.contains(country) || country.contains(key)) { if (sb.length() > 0) sb.append("/");
|
||||
*
|
||||
* sb.append(key + "," + centroids.get(key) + "("+0.8+")"+" "); } else {
|
||||
*/
|
||||
double score = dc.CD(false, country, key, true, false);
|
||||
if (score > threshold) {
|
||||
int i = 0;
|
||||
for (Double cscore : scores) {
|
||||
if (cscore < score)
|
||||
break;
|
||||
i++;
|
||||
}
|
||||
|
||||
// }
|
||||
sb.add(i, key + "," + centroids.get(key) + "," + MathFunctions.roundDecimal(score, 2));
|
||||
scores.add(i, score);
|
||||
}
|
||||
|
||||
// }
|
||||
}
|
||||
}
|
||||
if (sb.size()>0)
|
||||
if (sb.size() > 0)
|
||||
return sb.get(0).toString();
|
||||
else
|
||||
return "";
|
||||
} else
|
||||
return country+","+c+ ","+1;
|
||||
return country + "," + c + "," + 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue