forked from D-Net/dnet-hadoop
Merge pull request '[beta] implementation of countryMatch and addition of workflow parameters' (#451) from openorgs_fixes into beta
Reviewed-on: D-Net/dnet-hadoop#451
This commit is contained in:
commit
c7634c55c7
|
@ -0,0 +1,47 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import com.wcohen.ss.AbstractStringDistance;
|
||||
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.tree.support.AbstractStringComparator;
|
||||
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||
|
||||
@ComparatorClass("countryMatch")
|
||||
public class CountryMatch extends AbstractStringComparator {
|
||||
|
||||
public CountryMatch(Map<String, String> params) {
|
||||
super(params, new com.wcohen.ss.JaroWinkler());
|
||||
}
|
||||
|
||||
public CountryMatch(final double weight) {
|
||||
super(weight, new com.wcohen.ss.JaroWinkler());
|
||||
}
|
||||
|
||||
protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) {
|
||||
super(weight, ssalgo);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double distance(final String a, final String b, final Config conf) {
|
||||
if (a.isEmpty() || b.isEmpty()) {
|
||||
return -1.0; // return -1 if a field is missing
|
||||
}
|
||||
if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) {
|
||||
return -1.0; // return -1 if a country is UNKNOWN
|
||||
}
|
||||
|
||||
return a.equals(b) ? 1.0 : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getWeight() {
|
||||
return super.weight;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected double normalize(final double d) {
|
||||
return d;
|
||||
}
|
||||
}
|
|
@ -336,4 +336,23 @@ public class ComparatorTest extends AbstractPaceTest {
|
|||
System.out.println("compare = " + compare);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void countryMatch() {
|
||||
|
||||
CountryMatch countryMatch = new CountryMatch(params);
|
||||
|
||||
double result = countryMatch.distance("UNKNOWN", "UNKNOWN", conf);
|
||||
assertEquals(-1.0, result);
|
||||
|
||||
result = countryMatch.distance("CHILE", "UNKNOWN", conf);
|
||||
assertEquals(-1.0, result);
|
||||
|
||||
result = countryMatch.distance("CHILE", "ITALY", conf);
|
||||
assertEquals(0.0, result);
|
||||
|
||||
result = countryMatch.distance("CHILE", "CHILE", conf);
|
||||
assertEquals(1.0, result);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -15,4 +15,12 @@
|
|||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveMetastoreUris</name>
|
||||
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>pivotHistoryDatabase</name>
|
||||
<value>​</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -198,6 +198,8 @@
|
|||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
<arg>--actionSetId</arg><arg>${actionSetId}</arg>
|
||||
<arg>--cutConnectedComponent</arg><arg>${cutConnectedComponent}</arg>
|
||||
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
|
||||
<arg>--pivotHistoryDatabase</arg><arg>${pivotHistoryDatabase}</arg>
|
||||
</spark>
|
||||
<ok to="PrepareOrgRels"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -49,7 +49,7 @@
|
|||
},
|
||||
{
|
||||
"field": "country",
|
||||
"comparator": "exactMatch",
|
||||
"comparator": "countryMatch",
|
||||
"weight": 1,
|
||||
"countIfUndefined": "true",
|
||||
"params": {}
|
||||
|
|
Loading…
Reference in New Issue