forked from D-Net/dnet-hadoop
Merge pull request '[beta] implementation of countryMatch and addition of workflow parameters' (#451) from openorgs_fixes into beta
Reviewed-on: D-Net/dnet-hadoop#451
This commit is contained in:
commit
c7634c55c7
|
@ -0,0 +1,47 @@
|
||||||
|
package eu.dnetlib.pace.tree;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import com.wcohen.ss.AbstractStringDistance;
|
||||||
|
|
||||||
|
import eu.dnetlib.pace.config.Config;
|
||||||
|
import eu.dnetlib.pace.tree.support.AbstractStringComparator;
|
||||||
|
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||||
|
|
||||||
|
@ComparatorClass("countryMatch")
|
||||||
|
public class CountryMatch extends AbstractStringComparator {
|
||||||
|
|
||||||
|
public CountryMatch(Map<String, String> params) {
|
||||||
|
super(params, new com.wcohen.ss.JaroWinkler());
|
||||||
|
}
|
||||||
|
|
||||||
|
public CountryMatch(final double weight) {
|
||||||
|
super(weight, new com.wcohen.ss.JaroWinkler());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) {
|
||||||
|
super(weight, ssalgo);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double distance(final String a, final String b, final Config conf) {
|
||||||
|
if (a.isEmpty() || b.isEmpty()) {
|
||||||
|
return -1.0; // return -1 if a field is missing
|
||||||
|
}
|
||||||
|
if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) {
|
||||||
|
return -1.0; // return -1 if a country is UNKNOWN
|
||||||
|
}
|
||||||
|
|
||||||
|
return a.equals(b) ? 1.0 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double getWeight() {
|
||||||
|
return super.weight;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected double normalize(final double d) {
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
}
|
|
@ -336,4 +336,23 @@ public class ComparatorTest extends AbstractPaceTest {
|
||||||
System.out.println("compare = " + compare);
|
System.out.println("compare = " + compare);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void countryMatch() {
|
||||||
|
|
||||||
|
CountryMatch countryMatch = new CountryMatch(params);
|
||||||
|
|
||||||
|
double result = countryMatch.distance("UNKNOWN", "UNKNOWN", conf);
|
||||||
|
assertEquals(-1.0, result);
|
||||||
|
|
||||||
|
result = countryMatch.distance("CHILE", "UNKNOWN", conf);
|
||||||
|
assertEquals(-1.0, result);
|
||||||
|
|
||||||
|
result = countryMatch.distance("CHILE", "ITALY", conf);
|
||||||
|
assertEquals(0.0, result);
|
||||||
|
|
||||||
|
result = countryMatch.distance("CHILE", "CHILE", conf);
|
||||||
|
assertEquals(1.0, result);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,4 +15,12 @@
|
||||||
<name>oozie.action.sharelib.for.spark</name>
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
<value>spark2</value>
|
<value>spark2</value>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>pivotHistoryDatabase</name>
|
||||||
|
<value>​</value>
|
||||||
|
</property>
|
||||||
</configuration>
|
</configuration>
|
|
@ -198,6 +198,8 @@
|
||||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||||
<arg>--actionSetId</arg><arg>${actionSetId}</arg>
|
<arg>--actionSetId</arg><arg>${actionSetId}</arg>
|
||||||
<arg>--cutConnectedComponent</arg><arg>${cutConnectedComponent}</arg>
|
<arg>--cutConnectedComponent</arg><arg>${cutConnectedComponent}</arg>
|
||||||
|
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
|
||||||
|
<arg>--pivotHistoryDatabase</arg><arg>${pivotHistoryDatabase}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="PrepareOrgRels"/>
|
<ok to="PrepareOrgRels"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -49,7 +49,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "country",
|
"field": "country",
|
||||||
"comparator": "exactMatch",
|
"comparator": "countryMatch",
|
||||||
"weight": 1,
|
"weight": 1,
|
||||||
"countIfUndefined": "true",
|
"countIfUndefined": "true",
|
||||||
"params": {}
|
"params": {}
|
||||||
|
|
Loading…
Reference in New Issue