1
0
Fork 0

Merge pull request '[beta] implementation of countryMatch and addition of workflow parameters' (#451) from openorgs_fixes into beta

Reviewed-on: D-Net/dnet-hadoop#451
This commit is contained in:
Claudio Atzori 2024-07-01 09:22:56 +02:00
commit c7634c55c7
5 changed files with 77 additions and 1 deletions

View File

@ -0,0 +1,47 @@
package eu.dnetlib.pace.tree;
import java.util.Map;
import com.wcohen.ss.AbstractStringDistance;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.tree.support.AbstractStringComparator;
import eu.dnetlib.pace.tree.support.ComparatorClass;
@ComparatorClass("countryMatch")
public class CountryMatch extends AbstractStringComparator {
public CountryMatch(Map<String, String> params) {
super(params, new com.wcohen.ss.JaroWinkler());
}
public CountryMatch(final double weight) {
super(weight, new com.wcohen.ss.JaroWinkler());
}
protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) {
super(weight, ssalgo);
}
@Override
public double distance(final String a, final String b, final Config conf) {
if (a.isEmpty() || b.isEmpty()) {
return -1.0; // return -1 if a field is missing
}
if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) {
return -1.0; // return -1 if a country is UNKNOWN
}
return a.equals(b) ? 1.0 : 0;
}
@Override
public double getWeight() {
return super.weight;
}
@Override
protected double normalize(final double d) {
return d;
}
}

View File

@ -336,4 +336,23 @@ public class ComparatorTest extends AbstractPaceTest {
System.out.println("compare = " + compare);
}
@Test
public void countryMatch() {
CountryMatch countryMatch = new CountryMatch(params);
double result = countryMatch.distance("UNKNOWN", "UNKNOWN", conf);
assertEquals(-1.0, result);
result = countryMatch.distance("CHILE", "UNKNOWN", conf);
assertEquals(-1.0, result);
result = countryMatch.distance("CHILE", "ITALY", conf);
assertEquals(0.0, result);
result = countryMatch.distance("CHILE", "CHILE", conf);
assertEquals(1.0, result);
}
}

View File

@ -15,4 +15,12 @@
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>pivotHistoryDatabase</name>
<value>&#x200B;</value>
</property>
</configuration>

View File

@ -198,6 +198,8 @@
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
<arg>--actionSetId</arg><arg>${actionSetId}</arg>
<arg>--cutConnectedComponent</arg><arg>${cutConnectedComponent}</arg>
<arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
<arg>--pivotHistoryDatabase</arg><arg>${pivotHistoryDatabase}</arg>
</spark>
<ok to="PrepareOrgRels"/>
<error to="Kill"/>

View File

@ -49,7 +49,7 @@
},
{
"field": "country",
"comparator": "exactMatch",
"comparator": "countryMatch",
"weight": 1,
"countIfUndefined": "true",
"params": {}