forked from D-Net/dnet-hadoop
parent
89b7bc84f2
commit
fed13e083e
|
@ -1,10 +1,5 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import com.wcohen.ss.AbstractStringDistance;
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.tree.support.AbstractStringComparator;
|
||||
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||
import org.joda.time.DateTime;
|
||||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import java.time.DateTimeException;
|
||||
import java.time.LocalDate;
|
||||
|
@ -13,55 +8,60 @@ import java.time.format.DateTimeFormatter;
|
|||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import com.wcohen.ss.AbstractStringDistance;
|
||||
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.tree.support.AbstractStringComparator;
|
||||
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||
|
||||
@ComparatorClass("dateRange")
|
||||
public class DateRange extends AbstractStringComparator {
|
||||
|
||||
int YEAR_RANGE;
|
||||
int YEAR_RANGE;
|
||||
|
||||
public DateRange(Map<String, String> params) {
|
||||
super(params, new com.wcohen.ss.JaroWinkler());
|
||||
YEAR_RANGE = Integer.parseInt(params.getOrDefault("year_range", "3"));
|
||||
}
|
||||
public DateRange(Map<String, String> params) {
|
||||
super(params, new com.wcohen.ss.JaroWinkler());
|
||||
YEAR_RANGE = Integer.parseInt(params.getOrDefault("year_range", "3"));
|
||||
}
|
||||
|
||||
public DateRange(final double weight) {
|
||||
super(weight, new com.wcohen.ss.JaroWinkler());
|
||||
}
|
||||
public DateRange(final double weight) {
|
||||
super(weight, new com.wcohen.ss.JaroWinkler());
|
||||
}
|
||||
|
||||
protected DateRange(final double weight, final AbstractStringDistance ssalgo) {
|
||||
super(weight, ssalgo);
|
||||
}
|
||||
protected DateRange(final double weight, final AbstractStringDistance ssalgo) {
|
||||
super(weight, ssalgo);
|
||||
}
|
||||
|
||||
public static boolean isNumeric(String str) {
|
||||
return str.matches("\\d+"); //match a number with optional '-' and decimal.
|
||||
}
|
||||
public static boolean isNumeric(String str) {
|
||||
return str.matches("\\d+"); // match a number with optional '-' and decimal.
|
||||
}
|
||||
|
||||
@Override
|
||||
public double distance(final String a, final String b, final Config conf) {
|
||||
if (a.isEmpty() || b.isEmpty()) {
|
||||
return -1.0; // return -1 if a field is missing
|
||||
}
|
||||
@Override
|
||||
public double distance(final String a, final String b, final Config conf) {
|
||||
if (a.isEmpty() || b.isEmpty()) {
|
||||
return -1.0; // return -1 if a field is missing
|
||||
}
|
||||
|
||||
try {
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd", Locale.ENGLISH);
|
||||
LocalDate d1 = LocalDate.parse(a, formatter);
|
||||
LocalDate d2 = LocalDate.parse(b, formatter);
|
||||
Period period = Period.between(d1, d2);
|
||||
try {
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd", Locale.ENGLISH);
|
||||
LocalDate d1 = LocalDate.parse(a, formatter);
|
||||
LocalDate d2 = LocalDate.parse(b, formatter);
|
||||
Period period = Period.between(d1, d2);
|
||||
|
||||
return period.getYears() <= YEAR_RANGE? 1.0 : 0.0;
|
||||
}
|
||||
catch (DateTimeException e) {
|
||||
return -1.0;
|
||||
}
|
||||
return period.getYears() <= YEAR_RANGE ? 1.0 : 0.0;
|
||||
} catch (DateTimeException e) {
|
||||
return -1.0;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getWeight() {
|
||||
return super.weight;
|
||||
}
|
||||
@Override
|
||||
public double getWeight() {
|
||||
return super.weight;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected double normalize(final double d) {
|
||||
return d;
|
||||
}
|
||||
@Override
|
||||
protected double normalize(final double d) {
|
||||
return d;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -62,7 +62,7 @@ public class JsonListMatch extends AbstractListComparator {
|
|||
|
||||
Set<String> types = Sets.intersection(typesA, typesB);
|
||||
|
||||
if (types.isEmpty()) // if no common type, it is impossible to compare
|
||||
if (types.isEmpty()) // if no common type, it is impossible to compare
|
||||
return -1;
|
||||
|
||||
ca = ca.stream().filter(s -> types.contains(s.split("::")[0])).collect(Collectors.toSet());
|
||||
|
|
|
@ -72,14 +72,34 @@ public class ComparatorTest extends AbstractPaceTest {
|
|||
CodeMatch codeMatch = new CodeMatch(params);
|
||||
|
||||
// names have different codes
|
||||
assertEquals(0.0, codeMatch.distance("physical oceanography at ctd station june 1998 ev02a", "physical oceanography at ctd station june 1998 ir02", conf));
|
||||
assertEquals(
|
||||
0.0,
|
||||
codeMatch
|
||||
.distance(
|
||||
"physical oceanography at ctd station june 1998 ev02a",
|
||||
"physical oceanography at ctd station june 1998 ir02", conf));
|
||||
|
||||
// names have same code
|
||||
assertEquals(1.0, codeMatch.distance("physical oceanography at ctd station june 1998 ev02a", "physical oceanography at ctd station june 1998 ev02a", conf));
|
||||
assertEquals(
|
||||
1.0,
|
||||
codeMatch
|
||||
.distance(
|
||||
"physical oceanography at ctd station june 1998 ev02a",
|
||||
"physical oceanography at ctd station june 1998 ev02a", conf));
|
||||
|
||||
// code is not in both names
|
||||
assertEquals(-1, codeMatch.distance("physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998 ev02a", conf));
|
||||
assertEquals(1.0, codeMatch.distance("physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998", conf));
|
||||
assertEquals(
|
||||
-1,
|
||||
codeMatch
|
||||
.distance(
|
||||
"physical oceanography at ctd station june 1998",
|
||||
"physical oceanography at ctd station june 1998 ev02a", conf));
|
||||
assertEquals(
|
||||
1.0,
|
||||
codeMatch
|
||||
.distance(
|
||||
"physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998",
|
||||
conf));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -275,7 +295,7 @@ public class ComparatorTest extends AbstractPaceTest {
|
|||
Arrays
|
||||
.asList(
|
||||
"{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"grid\",\"classname\":\"GRID Identifier\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"grid_1\"}",
|
||||
"{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"ror\",\"classname\":\"Research Organization Registry\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"ror_1\"}"),
|
||||
"{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"ror\",\"classname\":\"Research Organization Registry\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"ror_1\"}"),
|
||||
"authors");
|
||||
List<String> b = createFieldList(
|
||||
Arrays
|
||||
|
|
|
@ -130,7 +130,7 @@ public class ResultTagger implements Serializable {
|
|||
// log.info("Remove constraints for " + communityId);
|
||||
if (conf.getRemoveConstraintsMap().keySet().contains(communityId) &&
|
||||
conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null &&
|
||||
!conf.getRemoveConstraintsMap().get(communityId).getCriteria().isEmpty() &&
|
||||
!conf.getRemoveConstraintsMap().get(communityId).getCriteria().isEmpty() &&
|
||||
conf
|
||||
.getRemoveConstraintsMap()
|
||||
.get(communityId)
|
||||
|
@ -228,7 +228,7 @@ public class ResultTagger implements Serializable {
|
|||
.forEach(communityId -> {
|
||||
if (!removeCommunities.contains(communityId) &&
|
||||
conf.getSelectionConstraintsMap().get(communityId).getCriteria() != null &&
|
||||
!conf.getSelectionConstraintsMap().get(communityId).getCriteria().isEmpty() &&
|
||||
!conf.getSelectionConstraintsMap().get(communityId).getCriteria().isEmpty() &&
|
||||
conf
|
||||
.getSelectionConstraintsMap()
|
||||
.get(communityId)
|
||||
|
|
|
@ -915,7 +915,8 @@ class MappersTest {
|
|||
|
||||
@Test
|
||||
void testODFRecord_guidelines4() throws IOException {
|
||||
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_guidelines4.xml")));
|
||||
final String xml = IOUtils
|
||||
.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_guidelines4.xml")));
|
||||
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
||||
|
||||
final Publication p = (Publication) list.get(0);
|
||||
|
|
|
@ -5,7 +5,6 @@ import java.io.StringReader;
|
|||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.schema.solr.PersonTopic;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
|
@ -40,6 +39,7 @@ import eu.dnetlib.dhp.schema.solr.OpenAccessColor;
|
|||
import eu.dnetlib.dhp.schema.solr.OpenAccessRoute;
|
||||
import eu.dnetlib.dhp.schema.solr.Organization;
|
||||
import eu.dnetlib.dhp.schema.solr.Person;
|
||||
import eu.dnetlib.dhp.schema.solr.PersonTopic;
|
||||
import eu.dnetlib.dhp.schema.solr.Pid;
|
||||
import eu.dnetlib.dhp.schema.solr.Project;
|
||||
import eu.dnetlib.dhp.schema.solr.Result;
|
||||
|
@ -216,11 +216,14 @@ public class ProvisionModelSupport {
|
|||
}
|
||||
|
||||
private static List<PersonTopic> mapPersonTopics(List<eu.dnetlib.dhp.schema.oaf.PersonTopic> subjects) {
|
||||
return Optional.ofNullable(subjects)
|
||||
.map(ss -> ss.stream()
|
||||
.map(ProvisionModelSupport::mapPersonTopic)
|
||||
.collect(Collectors.toList()))
|
||||
.orElse(null);
|
||||
return Optional
|
||||
.ofNullable(subjects)
|
||||
.map(
|
||||
ss -> ss
|
||||
.stream()
|
||||
.map(ProvisionModelSupport::mapPersonTopic)
|
||||
.collect(Collectors.toList()))
|
||||
.orElse(null);
|
||||
}
|
||||
|
||||
private static PersonTopic mapPersonTopic(eu.dnetlib.dhp.schema.oaf.PersonTopic pt) {
|
||||
|
|
Loading…
Reference in New Issue