1
0
Fork 0

events recognition

This commit is contained in:
Michele Artini 2020-05-15 12:25:37 +02:00
parent a832658296
commit 2a4e68a292
18 changed files with 281 additions and 140 deletions

View File

@ -17,18 +17,18 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.model.Event;
import eu.dnetlib.dhp.broker.model.EventFactory; import eu.dnetlib.dhp.broker.model.EventFactory;
import eu.dnetlib.dhp.broker.oa.util.EnrichMissingAbstract; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAbstract;
import eu.dnetlib.dhp.broker.oa.util.EnrichMissingAuthorOrcid; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAuthorOrcid;
import eu.dnetlib.dhp.broker.oa.util.EnrichMissingOpenAccess; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingOpenAccess;
import eu.dnetlib.dhp.broker.oa.util.EnrichMissingPid; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPid;
import eu.dnetlib.dhp.broker.oa.util.EnrichMissingProject; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingProject;
import eu.dnetlib.dhp.broker.oa.util.EnrichMissingPublicationDate; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationDate;
import eu.dnetlib.dhp.broker.oa.util.EnrichMissingSubject; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingSubject;
import eu.dnetlib.dhp.broker.oa.util.EnrichMoreOpenAccess; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreOpenAccess;
import eu.dnetlib.dhp.broker.oa.util.EnrichMorePid; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMorePid;
import eu.dnetlib.dhp.broker.oa.util.EnrichMoreSubject; import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSubject;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.UpdateMatcher;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;

View File

@ -1,11 +1,12 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.matchers;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMissingAbstract extends UpdateMatcher<String> { public class EnrichMissingAbstract extends UpdateMatcher<String> {

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.matchers;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
@ -7,6 +7,7 @@ import java.util.List;
import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.lang3.tuple.Pair;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMissingAuthorOrcid extends UpdateMatcher<Pair<String, String>> { public class EnrichMissingAuthorOrcid extends UpdateMatcher<Pair<String, String>> {

View File

@ -0,0 +1,55 @@
package eu.dnetlib.dhp.broker.oa.matchers;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.Instance;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMissingOpenAccess extends UpdateMatcher<Instance> {
public EnrichMissingOpenAccess() {
super(true);
}
@Override
protected List<UpdateInfo<Instance>> findUpdates(final Result source, final Result target) {
final long count = target
.getInstance()
.stream()
.map(i -> i.getAccessright().getClassid())
.filter(right -> right.equals(BrokerConstants.OPEN_ACCESS))
.count();
if (count > 0) {
return Arrays.asList();
}
return source
.getInstance()
.stream()
.filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS))
.map(ConversionUtils::oafInstanceToBrokerInstances)
.flatMap(s -> s)
.map(i -> generateUpdateInfo(i, source, target))
.collect(Collectors.toList());
}
@Override
public UpdateInfo<Instance> generateUpdateInfo(final Instance highlightValue,
final Result source,
final Result target) {
return new UpdateInfo<>(
Topic.ENRICH_MISSING_OA_VERSION,
highlightValue, source, target,
(p, i) -> p.getInstances().add(i),
Instance::getUrl);
}
}

View File

@ -1,11 +1,14 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.matchers;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.Pid; import eu.dnetlib.broker.objects.Pid;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMissingPid extends UpdateMatcher<Pid> { public class EnrichMissingPid extends UpdateMatcher<Pid> {
@ -16,8 +19,18 @@ public class EnrichMissingPid extends UpdateMatcher<Pid> {
@Override @Override
protected List<UpdateInfo<Pid>> findUpdates(final Result source, final Result target) { protected List<UpdateInfo<Pid>> findUpdates(final Result source, final Result target) {
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); final long count = target.getPid().size();
return Arrays.asList();
if (count > 0) {
return Arrays.asList();
}
return source
.getPid()
.stream()
.map(ConversionUtils::oafPidToBrokerPid)
.map(i -> generateUpdateInfo(i, source, target))
.collect(Collectors.toList());
} }
@Override @Override

View File

@ -1,11 +1,12 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.matchers;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.broker.objects.Project; import eu.dnetlib.broker.objects.Project;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMissingProject extends UpdateMatcher<Project> { public class EnrichMissingProject extends UpdateMatcher<Project> {
@ -21,7 +22,8 @@ public class EnrichMissingProject extends UpdateMatcher<Project> {
} }
@Override @Override
public UpdateInfo<Project> generateUpdateInfo(final Project highlightValue, final Result source, public UpdateInfo<Project> generateUpdateInfo(final Project highlightValue,
final Result source,
final Result target) { final Result target) {
return new UpdateInfo<>( return new UpdateInfo<>(
Topic.ENRICH_MISSING_PROJECT, Topic.ENRICH_MISSING_PROJECT,

View File

@ -1,10 +1,11 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.matchers;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMissingPublicationDate extends UpdateMatcher<String> { public class EnrichMissingPublicationDate extends UpdateMatcher<String> {

View File

@ -0,0 +1,53 @@
package eu.dnetlib.dhp.broker.oa.matchers;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class EnrichMissingSubject extends UpdateMatcher<Pair<String, String>> {
public EnrichMissingSubject() {
super(true);
}
@Override
protected List<UpdateInfo<Pair<String, String>>> findUpdates(final Result source, final Result target) {
final Set<String> existingTypes = target
.getSubject()
.stream()
.map(StructuredProperty::getQualifier)
.map(Qualifier::getClassid)
.collect(Collectors.toSet());
return source
.getPid()
.stream()
.filter(pid -> !existingTypes.contains(pid.getQualifier().getClassid()))
.map(ConversionUtils::oafSubjectToPair)
.map(i -> generateUpdateInfo(i, source, target))
.collect(Collectors.toList());
}
@Override
public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
final Result source,
final Result target) {
return new UpdateInfo<>(
Topic.fromPath("ENRICH/MISSING/SUBJECT/" + highlightValue.getLeft()),
highlightValue, source, target,
(p, pair) -> p.getSubjects().add(pair.getRight()),
pair -> pair.getLeft() + "::" + pair.getRight());
}
}

View File

@ -0,0 +1,53 @@
package eu.dnetlib.dhp.broker.oa.matchers;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.Instance;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMoreOpenAccess extends UpdateMatcher<Instance> {
public EnrichMoreOpenAccess() {
super(true);
}
@Override
protected List<UpdateInfo<Instance>> findUpdates(final Result source, final Result target) {
final Set<String> urls = target
.getInstance()
.stream()
.filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS))
.map(i -> i.getUrl())
.flatMap(List::stream)
.collect(Collectors.toSet());
return source
.getInstance()
.stream()
.filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS))
.map(ConversionUtils::oafInstanceToBrokerInstances)
.flatMap(s -> s)
.filter(i -> !urls.contains(i.getUrl()))
.map(i -> generateUpdateInfo(i, source, target))
.collect(Collectors.toList());
}
@Override
public UpdateInfo<Instance> generateUpdateInfo(final Instance highlightValue,
final Result source,
final Result target) {
return new UpdateInfo<>(
Topic.ENRICH_MORE_OA_VERSION,
highlightValue, source, target,
(p, i) -> p.getInstances().add(i),
Instance::getUrl);
}
}

View File

@ -1,11 +1,14 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.matchers;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.Pid; import eu.dnetlib.broker.objects.Pid;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMorePid extends UpdateMatcher<Pid> { public class EnrichMorePid extends UpdateMatcher<Pid> {
@ -16,8 +19,19 @@ public class EnrichMorePid extends UpdateMatcher<Pid> {
@Override @Override
protected List<UpdateInfo<Pid>> findUpdates(final Result source, final Result target) { protected List<UpdateInfo<Pid>> findUpdates(final Result source, final Result target) {
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); final Set<String> existingPids = target
return Arrays.asList(); .getPid()
.stream()
.map(pid -> pid.getQualifier().getClassid() + "::" + pid.getValue())
.collect(Collectors.toSet());
return source
.getPid()
.stream()
.filter(pid -> !existingPids.contains(pid.getQualifier().getClassid() + "::" + pid.getValue()))
.map(ConversionUtils::oafPidToBrokerPid)
.map(i -> generateUpdateInfo(i, source, target))
.collect(Collectors.toList());
} }
@Override @Override

View File

@ -1,12 +1,15 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.matchers;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.lang3.tuple.Pair;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMoreSubject extends UpdateMatcher<Pair<String, String>> { public class EnrichMoreSubject extends UpdateMatcher<Pair<String, String>> {
@ -17,18 +20,25 @@ public class EnrichMoreSubject extends UpdateMatcher<Pair<String, String>> {
@Override @Override
protected List<UpdateInfo<Pair<String, String>>> findUpdates(final Result source, final Result target) { protected List<UpdateInfo<Pair<String, String>>> findUpdates(final Result source, final Result target) {
// MESHEUROPMC final Set<String> existingSubjects = target
// ARXIV .getSubject()
// JEL .stream()
// DDC .map(pid -> pid.getQualifier().getClassid() + "::" + pid.getValue())
// ACM .collect(Collectors.toSet());
return Arrays.asList(); return source
.getPid()
.stream()
.filter(pid -> !existingSubjects.contains(pid.getQualifier().getClassid() + "::" + pid.getValue()))
.map(ConversionUtils::oafSubjectToPair)
.map(i -> generateUpdateInfo(i, source, target))
.collect(Collectors.toList());
} }
@Override @Override
public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue, public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
final Result source, final Result target) { final Result source,
final Result target) {
return new UpdateInfo<>( return new UpdateInfo<>(
Topic.fromPath("ENRICH/MORE/SUBJECT/" + highlightValue.getLeft()), Topic.fromPath("ENRICH/MORE/SUBJECT/" + highlightValue.getLeft()),

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.matchers;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
@ -10,6 +10,7 @@ import java.util.Map;
import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;

View File

@ -0,0 +1,7 @@
package eu.dnetlib.dhp.broker.oa.util;
public class BrokerConstants {
public final static String OPEN_ACCESS = "OPEN";
}

View File

@ -0,0 +1,36 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.stream.Stream;
import org.apache.commons.lang3.tuple.Pair;
import eu.dnetlib.broker.objects.Instance;
import eu.dnetlib.broker.objects.Pid;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class ConversionUtils {
public static Stream<Instance> oafInstanceToBrokerInstances(final eu.dnetlib.dhp.schema.oaf.Instance i) {
return i.getUrl().stream().map(url -> {
final Instance r = new Instance();
r.setUrl(url);
r.setInstancetype(i.getInstancetype().getClassid());
r.setLicense(BrokerConstants.OPEN_ACCESS);
r.setHostedby(i.getHostedby().getValue());
return r;
});
}
public static Pid oafPidToBrokerPid(final StructuredProperty sp) {
final Pid pid = new Pid();
pid.setValue(sp.getValue());
pid.setType(sp.getQualifier().getClassid());
return pid;
}
public static final Pair<String, String> oafSubjectToPair(final StructuredProperty sp) {
return Pair.of(sp.getQualifier().getClassid(), sp.getValue());
}
}

View File

@ -1,33 +0,0 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;
import java.util.List;
import eu.dnetlib.broker.objects.Instance;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMissingOpenAccess extends UpdateMatcher<Instance> {
public EnrichMissingOpenAccess() {
super(true);
}
@Override
protected List<UpdateInfo<Instance>> findUpdates(final Result source, final Result target) {
return Arrays.asList();
}
@Override
public UpdateInfo<Instance> generateUpdateInfo(final Instance highlightValue, final Result source,
final Result target) {
return new UpdateInfo<>(
Topic.ENRICH_MISSING_OA_VERSION,
highlightValue, source, target,
(p, i) -> p.getInstances().add(i),
Instance::getUrl);
}
}

View File

@ -1,40 +0,0 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang3.tuple.Pair;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMissingSubject extends UpdateMatcher<Pair<String, String>> {
public EnrichMissingSubject() {
super(true);
}
@Override
protected List<UpdateInfo<Pair<String, String>>> findUpdates(final Result source, final Result target) {
// MESHEUROPMC
// ARXIV
// JEL
// DDC
// ACM
return Arrays.asList();
}
@Override
public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
final Result source, final Result target) {
return new UpdateInfo<>(
Topic.fromPath("ENRICH/MISSING/SUBJECT/" + highlightValue.getLeft()),
highlightValue, source, target,
(p, pair) -> p.getSubjects().add(pair.getRight()),
pair -> pair.getLeft() + "::" + pair.getRight());
}
}

View File

@ -1,33 +0,0 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;
import java.util.List;
import eu.dnetlib.broker.objects.Instance;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMoreOpenAccess extends UpdateMatcher<Instance> {
public EnrichMoreOpenAccess() {
super(true);
}
@Override
protected List<UpdateInfo<Instance>> findUpdates(final Result source, final Result target) {
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
return Arrays.asList();
}
@Override
public UpdateInfo<Instance> generateUpdateInfo(final Instance highlightValue, final Result source,
final Result target) {
return new UpdateInfo<>(
Topic.ENRICH_MORE_OA_VERSION,
highlightValue, source, target,
(p, i) -> p.getInstances().add(i),
Instance::getUrl);
}
}

View File

@ -25,7 +25,7 @@ public final class UpdateInfo<T> {
private final float trust; private final float trust;
protected UpdateInfo(final Topic topic, final T highlightValue, final Result source, final Result target, public UpdateInfo(final Topic topic, final T highlightValue, final Result source, final Result target,
final BiConsumer<Publication, T> compileHighlight, final BiConsumer<Publication, T> compileHighlight,
final Function<T, String> highlightToString) { final Function<T, String> highlightToString) {
this.topic = topic; this.topic = topic;