This commit is contained in:
Claudio Atzori 2020-05-13 17:01:17 +02:00
commit 12bfa6702e
15 changed files with 363 additions and 194 deletions

View File

@ -29,31 +29,32 @@ public class EventFactory {
"yyyy-MM-dd" "yyyy-MM-dd"
}; };
public static Event newBrokerEvent(final Result source, final Result target, final UpdateInfo<?> updateInfo) { public static Event newBrokerEvent(final UpdateInfo<?> updateInfo) {
final long now = new Date().getTime(); final long now = new Date().getTime();
final Event res = new Event(); final Event res = new Event();
final Map<String, Object> map = createMapFromResult(target, source, updateInfo); final Map<String, Object> map = createMapFromResult(updateInfo);
final String payload = createPayload(target, updateInfo); final String payload = createPayload(updateInfo);
final String eventId = calculateEventId( final String eventId = calculateEventId(
updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString()); updateInfo.getTopicPath(), updateInfo.getTarget().getOriginalId().get(0),
updateInfo.getHighlightValueAsString());
res.setEventId(eventId); res.setEventId(eventId);
res.setProducerId(PRODUCER_ID); res.setProducerId(PRODUCER_ID);
res.setPayload(payload); res.setPayload(payload);
res.setMap(map); res.setMap(map);
res.setTopic(updateInfo.getTopic()); res.setTopic(updateInfo.getTopicPath());
res.setCreationDate(now); res.setCreationDate(now);
res.setExpiryDate(calculateExpiryDate(now)); res.setExpiryDate(calculateExpiryDate(now));
res.setInstantMessage(false); res.setInstantMessage(false);
return res; return res;
} }
private static String createPayload(final Result result, final UpdateInfo<?> updateInfo) { private static String createPayload(final UpdateInfo<?> updateInfo) {
final OpenAireEventPayload payload = new OpenAireEventPayload(); final OpenAireEventPayload payload = new OpenAireEventPayload();
// TODO // TODO
@ -62,32 +63,34 @@ public class EventFactory {
return payload.toJSON(); return payload.toJSON();
} }
private static Map<String, Object> createMapFromResult(final Result oaf, final Result source, private static Map<String, Object> createMapFromResult(final UpdateInfo<?> updateInfo) {
final UpdateInfo<?> updateInfo) {
final Map<String, Object> map = new HashMap<>(); final Map<String, Object> map = new HashMap<>();
final List<KeyValue> collectedFrom = oaf.getCollectedfrom(); final Result source = updateInfo.getSource();
final Result target = updateInfo.getTarget();
final List<KeyValue> collectedFrom = target.getCollectedfrom();
if (collectedFrom.size() == 1) { if (collectedFrom.size() == 1) {
map.put("target_datasource_id", collectedFrom.get(0).getKey()); map.put("target_datasource_id", collectedFrom.get(0).getKey());
map.put("target_datasource_name", collectedFrom.get(0).getValue()); map.put("target_datasource_name", collectedFrom.get(0).getValue());
} }
final List<String> ids = oaf.getOriginalId(); final List<String> ids = target.getOriginalId();
if (ids.size() > 0) { if (ids.size() > 0) {
map.put("target_publication_id", ids.get(0)); map.put("target_publication_id", ids.get(0));
} }
final List<StructuredProperty> titles = oaf.getTitle(); final List<StructuredProperty> titles = target.getTitle();
if (titles.size() > 0) { if (titles.size() > 0) {
map.put("target_publication_title", titles.get(0)); map.put("target_publication_title", titles.get(0));
} }
final long date = parseDateTolong(oaf.getDateofacceptance().getValue()); final long date = parseDateTolong(target.getDateofacceptance().getValue());
if (date > 0) { if (date > 0) {
map.put("target_dateofacceptance", date); map.put("target_dateofacceptance", date);
} }
final List<StructuredProperty> subjects = oaf.getSubject(); final List<StructuredProperty> subjects = target.getSubject();
if (subjects.size() > 0) { if (subjects.size() > 0) {
map map
.put( .put(
@ -95,7 +98,7 @@ public class EventFactory {
subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList())); subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList()));
} }
final List<Author> authors = oaf.getAuthor(); final List<Author> authors = target.getAuthor();
if (authors.size() > 0) { if (authors.size() > 0) {
map map
.put( .put(

View File

@ -0,0 +1,52 @@
package eu.dnetlib.dhp.broker.model;
public enum Topic {
// ENRICHMENT MISSING
ENRICH_MISSING_OA_VERSION("ENRICH/MISSING/OPENACCESS_VERSION"), ENRICH_MISSING_ABSTRACT(
"ENRICH/MISSING/ABSTRACT"), ENRICH_MISSING_PUBLICATION_DATE(
"ENRICH/MISSING/PUBLICATION_DATE"), ENRICH_MISSING_PID(
"ENRICH/MISSING/PID"), ENRICH_MISSING_PROJECT("ENRICH/MISSING/PROJECT"), ENRICH_MISSING_SOFTWARE(
"ENRICH/MISSING/SOFTWARE"), ENRICH_MISSING_SUBJECT_MESHEUROPMC(
"ENRICH/MISSING/SUBJECT/MESHEUROPMC"), ENRICH_MISSING_SUBJECT_ARXIV(
"ENRICH/MISSING/SUBJECT/ARXIV"), ENRICH_MISSING_SUBJECT_JEL(
"ENRICH/MISSING/SUBJECT/JEL"), ENRICH_MISSING_SUBJECT_DDC(
"ENRICH/MISSING/SUBJECT/DDC"), ENRICH_MISSING_SUBJECT_ACM(
"ENRICH/MISSING/SUBJECT/ACM"), ENRICH_MISSING_SUBJECT_RVK(
"ENRICH/MISSING/SUBJECT/RVK"), ENRICH_MISSING_AUTHOR_ORCID(
"ENRICH/MISSING/AUTHOR/ORCID"),
// ENRICHMENT MORE
ENRICH_MORE_PID("ENRICH/MORE/PID"), ENRICH_MORE_OA_VERSION("ENRICH/MORE/OPENACCESS_VERSION"), ENRICH_MORE_ABSTRACT(
"ENRICH/MORE/ABSTRACT"), ENRICH_MORE_PUBLICATION_DATE("ENRICH/MORE/PUBLICATION_DATE"), ENRICH_MORE_PROJECT(
"ENRICH/MORE/PROJECT"), ENRICH_MORE_SUBJECT_MESHEUROPMC(
"ENRICH/MORE/SUBJECT/MESHEUROPMC"), ENRICH_MORE_SUBJECT_ARXIV(
"ENRICH/MORE/SUBJECT/ARXIV"), ENRICH_MORE_SUBJECT_JEL(
"ENRICH/MORE/SUBJECT/JEL"), ENRICH_MORE_SUBJECT_DDC(
"ENRICH/MORE/SUBJECT/DDC"), ENRICH_MORE_SUBJECT_ACM(
"ENRICH/MORE/SUBJECT/ACM"), ENRICH_MORE_SUBJECT_RVK("ENRICH/MORE/SUBJECT/RVK"),
// ADDITION
ADD_BY_PROJECT("ADD/BY_PROJECT");
Topic(final String path) {
this.path = path;
}
protected String path;
public String getPath() {
return this.path;
}
public static Topic fromPath(final String path) {
for (final Topic t : Topic.values()) {
if (t.getPath().equals(path)) {
return t;
}
}
return null;
}
}

View File

@ -14,8 +14,6 @@ import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.model.Event;
import eu.dnetlib.dhp.broker.model.EventFactory; import eu.dnetlib.dhp.broker.model.EventFactory;
@ -30,6 +28,7 @@ import eu.dnetlib.dhp.broker.oa.util.EnrichMoreOpenAccess;
import eu.dnetlib.dhp.broker.oa.util.EnrichMorePid; import eu.dnetlib.dhp.broker.oa.util.EnrichMorePid;
import eu.dnetlib.dhp.broker.oa.util.EnrichMoreSubject; import eu.dnetlib.dhp.broker.oa.util.EnrichMoreSubject;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.UpdateMatcher;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
@ -37,7 +36,16 @@ public class GenerateEventsApplication {
private static final Logger log = LoggerFactory.getLogger(GenerateEventsApplication.class); private static final Logger log = LoggerFactory.getLogger(GenerateEventsApplication.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final UpdateMatcher<?> enrichMissingAbstract = new EnrichMissingAbstract();
private static final UpdateMatcher<?> enrichMissingAuthorOrcid = new EnrichMissingAuthorOrcid();
private static final UpdateMatcher<?> enrichMissingOpenAccess = new EnrichMissingOpenAccess();
private static final UpdateMatcher<?> enrichMissingPid = new EnrichMissingPid();
private static final UpdateMatcher<?> enrichMissingProject = new EnrichMissingProject();
private static final UpdateMatcher<?> enrichMissingPublicationDate = new EnrichMissingPublicationDate();
private static final UpdateMatcher<?> enrichMissingSubject = new EnrichMissingSubject();
private static final UpdateMatcher<?> enrichMoreOpenAccess = new EnrichMoreOpenAccess();
private static final UpdateMatcher<?> enrichMorePid = new EnrichMorePid();
private static final UpdateMatcher<?> enrichMoreSubject = new EnrichMoreSubject();
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -76,37 +84,22 @@ public class GenerateEventsApplication {
} }
private List<Event> generateEvents(final Result... children) { private List<Event> generateEvents(final Result... children) {
final List<Event> list = new ArrayList<>();
for (final Result source : children) {
for (final Result target : children) {
if (source != target) {
list
.addAll(
findUpdates(source, target)
.stream()
.map(info -> EventFactory.newBrokerEvent(source, target, info))
.collect(Collectors.toList()));
}
}
}
return list;
}
private List<UpdateInfo<?>> findUpdates(final Result source, final Result target) {
final List<UpdateInfo<?>> list = new ArrayList<>(); final List<UpdateInfo<?>> list = new ArrayList<>();
list.addAll(EnrichMissingAbstract.findUpdates(source, target));
list.addAll(EnrichMissingAuthorOrcid.findUpdates(source, target)); for (final Result target : children) {
list.addAll(EnrichMissingOpenAccess.findUpdates(source, target)); list.addAll(enrichMissingAbstract.searchUpdatesForRecord(target, children));
list.addAll(EnrichMissingPid.findUpdates(source, target)); list.addAll(enrichMissingAuthorOrcid.searchUpdatesForRecord(target, children));
list.addAll(EnrichMissingProject.findUpdates(source, target)); list.addAll(enrichMissingOpenAccess.searchUpdatesForRecord(target, children));
list.addAll(EnrichMissingPublicationDate.findUpdates(source, target)); list.addAll(enrichMissingPid.searchUpdatesForRecord(target, children));
list.addAll(EnrichMissingSubject.findUpdates(source, target)); list.addAll(enrichMissingProject.searchUpdatesForRecord(target, children));
list.addAll(EnrichMoreOpenAccess.findUpdates(source, target)); list.addAll(enrichMissingPublicationDate.searchUpdatesForRecord(target, children));
list.addAll(EnrichMorePid.findUpdates(source, target)); list.addAll(enrichMissingSubject.searchUpdatesForRecord(target, children));
list.addAll(EnrichMoreSubject.findUpdates(source, target)); list.addAll(enrichMoreOpenAccess.searchUpdatesForRecord(target, children));
return list; list.addAll(enrichMorePid.searchUpdatesForRecord(target, children));
list.addAll(enrichMoreSubject.searchUpdatesForRecord(target, children));
}
return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList());
} }
} }

View File

@ -1,31 +1,35 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.broker.objects.OpenAireEventPayload; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMissingAbstract extends UpdateInfo<String> { public class EnrichMissingAbstract extends UpdateMatcher<String> {
public static List<EnrichMissingAbstract> findUpdates(final Result source, final Result target) { public EnrichMissingAbstract() {
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); super(false);
return Arrays.asList();
}
private EnrichMissingAbstract(final String highlightValue, final float trust) {
super("ENRICH/MISSING/ABSTRACT", highlightValue, trust);
} }
@Override @Override
public void compileHighlight(final OpenAireEventPayload payload) { protected List<UpdateInfo<String>> findUpdates(final Result source, final Result target) {
payload.getHighlight().getAbstracts().add(getHighlightValue()); if (isMissing(target.getDescription()) && !isMissing(source.getDescription())) {
return Arrays.asList(generateUpdateInfo(source.getDescription().get(0).getValue(), source, target));
}
return new ArrayList<>();
} }
@Override @Override
public String getHighlightValueAsString() { public UpdateInfo<String> generateUpdateInfo(final String highlightValue, final Result source,
return getHighlightValue(); final Result target) {
return new UpdateInfo<>(
Topic.ENRICH_MISSING_ABSTRACT,
highlightValue, source, target,
(p, s) -> p.getAbstracts().add(s),
s -> s);
} }
} }

View File

@ -4,28 +4,30 @@ package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.broker.objects.OpenAireEventPayload; import org.apache.commons.lang3.tuple.Pair;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMissingAuthorOrcid extends UpdateInfo<String> { public class EnrichMissingAuthorOrcid extends UpdateMatcher<Pair<String, String>> {
public static List<EnrichMissingAuthorOrcid> findUpdates(final Result source, final Result target) { public EnrichMissingAuthorOrcid() {
super(true);
}
@Override
protected List<UpdateInfo<Pair<String, String>>> findUpdates(final Result source, final Result target) {
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
return Arrays.asList(); return Arrays.asList();
} }
private EnrichMissingAuthorOrcid(final String highlightValue, final float trust) {
super("ENRICH/MISSING/AUTHOR/ORCID", highlightValue, trust);
}
@Override @Override
public void compileHighlight(final OpenAireEventPayload payload) { public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
// TODO final Result source, final Result target) {
return new UpdateInfo<>(
Topic.ENRICH_MISSING_AUTHOR_ORCID,
highlightValue, source, target,
(p, pair) -> p.getCreators().add(pair.getLeft() + " - ORCID: " + pair.getRight()),
pair -> pair.getLeft() + "::" + pair.getRight());
} }
@Override
public String getHighlightValueAsString() {
return getHighlightValue();
}
} }

View File

@ -5,28 +5,29 @@ import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.broker.objects.Instance; import eu.dnetlib.broker.objects.Instance;
import eu.dnetlib.broker.objects.OpenAireEventPayload; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMissingOpenAccess extends UpdateInfo<Instance> { public class EnrichMissingOpenAccess extends UpdateMatcher<Instance> {
public EnrichMissingOpenAccess() {
super(true);
}
@Override
protected List<UpdateInfo<Instance>> findUpdates(final Result source, final Result target) {
public static List<EnrichMissingOpenAccess> findUpdates(final Result source, final Result target) {
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
return Arrays.asList(); return Arrays.asList();
} }
private EnrichMissingOpenAccess(final Instance highlightValue, final float trust) {
super("ENRICH/MISSING/OPENACCESS_VERSION", highlightValue, trust);
}
@Override @Override
public void compileHighlight(final OpenAireEventPayload payload) { public UpdateInfo<Instance> generateUpdateInfo(final Instance highlightValue, final Result source,
payload.getHighlight().getInstances().add(getHighlightValue()); final Result target) {
} return new UpdateInfo<>(
Topic.ENRICH_MISSING_OA_VERSION,
@Override highlightValue, source, target,
public String getHighlightValueAsString() { (p, i) -> p.getInstances().add(i),
return getHighlightValue().getUrl(); Instance::getUrl);
} }
} }

View File

@ -4,29 +4,29 @@ package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.broker.objects.OpenAireEventPayload;
import eu.dnetlib.broker.objects.Pid; import eu.dnetlib.broker.objects.Pid;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMissingPid extends UpdateInfo<Pid> { public class EnrichMissingPid extends UpdateMatcher<Pid> {
public static List<EnrichMissingPid> findUpdates(final Result source, final Result target) { public EnrichMissingPid() {
super(true);
}
@Override
protected List<UpdateInfo<Pid>> findUpdates(final Result source, final Result target) {
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
return Arrays.asList(); return Arrays.asList();
} }
private EnrichMissingPid(final Pid highlightValue, final float trust) {
super("ENRICH/MISSING/PID", highlightValue, trust);
}
@Override @Override
public void compileHighlight(final OpenAireEventPayload payload) { public UpdateInfo<Pid> generateUpdateInfo(final Pid highlightValue, final Result source, final Result target) {
payload.getHighlight().getPids().add(getHighlightValue()); return new UpdateInfo<>(
} Topic.ENRICH_MISSING_PID,
highlightValue, source, target,
@Override (p, pid) -> p.getPids().add(pid),
public String getHighlightValueAsString() { pid -> pid.getType() + "::" + pid.getValue());
return getHighlightValue().getType() + "::" + getHighlightValue().getValue();
} }
} }

View File

@ -4,30 +4,30 @@ package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.broker.objects.OpenAireEventPayload;
import eu.dnetlib.broker.objects.Project; import eu.dnetlib.broker.objects.Project;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMissingProject extends UpdateInfo<Project> { public class EnrichMissingProject extends UpdateMatcher<Project> {
public static List<EnrichMissingProject> findUpdates(final Result source, final Result target) { public EnrichMissingProject() {
super(true);
}
@Override
protected List<UpdateInfo<Project>> findUpdates(final Result source, final Result target) {
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
return Arrays.asList(); return Arrays.asList();
} }
private EnrichMissingProject(final Project highlightValue, final float trust) {
super("ENRICH/MISSING/PROJECT", highlightValue, trust);
}
@Override @Override
public void compileHighlight(final OpenAireEventPayload payload) { public UpdateInfo<Project> generateUpdateInfo(final Project highlightValue, final Result source,
payload.getHighlight().getProjects().add(getHighlightValue()); final Result target) {
} return new UpdateInfo<>(
Topic.ENRICH_MISSING_PROJECT,
@Override highlightValue, source, target,
public String getHighlightValueAsString() { (p, prj) -> p.getProjects().add(prj),
return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram() prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode());
+ getHighlightValue().getCode();
} }
} }

View File

@ -4,28 +4,29 @@ package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.broker.objects.OpenAireEventPayload; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMissingPublicationDate extends UpdateInfo<String> { public class EnrichMissingPublicationDate extends UpdateMatcher<String> {
public static List<EnrichMissingPublicationDate> findUpdates(final Result source, final Result target) { public EnrichMissingPublicationDate() {
super(false);
}
@Override
protected List<UpdateInfo<String>> findUpdates(final Result source, final Result target) {
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
return Arrays.asList(); return Arrays.asList();
} }
private EnrichMissingPublicationDate(final String highlightValue, final float trust) {
super("ENRICH/MISSING/PUBLICATION_DATE", highlightValue, trust);
}
@Override @Override
public void compileHighlight(final OpenAireEventPayload payload) { public UpdateInfo<String> generateUpdateInfo(final String highlightValue, final Result source,
payload.getHighlight().setPublicationdate(getHighlightValue()); final Result target) {
} return new UpdateInfo<>(
Topic.ENRICH_MISSING_PUBLICATION_DATE,
@Override highlightValue, source, target,
public String getHighlightValueAsString() { (p, date) -> p.setPublicationdate(date),
return getHighlightValue(); s -> s);
} }
} }

View File

@ -4,12 +4,19 @@ package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.broker.objects.OpenAireEventPayload; import org.apache.commons.lang3.tuple.Pair;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMissingSubject extends UpdateInfo<String> { public class EnrichMissingSubject extends UpdateMatcher<Pair<String, String>> {
public static List<EnrichMissingSubject> findUpdates(final Result source, final Result target) { public EnrichMissingSubject() {
super(true);
}
@Override
protected List<UpdateInfo<Pair<String, String>>> findUpdates(final Result source, final Result target) {
// MESHEUROPMC // MESHEUROPMC
// ARXIV // ARXIV
// JEL // JEL
@ -19,18 +26,15 @@ public class EnrichMissingSubject extends UpdateInfo<String> {
return Arrays.asList(); return Arrays.asList();
} }
private EnrichMissingSubject(final String subjectClassification, final String highlightValue, final float trust) {
super("ENRICH/MISSING/SUBJECT/" + subjectClassification, highlightValue, trust);
}
@Override @Override
public void compileHighlight(final OpenAireEventPayload payload) { public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
payload.getHighlight().getSubjects().add(getHighlightValue()); final Result source, final Result target) {
}
@Override return new UpdateInfo<>(
public String getHighlightValueAsString() { Topic.fromPath("ENRICH/MISSING/SUBJECT/" + highlightValue.getLeft()),
return getHighlightValue(); highlightValue, source, target,
(p, pair) -> p.getSubjects().add(pair.getRight()),
pair -> pair.getLeft() + "::" + pair.getRight());
} }
} }

View File

@ -5,28 +5,29 @@ import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.broker.objects.Instance; import eu.dnetlib.broker.objects.Instance;
import eu.dnetlib.broker.objects.OpenAireEventPayload; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMoreOpenAccess extends UpdateInfo<Instance> { public class EnrichMoreOpenAccess extends UpdateMatcher<Instance> {
public static List<EnrichMoreOpenAccess> findUpdates(final Result source, final Result target) { public EnrichMoreOpenAccess() {
super(true);
}
@Override
protected List<UpdateInfo<Instance>> findUpdates(final Result source, final Result target) {
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
return Arrays.asList(); return Arrays.asList();
} }
private EnrichMoreOpenAccess(final Instance highlightValue, final float trust) {
super("ENRICH/MORE/OPENACCESS_VERSION", highlightValue, trust);
}
@Override @Override
public void compileHighlight(final OpenAireEventPayload payload) { public UpdateInfo<Instance> generateUpdateInfo(final Instance highlightValue, final Result source,
payload.getHighlight().getInstances().add(getHighlightValue()); final Result target) {
} return new UpdateInfo<>(
Topic.ENRICH_MORE_OA_VERSION,
@Override highlightValue, source, target,
public String getHighlightValueAsString() { (p, i) -> p.getInstances().add(i),
return getHighlightValue().getUrl(); Instance::getUrl);
} }
} }

View File

@ -4,29 +4,29 @@ package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.broker.objects.OpenAireEventPayload;
import eu.dnetlib.broker.objects.Pid; import eu.dnetlib.broker.objects.Pid;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMorePid extends UpdateInfo<Pid> { public class EnrichMorePid extends UpdateMatcher<Pid> {
public static List<EnrichMorePid> findUpdates(final Result source, final Result target) { public EnrichMorePid() {
super(true);
}
@Override
protected List<UpdateInfo<Pid>> findUpdates(final Result source, final Result target) {
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
return Arrays.asList(); return Arrays.asList();
} }
private EnrichMorePid(final Pid highlightValue, final float trust) {
super("ENRICH/MORE/PID", highlightValue, trust);
}
@Override @Override
public void compileHighlight(final OpenAireEventPayload payload) { public UpdateInfo<Pid> generateUpdateInfo(final Pid highlightValue, final Result source, final Result target) {
payload.getHighlight().getPids().add(getHighlightValue()); return new UpdateInfo<>(
} Topic.ENRICH_MORE_PID,
highlightValue, source, target,
@Override (p, pid) -> p.getPids().add(pid),
public String getHighlightValueAsString() { pid -> pid.getType() + "::" + pid.getValue());
return getHighlightValue().getType() + "::" + getHighlightValue().getValue();
} }
} }

View File

@ -4,12 +4,19 @@ package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.broker.objects.OpenAireEventPayload; import org.apache.commons.lang3.tuple.Pair;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class EnrichMoreSubject extends UpdateInfo<String> { public class EnrichMoreSubject extends UpdateMatcher<Pair<String, String>> {
public static List<EnrichMoreSubject> findUpdates(final Result source, final Result target) { public EnrichMoreSubject() {
super(true);
}
@Override
protected List<UpdateInfo<Pair<String, String>>> findUpdates(final Result source, final Result target) {
// MESHEUROPMC // MESHEUROPMC
// ARXIV // ARXIV
// JEL // JEL
@ -19,18 +26,15 @@ public class EnrichMoreSubject extends UpdateInfo<String> {
return Arrays.asList(); return Arrays.asList();
} }
private EnrichMoreSubject(final String subjectClassification, final String highlightValue, final float trust) {
super("ENRICH/MORE/SUBJECT/" + subjectClassification, highlightValue, trust);
}
@Override @Override
public void compileHighlight(final OpenAireEventPayload payload) { public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
payload.getHighlight().getSubjects().add(getHighlightValue()); final Result source, final Result target) {
}
@Override return new UpdateInfo<>(
public String getHighlightValueAsString() { Topic.fromPath("ENRICH/MORE/SUBJECT/" + highlightValue.getLeft()),
return getHighlightValue(); highlightValue, source, target,
(p, pair) -> p.getSubjects().add(pair.getRight()),
pair -> pair.getLeft() + "::" + pair.getRight());
} }
} }

View File

@ -1,36 +1,77 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.function.BiConsumer;
import java.util.function.Function;
import eu.dnetlib.broker.objects.OpenAireEventPayload; import eu.dnetlib.broker.objects.OpenAireEventPayload;
import eu.dnetlib.broker.objects.Publication;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Result;
public abstract class UpdateInfo<T> { public final class UpdateInfo<T> {
private final String topic; private final Topic topic;
private final T highlightValue; private final T highlightValue;
private final Result source;
private final Result target;
private final BiConsumer<Publication, T> compileHighlight;
private final Function<T, String> highlightToString;
private final float trust; private final float trust;
protected UpdateInfo(final String topic, final T highlightValue, final float trust) { protected UpdateInfo(final Topic topic, final T highlightValue, final Result source, final Result target,
final BiConsumer<Publication, T> compileHighlight,
final Function<T, String> highlightToString) {
this.topic = topic; this.topic = topic;
this.highlightValue = highlightValue; this.highlightValue = highlightValue;
this.trust = trust; this.source = source;
this.target = target;
this.compileHighlight = compileHighlight;
this.highlightToString = highlightToString;
this.trust = calculateTrust(source, target);
} }
public T getHighlightValue() { public T getHighlightValue() {
return highlightValue; return highlightValue;
} }
public Result getSource() {
return source;
}
public Result getTarget() {
return target;
}
private float calculateTrust(final Result source, final Result target) {
// TODO
return 0.9f;
}
protected Topic getTopic() {
return topic;
}
public String getTopicPath() {
return topic.getPath();
}
public float getTrust() { public float getTrust() {
return trust; return trust;
} }
public String getTopic() { public void compileHighlight(final OpenAireEventPayload payload) {
return topic; compileHighlight.accept(payload.getHighlight(), getHighlightValue());
} }
abstract public void compileHighlight(OpenAireEventPayload payload); public String getHighlightValueAsString() {
return highlightToString.apply(getHighlightValue());
abstract public String getHighlightValueAsString(); }
} }

View File

@ -0,0 +1,63 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Result;
public abstract class UpdateMatcher<T> {
private final boolean multipleUpdate;
public UpdateMatcher(final boolean multipleUpdate) {
this.multipleUpdate = multipleUpdate;
}
public Collection<UpdateInfo<T>> searchUpdatesForRecord(final Result res, final Result... others) {
final Map<String, UpdateInfo<T>> infoMap = new HashMap<>();
for (final Result source : others) {
if (source != res) {
for (final UpdateInfo<T> info : findUpdates(source, res)) {
final String s = DigestUtils.md5Hex(info.getHighlightValueAsString());
if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {
} else {
infoMap.put(s, info);
}
}
}
}
final Collection<UpdateInfo<T>> values = infoMap.values();
if (values.isEmpty() || multipleUpdate) {
return values;
} else {
final UpdateInfo<T> v = values
.stream()
.sorted((o1, o2) -> Float.compare(o1.getTrust(), o2.getTrust()))
.findFirst()
.get();
return Arrays.asList(v);
}
}
protected abstract List<UpdateInfo<T>> findUpdates(Result source, Result target);
protected abstract UpdateInfo<T> generateUpdateInfo(final T highlightValue, final Result source,
final Result target);
protected static boolean isMissing(final List<Field<String>> list) {
return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0).getValue());
}
}