forked from antonis.lempesis/dnet-hadoop
Merge branch 'master' of code-repo.d4science.org:D-Net/dnet-hadoop
This commit is contained in:
commit
e7eb4f377e
|
@ -1,4 +1,4 @@
|
||||||
<workflow-app name="blacklisting" xmlns="uri:oozie:workflow:0.5">
|
<workflow-app name="blacklist_relations" xmlns="uri:oozie:workflow:0.5">
|
||||||
<parameters>
|
<parameters>
|
||||||
<property>
|
<property>
|
||||||
<name>postgresURL</name>
|
<name>postgresURL</name>
|
||||||
|
@ -102,7 +102,7 @@
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="copy_projects">
|
<action name="copy_project">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
<name-node>${nameNode}</name-node>
|
<name-node>${nameNode}</name-node>
|
||||||
|
@ -113,7 +113,7 @@
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="copy_datasources">
|
<action name="copy_datasource">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
<name-node>${nameNode}</name-node>
|
<name-node>${nameNode}</name-node>
|
||||||
|
|
|
@ -29,31 +29,32 @@ public class EventFactory {
|
||||||
"yyyy-MM-dd"
|
"yyyy-MM-dd"
|
||||||
};
|
};
|
||||||
|
|
||||||
public static Event newBrokerEvent(final Result source, final Result target, final UpdateInfo<?> updateInfo) {
|
public static Event newBrokerEvent(final UpdateInfo<?> updateInfo) {
|
||||||
|
|
||||||
final long now = new Date().getTime();
|
final long now = new Date().getTime();
|
||||||
|
|
||||||
final Event res = new Event();
|
final Event res = new Event();
|
||||||
|
|
||||||
final Map<String, Object> map = createMapFromResult(target, source, updateInfo);
|
final Map<String, Object> map = createMapFromResult(updateInfo);
|
||||||
|
|
||||||
final String payload = createPayload(target, updateInfo);
|
final String payload = createPayload(updateInfo);
|
||||||
|
|
||||||
final String eventId = calculateEventId(
|
final String eventId = calculateEventId(
|
||||||
updateInfo.getTopic(), target.getOriginalId().get(0), updateInfo.getHighlightValueAsString());
|
updateInfo.getTopicPath(), updateInfo.getTarget().getOriginalId().get(0),
|
||||||
|
updateInfo.getHighlightValueAsString());
|
||||||
|
|
||||||
res.setEventId(eventId);
|
res.setEventId(eventId);
|
||||||
res.setProducerId(PRODUCER_ID);
|
res.setProducerId(PRODUCER_ID);
|
||||||
res.setPayload(payload);
|
res.setPayload(payload);
|
||||||
res.setMap(map);
|
res.setMap(map);
|
||||||
res.setTopic(updateInfo.getTopic());
|
res.setTopic(updateInfo.getTopicPath());
|
||||||
res.setCreationDate(now);
|
res.setCreationDate(now);
|
||||||
res.setExpiryDate(calculateExpiryDate(now));
|
res.setExpiryDate(calculateExpiryDate(now));
|
||||||
res.setInstantMessage(false);
|
res.setInstantMessage(false);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String createPayload(final Result result, final UpdateInfo<?> updateInfo) {
|
private static String createPayload(final UpdateInfo<?> updateInfo) {
|
||||||
final OpenAireEventPayload payload = new OpenAireEventPayload();
|
final OpenAireEventPayload payload = new OpenAireEventPayload();
|
||||||
// TODO
|
// TODO
|
||||||
|
|
||||||
|
@ -62,32 +63,34 @@ public class EventFactory {
|
||||||
return payload.toJSON();
|
return payload.toJSON();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Map<String, Object> createMapFromResult(final Result oaf, final Result source,
|
private static Map<String, Object> createMapFromResult(final UpdateInfo<?> updateInfo) {
|
||||||
final UpdateInfo<?> updateInfo) {
|
|
||||||
final Map<String, Object> map = new HashMap<>();
|
final Map<String, Object> map = new HashMap<>();
|
||||||
|
|
||||||
final List<KeyValue> collectedFrom = oaf.getCollectedfrom();
|
final Result source = updateInfo.getSource();
|
||||||
|
final Result target = updateInfo.getTarget();
|
||||||
|
|
||||||
|
final List<KeyValue> collectedFrom = target.getCollectedfrom();
|
||||||
if (collectedFrom.size() == 1) {
|
if (collectedFrom.size() == 1) {
|
||||||
map.put("target_datasource_id", collectedFrom.get(0).getKey());
|
map.put("target_datasource_id", collectedFrom.get(0).getKey());
|
||||||
map.put("target_datasource_name", collectedFrom.get(0).getValue());
|
map.put("target_datasource_name", collectedFrom.get(0).getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<String> ids = oaf.getOriginalId();
|
final List<String> ids = target.getOriginalId();
|
||||||
if (ids.size() > 0) {
|
if (ids.size() > 0) {
|
||||||
map.put("target_publication_id", ids.get(0));
|
map.put("target_publication_id", ids.get(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<StructuredProperty> titles = oaf.getTitle();
|
final List<StructuredProperty> titles = target.getTitle();
|
||||||
if (titles.size() > 0) {
|
if (titles.size() > 0) {
|
||||||
map.put("target_publication_title", titles.get(0));
|
map.put("target_publication_title", titles.get(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
final long date = parseDateTolong(oaf.getDateofacceptance().getValue());
|
final long date = parseDateTolong(target.getDateofacceptance().getValue());
|
||||||
if (date > 0) {
|
if (date > 0) {
|
||||||
map.put("target_dateofacceptance", date);
|
map.put("target_dateofacceptance", date);
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<StructuredProperty> subjects = oaf.getSubject();
|
final List<StructuredProperty> subjects = target.getSubject();
|
||||||
if (subjects.size() > 0) {
|
if (subjects.size() > 0) {
|
||||||
map
|
map
|
||||||
.put(
|
.put(
|
||||||
|
@ -95,7 +98,7 @@ public class EventFactory {
|
||||||
subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList()));
|
subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<Author> authors = oaf.getAuthor();
|
final List<Author> authors = target.getAuthor();
|
||||||
if (authors.size() > 0) {
|
if (authors.size() > 0) {
|
||||||
map
|
map
|
||||||
.put(
|
.put(
|
||||||
|
|
|
@ -0,0 +1,52 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.broker.model;
|
||||||
|
|
||||||
|
public enum Topic {
|
||||||
|
|
||||||
|
// ENRICHMENT MISSING
|
||||||
|
ENRICH_MISSING_OA_VERSION("ENRICH/MISSING/OPENACCESS_VERSION"), ENRICH_MISSING_ABSTRACT(
|
||||||
|
"ENRICH/MISSING/ABSTRACT"), ENRICH_MISSING_PUBLICATION_DATE(
|
||||||
|
"ENRICH/MISSING/PUBLICATION_DATE"), ENRICH_MISSING_PID(
|
||||||
|
"ENRICH/MISSING/PID"), ENRICH_MISSING_PROJECT("ENRICH/MISSING/PROJECT"), ENRICH_MISSING_SOFTWARE(
|
||||||
|
"ENRICH/MISSING/SOFTWARE"), ENRICH_MISSING_SUBJECT_MESHEUROPMC(
|
||||||
|
"ENRICH/MISSING/SUBJECT/MESHEUROPMC"), ENRICH_MISSING_SUBJECT_ARXIV(
|
||||||
|
"ENRICH/MISSING/SUBJECT/ARXIV"), ENRICH_MISSING_SUBJECT_JEL(
|
||||||
|
"ENRICH/MISSING/SUBJECT/JEL"), ENRICH_MISSING_SUBJECT_DDC(
|
||||||
|
"ENRICH/MISSING/SUBJECT/DDC"), ENRICH_MISSING_SUBJECT_ACM(
|
||||||
|
"ENRICH/MISSING/SUBJECT/ACM"), ENRICH_MISSING_SUBJECT_RVK(
|
||||||
|
"ENRICH/MISSING/SUBJECT/RVK"), ENRICH_MISSING_AUTHOR_ORCID(
|
||||||
|
"ENRICH/MISSING/AUTHOR/ORCID"),
|
||||||
|
|
||||||
|
// ENRICHMENT MORE
|
||||||
|
ENRICH_MORE_PID("ENRICH/MORE/PID"), ENRICH_MORE_OA_VERSION("ENRICH/MORE/OPENACCESS_VERSION"), ENRICH_MORE_ABSTRACT(
|
||||||
|
"ENRICH/MORE/ABSTRACT"), ENRICH_MORE_PUBLICATION_DATE("ENRICH/MORE/PUBLICATION_DATE"), ENRICH_MORE_PROJECT(
|
||||||
|
"ENRICH/MORE/PROJECT"), ENRICH_MORE_SUBJECT_MESHEUROPMC(
|
||||||
|
"ENRICH/MORE/SUBJECT/MESHEUROPMC"), ENRICH_MORE_SUBJECT_ARXIV(
|
||||||
|
"ENRICH/MORE/SUBJECT/ARXIV"), ENRICH_MORE_SUBJECT_JEL(
|
||||||
|
"ENRICH/MORE/SUBJECT/JEL"), ENRICH_MORE_SUBJECT_DDC(
|
||||||
|
"ENRICH/MORE/SUBJECT/DDC"), ENRICH_MORE_SUBJECT_ACM(
|
||||||
|
"ENRICH/MORE/SUBJECT/ACM"), ENRICH_MORE_SUBJECT_RVK("ENRICH/MORE/SUBJECT/RVK"),
|
||||||
|
|
||||||
|
// ADDITION
|
||||||
|
ADD_BY_PROJECT("ADD/BY_PROJECT");
|
||||||
|
|
||||||
|
Topic(final String path) {
|
||||||
|
this.path = path;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected String path;
|
||||||
|
|
||||||
|
public String getPath() {
|
||||||
|
return this.path;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Topic fromPath(final String path) {
|
||||||
|
for (final Topic t : Topic.values()) {
|
||||||
|
if (t.getPath().equals(path)) {
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -14,8 +14,6 @@ import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.broker.model.Event;
|
import eu.dnetlib.dhp.broker.model.Event;
|
||||||
import eu.dnetlib.dhp.broker.model.EventFactory;
|
import eu.dnetlib.dhp.broker.model.EventFactory;
|
||||||
|
@ -30,6 +28,7 @@ import eu.dnetlib.dhp.broker.oa.util.EnrichMoreOpenAccess;
|
||||||
import eu.dnetlib.dhp.broker.oa.util.EnrichMorePid;
|
import eu.dnetlib.dhp.broker.oa.util.EnrichMorePid;
|
||||||
import eu.dnetlib.dhp.broker.oa.util.EnrichMoreSubject;
|
import eu.dnetlib.dhp.broker.oa.util.EnrichMoreSubject;
|
||||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||||
|
import eu.dnetlib.dhp.broker.oa.util.UpdateMatcher;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
@ -37,7 +36,16 @@ public class GenerateEventsApplication {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(GenerateEventsApplication.class);
|
private static final Logger log = LoggerFactory.getLogger(GenerateEventsApplication.class);
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final UpdateMatcher<?> enrichMissingAbstract = new EnrichMissingAbstract();
|
||||||
|
private static final UpdateMatcher<?> enrichMissingAuthorOrcid = new EnrichMissingAuthorOrcid();
|
||||||
|
private static final UpdateMatcher<?> enrichMissingOpenAccess = new EnrichMissingOpenAccess();
|
||||||
|
private static final UpdateMatcher<?> enrichMissingPid = new EnrichMissingPid();
|
||||||
|
private static final UpdateMatcher<?> enrichMissingProject = new EnrichMissingProject();
|
||||||
|
private static final UpdateMatcher<?> enrichMissingPublicationDate = new EnrichMissingPublicationDate();
|
||||||
|
private static final UpdateMatcher<?> enrichMissingSubject = new EnrichMissingSubject();
|
||||||
|
private static final UpdateMatcher<?> enrichMoreOpenAccess = new EnrichMoreOpenAccess();
|
||||||
|
private static final UpdateMatcher<?> enrichMorePid = new EnrichMorePid();
|
||||||
|
private static final UpdateMatcher<?> enrichMoreSubject = new EnrichMoreSubject();
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
@ -76,37 +84,22 @@ public class GenerateEventsApplication {
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<Event> generateEvents(final Result... children) {
|
private List<Event> generateEvents(final Result... children) {
|
||||||
final List<Event> list = new ArrayList<>();
|
|
||||||
|
|
||||||
for (final Result source : children) {
|
|
||||||
for (final Result target : children) {
|
|
||||||
if (source != target) {
|
|
||||||
list
|
|
||||||
.addAll(
|
|
||||||
findUpdates(source, target)
|
|
||||||
.stream()
|
|
||||||
.map(info -> EventFactory.newBrokerEvent(source, target, info))
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return list;
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<UpdateInfo<?>> findUpdates(final Result source, final Result target) {
|
|
||||||
final List<UpdateInfo<?>> list = new ArrayList<>();
|
final List<UpdateInfo<?>> list = new ArrayList<>();
|
||||||
list.addAll(EnrichMissingAbstract.findUpdates(source, target));
|
|
||||||
list.addAll(EnrichMissingAuthorOrcid.findUpdates(source, target));
|
for (final Result target : children) {
|
||||||
list.addAll(EnrichMissingOpenAccess.findUpdates(source, target));
|
list.addAll(enrichMissingAbstract.searchUpdatesForRecord(target, children));
|
||||||
list.addAll(EnrichMissingPid.findUpdates(source, target));
|
list.addAll(enrichMissingAuthorOrcid.searchUpdatesForRecord(target, children));
|
||||||
list.addAll(EnrichMissingProject.findUpdates(source, target));
|
list.addAll(enrichMissingOpenAccess.searchUpdatesForRecord(target, children));
|
||||||
list.addAll(EnrichMissingPublicationDate.findUpdates(source, target));
|
list.addAll(enrichMissingPid.searchUpdatesForRecord(target, children));
|
||||||
list.addAll(EnrichMissingSubject.findUpdates(source, target));
|
list.addAll(enrichMissingProject.searchUpdatesForRecord(target, children));
|
||||||
list.addAll(EnrichMoreOpenAccess.findUpdates(source, target));
|
list.addAll(enrichMissingPublicationDate.searchUpdatesForRecord(target, children));
|
||||||
list.addAll(EnrichMorePid.findUpdates(source, target));
|
list.addAll(enrichMissingSubject.searchUpdatesForRecord(target, children));
|
||||||
list.addAll(EnrichMoreSubject.findUpdates(source, target));
|
list.addAll(enrichMoreOpenAccess.searchUpdatesForRecord(target, children));
|
||||||
return list;
|
list.addAll(enrichMorePid.searchUpdatesForRecord(target, children));
|
||||||
|
list.addAll(enrichMoreSubject.searchUpdatesForRecord(target, children));
|
||||||
|
}
|
||||||
|
|
||||||
|
return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,31 +1,35 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.broker.oa.util;
|
package eu.dnetlib.dhp.broker.oa.util;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
import eu.dnetlib.dhp.broker.model.Topic;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
public class EnrichMissingAbstract extends UpdateInfo<String> {
|
public class EnrichMissingAbstract extends UpdateMatcher<String> {
|
||||||
|
|
||||||
public static List<EnrichMissingAbstract> findUpdates(final Result source, final Result target) {
|
public EnrichMissingAbstract() {
|
||||||
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
super(false);
|
||||||
return Arrays.asList();
|
|
||||||
}
|
|
||||||
|
|
||||||
private EnrichMissingAbstract(final String highlightValue, final float trust) {
|
|
||||||
super("ENRICH/MISSING/ABSTRACT", highlightValue, trust);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void compileHighlight(final OpenAireEventPayload payload) {
|
protected List<UpdateInfo<String>> findUpdates(final Result source, final Result target) {
|
||||||
payload.getHighlight().getAbstracts().add(getHighlightValue());
|
if (isMissing(target.getDescription()) && !isMissing(source.getDescription())) {
|
||||||
|
return Arrays.asList(generateUpdateInfo(source.getDescription().get(0).getValue(), source, target));
|
||||||
|
}
|
||||||
|
return new ArrayList<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getHighlightValueAsString() {
|
public UpdateInfo<String> generateUpdateInfo(final String highlightValue, final Result source,
|
||||||
return getHighlightValue();
|
final Result target) {
|
||||||
|
return new UpdateInfo<>(
|
||||||
|
Topic.ENRICH_MISSING_ABSTRACT,
|
||||||
|
highlightValue, source, target,
|
||||||
|
(p, s) -> p.getAbstracts().add(s),
|
||||||
|
s -> s);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,28 +4,30 @@ package eu.dnetlib.dhp.broker.oa.util;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.broker.model.Topic;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
public class EnrichMissingAuthorOrcid extends UpdateInfo<String> {
|
public class EnrichMissingAuthorOrcid extends UpdateMatcher<Pair<String, String>> {
|
||||||
|
|
||||||
public static List<EnrichMissingAuthorOrcid> findUpdates(final Result source, final Result target) {
|
public EnrichMissingAuthorOrcid() {
|
||||||
|
super(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<UpdateInfo<Pair<String, String>>> findUpdates(final Result source, final Result target) {
|
||||||
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
||||||
return Arrays.asList();
|
return Arrays.asList();
|
||||||
}
|
}
|
||||||
|
|
||||||
private EnrichMissingAuthorOrcid(final String highlightValue, final float trust) {
|
|
||||||
super("ENRICH/MISSING/AUTHOR/ORCID", highlightValue, trust);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void compileHighlight(final OpenAireEventPayload payload) {
|
public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
|
||||||
// TODO
|
final Result source, final Result target) {
|
||||||
|
return new UpdateInfo<>(
|
||||||
|
Topic.ENRICH_MISSING_AUTHOR_ORCID,
|
||||||
|
highlightValue, source, target,
|
||||||
|
(p, pair) -> p.getCreators().add(pair.getLeft() + " - ORCID: " + pair.getRight()),
|
||||||
|
pair -> pair.getLeft() + "::" + pair.getRight());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getHighlightValueAsString() {
|
|
||||||
return getHighlightValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,28 +5,29 @@ import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.broker.objects.Instance;
|
import eu.dnetlib.broker.objects.Instance;
|
||||||
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
import eu.dnetlib.dhp.broker.model.Topic;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
public class EnrichMissingOpenAccess extends UpdateInfo<Instance> {
|
public class EnrichMissingOpenAccess extends UpdateMatcher<Instance> {
|
||||||
|
|
||||||
|
public EnrichMissingOpenAccess() {
|
||||||
|
super(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<UpdateInfo<Instance>> findUpdates(final Result source, final Result target) {
|
||||||
|
|
||||||
public static List<EnrichMissingOpenAccess> findUpdates(final Result source, final Result target) {
|
|
||||||
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
|
||||||
return Arrays.asList();
|
return Arrays.asList();
|
||||||
}
|
}
|
||||||
|
|
||||||
private EnrichMissingOpenAccess(final Instance highlightValue, final float trust) {
|
|
||||||
super("ENRICH/MISSING/OPENACCESS_VERSION", highlightValue, trust);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void compileHighlight(final OpenAireEventPayload payload) {
|
public UpdateInfo<Instance> generateUpdateInfo(final Instance highlightValue, final Result source,
|
||||||
payload.getHighlight().getInstances().add(getHighlightValue());
|
final Result target) {
|
||||||
}
|
return new UpdateInfo<>(
|
||||||
|
Topic.ENRICH_MISSING_OA_VERSION,
|
||||||
@Override
|
highlightValue, source, target,
|
||||||
public String getHighlightValueAsString() {
|
(p, i) -> p.getInstances().add(i),
|
||||||
return getHighlightValue().getUrl();
|
Instance::getUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,29 +4,29 @@ package eu.dnetlib.dhp.broker.oa.util;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
|
||||||
import eu.dnetlib.broker.objects.Pid;
|
import eu.dnetlib.broker.objects.Pid;
|
||||||
|
import eu.dnetlib.dhp.broker.model.Topic;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
public class EnrichMissingPid extends UpdateInfo<Pid> {
|
public class EnrichMissingPid extends UpdateMatcher<Pid> {
|
||||||
|
|
||||||
public static List<EnrichMissingPid> findUpdates(final Result source, final Result target) {
|
public EnrichMissingPid() {
|
||||||
|
super(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<UpdateInfo<Pid>> findUpdates(final Result source, final Result target) {
|
||||||
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
||||||
return Arrays.asList();
|
return Arrays.asList();
|
||||||
}
|
}
|
||||||
|
|
||||||
private EnrichMissingPid(final Pid highlightValue, final float trust) {
|
|
||||||
super("ENRICH/MISSING/PID", highlightValue, trust);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void compileHighlight(final OpenAireEventPayload payload) {
|
public UpdateInfo<Pid> generateUpdateInfo(final Pid highlightValue, final Result source, final Result target) {
|
||||||
payload.getHighlight().getPids().add(getHighlightValue());
|
return new UpdateInfo<>(
|
||||||
}
|
Topic.ENRICH_MISSING_PID,
|
||||||
|
highlightValue, source, target,
|
||||||
@Override
|
(p, pid) -> p.getPids().add(pid),
|
||||||
public String getHighlightValueAsString() {
|
pid -> pid.getType() + "::" + pid.getValue());
|
||||||
return getHighlightValue().getType() + "::" + getHighlightValue().getValue();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,30 +4,30 @@ package eu.dnetlib.dhp.broker.oa.util;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
|
||||||
import eu.dnetlib.broker.objects.Project;
|
import eu.dnetlib.broker.objects.Project;
|
||||||
|
import eu.dnetlib.dhp.broker.model.Topic;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
public class EnrichMissingProject extends UpdateInfo<Project> {
|
public class EnrichMissingProject extends UpdateMatcher<Project> {
|
||||||
|
|
||||||
public static List<EnrichMissingProject> findUpdates(final Result source, final Result target) {
|
public EnrichMissingProject() {
|
||||||
|
super(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<UpdateInfo<Project>> findUpdates(final Result source, final Result target) {
|
||||||
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
||||||
return Arrays.asList();
|
return Arrays.asList();
|
||||||
}
|
}
|
||||||
|
|
||||||
private EnrichMissingProject(final Project highlightValue, final float trust) {
|
|
||||||
super("ENRICH/MISSING/PROJECT", highlightValue, trust);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void compileHighlight(final OpenAireEventPayload payload) {
|
public UpdateInfo<Project> generateUpdateInfo(final Project highlightValue, final Result source,
|
||||||
payload.getHighlight().getProjects().add(getHighlightValue());
|
final Result target) {
|
||||||
}
|
return new UpdateInfo<>(
|
||||||
|
Topic.ENRICH_MISSING_PROJECT,
|
||||||
@Override
|
highlightValue, source, target,
|
||||||
public String getHighlightValueAsString() {
|
(p, prj) -> p.getProjects().add(prj),
|
||||||
return getHighlightValue().getFunder() + "::" + getHighlightValue().getFundingProgram()
|
prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode());
|
||||||
+ getHighlightValue().getCode();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,28 +4,29 @@ package eu.dnetlib.dhp.broker.oa.util;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
import eu.dnetlib.dhp.broker.model.Topic;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
public class EnrichMissingPublicationDate extends UpdateInfo<String> {
|
public class EnrichMissingPublicationDate extends UpdateMatcher<String> {
|
||||||
|
|
||||||
public static List<EnrichMissingPublicationDate> findUpdates(final Result source, final Result target) {
|
public EnrichMissingPublicationDate() {
|
||||||
|
super(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<UpdateInfo<String>> findUpdates(final Result source, final Result target) {
|
||||||
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
||||||
return Arrays.asList();
|
return Arrays.asList();
|
||||||
}
|
}
|
||||||
|
|
||||||
private EnrichMissingPublicationDate(final String highlightValue, final float trust) {
|
|
||||||
super("ENRICH/MISSING/PUBLICATION_DATE", highlightValue, trust);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void compileHighlight(final OpenAireEventPayload payload) {
|
public UpdateInfo<String> generateUpdateInfo(final String highlightValue, final Result source,
|
||||||
payload.getHighlight().setPublicationdate(getHighlightValue());
|
final Result target) {
|
||||||
}
|
return new UpdateInfo<>(
|
||||||
|
Topic.ENRICH_MISSING_PUBLICATION_DATE,
|
||||||
@Override
|
highlightValue, source, target,
|
||||||
public String getHighlightValueAsString() {
|
(p, date) -> p.setPublicationdate(date),
|
||||||
return getHighlightValue();
|
s -> s);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,12 +4,19 @@ package eu.dnetlib.dhp.broker.oa.util;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.broker.model.Topic;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
public class EnrichMissingSubject extends UpdateInfo<String> {
|
public class EnrichMissingSubject extends UpdateMatcher<Pair<String, String>> {
|
||||||
|
|
||||||
public static List<EnrichMissingSubject> findUpdates(final Result source, final Result target) {
|
public EnrichMissingSubject() {
|
||||||
|
super(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<UpdateInfo<Pair<String, String>>> findUpdates(final Result source, final Result target) {
|
||||||
// MESHEUROPMC
|
// MESHEUROPMC
|
||||||
// ARXIV
|
// ARXIV
|
||||||
// JEL
|
// JEL
|
||||||
|
@ -19,18 +26,15 @@ public class EnrichMissingSubject extends UpdateInfo<String> {
|
||||||
return Arrays.asList();
|
return Arrays.asList();
|
||||||
}
|
}
|
||||||
|
|
||||||
private EnrichMissingSubject(final String subjectClassification, final String highlightValue, final float trust) {
|
|
||||||
super("ENRICH/MISSING/SUBJECT/" + subjectClassification, highlightValue, trust);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void compileHighlight(final OpenAireEventPayload payload) {
|
public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
|
||||||
payload.getHighlight().getSubjects().add(getHighlightValue());
|
final Result source, final Result target) {
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
return new UpdateInfo<>(
|
||||||
public String getHighlightValueAsString() {
|
Topic.fromPath("ENRICH/MISSING/SUBJECT/" + highlightValue.getLeft()),
|
||||||
return getHighlightValue();
|
highlightValue, source, target,
|
||||||
|
(p, pair) -> p.getSubjects().add(pair.getRight()),
|
||||||
|
pair -> pair.getLeft() + "::" + pair.getRight());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,28 +5,29 @@ import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.broker.objects.Instance;
|
import eu.dnetlib.broker.objects.Instance;
|
||||||
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
import eu.dnetlib.dhp.broker.model.Topic;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
public class EnrichMoreOpenAccess extends UpdateInfo<Instance> {
|
public class EnrichMoreOpenAccess extends UpdateMatcher<Instance> {
|
||||||
|
|
||||||
public static List<EnrichMoreOpenAccess> findUpdates(final Result source, final Result target) {
|
public EnrichMoreOpenAccess() {
|
||||||
|
super(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<UpdateInfo<Instance>> findUpdates(final Result source, final Result target) {
|
||||||
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
||||||
return Arrays.asList();
|
return Arrays.asList();
|
||||||
}
|
}
|
||||||
|
|
||||||
private EnrichMoreOpenAccess(final Instance highlightValue, final float trust) {
|
|
||||||
super("ENRICH/MORE/OPENACCESS_VERSION", highlightValue, trust);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void compileHighlight(final OpenAireEventPayload payload) {
|
public UpdateInfo<Instance> generateUpdateInfo(final Instance highlightValue, final Result source,
|
||||||
payload.getHighlight().getInstances().add(getHighlightValue());
|
final Result target) {
|
||||||
}
|
return new UpdateInfo<>(
|
||||||
|
Topic.ENRICH_MORE_OA_VERSION,
|
||||||
@Override
|
highlightValue, source, target,
|
||||||
public String getHighlightValueAsString() {
|
(p, i) -> p.getInstances().add(i),
|
||||||
return getHighlightValue().getUrl();
|
Instance::getUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,29 +4,29 @@ package eu.dnetlib.dhp.broker.oa.util;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
|
||||||
import eu.dnetlib.broker.objects.Pid;
|
import eu.dnetlib.broker.objects.Pid;
|
||||||
|
import eu.dnetlib.dhp.broker.model.Topic;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
public class EnrichMorePid extends UpdateInfo<Pid> {
|
public class EnrichMorePid extends UpdateMatcher<Pid> {
|
||||||
|
|
||||||
public static List<EnrichMorePid> findUpdates(final Result source, final Result target) {
|
public EnrichMorePid() {
|
||||||
|
super(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<UpdateInfo<Pid>> findUpdates(final Result source, final Result target) {
|
||||||
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
||||||
return Arrays.asList();
|
return Arrays.asList();
|
||||||
}
|
}
|
||||||
|
|
||||||
private EnrichMorePid(final Pid highlightValue, final float trust) {
|
|
||||||
super("ENRICH/MORE/PID", highlightValue, trust);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void compileHighlight(final OpenAireEventPayload payload) {
|
public UpdateInfo<Pid> generateUpdateInfo(final Pid highlightValue, final Result source, final Result target) {
|
||||||
payload.getHighlight().getPids().add(getHighlightValue());
|
return new UpdateInfo<>(
|
||||||
}
|
Topic.ENRICH_MORE_PID,
|
||||||
|
highlightValue, source, target,
|
||||||
@Override
|
(p, pid) -> p.getPids().add(pid),
|
||||||
public String getHighlightValueAsString() {
|
pid -> pid.getType() + "::" + pid.getValue());
|
||||||
return getHighlightValue().getType() + "::" + getHighlightValue().getValue();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,12 +4,19 @@ package eu.dnetlib.dhp.broker.oa.util;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.broker.model.Topic;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
public class EnrichMoreSubject extends UpdateInfo<String> {
|
public class EnrichMoreSubject extends UpdateMatcher<Pair<String, String>> {
|
||||||
|
|
||||||
public static List<EnrichMoreSubject> findUpdates(final Result source, final Result target) {
|
public EnrichMoreSubject() {
|
||||||
|
super(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<UpdateInfo<Pair<String, String>>> findUpdates(final Result source, final Result target) {
|
||||||
// MESHEUROPMC
|
// MESHEUROPMC
|
||||||
// ARXIV
|
// ARXIV
|
||||||
// JEL
|
// JEL
|
||||||
|
@ -19,18 +26,15 @@ public class EnrichMoreSubject extends UpdateInfo<String> {
|
||||||
return Arrays.asList();
|
return Arrays.asList();
|
||||||
}
|
}
|
||||||
|
|
||||||
private EnrichMoreSubject(final String subjectClassification, final String highlightValue, final float trust) {
|
|
||||||
super("ENRICH/MORE/SUBJECT/" + subjectClassification, highlightValue, trust);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void compileHighlight(final OpenAireEventPayload payload) {
|
public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
|
||||||
payload.getHighlight().getSubjects().add(getHighlightValue());
|
final Result source, final Result target) {
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
return new UpdateInfo<>(
|
||||||
public String getHighlightValueAsString() {
|
Topic.fromPath("ENRICH/MORE/SUBJECT/" + highlightValue.getLeft()),
|
||||||
return getHighlightValue();
|
highlightValue, source, target,
|
||||||
|
(p, pair) -> p.getSubjects().add(pair.getRight()),
|
||||||
|
pair -> pair.getLeft() + "::" + pair.getRight());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,36 +1,77 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.broker.oa.util;
|
package eu.dnetlib.dhp.broker.oa.util;
|
||||||
|
|
||||||
|
import java.util.function.BiConsumer;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
||||||
|
import eu.dnetlib.broker.objects.Publication;
|
||||||
|
import eu.dnetlib.dhp.broker.model.Topic;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
public abstract class UpdateInfo<T> {
|
public final class UpdateInfo<T> {
|
||||||
|
|
||||||
private final String topic;
|
private final Topic topic;
|
||||||
|
|
||||||
private final T highlightValue;
|
private final T highlightValue;
|
||||||
|
|
||||||
|
private final Result source;
|
||||||
|
|
||||||
|
private final Result target;
|
||||||
|
|
||||||
|
private final BiConsumer<Publication, T> compileHighlight;
|
||||||
|
|
||||||
|
private final Function<T, String> highlightToString;
|
||||||
|
|
||||||
private final float trust;
|
private final float trust;
|
||||||
|
|
||||||
protected UpdateInfo(final String topic, final T highlightValue, final float trust) {
|
protected UpdateInfo(final Topic topic, final T highlightValue, final Result source, final Result target,
|
||||||
|
final BiConsumer<Publication, T> compileHighlight,
|
||||||
|
final Function<T, String> highlightToString) {
|
||||||
this.topic = topic;
|
this.topic = topic;
|
||||||
this.highlightValue = highlightValue;
|
this.highlightValue = highlightValue;
|
||||||
this.trust = trust;
|
this.source = source;
|
||||||
|
this.target = target;
|
||||||
|
this.compileHighlight = compileHighlight;
|
||||||
|
this.highlightToString = highlightToString;
|
||||||
|
this.trust = calculateTrust(source, target);
|
||||||
}
|
}
|
||||||
|
|
||||||
public T getHighlightValue() {
|
public T getHighlightValue() {
|
||||||
return highlightValue;
|
return highlightValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Result getSource() {
|
||||||
|
return source;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Result getTarget() {
|
||||||
|
return target;
|
||||||
|
}
|
||||||
|
|
||||||
|
private float calculateTrust(final Result source, final Result target) {
|
||||||
|
// TODO
|
||||||
|
return 0.9f;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Topic getTopic() {
|
||||||
|
return topic;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTopicPath() {
|
||||||
|
return topic.getPath();
|
||||||
|
}
|
||||||
|
|
||||||
public float getTrust() {
|
public float getTrust() {
|
||||||
return trust;
|
return trust;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getTopic() {
|
public void compileHighlight(final OpenAireEventPayload payload) {
|
||||||
return topic;
|
compileHighlight.accept(payload.getHighlight(), getHighlightValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract public void compileHighlight(OpenAireEventPayload payload);
|
public String getHighlightValueAsString() {
|
||||||
|
return highlightToString.apply(getHighlightValue());
|
||||||
abstract public String getHighlightValueAsString();
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,63 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.broker.oa.util;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.codec.digest.DigestUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
public abstract class UpdateMatcher<T> {
|
||||||
|
|
||||||
|
private final boolean multipleUpdate;
|
||||||
|
|
||||||
|
public UpdateMatcher(final boolean multipleUpdate) {
|
||||||
|
this.multipleUpdate = multipleUpdate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Collection<UpdateInfo<T>> searchUpdatesForRecord(final Result res, final Result... others) {
|
||||||
|
|
||||||
|
final Map<String, UpdateInfo<T>> infoMap = new HashMap<>();
|
||||||
|
|
||||||
|
for (final Result source : others) {
|
||||||
|
if (source != res) {
|
||||||
|
for (final UpdateInfo<T> info : findUpdates(source, res)) {
|
||||||
|
final String s = DigestUtils.md5Hex(info.getHighlightValueAsString());
|
||||||
|
if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {
|
||||||
|
} else {
|
||||||
|
infoMap.put(s, info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final Collection<UpdateInfo<T>> values = infoMap.values();
|
||||||
|
|
||||||
|
if (values.isEmpty() || multipleUpdate) {
|
||||||
|
return values;
|
||||||
|
} else {
|
||||||
|
final UpdateInfo<T> v = values
|
||||||
|
.stream()
|
||||||
|
.sorted((o1, o2) -> Float.compare(o1.getTrust(), o2.getTrust()))
|
||||||
|
.findFirst()
|
||||||
|
.get();
|
||||||
|
return Arrays.asList(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract List<UpdateInfo<T>> findUpdates(Result source, Result target);
|
||||||
|
|
||||||
|
protected abstract UpdateInfo<T> generateUpdateInfo(final T highlightValue, final Result source,
|
||||||
|
final Result target);
|
||||||
|
|
||||||
|
protected static boolean isMissing(final List<Field<String>> list) {
|
||||||
|
return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0).getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag;
|
package eu.dnetlib.dhp.bulktag;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
@ -84,6 +85,7 @@ public class SparkBulkTagJob {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
|
removeOutputDir(spark, outputPath);
|
||||||
execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc);
|
execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,13 +69,16 @@ public class SparkCountryPropagationJob {
|
||||||
runWithSparkSession(
|
runWithSparkSession(
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> execPropagation(
|
spark -> {
|
||||||
|
removeOutputDir(spark, outputPath);
|
||||||
|
execPropagation(
|
||||||
spark,
|
spark,
|
||||||
sourcePath,
|
sourcePath,
|
||||||
preparedInfoPath,
|
preparedInfoPath,
|
||||||
outputPath,
|
outputPath,
|
||||||
resultClazz,
|
resultClazz,
|
||||||
saveGraph));
|
saveGraph);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <R extends Result> void execPropagation(
|
private static <R extends Result> void execPropagation(
|
||||||
|
|
|
@ -74,9 +74,7 @@ public class PrepareResultOrcidAssociationStep1 {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
if (isTest(parser)) {
|
|
||||||
removeOutputDir(spark, outputPath);
|
removeOutputDir(spark, outputPath);
|
||||||
}
|
|
||||||
prepareInfo(
|
prepareInfo(
|
||||||
spark, inputRelationPath, inputResultPath, outputResultPath, resultClazz, allowedsemrel);
|
spark, inputRelationPath, inputResultPath, outputResultPath, resultClazz, allowedsemrel);
|
||||||
});
|
});
|
||||||
|
|
|
@ -50,9 +50,7 @@ public class PrepareResultOrcidAssociationStep2 {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
if (isTest(parser)) {
|
|
||||||
removeOutputDir(spark, outputPath);
|
removeOutputDir(spark, outputPath);
|
||||||
}
|
|
||||||
mergeInfo(spark, inputPath, outputPath);
|
mergeInfo(spark, inputPath, outputPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,11 +70,10 @@ public class SparkOrcidToResultFromSemRelJob {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
if (isTest(parser)) {
|
|
||||||
removeOutputDir(spark, outputPath);
|
removeOutputDir(spark, outputPath);
|
||||||
}
|
if (saveGraph) {
|
||||||
if (saveGraph)
|
|
||||||
execPropagation(spark, possibleUpdates, inputPath, outputPath, resultClazz);
|
execPropagation(spark, possibleUpdates, inputPath, outputPath, resultClazz);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -60,6 +60,8 @@ public class PrepareProjectResultsAssociation {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
|
removeOutputDir(spark, potentialUpdatePath);
|
||||||
|
removeOutputDir(spark, alreadyLinkedPath);
|
||||||
prepareResultProjProjectResults(
|
prepareResultProjProjectResults(
|
||||||
spark,
|
spark,
|
||||||
inputPath,
|
inputPath,
|
||||||
|
|
|
@ -55,9 +55,7 @@ public class PrepareResultCommunitySet {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
if (isTest(parser)) {
|
|
||||||
removeOutputDir(spark, outputPath);
|
removeOutputDir(spark, outputPath);
|
||||||
}
|
|
||||||
prepareInfo(spark, inputPath, outputPath, organizationMap);
|
prepareInfo(spark, inputPath, outputPath, organizationMap);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,11 +68,10 @@ public class SparkResultToCommunityFromOrganizationJob {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
if (isTest(parser)) {
|
|
||||||
removeOutputDir(spark, outputPath);
|
removeOutputDir(spark, outputPath);
|
||||||
}
|
if (saveGraph) {
|
||||||
if (saveGraph)
|
|
||||||
execPropagation(spark, inputPath, outputPath, resultClazz, possibleupdatespath);
|
execPropagation(spark, inputPath, outputPath, resultClazz, possibleupdatespath);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -58,30 +58,15 @@ public class PrepareResultInstRepoAssociation {
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
readNeededResources(spark, inputPath);
|
readNeededResources(spark, inputPath);
|
||||||
|
|
||||||
|
removeOutputDir(spark, datasourceOrganizationPath);
|
||||||
prepareDatasourceOrganization(spark, datasourceOrganizationPath);
|
prepareDatasourceOrganization(spark, datasourceOrganizationPath);
|
||||||
|
|
||||||
|
removeOutputDir(spark, alreadyLinkedPath);
|
||||||
prepareAlreadyLinkedAssociation(spark, alreadyLinkedPath);
|
prepareAlreadyLinkedAssociation(spark, alreadyLinkedPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void prepareAlreadyLinkedAssociation(
|
|
||||||
SparkSession spark, String alreadyLinkedPath) {
|
|
||||||
String query = "Select source resultId, collect_set(target) organizationSet "
|
|
||||||
+ "from relation "
|
|
||||||
+ "where datainfo.deletedbyinference = false "
|
|
||||||
+ "and relClass = '"
|
|
||||||
+ RELATION_RESULT_ORGANIZATION_REL_CLASS
|
|
||||||
+ "' "
|
|
||||||
+ "group by source";
|
|
||||||
|
|
||||||
spark
|
|
||||||
.sql(query)
|
|
||||||
.as(Encoders.bean(ResultOrganizationSet.class))
|
|
||||||
// TODO retry to stick with datasets
|
|
||||||
.toJavaRDD()
|
|
||||||
.map(r -> OBJECT_MAPPER.writeValueAsString(r))
|
|
||||||
.saveAsTextFile(alreadyLinkedPath, GzipCodec.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void readNeededResources(SparkSession spark, String inputPath) {
|
private static void readNeededResources(SparkSession spark, String inputPath) {
|
||||||
Dataset<Datasource> datasource = readPath(spark, inputPath + "/datasource", Datasource.class);
|
Dataset<Datasource> datasource = readPath(spark, inputPath + "/datasource", Datasource.class);
|
||||||
datasource.createOrReplaceTempView("datasource");
|
datasource.createOrReplaceTempView("datasource");
|
||||||
|
@ -119,4 +104,24 @@ public class PrepareResultInstRepoAssociation {
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(datasourceOrganizationPath);
|
.json(datasourceOrganizationPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void prepareAlreadyLinkedAssociation(
|
||||||
|
SparkSession spark, String alreadyLinkedPath) {
|
||||||
|
String query = "Select source resultId, collect_set(target) organizationSet "
|
||||||
|
+ "from relation "
|
||||||
|
+ "where datainfo.deletedbyinference = false "
|
||||||
|
+ "and relClass = '"
|
||||||
|
+ RELATION_RESULT_ORGANIZATION_REL_CLASS
|
||||||
|
+ "' "
|
||||||
|
+ "group by source";
|
||||||
|
|
||||||
|
spark
|
||||||
|
.sql(query)
|
||||||
|
.as(Encoders.bean(ResultOrganizationSet.class))
|
||||||
|
// TODO retry to stick with datasets
|
||||||
|
.toJavaRDD()
|
||||||
|
.map(r -> OBJECT_MAPPER.writeValueAsString(r))
|
||||||
|
.saveAsTextFile(alreadyLinkedPath, GzipCodec.class);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -83,10 +83,8 @@ public class SparkResultToOrganizationFromIstRepoJob {
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
if (isTest(parser)) {
|
|
||||||
removeOutputDir(spark, outputPath);
|
removeOutputDir(spark, outputPath);
|
||||||
}
|
if (saveGraph) {
|
||||||
if (saveGraph)
|
|
||||||
execPropagation(
|
execPropagation(
|
||||||
spark,
|
spark,
|
||||||
datasourceorganization,
|
datasourceorganization,
|
||||||
|
@ -94,6 +92,7 @@ public class SparkResultToOrganizationFromIstRepoJob {
|
||||||
inputPath,
|
inputPath,
|
||||||
outputPath,
|
outputPath,
|
||||||
resultClazz);
|
resultClazz);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,17 @@
|
||||||
</property>
|
</property>
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
|
<global>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>${oozieActionShareLibForSpark2}</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
</global>
|
||||||
|
|
||||||
<start to="reset_outputpath"/>
|
<start to="reset_outputpath"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
|
@ -42,8 +53,6 @@
|
||||||
|
|
||||||
<action name="copy_relation">
|
<action name="copy_relation">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||||
<arg>${nameNode}/${outputPath}/relation</arg>
|
<arg>${nameNode}/${outputPath}/relation</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -53,8 +62,6 @@
|
||||||
|
|
||||||
<action name="copy_organization">
|
<action name="copy_organization">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/organization</arg>
|
<arg>${nameNode}/${sourcePath}/organization</arg>
|
||||||
<arg>${nameNode}/${outputPath}/organization</arg>
|
<arg>${nameNode}/${outputPath}/organization</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -64,8 +71,6 @@
|
||||||
|
|
||||||
<action name="copy_projects">
|
<action name="copy_projects">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/project</arg>
|
<arg>${nameNode}/${sourcePath}/project</arg>
|
||||||
<arg>${nameNode}/${outputPath}/project</arg>
|
<arg>${nameNode}/${outputPath}/project</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -75,8 +80,6 @@
|
||||||
|
|
||||||
<action name="copy_datasources">
|
<action name="copy_datasources">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
||||||
<arg>${nameNode}/${outputPath}/datasource</arg>
|
<arg>${nameNode}/${outputPath}/datasource</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -95,8 +98,6 @@
|
||||||
|
|
||||||
<action name="join_bulktag_publication">
|
<action name="join_bulktag_publication">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<master>yarn-cluster</master>
|
<master>yarn-cluster</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>bulkTagging-publication</name>
|
<name>bulkTagging-publication</name>
|
||||||
|
@ -124,8 +125,6 @@
|
||||||
|
|
||||||
<action name="join_bulktag_dataset">
|
<action name="join_bulktag_dataset">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<master>yarn-cluster</master>
|
<master>yarn-cluster</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>bulkTagging-dataset</name>
|
<name>bulkTagging-dataset</name>
|
||||||
|
@ -153,8 +152,6 @@
|
||||||
|
|
||||||
<action name="join_bulktag_otherresearchproduct">
|
<action name="join_bulktag_otherresearchproduct">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<master>yarn-cluster</master>
|
<master>yarn-cluster</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>bulkTagging-orp</name>
|
<name>bulkTagging-orp</name>
|
||||||
|
@ -182,8 +179,6 @@
|
||||||
|
|
||||||
<action name="join_bulktag_software">
|
<action name="join_bulktag_software">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<master>yarn-cluster</master>
|
<master>yarn-cluster</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>bulkTagging-software</name>
|
<name>bulkTagging-software</name>
|
||||||
|
|
|
@ -19,6 +19,17 @@
|
||||||
|
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
|
<global>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>${oozieActionShareLibForSpark2}</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
</global>
|
||||||
|
|
||||||
<start to="reset_outputpath"/>
|
<start to="reset_outputpath"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
|
@ -43,8 +54,6 @@
|
||||||
|
|
||||||
<action name="copy_relation">
|
<action name="copy_relation">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||||
<arg>${nameNode}/${outputPath}/relation</arg>
|
<arg>${nameNode}/${outputPath}/relation</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -54,18 +63,15 @@
|
||||||
|
|
||||||
<action name="copy_organization">
|
<action name="copy_organization">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/organization</arg>
|
<arg>${nameNode}/${sourcePath}/organization</arg>
|
||||||
<arg>${nameNode}/${outputPath}/organization</arg>
|
<arg>${nameNode}/${outputPath}/organization</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
<ok to="copy_wait"/>
|
<ok to="copy_wait"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="copy_projects">
|
<action name="copy_projects">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/project</arg>
|
<arg>${nameNode}/${sourcePath}/project</arg>
|
||||||
<arg>${nameNode}/${outputPath}/project</arg>
|
<arg>${nameNode}/${outputPath}/project</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -75,8 +81,6 @@
|
||||||
|
|
||||||
<action name="copy_datasources">
|
<action name="copy_datasources">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
||||||
<arg>${nameNode}/${outputPath}/datasource</arg>
|
<arg>${nameNode}/${outputPath}/datasource</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
|
|
@ -57,6 +57,7 @@
|
||||||
<ok to="copy_wait"/>
|
<ok to="copy_wait"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="copy_projects">
|
<action name="copy_projects">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
@ -81,7 +82,6 @@
|
||||||
|
|
||||||
<join name="copy_wait" to="fork_prepare_assoc_step1"/>
|
<join name="copy_wait" to="fork_prepare_assoc_step1"/>
|
||||||
|
|
||||||
|
|
||||||
<fork name="fork_prepare_assoc_step1">
|
<fork name="fork_prepare_assoc_step1">
|
||||||
<path start="join_prepare_publication"/>
|
<path start="join_prepare_publication"/>
|
||||||
<path start="join_prepare_dataset"/>
|
<path start="join_prepare_dataset"/>
|
||||||
|
@ -230,8 +230,8 @@
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="fork-join-exec-propagation"/>
|
<ok to="fork-join-exec-propagation"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<fork name="fork-join-exec-propagation">
|
<fork name="fork-join-exec-propagation">
|
||||||
<path start="join_propagate_publication"/>
|
<path start="join_propagate_publication"/>
|
||||||
<path start="join_propagate_dataset"/>
|
<path start="join_propagate_dataset"/>
|
||||||
|
@ -271,6 +271,7 @@
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="join_propagate_dataset">
|
<action name="join_propagate_dataset">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
|
@ -302,6 +303,7 @@
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="join_propagate_otherresearchproduct">
|
<action name="join_propagate_otherresearchproduct">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
|
@ -333,6 +335,7 @@
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="join_propagate_software">
|
<action name="join_propagate_software">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
|
|
|
@ -14,6 +14,17 @@
|
||||||
</property>
|
</property>
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
|
<global>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>${oozieActionShareLibForSpark2}</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
</global>
|
||||||
|
|
||||||
<start to="reset_outputpath"/>
|
<start to="reset_outputpath"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
|
@ -42,8 +53,6 @@
|
||||||
|
|
||||||
<action name="copy_relation">
|
<action name="copy_relation">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||||
<arg>${nameNode}/${outputPath}/relation</arg>
|
<arg>${nameNode}/${outputPath}/relation</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -53,8 +62,6 @@
|
||||||
|
|
||||||
<action name="copy_publication">
|
<action name="copy_publication">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/publication</arg>
|
<arg>${nameNode}/${sourcePath}/publication</arg>
|
||||||
<arg>${nameNode}/${outputPath}/publication</arg>
|
<arg>${nameNode}/${outputPath}/publication</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -64,8 +71,6 @@
|
||||||
|
|
||||||
<action name="copy_dataset">
|
<action name="copy_dataset">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/dataset</arg>
|
<arg>${nameNode}/${sourcePath}/dataset</arg>
|
||||||
<arg>${nameNode}/${outputPath}/dataset</arg>
|
<arg>${nameNode}/${outputPath}/dataset</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -75,8 +80,6 @@
|
||||||
|
|
||||||
<action name="copy_orp">
|
<action name="copy_orp">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
|
<arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
|
||||||
<arg>${nameNode}/${outputPath}/otherresearchproduct</arg>
|
<arg>${nameNode}/${outputPath}/otherresearchproduct</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -86,28 +89,24 @@
|
||||||
|
|
||||||
<action name="copy_software">
|
<action name="copy_software">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/software</arg>
|
<arg>${nameNode}/${sourcePath}/software</arg>
|
||||||
<arg>${nameNode}/${outputPath}/software</arg>
|
<arg>${nameNode}/${outputPath}/software</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
<ok to="wait"/>
|
<ok to="wait"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="copy_organization">
|
<action name="copy_organization">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/organization</arg>
|
<arg>${nameNode}/${sourcePath}/organization</arg>
|
||||||
<arg>${nameNode}/${outputPath}/organization</arg>
|
<arg>${nameNode}/${outputPath}/organization</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
<ok to="wait"/>
|
<ok to="wait"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="copy_projects">
|
<action name="copy_projects">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/project</arg>
|
<arg>${nameNode}/${sourcePath}/project</arg>
|
||||||
<arg>${nameNode}/${outputPath}/project</arg>
|
<arg>${nameNode}/${outputPath}/project</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -117,8 +116,6 @@
|
||||||
|
|
||||||
<action name="copy_datasources">
|
<action name="copy_datasources">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
||||||
<arg>${nameNode}/${outputPath}/datasource</arg>
|
<arg>${nameNode}/${outputPath}/datasource</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
|
|
@ -14,6 +14,17 @@
|
||||||
</property>
|
</property>
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
|
<global>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>${oozieActionShareLibForSpark2}</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
</global>
|
||||||
|
|
||||||
<start to="reset_outputpath"/>
|
<start to="reset_outputpath"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
|
@ -38,8 +49,6 @@
|
||||||
|
|
||||||
<action name="copy_relation">
|
<action name="copy_relation">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||||
<arg>${nameNode}/${outputPath}/relation</arg>
|
<arg>${nameNode}/${outputPath}/relation</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -49,8 +58,6 @@
|
||||||
|
|
||||||
<action name="copy_organization">
|
<action name="copy_organization">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/organization</arg>
|
<arg>${nameNode}/${sourcePath}/organization</arg>
|
||||||
<arg>${nameNode}/${outputPath}/organization</arg>
|
<arg>${nameNode}/${outputPath}/organization</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -60,8 +67,6 @@
|
||||||
|
|
||||||
<action name="copy_projects">
|
<action name="copy_projects">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/project</arg>
|
<arg>${nameNode}/${sourcePath}/project</arg>
|
||||||
<arg>${nameNode}/${outputPath}/project</arg>
|
<arg>${nameNode}/${outputPath}/project</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -71,8 +76,6 @@
|
||||||
|
|
||||||
<action name="copy_datasources">
|
<action name="copy_datasources">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
||||||
<arg>${nameNode}/${outputPath}/datasource</arg>
|
<arg>${nameNode}/${outputPath}/datasource</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -101,8 +104,8 @@
|
||||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||||
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--organizationtoresultcommunitymap</arg><arg>${organizationtoresultcommunitymap}</arg>
|
<arg>--organizationtoresultcommunitymap</arg><arg>${organizationtoresultcommunitymap}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="fork-join-exec-propagation"/>
|
<ok to="fork-join-exec-propagation"/>
|
||||||
|
@ -136,9 +139,9 @@
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
|
@ -165,9 +168,9 @@
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
|
@ -194,9 +197,9 @@
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
|
@ -223,9 +226,9 @@
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
|
|
|
@ -10,6 +10,17 @@
|
||||||
</property>
|
</property>
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
|
<global>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>${oozieActionShareLibForSpark2}</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
</global>
|
||||||
|
|
||||||
<start to="reset_outputpath"/>
|
<start to="reset_outputpath"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
|
@ -38,8 +49,6 @@
|
||||||
|
|
||||||
<action name="copy_relation">
|
<action name="copy_relation">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||||
<arg>${nameNode}/${outputPath}/relation</arg>
|
<arg>${nameNode}/${outputPath}/relation</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -49,8 +58,6 @@
|
||||||
|
|
||||||
<action name="copy_publication">
|
<action name="copy_publication">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/publication</arg>
|
<arg>${nameNode}/${sourcePath}/publication</arg>
|
||||||
<arg>${nameNode}/${outputPath}/publication</arg>
|
<arg>${nameNode}/${outputPath}/publication</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -60,8 +67,6 @@
|
||||||
|
|
||||||
<action name="copy_dataset">
|
<action name="copy_dataset">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/dataset</arg>
|
<arg>${nameNode}/${sourcePath}/dataset</arg>
|
||||||
<arg>${nameNode}/${outputPath}/dataset</arg>
|
<arg>${nameNode}/${outputPath}/dataset</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -71,8 +76,6 @@
|
||||||
|
|
||||||
<action name="copy_orp">
|
<action name="copy_orp">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
|
<arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
|
||||||
<arg>${nameNode}/${outputPath}/otherresearchproduct</arg>
|
<arg>${nameNode}/${outputPath}/otherresearchproduct</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -82,8 +85,6 @@
|
||||||
|
|
||||||
<action name="copy_software">
|
<action name="copy_software">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/software</arg>
|
<arg>${nameNode}/${sourcePath}/software</arg>
|
||||||
<arg>${nameNode}/${outputPath}/software</arg>
|
<arg>${nameNode}/${outputPath}/software</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -93,8 +94,6 @@
|
||||||
|
|
||||||
<action name="copy_organization">
|
<action name="copy_organization">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/organization</arg>
|
<arg>${nameNode}/${sourcePath}/organization</arg>
|
||||||
<arg>${nameNode}/${outputPath}/organization</arg>
|
<arg>${nameNode}/${outputPath}/organization</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -104,8 +103,6 @@
|
||||||
|
|
||||||
<action name="copy_projects">
|
<action name="copy_projects">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/project</arg>
|
<arg>${nameNode}/${sourcePath}/project</arg>
|
||||||
<arg>${nameNode}/${outputPath}/project</arg>
|
<arg>${nameNode}/${outputPath}/project</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -115,8 +112,6 @@
|
||||||
|
|
||||||
<action name="copy_datasources">
|
<action name="copy_datasources">
|
||||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||||
<job-tracker>${jobTracker}</job-tracker>
|
|
||||||
<name-node>${nameNode}</name-node>
|
|
||||||
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
||||||
<arg>${nameNode}/${outputPath}/datasource</arg>
|
<arg>${nameNode}/${outputPath}/datasource</arg>
|
||||||
</distcp>
|
</distcp>
|
||||||
|
@ -125,6 +120,7 @@
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<join name="wait" to="prepare_result_organization_association"/>
|
<join name="wait" to="prepare_result_organization_association"/>
|
||||||
|
|
||||||
<action name="prepare_result_organization_association">
|
<action name="prepare_result_organization_association">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
|
@ -176,12 +172,12 @@
|
||||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||||
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -206,12 +202,12 @@
|
||||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||||
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -236,12 +232,12 @@
|
||||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||||
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -266,12 +262,12 @@
|
||||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||||
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
|
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||||
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -127,7 +127,6 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final List<Oaf> oafs = new ArrayList<>();
|
final List<Oaf> oafs = new ArrayList<>();
|
||||||
|
|
||||||
switch (type.toLowerCase()) {
|
switch (type.toLowerCase()) {
|
||||||
case "":
|
|
||||||
case "publication":
|
case "publication":
|
||||||
final Publication p = new Publication();
|
final Publication p = new Publication();
|
||||||
populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||||
|
@ -138,7 +137,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
case "dataset":
|
case "dataset":
|
||||||
final Dataset d = new Dataset();
|
final Dataset d = new Dataset();
|
||||||
populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||||
d.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
|
d.setResulttype(DATASET_DEFAULT_RESULTTYPE);
|
||||||
d.setStoragedate(prepareDatasetStorageDate(doc, info));
|
d.setStoragedate(prepareDatasetStorageDate(doc, info));
|
||||||
d.setDevice(prepareDatasetDevice(doc, info));
|
d.setDevice(prepareDatasetDevice(doc, info));
|
||||||
d.setSize(prepareDatasetSize(doc, info));
|
d.setSize(prepareDatasetSize(doc, info));
|
||||||
|
@ -158,6 +157,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info));
|
s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info));
|
||||||
oafs.add(s);
|
oafs.add(s);
|
||||||
break;
|
break;
|
||||||
|
case "":
|
||||||
case "otherresearchproducts":
|
case "otherresearchproducts":
|
||||||
default:
|
default:
|
||||||
final OtherResearchProduct o = new OtherResearchProduct();
|
final OtherResearchProduct o = new OtherResearchProduct();
|
||||||
|
|
Loading…
Reference in New Issue