enrichment steps #38
|
@ -10,6 +10,7 @@ public class Dataset extends Result implements Serializable {
|
|||
|
||||
private Field<String> storagedate;
|
||||
|
||||
// candidate for removal
|
||||
private Field<String> device;
|
||||
|
||||
private Field<String> size;
|
||||
|
|
|
@ -2,8 +2,10 @@
|
|||
package eu.dnetlib.dhp.schema.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class Result extends OafEntity implements Serializable {
|
||||
|
||||
|
@ -248,13 +250,24 @@ public class Result extends OafEntity implements Serializable {
|
|||
StructuredProperty baseMainTitle = null;
|
||||
if (title != null) {
|
||||
baseMainTitle = getMainTitle(title);
|
||||
title.remove(baseMainTitle);
|
||||
if (baseMainTitle != null) {
|
||||
final StructuredProperty p = baseMainTitle;
|
||||
title = title.stream().filter(t -> t != p).collect(Collectors.toList());
|
||||
}
|
||||
//
|
||||
//
|
||||
// title.remove(baseMainTitle);
|
||||
}
|
||||
|
||||
StructuredProperty newMainTitle = null;
|
||||
if (r.getTitle() != null) {
|
||||
newMainTitle = getMainTitle(r.getTitle());
|
||||
r.getTitle().remove(newMainTitle);
|
||||
if (newMainTitle != null) {
|
||||
final StructuredProperty p = newMainTitle;
|
||||
title = title.stream().filter(t -> t != p).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
// r.getTitle().remove(newMainTitle);
|
||||
}
|
||||
|
||||
if (newMainTitle != null && compareTrust(this, r) < 0)
|
||||
|
|
|
@ -10,8 +10,10 @@ public class Software extends Result implements Serializable {
|
|||
|
||||
private List<Field<String>> documentationUrl;
|
||||
|
||||
// candidate for removal
|
||||
private List<StructuredProperty> license;
|
||||
|
||||
// candidate for removal
|
||||
private Field<String> codeRepositoryUrl;
|
||||
|
||||
private Qualifier programmingLanguage;
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
<dependency>
|
||||
<groupId>eu.dnetlib</groupId>
|
||||
<artifactId>dnet-openaire-broker-common</artifactId>
|
||||
<version>[2.0.0,3.0.0)</version>
|
||||
<version>[2.0.1,3.0.0)</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
|
|
@ -12,7 +12,6 @@ import org.apache.commons.codec.digest.DigestUtils;
|
|||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.time.DateUtils;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
|
@ -37,15 +36,13 @@ public class EventFactory {
|
|||
|
||||
final Map<String, Object> map = createMapFromResult(updateInfo);
|
||||
|
||||
final String payload = createPayload(updateInfo);
|
||||
|
||||
final String eventId = calculateEventId(
|
||||
updateInfo.getTopicPath(), updateInfo.getTarget().getOriginalId().get(0),
|
||||
updateInfo.getHighlightValueAsString());
|
||||
|
||||
res.setEventId(eventId);
|
||||
res.setProducerId(PRODUCER_ID);
|
||||
res.setPayload(payload);
|
||||
res.setPayload(updateInfo.asBrokerPayload().toJSON());
|
||||
res.setMap(map);
|
||||
res.setTopic(updateInfo.getTopicPath());
|
||||
res.setCreationDate(now);
|
||||
|
@ -54,15 +51,6 @@ public class EventFactory {
|
|||
return res;
|
||||
}
|
||||
|
||||
private static String createPayload(final UpdateInfo<?> updateInfo) {
|
||||
final OpenAireEventPayload payload = new OpenAireEventPayload();
|
||||
// TODO
|
||||
|
||||
updateInfo.compileHighlight(payload);
|
||||
|
||||
return payload.toJSON();
|
||||
}
|
||||
|
||||
private static Map<String, Object> createMapFromResult(final UpdateInfo<?> updateInfo) {
|
||||
final Map<String, Object> map = new HashMap<>();
|
||||
|
||||
|
|
|
@ -43,21 +43,21 @@ import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublic
|
|||
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsSupplementedBy;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationIsSupplementedTo;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedPublications.EnrichMissingPublicationReferences;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedSoftware.EnrichMissingSoftware;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.relatedSoftware.EnrichMoreSoftware;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAbstract;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingAuthorOrcid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingOpenAccess;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingPublicationDate;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingSoftware;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMissingSubject;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreOpenAccess;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMorePid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSoftware;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSubject;
|
||||
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
@ -128,10 +128,13 @@ public class GenerateEventsApplication {
|
|||
|
||||
final JavaRDD<Event> eventsRdd = sc.emptyRDD();
|
||||
|
||||
eventsRdd.union(generateSimpleEvents(spark, graphPath, Publication.class));
|
||||
eventsRdd.union(generateSimpleEvents(spark, graphPath, eu.dnetlib.dhp.schema.oaf.Dataset.class));
|
||||
eventsRdd.union(generateSimpleEvents(spark, graphPath, Software.class));
|
||||
eventsRdd.union(generateSimpleEvents(spark, graphPath, OtherResearchProduct.class));
|
||||
for (final Class<? extends Result> r1 : BrokerConstants.RESULT_CLASSES) {
|
||||
eventsRdd.union(generateSimpleEvents(spark, graphPath, r1));
|
||||
|
||||
for (final Class<? extends Result> r2 : BrokerConstants.RESULT_CLASSES) {
|
||||
eventsRdd.union(generateRelationEvents(spark, graphPath, r1, r2));
|
||||
}
|
||||
}
|
||||
|
||||
eventsRdd.saveAsTextFile(eventsPath, GzipCodec.class);
|
||||
});
|
||||
|
@ -187,6 +190,38 @@ public class GenerateEventsApplication {
|
|||
return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static <SRC extends Result, TRG extends OafEntity> JavaRDD<Event> generateRelationEvents(
|
||||
final SparkSession spark,
|
||||
final String graphPath,
|
||||
final Class<SRC> sourceClass,
|
||||
final Class<TRG> targetClass) {
|
||||
|
||||
final Dataset<SRC> sources = readPath(
|
||||
spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), sourceClass)
|
||||
.filter(r -> r.getDataInfo().getDeletedbyinference());
|
||||
|
||||
final Dataset<TRG> targets = readPath(
|
||||
spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), targetClass);
|
||||
|
||||
final Dataset<Relation> mergedRels = readPath(spark, graphPath + "/relation", Relation.class)
|
||||
.filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));
|
||||
|
||||
final Dataset<Relation> rels = readPath(spark, graphPath + "/relation", Relation.class)
|
||||
.filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));
|
||||
|
||||
if (targetClass == Project.class) {
|
||||
// TODO join using: generateProjectsEvents
|
||||
} else if (targetClass == Software.class) {
|
||||
// TODO join using: generateSoftwareEvents
|
||||
} else if (targetClass == Publication.class) {
|
||||
// TODO join using: generatePublicationRelatedEvents
|
||||
} else if (targetClass == eu.dnetlib.dhp.schema.oaf.Dataset.class) {
|
||||
// TODO join using: generateDatasetRelatedEvents
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private List<Event> generateProjectsEvents(final Collection<Pair<Result, List<Project>>> childrenWithProjects) {
|
||||
final List<UpdateInfo<?>> list = new ArrayList<>();
|
||||
|
||||
|
|
|
@ -3,11 +3,13 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedProjects;
|
|||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
@ -22,8 +24,17 @@ public class EnrichMissingProject
|
|||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Project>> findUpdates(final Pair<Result, List<Project>> source,
|
||||
final Pair<Result, List<Project>> target) {
|
||||
// TODO
|
||||
return Arrays.asList();
|
||||
|
||||
if (source.getRight().isEmpty()) {
|
||||
return Arrays.asList();
|
||||
} else {
|
||||
return target
|
||||
.getRight()
|
||||
.stream()
|
||||
.map(ConversionUtils::oafProjectToBrokerProject)
|
||||
.map(p -> generateUpdateInfo(p, source, target))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1,13 +1,15 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedProjects;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
@ -21,8 +23,20 @@ public class EnrichMoreProject extends UpdateMatcher<Pair<Result, List<Project>>
|
|||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Project>> findUpdates(final Pair<Result, List<Project>> source,
|
||||
final Pair<Result, List<Project>> target) {
|
||||
// TODO
|
||||
return Arrays.asList();
|
||||
|
||||
final Set<String> existingProjects = source
|
||||
.getRight()
|
||||
.stream()
|
||||
.map(Project::getId)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
return target
|
||||
.getRight()
|
||||
.stream()
|
||||
.filter(p -> !existingProjects.contains(p.getId()))
|
||||
.map(ConversionUtils::oafProjectToBrokerProject)
|
||||
.map(p -> generateUpdateInfo(p, source, target))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -39,7 +39,7 @@ public abstract class AbstractEnrichMissingPublication
|
|||
.getRight()
|
||||
.stream()
|
||||
.filter(d -> !existingPublications.contains(d.getId()))
|
||||
.map(ConversionUtils::oafPublicationToBrokerPublication)
|
||||
.map(ConversionUtils::oafResultToBrokerPublication)
|
||||
.map(i -> generateUpdateInfo(i, source, target))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
|
|
|
@ -1,13 +1,15 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedSoftware;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
|
@ -23,8 +25,17 @@ public class EnrichMissingSoftware
|
|||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Software>> findUpdates(
|
||||
final Pair<Result, List<Software>> source,
|
||||
final Pair<Result, List<Software>> target) {
|
||||
// TODO
|
||||
return Arrays.asList();
|
||||
|
||||
if (source.getRight().isEmpty()) {
|
||||
return Arrays.asList();
|
||||
} else {
|
||||
return target
|
||||
.getRight()
|
||||
.stream()
|
||||
.map(ConversionUtils::oafSoftwareToBrokerSoftware)
|
||||
.map(p -> generateUpdateInfo(p, source, target))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
|
@ -1,13 +1,15 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||
package eu.dnetlib.dhp.broker.oa.matchers.relatedSoftware;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
|
@ -23,8 +25,20 @@ public class EnrichMoreSoftware
|
|||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Software>> findUpdates(
|
||||
final Pair<Result, List<Software>> source,
|
||||
final Pair<Result, List<Software>> target) {
|
||||
// TODO
|
||||
return Arrays.asList();
|
||||
|
||||
final Set<String> existingSoftwares = source
|
||||
.getRight()
|
||||
.stream()
|
||||
.map(Software::getId)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
return target
|
||||
.getRight()
|
||||
.stream()
|
||||
.filter(p -> !existingSoftwares.contains(p.getId()))
|
||||
.map(ConversionUtils::oafSoftwareToBrokerSoftware)
|
||||
.map(p -> generateUpdateInfo(p, source, target))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@Override
|
|
@ -37,7 +37,7 @@ public class EnrichMissingOpenAccess extends UpdateMatcher<Result, Instance> {
|
|||
.stream()
|
||||
.filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS))
|
||||
.map(ConversionUtils::oafInstanceToBrokerInstances)
|
||||
.flatMap(s -> s)
|
||||
.flatMap(List::stream)
|
||||
.map(i -> generateUpdateInfo(i, source, target))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ public class EnrichMoreOpenAccess extends UpdateMatcher<Result, Instance> {
|
|||
.stream()
|
||||
.filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS))
|
||||
.map(ConversionUtils::oafInstanceToBrokerInstances)
|
||||
.flatMap(s -> s)
|
||||
.flatMap(List::stream)
|
||||
.filter(i -> !urls.contains(i.getUrl()))
|
||||
.map(i -> generateUpdateInfo(i, source, target))
|
||||
.collect(Collectors.toList());
|
||||
|
|
|
@ -1,9 +1,21 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
|
||||
public class BrokerConstants {
|
||||
|
||||
public final static String OPEN_ACCESS = "OPEN";
|
||||
public final static String IS_MERGED_IN_CLASS = "isMergedIn";
|
||||
public static final String OPEN_ACCESS = "OPEN";
|
||||
public static final String IS_MERGED_IN_CLASS = "isMergedIn";
|
||||
|
||||
public static final List<Class<? extends Result>> RESULT_CLASSES = Arrays
|
||||
.asList(Publication.class, Dataset.class, Software.class, OtherResearchProduct.class);
|
||||
|
||||
}
|
||||
|
|
|
@ -1,49 +1,184 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.stream.Stream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.DocumentHelper;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.broker.objects.Instance;
|
||||
import eu.dnetlib.broker.objects.Pid;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.ExternalReference;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||
import eu.dnetlib.dhp.schema.oaf.Journal;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
public class ConversionUtils {
|
||||
|
||||
public static Stream<Instance> oafInstanceToBrokerInstances(final eu.dnetlib.dhp.schema.oaf.Instance i) {
|
||||
private static final Logger log = LoggerFactory.getLogger(ConversionUtils.class);
|
||||
|
||||
public static List<eu.dnetlib.broker.objects.Instance> oafInstanceToBrokerInstances(final Instance i) {
|
||||
return i.getUrl().stream().map(url -> {
|
||||
final Instance r = new Instance();
|
||||
r.setUrl(url);
|
||||
r.setInstancetype(i.getInstancetype().getClassid());
|
||||
r.setLicense(BrokerConstants.OPEN_ACCESS);
|
||||
r.setHostedby(i.getHostedby().getValue());
|
||||
return r;
|
||||
});
|
||||
return new eu.dnetlib.broker.objects.Instance()
|
||||
.setUrl(url)
|
||||
.setInstancetype(i.getInstancetype().getClassid())
|
||||
.setLicense(BrokerConstants.OPEN_ACCESS)
|
||||
.setHostedby(i.getHostedby().getValue());
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static Pid oafPidToBrokerPid(final StructuredProperty sp) {
|
||||
final Pid pid = new Pid();
|
||||
pid.setValue(sp.getValue());
|
||||
pid.setType(sp.getQualifier().getClassid());
|
||||
return pid;
|
||||
return sp != null ? new Pid()
|
||||
.setValue(sp.getValue())
|
||||
.setType(sp.getQualifier().getClassid()) : null;
|
||||
}
|
||||
|
||||
public static final Pair<String, String> oafSubjectToPair(final StructuredProperty sp) {
|
||||
return Pair.of(sp.getQualifier().getClassid(), sp.getValue());
|
||||
return sp != null ? Pair.of(sp.getQualifier().getClassid(), sp.getValue()) : null;
|
||||
}
|
||||
|
||||
public static final eu.dnetlib.broker.objects.Dataset oafDatasetToBrokerDataset(final Dataset d) {
|
||||
final eu.dnetlib.broker.objects.Dataset res = new eu.dnetlib.broker.objects.Dataset();
|
||||
// TODO
|
||||
return d != null ? new eu.dnetlib.broker.objects.Dataset()
|
||||
.setOriginalId(d.getOriginalId().get(0))
|
||||
.setTitles(structPropList(d.getTitle()))
|
||||
.setPids(d.getPid().stream().map(ConversionUtils::oafPidToBrokerPid).collect(Collectors.toList()))
|
||||
.setInstances(
|
||||
d
|
||||
.getInstance()
|
||||
.stream()
|
||||
.map(ConversionUtils::oafInstanceToBrokerInstances)
|
||||
.flatMap(List::stream)
|
||||
.collect(Collectors.toList()))
|
||||
.setCollectedFrom(d.getCollectedfrom().stream().map(KeyValue::getValue).collect(Collectors.toList()))
|
||||
: null;
|
||||
}
|
||||
|
||||
public static final eu.dnetlib.broker.objects.Publication oafResultToBrokerPublication(final Result result) {
|
||||
|
||||
return result != null ? new eu.dnetlib.broker.objects.Publication()
|
||||
.setOriginalId(result.getOriginalId().get(0))
|
||||
.setTitles(structPropList(result.getTitle()))
|
||||
.setAbstracts(fieldList(result.getDescription()))
|
||||
.setLanguage(result.getLanguage().getClassid())
|
||||
.setSubjects(structPropList(result.getSubject()))
|
||||
.setCreators(result.getAuthor().stream().map(Author::getFullname).collect(Collectors.toList()))
|
||||
.setPublicationdate(result.getDateofcollection())
|
||||
.setPublisher(fieldValue(result.getPublisher()))
|
||||
.setEmbargoenddate(fieldValue(result.getEmbargoenddate()))
|
||||
.setContributor(fieldList(result.getContributor()))
|
||||
.setJournal(
|
||||
result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null)
|
||||
.setCollectedFrom(result.getCollectedfrom().stream().map(KeyValue::getValue).collect(Collectors.toList()))
|
||||
.setPids(result.getPid().stream().map(ConversionUtils::oafPidToBrokerPid).collect(Collectors.toList()))
|
||||
.setInstances(
|
||||
result
|
||||
.getInstance()
|
||||
.stream()
|
||||
.map(ConversionUtils::oafInstanceToBrokerInstances)
|
||||
.flatMap(List::stream)
|
||||
.collect(Collectors.toList()))
|
||||
.setExternalReferences(
|
||||
result
|
||||
.getExternalReference()
|
||||
.stream()
|
||||
.map(ConversionUtils::oafExtRefToBrokerExtRef)
|
||||
.collect(Collectors.toList()))
|
||||
: null;
|
||||
}
|
||||
|
||||
private static eu.dnetlib.broker.objects.Journal oafJournalToBrokerJournal(final Journal journal) {
|
||||
return journal != null ? new eu.dnetlib.broker.objects.Journal()
|
||||
.setName(journal.getName())
|
||||
.setIssn(journal.getIssnPrinted())
|
||||
.setEissn(journal.getIssnOnline())
|
||||
.setLissn(journal.getIssnLinking()) : null;
|
||||
}
|
||||
|
||||
private static eu.dnetlib.broker.objects.ExternalReference oafExtRefToBrokerExtRef(final ExternalReference ref) {
|
||||
return ref != null ? new eu.dnetlib.broker.objects.ExternalReference()
|
||||
.setRefidentifier(ref.getRefidentifier())
|
||||
.setSitename(ref.getSitename())
|
||||
.setType(ref.getQualifier().getClassid())
|
||||
.setUrl(ref.getUrl())
|
||||
: null;
|
||||
}
|
||||
|
||||
public static final eu.dnetlib.broker.objects.Project oafProjectToBrokerProject(final Project p) {
|
||||
if (p == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final eu.dnetlib.broker.objects.Project res = new eu.dnetlib.broker.objects.Project()
|
||||
.setTitle(fieldValue(p.getTitle()))
|
||||
.setAcronym(fieldValue(p.getAcronym()))
|
||||
.setCode(fieldValue(p.getCode()));
|
||||
|
||||
final String ftree = fieldValue(p.getFundingtree());
|
||||
if (StringUtils.isNotBlank(ftree)) {
|
||||
try {
|
||||
final Document fdoc = DocumentHelper.parseText(ftree);
|
||||
res.setFunder(fdoc.valueOf("/fundingtree/funder/shortname"));
|
||||
res.setJurisdiction(fdoc.valueOf("/fundingtree/funder/jurisdiction"));
|
||||
res.setFundingProgram(fdoc.valueOf("//funding_level_0/name"));
|
||||
} catch (final DocumentException e) {
|
||||
log.error("Error in record " + p.getId() + ": invalid fundingtree: " + ftree);
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
public static final eu.dnetlib.broker.objects.Publication oafPublicationToBrokerPublication(final Publication d) {
|
||||
final eu.dnetlib.broker.objects.Publication res = new eu.dnetlib.broker.objects.Publication();
|
||||
// TODO
|
||||
return res;
|
||||
public static final eu.dnetlib.broker.objects.Software oafSoftwareToBrokerSoftware(final Software sw) {
|
||||
return sw != null ? new eu.dnetlib.broker.objects.Software()
|
||||
.setName(structPropValue(sw.getTitle()))
|
||||
.setDescription(fieldValue(sw.getDescription()))
|
||||
.setRepository(fieldValue(sw.getCodeRepositoryUrl()))
|
||||
.setLandingPage(fieldValue(sw.getDocumentationUrl()))
|
||||
: null;
|
||||
}
|
||||
|
||||
private static String fieldValue(final Field<String> f) {
|
||||
return f != null ? f.getValue() : null;
|
||||
}
|
||||
|
||||
private static String fieldValue(final List<Field<String>> fl) {
|
||||
return fl != null ? fl.stream().map(Field::getValue).filter(StringUtils::isNotBlank).findFirst().orElse(null)
|
||||
: null;
|
||||
}
|
||||
|
||||
private static String structPropValue(final List<StructuredProperty> props) {
|
||||
return props != null
|
||||
? props.stream().map(StructuredProperty::getValue).filter(StringUtils::isNotBlank).findFirst().orElse(null)
|
||||
: null;
|
||||
}
|
||||
|
||||
private static List<String> fieldList(final List<Field<String>> fl) {
|
||||
return fl != null
|
||||
? fl.stream().map(Field::getValue).filter(StringUtils::isNotBlank).collect(Collectors.toList())
|
||||
: new ArrayList<>();
|
||||
}
|
||||
|
||||
private static List<String> structPropList(final List<StructuredProperty> props) {
|
||||
return props != null
|
||||
? props
|
||||
.stream()
|
||||
.map(StructuredProperty::getValue)
|
||||
.filter(StringUtils::isNotBlank)
|
||||
.collect(Collectors.toList())
|
||||
: new ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,12 +1,16 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.Function;
|
||||
|
||||
import eu.dnetlib.broker.objects.OpenAireEventPayload;
|
||||
import eu.dnetlib.broker.objects.Provenance;
|
||||
import eu.dnetlib.broker.objects.Publication;
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public final class UpdateInfo<T> {
|
||||
|
@ -66,12 +70,41 @@ public final class UpdateInfo<T> {
|
|||
return trust;
|
||||
}
|
||||
|
||||
public void compileHighlight(final OpenAireEventPayload payload) {
|
||||
compileHighlight.accept(payload.getHighlight(), getHighlightValue());
|
||||
}
|
||||
|
||||
public String getHighlightValueAsString() {
|
||||
return highlightToString.apply(getHighlightValue());
|
||||
}
|
||||
|
||||
public OpenAireEventPayload asBrokerPayload() {
|
||||
|
||||
final Publication p = ConversionUtils.oafResultToBrokerPublication(getSource());
|
||||
compileHighlight.accept(p, getHighlightValue());
|
||||
|
||||
final Publication hl = new Publication();
|
||||
compileHighlight.accept(hl, getHighlightValue());
|
||||
|
||||
final String provId = getSource().getOriginalId().stream().findFirst().orElse(null);
|
||||
final String provRepo = getSource()
|
||||
.getCollectedfrom()
|
||||
.stream()
|
||||
.map(KeyValue::getValue)
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
final String provUrl = getSource()
|
||||
.getInstance()
|
||||
.stream()
|
||||
.map(Instance::getUrl)
|
||||
.flatMap(List::stream)
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
;
|
||||
|
||||
final Provenance provenance = new Provenance().setId(provId).setRepositoryName(provRepo).setUrl(provUrl);
|
||||
|
||||
return new OpenAireEventPayload()
|
||||
.setPublication(p)
|
||||
.setHighlight(hl)
|
||||
.setTrust(trust)
|
||||
.setProvenance(provenance);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,93 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>dhp-workflows</artifactId>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<version>1.2.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>dhp-doiboost</artifactId>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>net.alchim31.maven</groupId>
|
||||
<artifactId>scala-maven-plugin</artifactId>
|
||||
<version>4.0.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>scala-compile-first</id>
|
||||
<phase>initialize</phase>
|
||||
<goals>
|
||||
<goal>add-source</goal>
|
||||
<goal>compile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>scala-test-compile</id>
|
||||
<phase>process-test-resources</phase>
|
||||
<goals>
|
||||
<goal>testCompile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<scalaVersion>${scala.version}</scalaVersion>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
|
||||
</build>
|
||||
|
||||
|
||||
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
<version>4.3.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.apache.cxf</groupId>
|
||||
<artifactId>cxf-rt-transports-http</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
<artifactId>dhp-schemas</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.jayway.jsonpath</groupId>
|
||||
<artifactId>json-path</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
|
||||
|
||||
</dependencies>
|
||||
|
||||
|
||||
</project>
|
|
@ -0,0 +1,374 @@
|
|||
package eu.dnetlib.doiboost
|
||||
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction
|
||||
import eu.dnetlib.dhp.schema.oaf.{DataInfo, Dataset, Field, Instance, KeyValue, Oaf, Organization, Publication, Qualifier, Relation, Result, StructuredProperty}
|
||||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import org.apache.commons.lang3.StringUtils
|
||||
import org.codehaus.jackson.map.ObjectMapper
|
||||
import org.json4s
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.io.Source
|
||||
|
||||
|
||||
case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {}
|
||||
|
||||
case class DoiBoostAffiliation(PaperId:Long, AffiliationId:Long, GridId:Option[String], OfficialPage:Option[String], DisplayName:Option[String]){}
|
||||
|
||||
object DoiBoostMappingUtil {
|
||||
def getUnknownCountry(): Qualifier = {
|
||||
createQualifier("UNKNOWN","UNKNOWN","dnet:countries","dnet:countries")
|
||||
}
|
||||
|
||||
|
||||
|
||||
def generateMAGAffiliationId(affId: String): String = {
|
||||
s"20|microsoft___$SEPARATOR${DHPUtils.md5(affId)}"
|
||||
}
|
||||
|
||||
|
||||
val logger: Logger = LoggerFactory.getLogger(getClass)
|
||||
|
||||
//STATIC STRING
|
||||
val MAG = "microsoft"
|
||||
val MAG_NAME = "Microsoft Academic Graph"
|
||||
val ORCID = "ORCID"
|
||||
val CROSSREF = "Crossref"
|
||||
val UNPAYWALL = "UnpayWall"
|
||||
val GRID_AC = "grid.ac"
|
||||
val WIKPEDIA = "wikpedia"
|
||||
val doiBoostNSPREFIX = "doiboost____"
|
||||
val OPENAIRE_PREFIX = "openaire____"
|
||||
val SEPARATOR = "::"
|
||||
val DNET_LANGUAGES = "dnet:languages"
|
||||
val PID_TYPES = "dnet:pid_types"
|
||||
|
||||
val invalidName = List(",", "none none", "none, none", "none &na;", "(:null)", "test test test", "test test", "test", "&na; &na;")
|
||||
|
||||
def toActionSet(item:Oaf) :(String, String) = {
|
||||
val mapper = new ObjectMapper()
|
||||
|
||||
item match {
|
||||
case dataset: Dataset =>
|
||||
val a: AtomicAction[Dataset] = new AtomicAction[Dataset]
|
||||
a.setClazz(classOf[Dataset])
|
||||
a.setPayload(dataset)
|
||||
(dataset.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
||||
case publication: Publication =>
|
||||
val a: AtomicAction[Publication] = new AtomicAction[Publication]
|
||||
a.setClazz(classOf[Publication])
|
||||
a.setPayload(publication)
|
||||
(publication.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
||||
case organization: Organization =>
|
||||
val a: AtomicAction[Organization] = new AtomicAction[Organization]
|
||||
a.setClazz(classOf[Organization])
|
||||
a.setPayload(organization)
|
||||
(organization.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
||||
case relation: Relation =>
|
||||
val a: AtomicAction[Relation] = new AtomicAction[Relation]
|
||||
a.setClazz(classOf[Relation])
|
||||
a.setPayload(relation)
|
||||
(relation.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
||||
case _ =>
|
||||
null
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
def toHostedByItem(input:String): (String, HostedByItemType) = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
|
||||
lazy val json: json4s.JValue = parse(input)
|
||||
val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]]
|
||||
(c.keys.head, c.values.head)
|
||||
}
|
||||
|
||||
|
||||
def toISSNPair(publication: Publication) : (String, Publication) = {
|
||||
val issn = if (publication.getJournal == null) null else publication.getJournal.getIssnPrinted
|
||||
val eissn =if (publication.getJournal == null) null else publication.getJournal.getIssnOnline
|
||||
val lissn =if (publication.getJournal == null) null else publication.getJournal.getIssnLinking
|
||||
|
||||
if (issn!= null && issn.nonEmpty)
|
||||
(issn, publication)
|
||||
else if(eissn!= null && eissn.nonEmpty)
|
||||
(eissn, publication)
|
||||
else if(lissn!= null && lissn.nonEmpty)
|
||||
(lissn, publication)
|
||||
else
|
||||
(publication.getId, publication)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
def generateGridAffiliationId(gridId:String) :String = {
|
||||
s"20|grid________::${DHPUtils.md5(gridId.toLowerCase().trim())}"
|
||||
}
|
||||
|
||||
|
||||
def fixResult(result: Dataset) :Dataset = {
|
||||
val instanceType = result.getInstance().asScala.find(i => i.getInstancetype != null && i.getInstancetype.getClassid.nonEmpty)
|
||||
if (instanceType.isDefined) {
|
||||
result.getInstance().asScala.foreach(i => i.setInstancetype(instanceType.get.getInstancetype))
|
||||
}
|
||||
result.getInstance().asScala.foreach(i => {
|
||||
i.setHostedby(getUbknownHostedBy())
|
||||
})
|
||||
result
|
||||
}
|
||||
|
||||
def getUbknownHostedBy():KeyValue = {
|
||||
val hb = new KeyValue
|
||||
hb.setValue("Unknown Repository")
|
||||
hb.setKey(s"10|$OPENAIRE_PREFIX::55045bd2a65019fd8e6741a755395c8c")
|
||||
hb
|
||||
|
||||
}
|
||||
|
||||
|
||||
def getOpenAccessQualifier():Qualifier = {
|
||||
createQualifier("OPEN","Open Access","dnet:access_modes", "dnet:access_modes")
|
||||
|
||||
}
|
||||
|
||||
def getRestrictedQualifier():Qualifier = {
|
||||
createQualifier("RESTRICTED","Restricted","dnet:access_modes", "dnet:access_modes")
|
||||
|
||||
}
|
||||
|
||||
def fixPublication(input:((String,Publication), (String,HostedByItemType))): Publication = {
|
||||
|
||||
val publication = input._1._2
|
||||
|
||||
val item = if (input._2 != null) input._2._2 else null
|
||||
|
||||
|
||||
val instanceType = publication.getInstance().asScala.find(i => i.getInstancetype != null && i.getInstancetype.getClassid.nonEmpty)
|
||||
|
||||
if (instanceType.isDefined) {
|
||||
publication.getInstance().asScala.foreach(i => i.setInstancetype(instanceType.get.getInstancetype))
|
||||
}
|
||||
|
||||
|
||||
publication.getInstance().asScala.foreach(i => {
|
||||
val hb = new KeyValue
|
||||
if (item != null) {
|
||||
hb.setValue(item.officialname)
|
||||
hb.setKey(generateDSId(item.id))
|
||||
if (item.openAccess)
|
||||
i.setAccessright(getOpenAccessQualifier())
|
||||
publication.setBestaccessright(getOpenAccessQualifier())
|
||||
}
|
||||
else {
|
||||
hb.setValue("Unknown Repository")
|
||||
hb.setKey(s"10|$OPENAIRE_PREFIX::55045bd2a65019fd8e6741a755395c8c")
|
||||
}
|
||||
i.setHostedby(hb)
|
||||
})
|
||||
|
||||
val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid)
|
||||
if (ar.nonEmpty) {
|
||||
if(ar.contains("OPEN")){
|
||||
publication.setBestaccessright(getOpenAccessQualifier())
|
||||
}
|
||||
else {
|
||||
publication.setBestaccessright(getRestrictedQualifier())
|
||||
}
|
||||
}
|
||||
publication
|
||||
}
|
||||
|
||||
|
||||
def generateDSId(input: String): String = {
|
||||
|
||||
val b = StringUtils.substringBefore(input, "::")
|
||||
val a = StringUtils.substringAfter(input, "::")
|
||||
s"10|${b}::${DHPUtils.md5(a)}"
|
||||
}
|
||||
|
||||
|
||||
def generateDataInfo(): DataInfo = {
|
||||
generateDataInfo("0.9")
|
||||
}
|
||||
|
||||
|
||||
def filterPublication(publication: Publication): Boolean = {
|
||||
|
||||
//Case empty publication
|
||||
if (publication == null)
|
||||
return false
|
||||
|
||||
//Case publication with no title
|
||||
if (publication.getTitle == null || publication.getTitle.size == 0)
|
||||
return false
|
||||
|
||||
|
||||
val s = publication.getTitle.asScala.count(p => p.getValue != null
|
||||
&& p.getValue.nonEmpty && !p.getValue.equalsIgnoreCase("[NO TITLE AVAILABLE]"))
|
||||
|
||||
if (s == 0)
|
||||
return false
|
||||
|
||||
// fixes #4360 (test publisher)
|
||||
val publisher = if (publication.getPublisher != null) publication.getPublisher.getValue else null
|
||||
|
||||
if (publisher != null && (publisher.equalsIgnoreCase("Test accounts") || publisher.equalsIgnoreCase("CrossRef Test Account"))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
//Publication with no Author
|
||||
if (publication.getAuthor == null || publication.getAuthor.size() == 0)
|
||||
return false
|
||||
|
||||
|
||||
//filter invalid author
|
||||
val authors = publication.getAuthor.asScala.map(s => {
|
||||
if (s.getFullname.nonEmpty) {
|
||||
s.getFullname
|
||||
}
|
||||
else
|
||||
s"${
|
||||
s.getName
|
||||
} ${
|
||||
s.getSurname
|
||||
}"
|
||||
})
|
||||
|
||||
val c = authors.count(isValidAuthorName)
|
||||
if (c == 0)
|
||||
return false
|
||||
|
||||
// fixes #4368
|
||||
if (authors.count(s => s.equalsIgnoreCase("Addie Jackson")) > 0 && "Elsevier BV".equalsIgnoreCase(publication.getPublisher.getValue))
|
||||
return false
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
|
||||
def isValidAuthorName(fullName: String): Boolean = {
|
||||
if (fullName == null || fullName.isEmpty)
|
||||
return false
|
||||
if (invalidName.contains(fullName.toLowerCase.trim))
|
||||
return false
|
||||
true
|
||||
}
|
||||
|
||||
|
||||
def generateDataInfo(trust: String): DataInfo = {
|
||||
val di = new DataInfo
|
||||
di.setDeletedbyinference(false)
|
||||
di.setInferred(false)
|
||||
di.setInvisible(false)
|
||||
di.setTrust(trust)
|
||||
di.setProvenanceaction(createQualifier("sysimport:actionset", "dnet:provenanceActions"))
|
||||
di
|
||||
}
|
||||
|
||||
|
||||
def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
|
||||
val sp = new StructuredProperty
|
||||
sp.setQualifier(createQualifier(classId, schemeId))
|
||||
sp.setValue(value)
|
||||
sp
|
||||
|
||||
}
|
||||
|
||||
def createSP(value: String, classId: String, schemeId: String, dataInfo: DataInfo): StructuredProperty = {
|
||||
val sp = new StructuredProperty
|
||||
sp.setQualifier(createQualifier(classId, schemeId))
|
||||
sp.setValue(value)
|
||||
sp.setDataInfo(dataInfo)
|
||||
sp
|
||||
|
||||
}
|
||||
|
||||
def createCrossrefCollectedFrom(): KeyValue = {
|
||||
|
||||
val cf = new KeyValue
|
||||
cf.setValue(CROSSREF)
|
||||
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + DHPUtils.md5(CROSSREF.toLowerCase))
|
||||
cf
|
||||
|
||||
}
|
||||
|
||||
|
||||
def createUnpayWallCollectedFrom(): KeyValue = {
|
||||
|
||||
val cf = new KeyValue
|
||||
cf.setValue(UNPAYWALL)
|
||||
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + DHPUtils.md5(UNPAYWALL.toLowerCase))
|
||||
cf
|
||||
|
||||
}
|
||||
|
||||
def createORIDCollectedFrom(): KeyValue = {
|
||||
|
||||
val cf = new KeyValue
|
||||
cf.setValue(ORCID)
|
||||
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + DHPUtils.md5(ORCID.toLowerCase))
|
||||
cf
|
||||
|
||||
}
|
||||
|
||||
|
||||
def generateIdentifier (oaf: Result, doi: String): String = {
|
||||
val id = DHPUtils.md5 (doi.toLowerCase)
|
||||
if (oaf.isInstanceOf[Dataset] )
|
||||
return s"60|${
|
||||
doiBoostNSPREFIX
|
||||
}${
|
||||
SEPARATOR
|
||||
}${
|
||||
id
|
||||
}"
|
||||
s"50|${
|
||||
doiBoostNSPREFIX
|
||||
}${
|
||||
SEPARATOR
|
||||
}${
|
||||
id
|
||||
}"
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
def createMAGCollectedFrom(): KeyValue = {
|
||||
|
||||
val cf = new KeyValue
|
||||
cf.setValue(MAG_NAME)
|
||||
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + DHPUtils.md5(MAG))
|
||||
cf
|
||||
|
||||
}
|
||||
|
||||
def createQualifier(clsName: String, clsValue: String, schName: String, schValue: String): Qualifier = {
|
||||
val q = new Qualifier
|
||||
q.setClassid(clsName)
|
||||
q.setClassname(clsValue)
|
||||
q.setSchemeid(schName)
|
||||
q.setSchemename(schValue)
|
||||
q
|
||||
}
|
||||
|
||||
def createQualifier(cls: String, sch: String): Qualifier = {
|
||||
createQualifier(cls, cls, sch, sch)
|
||||
}
|
||||
|
||||
|
||||
def asField[T](value: T): Field[T] = {
|
||||
val tmp = new Field[T]
|
||||
tmp.setValue(value)
|
||||
tmp
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
package eu.dnetlib.doiboost
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction
|
||||
import eu.dnetlib.dhp.schema.oaf.{Organization, Publication, Relation, Dataset => OafDataset}
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.hadoop.io.Text
|
||||
import org.apache.hadoop.io.compress.GzipCodec
|
||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
object SparkGenerateDOIBoostActionSet {
|
||||
val logger: Logger = LoggerFactory.getLogger(getClass)
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
val conf: SparkConf = new SparkConf()
|
||||
val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/generate_doiboost_as_params.json")))
|
||||
parser.parseArgument(args)
|
||||
val spark: SparkSession =
|
||||
SparkSession
|
||||
.builder()
|
||||
.config(conf)
|
||||
.appName(getClass.getSimpleName)
|
||||
.master(parser.get("master")).getOrCreate()
|
||||
|
||||
implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication]
|
||||
implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization]
|
||||
implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset]
|
||||
implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation]
|
||||
implicit val mapEncoderAS: Encoder[(String, String)] = Encoders.tuple(Encoders.STRING, Encoders.STRING)
|
||||
|
||||
implicit val mapEncoderAtomiAction: Encoder[AtomicAction[OafDataset]] = Encoders.kryo[AtomicAction[OafDataset]]
|
||||
|
||||
val dbPublicationPath = parser.get("dbPublicationPath")
|
||||
val dbDatasetPath = parser.get("dbDatasetPath")
|
||||
val crossRefRelation = parser.get("crossRefRelation")
|
||||
val dbaffiliationRelationPath = parser.get("dbaffiliationRelationPath")
|
||||
val dbOrganizationPath = parser.get("dbOrganizationPath")
|
||||
val workingDirPath = parser.get("targetPath")
|
||||
|
||||
spark.read.load(dbDatasetPath).as[OafDataset]
|
||||
.map(d =>DoiBoostMappingUtil.fixResult(d))
|
||||
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
||||
.write.mode(SaveMode.Overwrite).save(s"$workingDirPath/actionSet")
|
||||
|
||||
spark.read.load(dbPublicationPath).as[Publication]
|
||||
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
||||
.write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
|
||||
|
||||
spark.read.load(dbOrganizationPath).as[Organization]
|
||||
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
||||
.write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
|
||||
|
||||
|
||||
spark.read.load(crossRefRelation).as[Relation]
|
||||
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
||||
.write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
|
||||
|
||||
spark.read.load(dbaffiliationRelationPath).as[Relation]
|
||||
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
||||
.write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
|
||||
|
||||
|
||||
val d: Dataset[(String, String)] =spark.read.load(s"$workingDirPath/actionSet").as[(String,String)]
|
||||
|
||||
d.rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$workingDirPath/rawset", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec])
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,140 @@
|
|||
package eu.dnetlib.doiboost
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Dataset => OafDataset, Organization}
|
||||
import eu.dnetlib.doiboost.mag.ConversionUtil
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.sql.functions.col
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object SparkGenerateDoiBoost {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
val logger: Logger = LoggerFactory.getLogger(getClass)
|
||||
val conf: SparkConf = new SparkConf()
|
||||
val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json")))
|
||||
parser.parseArgument(args)
|
||||
val spark: SparkSession =
|
||||
SparkSession
|
||||
.builder()
|
||||
.config(conf)
|
||||
.appName(getClass.getSimpleName)
|
||||
.master(parser.get("master")).getOrCreate()
|
||||
|
||||
import spark.implicits._
|
||||
|
||||
val hostedByMapPath = parser.get("hostedByMapPath")
|
||||
val workingDirPath = parser.get("workingDirPath")
|
||||
|
||||
|
||||
implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication]
|
||||
implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization]
|
||||
implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset]
|
||||
implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPub)
|
||||
implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation]
|
||||
|
||||
logger.info("Phase 2) Join Crossref with UnpayWall")
|
||||
|
||||
val crossrefPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p))
|
||||
val uwPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/uwPublication").as[Publication].map(p => (p.getId, p))
|
||||
|
||||
def applyMerge(item:((String, Publication), (String, Publication))) : Publication =
|
||||
{
|
||||
val crossrefPub = item._1._2
|
||||
if (item._2!= null) {
|
||||
val otherPub = item._2._2
|
||||
if (otherPub != null) {
|
||||
crossrefPub.mergeFrom(otherPub)
|
||||
}
|
||||
}
|
||||
crossrefPub
|
||||
}
|
||||
crossrefPublication.joinWith(uwPublication, crossrefPublication("_1").equalTo(uwPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/firstJoin")
|
||||
logger.info("Phase 3) Join Result with ORCID")
|
||||
val fj: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
|
||||
val orcidPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p))
|
||||
fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/secondJoin")
|
||||
|
||||
logger.info("Phase 3) Join Result with MAG")
|
||||
val sj: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p))
|
||||
|
||||
val magPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p))
|
||||
sj.joinWith(magPublication, sj("_1").equalTo(magPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublication")
|
||||
|
||||
|
||||
val doiBoostPublication: Dataset[(String,Publication)] = spark.read.load(s"$workingDirPath/doiBoostPublication").as[Publication].filter(p=>DoiBoostMappingUtil.filterPublication(p)).map(DoiBoostMappingUtil.toISSNPair)(tupleForJoinEncoder)
|
||||
|
||||
val hostedByDataset : Dataset[(String, HostedByItemType)] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath).map(DoiBoostMappingUtil.toHostedByItem))
|
||||
|
||||
|
||||
doiBoostPublication.joinWith(hostedByDataset, doiBoostPublication("_1").equalTo(hostedByDataset("_1")), "left")
|
||||
.map(DoiBoostMappingUtil.fixPublication)
|
||||
.write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationFiltered")
|
||||
|
||||
val affiliationPath = parser.get("affiliationPath")
|
||||
val paperAffiliationPath = parser.get("paperAffiliationPath")
|
||||
|
||||
val affiliation = spark.read.load(affiliationPath).select(col("AffiliationId"), col("GridId"), col("OfficialPage"), col("DisplayName"))
|
||||
|
||||
val paperAffiliation = spark.read.load(paperAffiliationPath).select(col("AffiliationId").alias("affId"), col("PaperId"))
|
||||
|
||||
|
||||
val a:Dataset[DoiBoostAffiliation] = paperAffiliation
|
||||
.joinWith(affiliation, paperAffiliation("affId").equalTo(affiliation("AffiliationId")))
|
||||
.select(col("_1.PaperId"), col("_2.AffiliationId"), col("_2.GridId"), col("_2.OfficialPage"), col("_2.DisplayName")).as[DoiBoostAffiliation]
|
||||
|
||||
|
||||
|
||||
val magPubs:Dataset[(String,Publication)]= spark.read.load(s"$workingDirPath/doiBoostPublicationFiltered").as[Publication]
|
||||
.map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p))(tupleForJoinEncoder).filter(s =>s._1!= null )
|
||||
|
||||
|
||||
magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).flatMap(item => {
|
||||
val pub:Publication = item._1._2
|
||||
val affiliation = item._2
|
||||
val affId:String = if (affiliation.GridId.isDefined) DoiBoostMappingUtil.generateGridAffiliationId(affiliation.GridId.get) else DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString)
|
||||
val r:Relation = new Relation
|
||||
r.setSource(pub.getId)
|
||||
r.setTarget(affId)
|
||||
r.setRelType("resultOrganization")
|
||||
r.setRelClass("hasAuthorInstitution")
|
||||
r.setSubRelType("affiliation")
|
||||
r.setDataInfo(pub.getDataInfo)
|
||||
r.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava)
|
||||
val r1:Relation = new Relation
|
||||
r1.setTarget(pub.getId)
|
||||
r1.setSource(affId)
|
||||
r1.setRelType("resultOrganization")
|
||||
r1.setRelClass("isAuthorInstitutionOf")
|
||||
r1.setSubRelType("affiliation")
|
||||
r1.setDataInfo(pub.getDataInfo)
|
||||
r1.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava)
|
||||
List(r, r1)
|
||||
})(mapEncoderRel).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation")
|
||||
|
||||
|
||||
magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).map( item => {
|
||||
val affiliation = item._2
|
||||
if (affiliation.GridId.isEmpty) {
|
||||
val o = new Organization
|
||||
o.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava)
|
||||
o.setDataInfo(DoiBoostMappingUtil.generateDataInfo())
|
||||
o.setId(DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString))
|
||||
o.setOriginalId(List(affiliation.AffiliationId.toString).asJava)
|
||||
if (affiliation.DisplayName.nonEmpty)
|
||||
o.setLegalname(DoiBoostMappingUtil.asField(affiliation.DisplayName.get))
|
||||
if (affiliation.OfficialPage.isDefined)
|
||||
o.setWebsiteurl(DoiBoostMappingUtil.asField(affiliation.OfficialPage.get))
|
||||
o.setCountry(DoiBoostMappingUtil.getUnknownCountry())
|
||||
o
|
||||
}
|
||||
else
|
||||
null
|
||||
}).filter(o=> o!=null).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostOrganization")
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,440 @@
|
|||
package eu.dnetlib.doiboost.crossref
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf._
|
||||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
|
||||
import org.apache.commons.lang.StringUtils
|
||||
import org.json4s
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.JsonAST._
|
||||
import org.json4s.jackson.JsonMethods._
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.collection.mutable
|
||||
import scala.util.matching.Regex
|
||||
|
||||
case class mappingAffiliation(name: String) {}
|
||||
|
||||
case class mappingAuthor(given: Option[String], family: String, ORCID: Option[String], affiliation: Option[mappingAffiliation]) {}
|
||||
|
||||
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
|
||||
|
||||
|
||||
case object Crossref2Oaf {
|
||||
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
||||
|
||||
val mappingCrossrefType = Map(
|
||||
"book-section" -> "publication",
|
||||
"book" -> "publication",
|
||||
"book-chapter" -> "publication",
|
||||
"book-part" -> "publication",
|
||||
"book-series" -> "publication",
|
||||
"book-set" -> "publication",
|
||||
"book-track" -> "publication",
|
||||
"edited-book" -> "publication",
|
||||
"reference-book" -> "publication",
|
||||
"monograph" -> "publication",
|
||||
"journal-article" -> "publication",
|
||||
"dissertation" -> "publication",
|
||||
"other" -> "publication",
|
||||
"peer-review" -> "publication",
|
||||
"proceedings" -> "publication",
|
||||
"proceedings-article" -> "publication",
|
||||
"reference-entry" -> "publication",
|
||||
"report" -> "publication",
|
||||
"report-series" -> "publication",
|
||||
"standard" -> "publication",
|
||||
"standard-series" -> "publication",
|
||||
"posted-content" -> "publication",
|
||||
"dataset" -> "dataset"
|
||||
)
|
||||
|
||||
|
||||
val mappingCrossrefSubType = Map(
|
||||
"book-section" -> "0013 Part of book or chapter of book",
|
||||
"book" -> "0002 Book",
|
||||
"book-chapter" -> "0013 Part of book or chapter of book",
|
||||
"book-part" -> "0013 Part of book or chapter of book",
|
||||
"book-series" -> "0002 Book",
|
||||
"book-set" -> "0002 Book",
|
||||
"book-track" -> "0002 Book",
|
||||
"edited-book" -> "0002 Book",
|
||||
"reference-book" -> "0002 Book",
|
||||
"monograph" -> "0002 Book",
|
||||
"journal-article" -> "0001 Article",
|
||||
"dissertation" -> "0006 Doctoral thesis",
|
||||
"other" -> "0038 Other literature type",
|
||||
"peer-review" -> "0015 Review",
|
||||
"proceedings" -> "0004 Conference object",
|
||||
"proceedings-article" -> "0004 Conference object",
|
||||
"reference-entry" -> "0013 Part of book or chapter of book",
|
||||
"report" -> "0017 Report",
|
||||
"report-series" -> "0017 Report",
|
||||
"standard" -> "0038 Other literature type",
|
||||
"standard-series" -> "0038 Other literature type",
|
||||
"dataset" -> "0021 Dataset",
|
||||
"preprint" -> "0016 Preprint",
|
||||
"report" -> "0017 Report"
|
||||
)
|
||||
|
||||
def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
|
||||
//MAPPING Crossref DOI into PID
|
||||
val doi: String = (json \ "DOI").extract[String]
|
||||
result.setPid(List(createSP(doi, "doi", PID_TYPES)).asJava)
|
||||
|
||||
//MAPPING Crossref DOI into OriginalId
|
||||
//and Other Original Identifier of dataset like clinical-trial-number
|
||||
val clinicalTrialNumbers = for (JString(ctr) <- json \ "clinical-trial-number") yield ctr
|
||||
val alternativeIds = for (JString(ids) <- json \ "alternative-id") yield ids
|
||||
val tmp = clinicalTrialNumbers ::: alternativeIds ::: List(doi)
|
||||
|
||||
result.setOriginalId(tmp.filter(id => id != null).asJava)
|
||||
|
||||
//Set identifier as {50|60} | doiboost____::md5(DOI)
|
||||
result.setId(generateIdentifier(result, doi))
|
||||
|
||||
// Add DataInfo
|
||||
result.setDataInfo(generateDataInfo())
|
||||
|
||||
result.setLastupdatetimestamp((json \ "indexed" \ "timestamp").extract[Long])
|
||||
result.setDateofcollection((json \ "indexed" \ "date-time").extract[String])
|
||||
|
||||
result.setCollectedfrom(List(createCrossrefCollectedFrom()).asJava)
|
||||
|
||||
// Publisher ( Name of work's publisher mapped into Result/Publisher)
|
||||
val publisher = (json \ "publisher").extractOrElse[String](null)
|
||||
if (publisher!= null && publisher.nonEmpty)
|
||||
result.setPublisher(asField(publisher))
|
||||
|
||||
|
||||
// TITLE
|
||||
val mainTitles = for {JString(title) <- json \ "title" if title.nonEmpty} yield createSP(title, "main title", "dnet:dataCite_title")
|
||||
val originalTitles = for {JString(title) <- json \ "original-title" if title.nonEmpty} yield createSP(title, "alternative title", "dnet:dataCite_title")
|
||||
val shortTitles = for {JString(title) <- json \ "short-title" if title.nonEmpty} yield createSP(title, "alternative title", "dnet:dataCite_title")
|
||||
val subtitles = for {JString(title) <- json \ "subtitle" if title.nonEmpty} yield createSP(title, "subtitle", "dnet:dataCite_title")
|
||||
result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava)
|
||||
|
||||
// DESCRIPTION
|
||||
val descriptionList = for {JString(description) <- json \ "abstract"} yield asField(description)
|
||||
result.setDescription(descriptionList.asJava)
|
||||
|
||||
// Source
|
||||
val sourceList = for {JString(source) <- json \ "source" if source!= null && source.nonEmpty} yield asField(source)
|
||||
result.setSource(sourceList.asJava)
|
||||
|
||||
//RELEVANT DATE Mapping
|
||||
val createdDate = generateDate((json \ "created" \ "date-time").extract[String], (json \ "created" \ "date-parts").extract[List[List[Int]]], "created", "dnet:dataCite_date")
|
||||
val postedDate = generateDate((json \ "posted" \ "date-time").extractOrElse[String](null), (json \ "posted" \ "date-parts").extract[List[List[Int]]], "available", "dnet:dataCite_date")
|
||||
val acceptedDate = generateDate((json \ "accepted" \ "date-time").extractOrElse[String](null), (json \ "accepted" \ "date-parts").extract[List[List[Int]]], "accepted", "dnet:dataCite_date")
|
||||
val publishedPrintDate = generateDate((json \ "published-print" \ "date-time").extractOrElse[String](null), (json \ "published-print" \ "date-parts").extract[List[List[Int]]], "published-print", "dnet:dataCite_date")
|
||||
val publishedOnlineDate = generateDate((json \ "published-online" \ "date-time").extractOrElse[String](null), (json \ "published-online" \ "date-parts").extract[List[List[Int]]], "published-online", "dnet:dataCite_date")
|
||||
|
||||
val issuedDate = extractDate((json \ "issued" \ "date-time").extractOrElse[String](null), (json \ "issued" \ "date-parts").extract[List[List[Int]]])
|
||||
if (StringUtils.isNotBlank(issuedDate)) {
|
||||
result.setDateofacceptance(asField(issuedDate))
|
||||
}
|
||||
else {
|
||||
result.setDateofacceptance(asField(createdDate.getValue))
|
||||
}
|
||||
result.setRelevantdate(List(createdDate, postedDate, acceptedDate, publishedOnlineDate, publishedPrintDate).filter(p => p != null).asJava)
|
||||
|
||||
//Mapping Subject
|
||||
val subjectList:List[String] = (json \ "subject").extractOrElse[List[String]](List())
|
||||
|
||||
if (subjectList.nonEmpty) {
|
||||
result.setSubject(subjectList.map(s=> createSP(s, "keywords", "dnet:subject_classification_typologies")).asJava)
|
||||
}
|
||||
|
||||
|
||||
|
||||
//Mapping Author
|
||||
val authorList: List[mappingAuthor] = (json \ "author").extractOrElse[List[mappingAuthor]](List())
|
||||
result.setAuthor(authorList.map(a => generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull)).asJava)
|
||||
|
||||
// Mapping instance
|
||||
val instance = new Instance()
|
||||
val license = for {
|
||||
JString(lic) <- json \ "license" \ "URL"
|
||||
} yield asField(lic)
|
||||
val l = license.filter(d => StringUtils.isNotBlank(d.getValue))
|
||||
if (l.nonEmpty)
|
||||
instance.setLicense(l.head)
|
||||
|
||||
|
||||
val has_review = (json \ "relation" \"has-review" \ "id")
|
||||
|
||||
if(has_review != JNothing)
|
||||
instance.setRefereed(asField("peerReviewed"))
|
||||
|
||||
|
||||
instance.setAccessright(getRestrictedQualifier())
|
||||
result.setInstance(List(instance).asJava)
|
||||
instance.setInstancetype(createQualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), "dnet:publication_resource", "dnet:publication_resource"))
|
||||
result.setResourcetype(createQualifier(cobjCategory.substring(0, 4),"dnet:dataCite_resource"))
|
||||
|
||||
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
||||
if (StringUtils.isNotBlank(issuedDate)) {
|
||||
instance.setDateofacceptance(asField(issuedDate))
|
||||
}
|
||||
val s: String = (json \ "URL").extract[String]
|
||||
val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null).distinct
|
||||
if (links.nonEmpty)
|
||||
instance.setUrl(links.asJava)
|
||||
result
|
||||
}
|
||||
|
||||
|
||||
def generateAuhtor(given: String, family: String, orcid: String): Author = {
|
||||
val a = new Author
|
||||
a.setName(given)
|
||||
a.setSurname(family)
|
||||
a.setFullname(s"$given $family")
|
||||
if (StringUtils.isNotBlank(orcid))
|
||||
a.setPid(List(createSP(orcid, ORCID, PID_TYPES)).asJava)
|
||||
|
||||
a
|
||||
}
|
||||
|
||||
def convert(input: String): List[Oaf] = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: json4s.JValue = parse(input)
|
||||
|
||||
var resultList: List[Oaf] = List()
|
||||
|
||||
|
||||
val objectType = (json \ "type").extractOrElse[String](null)
|
||||
val objectSubType = (json \ "subtype").extractOrElse[String](null)
|
||||
if (objectType == null)
|
||||
return resultList
|
||||
|
||||
|
||||
val result = generateItemFromType(objectType, objectSubType)
|
||||
if (result == null)
|
||||
return List()
|
||||
val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType, mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type"));
|
||||
mappingResult(result, json, cOBJCategory)
|
||||
|
||||
|
||||
val funderList: List[mappingFunder] = (json \ "funder").extractOrElse[List[mappingFunder]](List())
|
||||
|
||||
if (funderList.nonEmpty) {
|
||||
resultList = resultList ::: mappingFunderToRelations(funderList, result.getId, createCrossrefCollectedFrom(), result.getDataInfo, result.getLastupdatetimestamp)
|
||||
}
|
||||
|
||||
|
||||
result match {
|
||||
case publication: Publication => convertPublication(publication, json, cOBJCategory)
|
||||
case dataset: Dataset => convertDataset(dataset)
|
||||
}
|
||||
|
||||
resultList = resultList ::: List(result)
|
||||
resultList
|
||||
}
|
||||
|
||||
|
||||
def mappingFunderToRelations(funders: List[mappingFunder], sourceId: String, cf: KeyValue, di: DataInfo, ts: Long): List[Relation] = {
|
||||
|
||||
val queue = new mutable.Queue[Relation]
|
||||
|
||||
|
||||
def snfRule(award:String): String = {
|
||||
var tmp1 = StringUtils.substringAfter(award,"_")
|
||||
val tmp2 = StringUtils.substringBefore(tmp1,"/")
|
||||
logger.debug(s"From $award to $tmp2")
|
||||
tmp2
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
def extractECAward(award: String): String = {
|
||||
val awardECRegex: Regex = "[0-9]{4,9}".r
|
||||
if (awardECRegex.findAllIn(award).hasNext)
|
||||
return awardECRegex.findAllIn(award).max
|
||||
null
|
||||
}
|
||||
|
||||
|
||||
def generateRelation(sourceId:String, targetId:String, nsPrefix:String) :Relation = {
|
||||
|
||||
val r = new Relation
|
||||
r.setSource(sourceId)
|
||||
r.setTarget(s"$nsPrefix::$targetId")
|
||||
r.setRelType("resultProject")
|
||||
r.setRelClass("isProducedBy")
|
||||
r.setSubRelType("outcome")
|
||||
r.setCollectedfrom(List(cf).asJava)
|
||||
r.setDataInfo(di)
|
||||
r.setLastupdatetimestamp(ts)
|
||||
r
|
||||
}
|
||||
|
||||
|
||||
def generateSimpleRelationFromAward(funder: mappingFunder, nsPrefix: String, extractField: String => String): Unit = {
|
||||
if (funder.award.isDefined && funder.award.get.nonEmpty)
|
||||
funder.award.get.map(extractField).filter(a => a!= null && a.nonEmpty).foreach(
|
||||
award => {
|
||||
val targetId = DHPUtils.md5(award)
|
||||
queue += generateRelation(sourceId, targetId, nsPrefix)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
if (funders != null)
|
||||
funders.foreach(funder => {
|
||||
if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) {
|
||||
funder.DOI.get match {
|
||||
case "10.13039/100010663" |
|
||||
"10.13039/100010661" |
|
||||
"10.13039/501100007601" |
|
||||
"10.13039/501100000780" |
|
||||
"10.13039/100010665" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
|
||||
case "10.13039/100011199" |
|
||||
"10.13039/100004431" |
|
||||
"10.13039/501100004963" |
|
||||
"10.13039/501100000780" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
|
||||
case "10.13039/501100000781" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
|
||||
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
|
||||
case "10.13039/100000001" => generateSimpleRelationFromAward(funder, "nsf_________", a => a)
|
||||
case "10.13039/501100001665" => generateSimpleRelationFromAward(funder, "anr_________", a => a)
|
||||
case "10.13039/501100002341" => generateSimpleRelationFromAward(funder, "aka_________", a => a)
|
||||
case "10.13039/501100001602" => generateSimpleRelationFromAward(funder, "aka_________", a => a.replace("SFI", ""))
|
||||
case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a)
|
||||
case "10.13039/501100000038"=> queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "nserc_______" )
|
||||
case "10.13039/501100000155"=> queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "sshrc_______" )
|
||||
case "10.13039/501100000024"=> queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "cihr________" )
|
||||
case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a)
|
||||
case "10.13039/501100003448" => generateSimpleRelationFromAward(funder, "gsrt________", extractECAward)
|
||||
case "10.13039/501100010198" => generateSimpleRelationFromAward(funder, "sgov________", a=>a)
|
||||
case "10.13039/501100004564" => generateSimpleRelationFromAward(funder, "mestd_______", extractECAward)
|
||||
case "10.13039/501100003407" => generateSimpleRelationFromAward(funder, "miur________", a=>a)
|
||||
queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "miur________" )
|
||||
case "10.13039/501100006588" |
|
||||
"10.13039/501100004488" => generateSimpleRelationFromAward(funder, "irb_hr______", a=>a.replaceAll("Project No.", "").replaceAll("HRZZ-","") )
|
||||
case "10.13039/501100006769"=> generateSimpleRelationFromAward(funder, "rsf_________", a=>a)
|
||||
case "10.13039/501100001711"=> generateSimpleRelationFromAward(funder, "snsf________", snfRule)
|
||||
case "10.13039/501100004410"=> generateSimpleRelationFromAward(funder, "tubitakf____", a =>a)
|
||||
case "10.10.13039/100004440"=> generateSimpleRelationFromAward(funder, "wt__________", a =>a)
|
||||
case "10.13039/100004440"=> queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "wt__________" )
|
||||
case _ => logger.debug("no match for "+funder.DOI.get )
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
} else {
|
||||
funder.name match {
|
||||
case "European Union’s Horizon 2020 research and innovation program" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
|
||||
case "European Union's" =>
|
||||
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
|
||||
generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
|
||||
case "The French National Research Agency (ANR)" |
|
||||
"The French National Research Agency" => generateSimpleRelationFromAward(funder, "anr_________", a => a)
|
||||
case "CONICYT, Programa de Formación de Capital Humano Avanzado" => generateSimpleRelationFromAward(funder, "conicytf____", extractECAward)
|
||||
case "Wellcome Trust Masters Fellowship" => queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "wt__________" )
|
||||
case _ => logger.debug("no match for "+funder.name )
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
)
|
||||
queue.toList
|
||||
}
|
||||
|
||||
def convertDataset(dataset: Dataset): Unit = {
|
||||
// TODO check if there are other info to map into the Dataset
|
||||
}
|
||||
|
||||
|
||||
def convertPublication(publication: Publication, json: JValue, cobjCategory: String): Unit = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
val containerTitles = for {JString(ct) <- json \ "container-title"} yield ct
|
||||
|
||||
|
||||
//Mapping book
|
||||
if (cobjCategory.toLowerCase.contains("book")) {
|
||||
val ISBN = for {JString(isbn) <- json \ "ISBN"} yield isbn
|
||||
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
|
||||
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
|
||||
if (publication.getSource != null) {
|
||||
val l: List[Field[String]] = publication.getSource.asScala.toList
|
||||
val ll: List[Field[String]] = l ::: List(asField(source))
|
||||
publication.setSource(ll.asJava)
|
||||
}
|
||||
else
|
||||
publication.setSource(List(asField(source)).asJava)
|
||||
}
|
||||
} else {
|
||||
// Mapping Journal
|
||||
|
||||
val issnInfos = for {JArray(issn_types) <- json \ "issn-type"
|
||||
JObject(issn_type) <- issn_types
|
||||
JField("type", JString(tp)) <- issn_type
|
||||
JField("value", JString(vl)) <- issn_type
|
||||
} yield Tuple2(tp, vl)
|
||||
|
||||
val volume = (json \ "volume").extractOrElse[String](null)
|
||||
if (containerTitles.nonEmpty) {
|
||||
val journal = new Journal
|
||||
journal.setName(containerTitles.head)
|
||||
if (issnInfos.nonEmpty) {
|
||||
|
||||
issnInfos.foreach(tp => {
|
||||
tp._1 match {
|
||||
case "electronic" => journal.setIssnOnline(tp._2)
|
||||
case "print" => journal.setIssnPrinted(tp._2)
|
||||
}
|
||||
})
|
||||
}
|
||||
journal.setVol(volume)
|
||||
val page = (json \ "page").extractOrElse[String](null)
|
||||
if (page != null) {
|
||||
val pp = page.split("-")
|
||||
if (pp.nonEmpty)
|
||||
journal.setSp(pp.head)
|
||||
if (pp.size > 1)
|
||||
journal.setEp(pp(1))
|
||||
}
|
||||
publication.setJournal(journal)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def extractDate(dt: String, datePart: List[List[Int]]): String = {
|
||||
if (StringUtils.isNotBlank(dt))
|
||||
return dt
|
||||
if (datePart != null && datePart.size == 1) {
|
||||
val res = datePart.head
|
||||
if (res.size == 3) {
|
||||
val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
|
||||
if (dp.length == 10) {
|
||||
return dp
|
||||
}
|
||||
}
|
||||
}
|
||||
null
|
||||
|
||||
}
|
||||
|
||||
def generateDate(dt: String, datePart: List[List[Int]], classId: String, schemeId: String): StructuredProperty = {
|
||||
val dp = extractDate(dt, datePart)
|
||||
if (StringUtils.isNotBlank(dp))
|
||||
return createSP(dp, classId, schemeId)
|
||||
null
|
||||
}
|
||||
|
||||
def generateItemFromType(objectType: String, objectSubType: String): Result = {
|
||||
if (mappingCrossrefType.contains(objectType)) {
|
||||
if (mappingCrossrefType(objectType).equalsIgnoreCase("publication"))
|
||||
return new Publication()
|
||||
if (mappingCrossrefType(objectType).equalsIgnoreCase("dataset"))
|
||||
return new Dataset()
|
||||
}
|
||||
null
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
|
||||
package eu.dnetlib.doiboost.crossref;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.util.zip.Inflater;
|
||||
|
||||
import org.apache.commons.codec.binary.Base64;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
public class CrossrefImporter {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
CrossrefImporter.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/doiboost/import_from_es.json")));
|
||||
|
||||
parser.parseArgument(args);
|
||||
|
||||
final String hdfsuri = parser.get("namenode");
|
||||
System.out.println("HDFS URI" + hdfsuri);
|
||||
Path hdfswritepath = new Path(parser.get("targetPath"));
|
||||
System.out.println("TargetPath: " + hdfsuri);
|
||||
|
||||
final Long timestamp = StringUtils.isNotBlank(parser.get("timestamp"))
|
||||
? Long.parseLong(parser.get("timestamp"))
|
||||
: -1;
|
||||
|
||||
if (timestamp > 0)
|
||||
System.out.println("Timestamp added " + timestamp);
|
||||
|
||||
// ====== Init HDFS File System Object
|
||||
Configuration conf = new Configuration();
|
||||
// Set FileSystem URI
|
||||
conf.set("fs.defaultFS", hdfsuri);
|
||||
// Because of Maven
|
||||
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
|
||||
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
|
||||
|
||||
ESClient client = timestamp > 0
|
||||
? new ESClient("ip-90-147-167-25.ct1.garrservices.it", "crossref", timestamp)
|
||||
: new ESClient("ip-90-147-167-25.ct1.garrservices.it", "crossref");
|
||||
|
||||
try (SequenceFile.Writer writer = SequenceFile
|
||||
.createWriter(
|
||||
conf,
|
||||
SequenceFile.Writer.file(hdfswritepath),
|
||||
SequenceFile.Writer.keyClass(IntWritable.class),
|
||||
SequenceFile.Writer.valueClass(Text.class))) {
|
||||
|
||||
int i = 0;
|
||||
long start = System.currentTimeMillis();
|
||||
long end = 0;
|
||||
final IntWritable key = new IntWritable(i);
|
||||
final Text value = new Text();
|
||||
while (client.hasNext()) {
|
||||
key.set(i++);
|
||||
value.set(client.next());
|
||||
writer.append(key, value);
|
||||
if (i % 100000 == 0) {
|
||||
end = System.currentTimeMillis();
|
||||
final float time = (end - start) / 1000.0F;
|
||||
System.out
|
||||
.println(
|
||||
String.format("Imported %d records last 100000 imported in %f seconds", i, time));
|
||||
start = System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static String decompressBlob(final String blob) {
|
||||
try {
|
||||
byte[] byteArray = Base64.decodeBase64(blob.getBytes());
|
||||
final Inflater decompresser = new Inflater();
|
||||
decompresser.setInput(byteArray);
|
||||
final ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length);
|
||||
byte[] buffer = new byte[8192];
|
||||
while (!decompresser.finished()) {
|
||||
int size = decompresser.inflate(buffer);
|
||||
bos.write(buffer, 0, size);
|
||||
}
|
||||
byte[] unzippeddata = bos.toByteArray();
|
||||
decompresser.end();
|
||||
return new String(unzippeddata);
|
||||
} catch (Throwable e) {
|
||||
throw new RuntimeException("Wrong record:" + blob, e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
|
||||
package eu.dnetlib.doiboost.crossref;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpPost;
|
||||
import org.apache.http.entity.StringEntity;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.jayway.jsonpath.JsonPath;
|
||||
|
||||
public class ESClient implements Iterator<String> {
|
||||
private static final Logger logger = LoggerFactory.getLogger(ESClient.class);
|
||||
|
||||
static final String blobPath = "$.hits[*].hits[*]._source.blob";
|
||||
static final String scrollIdPath = "$._scroll_id";
|
||||
static final String JSON_NO_TS = "{\"size\":1000}";
|
||||
static final String JSON_WITH_TS = "{\"size\":1000, \"query\":{\"range\":{\"timestamp\":{\"gte\":%d}}}}";
|
||||
static final String JSON_SCROLL = "{\"scroll_id\":\"%s\",\"scroll\" : \"1m\"}";
|
||||
|
||||
private final String scrollId;
|
||||
|
||||
private List<String> buffer;
|
||||
|
||||
private final String esHost;
|
||||
|
||||
public ESClient(final String esHost, final String esIndex) throws IOException {
|
||||
|
||||
this.esHost = esHost;
|
||||
final String body = getResponse(
|
||||
String.format("http://%s:9200/%s/_search?scroll=1m", esHost, esIndex), JSON_NO_TS);
|
||||
scrollId = getJPathString(scrollIdPath, body);
|
||||
buffer = getBlobs(body);
|
||||
}
|
||||
|
||||
public ESClient(final String esHost, final String esIndex, final long timestamp)
|
||||
throws IOException {
|
||||
this.esHost = esHost;
|
||||
final String body = getResponse(
|
||||
String.format("http://%s:9200/%s/_search?scroll=1m", esHost, esIndex),
|
||||
String.format(JSON_WITH_TS, timestamp));
|
||||
scrollId = getJPathString(scrollIdPath, body);
|
||||
buffer = getBlobs(body);
|
||||
}
|
||||
|
||||
private String getResponse(final String url, final String json) {
|
||||
CloseableHttpClient client = HttpClients.createDefault();
|
||||
try {
|
||||
|
||||
HttpPost httpPost = new HttpPost(url);
|
||||
if (json != null) {
|
||||
StringEntity entity = new StringEntity(json);
|
||||
httpPost.setEntity(entity);
|
||||
httpPost.setHeader("Accept", "application/json");
|
||||
httpPost.setHeader("Content-type", "application/json");
|
||||
}
|
||||
CloseableHttpResponse response = client.execute(httpPost);
|
||||
|
||||
return IOUtils.toString(response.getEntity().getContent());
|
||||
} catch (Throwable e) {
|
||||
throw new RuntimeException("Error on executing request ", e);
|
||||
} finally {
|
||||
try {
|
||||
client.close();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to close client ", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String getJPathString(final String jsonPath, final String json) {
|
||||
try {
|
||||
Object o = JsonPath.read(json, jsonPath);
|
||||
if (o instanceof String)
|
||||
return (String) o;
|
||||
return null;
|
||||
} catch (Exception e) {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
private List<String> getBlobs(final String body) {
|
||||
final List<String> res = JsonPath.read(body, "$.hits.hits[*]._source.blob");
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return (buffer != null && !buffer.isEmpty());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String next() {
|
||||
final String nextItem = buffer.remove(0);
|
||||
if (buffer.isEmpty()) {
|
||||
|
||||
final String json_param = String.format(JSON_SCROLL, scrollId);
|
||||
final String body = getResponse(String.format("http://%s:9200/_search/scroll", esHost), json_param);
|
||||
try {
|
||||
buffer = getBlobs(body);
|
||||
} catch (Throwable e) {
|
||||
logger.error("Error on get next page: body:" + body);
|
||||
}
|
||||
}
|
||||
return nextItem;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
package eu.dnetlib.doiboost.crossref
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.schema.oaf
|
||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset}
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.hadoop.io.{IntWritable, Text}
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
|
||||
case class Reference(author: String, firstPage: String) {}
|
||||
|
||||
object SparkMapDumpIntoOAF {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
|
||||
val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass)
|
||||
val conf: SparkConf = new SparkConf()
|
||||
val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json")))
|
||||
parser.parseArgument(args)
|
||||
val spark: SparkSession =
|
||||
SparkSession
|
||||
.builder()
|
||||
.config(conf)
|
||||
.appName(SparkMapDumpIntoOAF.getClass.getSimpleName)
|
||||
.master(parser.get("master")).getOrCreate()
|
||||
|
||||
implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication]
|
||||
implicit val mapEncoderRelatons: Encoder[Relation] = Encoders.kryo[Relation]
|
||||
implicit val mapEncoderDatasets: Encoder[oaf.Dataset] = Encoders.kryo[OafDataset]
|
||||
|
||||
val sc = spark.sparkContext
|
||||
val targetPath = parser.get("targetPath")
|
||||
|
||||
|
||||
sc.sequenceFile(parser.get("sourcePath"), classOf[IntWritable], classOf[Text])
|
||||
.map(k => k._2.toString).map(CrossrefImporter.decompressBlob)
|
||||
.flatMap(k => Crossref2Oaf.convert(k)).saveAsObjectFile(s"${targetPath}/mixObject")
|
||||
|
||||
val inputRDD = sc.objectFile[Oaf](s"${targetPath}/mixObject").filter(p=> p!= null)
|
||||
|
||||
val distinctPubs:RDD[Publication] = inputRDD.filter(k => k != null && k.isInstanceOf[Publication])
|
||||
.map(k => k.asInstanceOf[Publication]).map { p: Publication => Tuple2(p.getId, p) }.reduceByKey { case (p1: Publication, p2: Publication) =>
|
||||
var r = if (p1 == null) p2 else p1
|
||||
if (p1 != null && p2 != null) {
|
||||
if (p1.getLastupdatetimestamp != null && p2.getLastupdatetimestamp != null) {
|
||||
if (p1.getLastupdatetimestamp < p2.getLastupdatetimestamp)
|
||||
r = p2
|
||||
else
|
||||
r = p1
|
||||
} else {
|
||||
r = if (p1.getLastupdatetimestamp == null) p2 else p1
|
||||
}
|
||||
}
|
||||
r
|
||||
}.map(_._2)
|
||||
|
||||
val pubs:Dataset[Publication] = spark.createDataset(distinctPubs)
|
||||
pubs.write.mode(SaveMode.Overwrite).save(s"${targetPath}/publication")
|
||||
|
||||
|
||||
val distincDatasets:RDD[OafDataset] = inputRDD.filter(k => k != null && k.isInstanceOf[OafDataset])
|
||||
.map(k => k.asInstanceOf[OafDataset]).map(p => Tuple2(p.getId, p)).reduceByKey { case (p1: OafDataset, p2: OafDataset) =>
|
||||
var r = if (p1 == null) p2 else p1
|
||||
if (p1 != null && p2 != null) {
|
||||
if (p1.getLastupdatetimestamp != null && p2.getLastupdatetimestamp != null) {
|
||||
if (p1.getLastupdatetimestamp < p2.getLastupdatetimestamp)
|
||||
r = p2
|
||||
else
|
||||
r = p1
|
||||
} else {
|
||||
r = if (p1.getLastupdatetimestamp == null) p2 else p1
|
||||
}
|
||||
}
|
||||
r
|
||||
}.map(_._2)
|
||||
|
||||
spark.createDataset(distincDatasets).write.mode(SaveMode.Overwrite).save(s"${targetPath}/dataset")
|
||||
|
||||
|
||||
|
||||
val distinctRels =inputRDD.filter(k => k != null && k.isInstanceOf[Relation])
|
||||
.map(k => k.asInstanceOf[Relation]).map(r=> (s"${r.getSource}::${r.getTarget}",r))
|
||||
.reduceByKey { case (p1: Relation, p2: Relation) =>
|
||||
if (p1 == null) p2 else p1
|
||||
}.map(_._2)
|
||||
|
||||
val rels: Dataset[Relation] = spark.createDataset(distinctRels)
|
||||
|
||||
rels.write.mode(SaveMode.Overwrite).save(s"${targetPath}/relations")
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,314 @@
|
|||
package eu.dnetlib.doiboost.mag
|
||||
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty}
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
import org.json4s
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.collection.mutable
|
||||
import scala.util.matching.Regex
|
||||
|
||||
|
||||
case class MagPapers(PaperId: Long, Rank: Integer, Doi: String,
|
||||
DocType: String, PaperTitle: String, OriginalTitle: String,
|
||||
BookTitle: String, Year: Option[Integer], Date: Option[java.sql.Timestamp], Publisher: String,
|
||||
JournalId: Option[Long], ConferenceSeriesId: Option[Long], ConferenceInstanceId: Option[Long],
|
||||
Volume: String, Issue: String, FirstPage: String, LastPage: String,
|
||||
ReferenceCount: Option[Long], CitationCount: Option[Long], EstimatedCitation: Option[Long],
|
||||
OriginalVenue: String, FamilyId: Option[Long], CreatedDate: java.sql.Timestamp) {}
|
||||
|
||||
|
||||
case class MagPaperAbstract(PaperId: Long, IndexedAbstract: String) {}
|
||||
|
||||
case class MagAuthor(AuthorId: Long, Rank: Option[Int], NormalizedName: Option[String], DisplayName: Option[String], LastKnownAffiliationId: Option[Long], PaperCount: Option[Long], CitationCount: Option[Long], CreatedDate: Option[java.sql.Timestamp]) {}
|
||||
|
||||
case class MagAffiliation(AffiliationId: Long, Rank: Int, NormalizedName: String, DisplayName: String, GridId: String, OfficialPage: String, WikiPage: String, PaperCount: Long, CitationCount: Long, Latitude: Option[Float], Longitude: Option[Float], CreatedDate: java.sql.Timestamp) {}
|
||||
|
||||
case class MagPaperAuthorAffiliation(PaperId: Long, AuthorId: Long, AffiliationId: Option[Long], AuthorSequenceNumber: Int, OriginalAuthor: String, OriginalAffiliation: String) {}
|
||||
|
||||
|
||||
case class MagAuthorAffiliation(author: MagAuthor, affiliation:String)
|
||||
|
||||
case class MagPaperWithAuthorList(PaperId: Long, authors: List[MagAuthorAffiliation]) {}
|
||||
|
||||
case class MagPaperAuthorDenormalized(PaperId: Long, author: MagAuthor, affiliation:String) {}
|
||||
|
||||
case class MagPaperUrl(PaperId: Long, SourceType: Option[Int], SourceUrl: Option[String], LanguageCode: Option[String]) {}
|
||||
|
||||
case class MagUrlInstance(SourceUrl:String){}
|
||||
|
||||
case class MagUrl(PaperId: Long, instances: List[MagUrlInstance])
|
||||
|
||||
case class MagSubject(FieldOfStudyId:Long, DisplayName:String, MainType:Option[String], Score:Float){}
|
||||
|
||||
case class MagFieldOfStudy(PaperId:Long, subjects:List[MagSubject]) {}
|
||||
|
||||
case class MagJournal(JournalId: Long, Rank: Option[Int], NormalizedName: Option[String], DisplayName: Option[String], Issn: Option[String], Publisher: Option[String], Webpage: Option[String], PaperCount: Option[Long], CitationCount: Option[Long], CreatedDate: Option[java.sql.Timestamp]) {}
|
||||
|
||||
|
||||
case class MagConferenceInstance(ci:Long, DisplayName:Option[String], Location:Option[String], StartDate:Option[java.sql.Timestamp], EndDate:Option[java.sql.Timestamp], PaperId:Long){}
|
||||
|
||||
case object ConversionUtil {
|
||||
|
||||
def extractMagIdentifier(pids:mutable.Buffer[String]) :String ={
|
||||
val magIDRegex: Regex = "^[0-9]+$".r
|
||||
val s =pids.filter(p=> magIDRegex.findAllIn(p).hasNext)
|
||||
|
||||
if (s.nonEmpty)
|
||||
return s.head
|
||||
null
|
||||
}
|
||||
|
||||
|
||||
def mergePublication(a: Publication, b:Publication) : Publication = {
|
||||
if ((a != null) && (b != null)) {
|
||||
a.mergeFrom(b)
|
||||
a
|
||||
} else {
|
||||
if (a == null) b else a
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
def choiceLatestMagArtitcle(p1: MagPapers, p2:MagPapers) :MagPapers = {
|
||||
var r = if (p1 == null) p2 else p1
|
||||
if (p1 != null && p2 != null) {
|
||||
if (p1.CreatedDate != null && p2.CreatedDate != null) {
|
||||
if (p1.CreatedDate.before(p2.CreatedDate))
|
||||
r = p2
|
||||
else
|
||||
r = p1
|
||||
} else {
|
||||
r = if (p1.CreatedDate == null) p2 else p1
|
||||
}
|
||||
}
|
||||
r
|
||||
|
||||
}
|
||||
|
||||
|
||||
def updatePubsWithDescription(inputItem:((String, Publication), MagPaperAbstract)) : Publication = {
|
||||
val pub = inputItem._1._2
|
||||
val abst = inputItem._2
|
||||
if (abst != null) {
|
||||
pub.setDescription(List(asField(abst.IndexedAbstract)).asJava)
|
||||
}
|
||||
pub
|
||||
|
||||
}
|
||||
|
||||
|
||||
def updatePubsWithConferenceInfo(inputItem:((String, Publication), MagConferenceInstance)) : Publication = {
|
||||
val publication:Publication= inputItem._1._2
|
||||
val ci:MagConferenceInstance = inputItem._2
|
||||
|
||||
if (ci!= null){
|
||||
|
||||
val j:Journal = new Journal
|
||||
if (ci.Location.isDefined)
|
||||
j.setConferenceplace(ci.Location.get)
|
||||
j.setName(ci.DisplayName.get)
|
||||
if (ci.StartDate.isDefined && ci.EndDate.isDefined)
|
||||
{
|
||||
j.setConferencedate(s"${ci.StartDate.get.toString.substring(0,10)} - ${ci.EndDate.get.toString.substring(0,10)}")
|
||||
}
|
||||
|
||||
publication.setJournal(j)
|
||||
}
|
||||
publication
|
||||
}
|
||||
|
||||
def updatePubsWithSubject(item:((String, Publication), MagFieldOfStudy)) : Publication = {
|
||||
|
||||
val publication = item._1._2
|
||||
val fieldOfStudy = item._2
|
||||
if (fieldOfStudy != null && fieldOfStudy.subjects != null && fieldOfStudy.subjects.nonEmpty) {
|
||||
val p: List[StructuredProperty] = fieldOfStudy.subjects.flatMap(s => {
|
||||
val s1 = createSP(s.DisplayName, "keywords", "dnet:subject_classification_typologies")
|
||||
val di = DoiBoostMappingUtil.generateDataInfo(s.Score.toString)
|
||||
var resList: List[StructuredProperty] = List(s1)
|
||||
if (s.MainType.isDefined) {
|
||||
val maintp = s.MainType.get
|
||||
val s2 = createSP(s.MainType.get, "keywords", "dnet:subject_classification_typologies")
|
||||
s2.setDataInfo(di)
|
||||
resList = resList ::: List(s2)
|
||||
if (maintp.contains(".")) {
|
||||
val s3 = createSP(maintp.split("\\.").head, "keywords", "dnet:subject_classification_typologies")
|
||||
s3.setDataInfo(di)
|
||||
resList = resList ::: List(s3)
|
||||
}
|
||||
}
|
||||
resList
|
||||
})
|
||||
publication.setSubject(p.asJava)
|
||||
}
|
||||
publication
|
||||
}
|
||||
|
||||
|
||||
|
||||
def addInstances(a: (Publication, MagUrl)): Publication = {
|
||||
val pub = a._1
|
||||
val urls = a._2
|
||||
|
||||
|
||||
|
||||
val i = new Instance
|
||||
|
||||
|
||||
if (urls!= null) {
|
||||
|
||||
val l:List[String] = urls.instances.filter(k=>k.SourceUrl.nonEmpty).map(k=>k.SourceUrl):::List(s"https://academic.microsoft.com/#/detail/${extractMagIdentifier(pub.getOriginalId.asScala)}")
|
||||
|
||||
i.setUrl(l.asJava)
|
||||
}
|
||||
else
|
||||
i.setUrl(List(s"https://academic.microsoft.com/#/detail/${extractMagIdentifier(pub.getOriginalId.asScala)}").asJava)
|
||||
|
||||
i.setCollectedfrom(createMAGCollectedFrom())
|
||||
pub.setInstance(List(i).asJava)
|
||||
pub
|
||||
}
|
||||
|
||||
|
||||
def transformPaperAbstract(input: MagPaperAbstract): MagPaperAbstract = {
|
||||
MagPaperAbstract(input.PaperId, convertInvertedIndexString(input.IndexedAbstract))
|
||||
}
|
||||
|
||||
|
||||
def createOAFFromJournalAuthorPaper(inputParams: ((MagPapers, MagJournal), MagPaperWithAuthorList)): Publication = {
|
||||
val paper = inputParams._1._1
|
||||
val journal = inputParams._1._2
|
||||
val authors = inputParams._2
|
||||
|
||||
val pub = new Publication
|
||||
pub.setPid(List(createSP(paper.Doi.toLowerCase, "doi", PID_TYPES)).asJava)
|
||||
pub.setOriginalId(List(paper.PaperId.toString, paper.Doi.toLowerCase).asJava)
|
||||
|
||||
//Set identifier as {50|60} | doiboost____::md5(DOI)
|
||||
pub.setId(generateIdentifier(pub, paper.Doi.toLowerCase))
|
||||
|
||||
val mainTitles = createSP(paper.PaperTitle, "main title", "dnet:dataCite_title")
|
||||
val originalTitles = createSP(paper.OriginalTitle, "alternative title", "dnet:dataCite_title")
|
||||
pub.setTitle(List(mainTitles, originalTitles).asJava)
|
||||
|
||||
pub.setSource(List(asField(paper.BookTitle)).asJava)
|
||||
|
||||
val authorsOAF = authors.authors.map { f: MagAuthorAffiliation =>
|
||||
|
||||
val a: eu.dnetlib.dhp.schema.oaf.Author = new eu.dnetlib.dhp.schema.oaf.Author
|
||||
|
||||
a.setFullname(f.author.DisplayName.get)
|
||||
|
||||
if(f.affiliation!= null)
|
||||
a.setAffiliation(List(asField(f.affiliation)).asJava)
|
||||
a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", PID_TYPES)).asJava)
|
||||
a
|
||||
}
|
||||
pub.setAuthor(authorsOAF.asJava)
|
||||
|
||||
|
||||
if (paper.Date != null && paper.Date.isDefined) {
|
||||
pub.setDateofacceptance(asField(paper.Date.get.toString.substring(0,10)))
|
||||
}
|
||||
pub.setPublisher(asField(paper.Publisher))
|
||||
|
||||
|
||||
if (journal != null && journal.DisplayName.isDefined) {
|
||||
val j = new Journal
|
||||
|
||||
j.setName(journal.DisplayName.get)
|
||||
j.setSp(paper.FirstPage)
|
||||
j.setEp(paper.LastPage)
|
||||
if (journal.Publisher.isDefined)
|
||||
pub.setPublisher(asField(journal.Publisher.get))
|
||||
if (journal.Issn.isDefined)
|
||||
j.setIssnPrinted(journal.Issn.get)
|
||||
pub.setJournal(j)
|
||||
}
|
||||
pub.setCollectedfrom(List(createMAGCollectedFrom()).asJava)
|
||||
pub.setDataInfo(generateDataInfo())
|
||||
pub
|
||||
}
|
||||
|
||||
|
||||
def createOAF(inputParams: ((MagPapers, MagPaperWithAuthorList), MagPaperAbstract)): Publication = {
|
||||
|
||||
val paper = inputParams._1._1
|
||||
val authors = inputParams._1._2
|
||||
val description = inputParams._2
|
||||
|
||||
val pub = new Publication
|
||||
pub.setPid(List(createSP(paper.Doi.toLowerCase, "doi", PID_TYPES)).asJava)
|
||||
pub.setOriginalId(List(paper.PaperId.toString, paper.Doi.toLowerCase).asJava)
|
||||
|
||||
//Set identifier as {50|60} | doiboost____::md5(DOI)
|
||||
pub.setId(generateIdentifier(pub, paper.Doi.toLowerCase))
|
||||
|
||||
val mainTitles = createSP(paper.PaperTitle, "main title", "dnet:dataCite_title")
|
||||
val originalTitles = createSP(paper.OriginalTitle, "alternative title", "dnet:dataCite_title")
|
||||
pub.setTitle(List(mainTitles, originalTitles).asJava)
|
||||
|
||||
pub.setSource(List(asField(paper.BookTitle)).asJava)
|
||||
|
||||
|
||||
if (description != null) {
|
||||
pub.setDescription(List(asField(description.IndexedAbstract)).asJava)
|
||||
}
|
||||
|
||||
|
||||
val authorsOAF = authors.authors.map { f: MagAuthorAffiliation =>
|
||||
|
||||
val a: eu.dnetlib.dhp.schema.oaf.Author = new eu.dnetlib.dhp.schema.oaf.Author
|
||||
|
||||
a.setFullname(f.author.DisplayName.get)
|
||||
|
||||
if(f.affiliation!= null)
|
||||
a.setAffiliation(List(asField(f.affiliation)).asJava)
|
||||
|
||||
|
||||
a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", PID_TYPES)).asJava)
|
||||
|
||||
a
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (paper.Date != null) {
|
||||
pub.setDateofacceptance(asField(paper.Date.toString.substring(0,10)))
|
||||
}
|
||||
|
||||
pub.setAuthor(authorsOAF.asJava)
|
||||
|
||||
|
||||
pub
|
||||
|
||||
}
|
||||
|
||||
|
||||
def convertInvertedIndexString(json_input: String): String = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: json4s.JValue = parse(json_input)
|
||||
val idl = (json \ "IndexLength").extract[Int]
|
||||
if (idl > 0) {
|
||||
val res = Array.ofDim[String](idl)
|
||||
|
||||
val iid = (json \ "InvertedIndex").extract[Map[String, List[Int]]]
|
||||
|
||||
for {(k: String, v: List[Int]) <- iid} {
|
||||
v.foreach(item => res(item) = k)
|
||||
}
|
||||
(0 until idl).foreach(i => {
|
||||
if (res(i) == null)
|
||||
res(i) = ""
|
||||
})
|
||||
return res.mkString(" ")
|
||||
}
|
||||
""
|
||||
}
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
package eu.dnetlib.doiboost.mag
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.sql.{SaveMode, SparkSession}
|
||||
import org.apache.spark.sql.types._
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
import org.apache.spark.sql.functions._
|
||||
|
||||
object SparkImportMagIntoDataset {
|
||||
val datatypedict = Map(
|
||||
"int" -> IntegerType,
|
||||
"uint" -> IntegerType,
|
||||
"long" -> LongType,
|
||||
"ulong" -> LongType,
|
||||
"float" -> FloatType,
|
||||
"string" -> StringType,
|
||||
"DateTime" -> DateType
|
||||
)
|
||||
|
||||
|
||||
val stream = Map(
|
||||
"Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")),
|
||||
"Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
|
||||
"ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")),
|
||||
"ConferenceSeries" -> Tuple2("mag/ConferenceSeries.txt", Seq("ConferenceSeriesId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
|
||||
"EntityRelatedEntities" -> Tuple2("advanced/EntityRelatedEntities.txt", Seq("EntityId:long", "EntityType:string", "RelatedEntityId:long", "RelatedEntityType:string", "RelatedType:int", "Score:float")),
|
||||
"FieldOfStudyChildren" -> Tuple2("advanced/FieldOfStudyChildren.txt", Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long")),
|
||||
"FieldOfStudyExtendedAttributes" -> Tuple2("advanced/FieldOfStudyExtendedAttributes.txt", Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string")),
|
||||
"FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
|
||||
"Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
|
||||
"PaperAbstractsInvertedIndex" -> Tuple2("nlp/PaperAbstractsInvertedIndex.txt.*", Seq("PaperId:long", "IndexedAbstract:string")),
|
||||
"PaperAuthorAffiliations" -> Tuple2("mag/PaperAuthorAffiliations.txt", Seq("PaperId:long", "AuthorId:long", "AffiliationId:long?", "AuthorSequenceNumber:uint", "OriginalAuthor:string", "OriginalAffiliation:string")),
|
||||
"PaperCitationContexts" -> Tuple2("nlp/PaperCitationContexts.txt", Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string")),
|
||||
"PaperExtendedAttributes" -> Tuple2("mag/PaperExtendedAttributes.txt", Seq("PaperId:long", "AttributeType:int", "AttributeValue:string")),
|
||||
"PaperFieldsOfStudy" -> Tuple2("advanced/PaperFieldsOfStudy.txt", Seq("PaperId:long", "FieldOfStudyId:long", "Score:float")),
|
||||
"PaperRecommendations" -> Tuple2("advanced/PaperRecommendations.txt", Seq("PaperId:long", "RecommendedPaperId:long", "Score:float")),
|
||||
"PaperReferences" -> Tuple2("mag/PaperReferences.txt", Seq("PaperId:long", "PaperReferenceId:long")),
|
||||
"PaperResources" -> Tuple2("mag/PaperResources.txt", Seq("PaperId:long", "ResourceType:int", "ResourceUrl:string", "SourceUrl:string", "RelationshipType:int")),
|
||||
"PaperUrls" -> Tuple2("mag/PaperUrls.txt", Seq("PaperId:long", "SourceType:int?", "SourceUrl:string", "LanguageCode:string")),
|
||||
"Papers" -> Tuple2("mag/Papers.txt", Seq("PaperId:long", "Rank:uint", "Doi:string", "DocType:string", "PaperTitle:string", "OriginalTitle:string", "BookTitle:string", "Year:int?", "Date:DateTime?", "Publisher:string", "JournalId:long?", "ConferenceSeriesId:long?", "ConferenceInstanceId:long?", "Volume:string", "Issue:string", "FirstPage:string", "LastPage:string", "ReferenceCount:long", "CitationCount:long", "EstimatedCitation:long", "OriginalVenue:string", "FamilyId:long?", "CreatedDate:DateTime")),
|
||||
"RelatedFieldOfStudy" -> Tuple2("advanced/RelatedFieldOfStudy.txt", Seq("FieldOfStudyId1:long", "Type1:string", "FieldOfStudyId2:long", "Type2:string", "Rank:float"))
|
||||
)
|
||||
|
||||
|
||||
def getSchema(streamName: String): StructType = {
|
||||
var schema = new StructType()
|
||||
val d: Seq[String] = stream(streamName)._2
|
||||
d.foreach { case t =>
|
||||
val currentType = t.split(":")
|
||||
val fieldName: String = currentType.head
|
||||
var fieldType: String = currentType.last
|
||||
val nullable: Boolean = fieldType.endsWith("?")
|
||||
if (nullable)
|
||||
fieldType = fieldType.replace("?", "")
|
||||
schema = schema.add(StructField(fieldName, datatypedict(fieldType), nullable))
|
||||
}
|
||||
schema
|
||||
}
|
||||
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val logger: Logger = LoggerFactory.getLogger(getClass)
|
||||
val conf: SparkConf = new SparkConf()
|
||||
val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/mag/convert_mag_to_oaf_params.json")))
|
||||
parser.parseArgument(args)
|
||||
val spark: SparkSession =
|
||||
SparkSession
|
||||
.builder()
|
||||
.config(conf)
|
||||
.appName(getClass.getSimpleName)
|
||||
.master(parser.get("master")).getOrCreate()
|
||||
|
||||
|
||||
|
||||
stream.foreach { case (k, v) =>
|
||||
val s: StructType = getSchema(k)
|
||||
val df = spark.read
|
||||
.option("header", "false")
|
||||
.option("charset", "UTF8")
|
||||
.option("delimiter", "\t")
|
||||
.schema(s)
|
||||
.csv(s"${parser.get("sourcePath")}/${v._1}")
|
||||
logger.info(s"Converting $k")
|
||||
|
||||
df.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/$k")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
package eu.dnetlib.doiboost.mag
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.functions._
|
||||
import org.apache.spark.sql._
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object SparkPreProcessMAG {
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
val logger: Logger = LoggerFactory.getLogger(getClass)
|
||||
val conf: SparkConf = new SparkConf()
|
||||
val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/mag/preprocess_mag_params.json")))
|
||||
parser.parseArgument(args)
|
||||
val spark: SparkSession =
|
||||
SparkSession
|
||||
.builder()
|
||||
.config(conf)
|
||||
.appName(getClass.getSimpleName)
|
||||
.master(parser.get("master")).getOrCreate()
|
||||
|
||||
val sourcePath = parser.get("sourcePath")
|
||||
import spark.implicits._
|
||||
implicit val mapEncoderPubs: Encoder[Publication] = org.apache.spark.sql.Encoders.kryo[Publication]
|
||||
implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs)
|
||||
|
||||
logger.info("Phase 1) make uninque DOI in Papers:")
|
||||
val d: Dataset[MagPapers] = spark.read.load(s"${parser.get("sourcePath")}/Papers").as[MagPapers]
|
||||
|
||||
// Filtering Papers with DOI, and since for the same DOI we have multiple version of item with different PapersId we get the last one
|
||||
val result: RDD[MagPapers] = d.where(col("Doi").isNotNull)
|
||||
.rdd
|
||||
.map{ p: MagPapers => Tuple2(p.Doi, p) }
|
||||
.reduceByKey((p1:MagPapers,p2:MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1,p2))
|
||||
.map(_._2)
|
||||
|
||||
val distinctPaper: Dataset[MagPapers] = spark.createDataset(result)
|
||||
distinctPaper.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/Papers_distinct")
|
||||
|
||||
logger.info("Phase 6) Enrich Publication with description")
|
||||
val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract]
|
||||
pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract")
|
||||
|
||||
logger.info("Phase 3) Group Author by PaperId")
|
||||
val authors = spark.read.load(s"$sourcePath/Authors").as[MagAuthor]
|
||||
|
||||
val affiliation = spark.read.load(s"$sourcePath/Affiliations").as[MagAffiliation]
|
||||
val paperAuthorAffiliation = spark.read.load(s"$sourcePath/PaperAuthorAffiliations").as[MagPaperAuthorAffiliation]
|
||||
|
||||
paperAuthorAffiliation.joinWith(authors, paperAuthorAffiliation("AuthorId").equalTo(authors("AuthorId")))
|
||||
.map { case (a: MagPaperAuthorAffiliation, b: MagAuthor) => (a.AffiliationId, MagPaperAuthorDenormalized(a.PaperId, b, null)) }
|
||||
.joinWith(affiliation, affiliation("AffiliationId").equalTo(col("_1")), "left")
|
||||
.map(s => {
|
||||
val mpa = s._1._2
|
||||
val af = s._2
|
||||
if (af != null) {
|
||||
MagPaperAuthorDenormalized(mpa.PaperId, mpa.author, af.DisplayName)
|
||||
} else
|
||||
mpa
|
||||
}).groupBy("PaperId").agg(collect_list(struct($"author", $"affiliation")).as("authors"))
|
||||
.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_1_paper_authors")
|
||||
|
||||
logger.info("Phase 4) create First Version of publication Entity with Paper Journal and Authors")
|
||||
|
||||
val journals = spark.read.load(s"$sourcePath/Journals").as[MagJournal]
|
||||
|
||||
val papers = spark.read.load((s"${parser.get("targetPath")}/Papers_distinct")).as[MagPapers]
|
||||
|
||||
val paperWithAuthors = spark.read.load(s"${parser.get("targetPath")}/merge_step_1_paper_authors").as[MagPaperWithAuthorList]
|
||||
|
||||
val firstJoin = papers.joinWith(journals, papers("JournalId").equalTo(journals("JournalId")), "left")
|
||||
firstJoin.joinWith(paperWithAuthors, firstJoin("_1.PaperId").equalTo(paperWithAuthors("PaperId")), "left")
|
||||
.map { a => ConversionUtil.createOAFFromJournalAuthorPaper(a) }
|
||||
.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_2")
|
||||
|
||||
|
||||
var magPubs: Dataset[(String, Publication)] =
|
||||
spark.read.load(s"${parser.get("targetPath")}/merge_step_2").as[Publication]
|
||||
.map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]
|
||||
|
||||
|
||||
val conference = spark.read.load(s"$sourcePath/ConferenceInstances")
|
||||
.select($"ConferenceInstanceId".as("ci"), $"DisplayName", $"Location", $"StartDate",$"EndDate" )
|
||||
val conferenceInstance = conference.joinWith(papers, papers("ConferenceInstanceId").equalTo(conference("ci")))
|
||||
.select($"_1.ci", $"_1.DisplayName", $"_1.Location", $"_1.StartDate",$"_1.EndDate", $"_2.PaperId").as[MagConferenceInstance]
|
||||
|
||||
|
||||
magPubs.joinWith(conferenceInstance, col("_1").equalTo(conferenceInstance("PaperId")), "left")
|
||||
.map(item => ConversionUtil.updatePubsWithConferenceInfo(item))
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"${parser.get("targetPath")}/merge_step_2_conference")
|
||||
|
||||
|
||||
magPubs= spark.read.load(s"${parser.get("targetPath")}/merge_step_2_conference").as[Publication]
|
||||
.map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]
|
||||
|
||||
val paperUrlDataset = spark.read.load(s"$sourcePath/PaperUrls").as[MagPaperUrl].groupBy("PaperId").agg(collect_list(struct("sourceUrl")).as("instances")).as[MagUrl]
|
||||
|
||||
|
||||
logger.info("Phase 5) enrich publication with URL and Instances")
|
||||
magPubs.joinWith(paperUrlDataset, col("_1").equalTo(paperUrlDataset("PaperId")), "left")
|
||||
.map { a: ((String, Publication), MagUrl) => ConversionUtil.addInstances((a._1._2, a._2)) }
|
||||
.write.mode(SaveMode.Overwrite)
|
||||
.save(s"${parser.get("targetPath")}/merge_step_3")
|
||||
|
||||
|
||||
// logger.info("Phase 6) Enrich Publication with description")
|
||||
// val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract]
|
||||
// pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract")
|
||||
|
||||
val paperAbstract = spark.read.load((s"${parser.get("targetPath")}/PaperAbstract")).as[MagPaperAbstract]
|
||||
|
||||
|
||||
magPubs = spark.read.load(s"${parser.get("targetPath")}/merge_step_3").as[Publication]
|
||||
.map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]
|
||||
|
||||
magPubs.joinWith(paperAbstract, col("_1").equalTo(paperAbstract("PaperId")), "left")
|
||||
.map(item => ConversionUtil.updatePubsWithDescription(item)
|
||||
).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_4")
|
||||
|
||||
|
||||
logger.info("Phase 7) Enrich Publication with FieldOfStudy")
|
||||
|
||||
magPubs = spark.read.load(s"${parser.get("targetPath")}/merge_step_4").as[Publication]
|
||||
.map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]
|
||||
|
||||
val fos = spark.read.load(s"$sourcePath/FieldsOfStudy").select($"FieldOfStudyId".alias("fos"), $"DisplayName", $"MainType")
|
||||
|
||||
val pfos = spark.read.load(s"$sourcePath/PaperFieldsOfStudy")
|
||||
|
||||
val paperField = pfos.joinWith(fos, fos("fos").equalTo(pfos("FieldOfStudyId")))
|
||||
.select($"_1.FieldOfStudyId", $"_2.DisplayName", $"_2.MainType", $"_1.PaperId", $"_1.Score")
|
||||
.groupBy($"PaperId").agg(collect_list(struct($"FieldOfStudyId", $"DisplayName", $"MainType", $"Score")).as("subjects"))
|
||||
.as[MagFieldOfStudy]
|
||||
|
||||
magPubs.joinWith(paperField, col("_1")
|
||||
.equalTo(paperField("PaperId")), "left")
|
||||
.map(item => ConversionUtil.updatePubsWithSubject(item))
|
||||
.write.mode(SaveMode.Overwrite)
|
||||
.save(s"${parser.get("targetPath")}/mag_publication")
|
||||
|
||||
|
||||
val s:RDD[Publication] = spark.read.load(s"${parser.get("targetPath")}/mag_publication").as[Publication]
|
||||
.map(p=>Tuple2(p.getId, p)).rdd.reduceByKey((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b))
|
||||
.map(_._2)
|
||||
|
||||
spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/mag_publication_u")
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,146 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URI;
|
||||
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.compress.CompressionCodec;
|
||||
import org.apache.hadoop.io.compress.CompressionCodecFactory;
|
||||
import org.mortbay.log.Log;
|
||||
|
||||
import eu.dnetlib.doiboost.orcid.json.JsonWriter;
|
||||
import eu.dnetlib.doiboost.orcid.model.WorkData;
|
||||
import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser;
|
||||
|
||||
public class ActivitiesDecompressor {
|
||||
|
||||
private static final int MAX_XML_WORKS_PARSED = -1;
|
||||
private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000;
|
||||
|
||||
public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath)
|
||||
throws Exception {
|
||||
String uri = inputUri;
|
||||
FileSystem fs = FileSystem.get(URI.create(uri), conf);
|
||||
Path inputPath = new Path(uri);
|
||||
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
|
||||
CompressionCodec codec = factory.getCodec(inputPath);
|
||||
if (codec == null) {
|
||||
System.err.println("No codec found for " + uri);
|
||||
System.exit(1);
|
||||
}
|
||||
CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());
|
||||
InputStream gzipInputStream = null;
|
||||
try {
|
||||
gzipInputStream = codec.createInputStream(fs.open(inputPath));
|
||||
parseTarActivities(fs, conf, gzipInputStream, outputPath);
|
||||
|
||||
} finally {
|
||||
Log.debug("Closing gzip stream");
|
||||
IOUtils.closeStream(gzipInputStream);
|
||||
}
|
||||
}
|
||||
|
||||
private static void parseTarActivities(
|
||||
FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) {
|
||||
int counter = 0;
|
||||
int doiFound = 0;
|
||||
int errorFromOrcidFound = 0;
|
||||
int xmlParserErrorFound = 0;
|
||||
try (TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
|
||||
TarArchiveEntry entry = null;
|
||||
|
||||
try (SequenceFile.Writer writer = SequenceFile
|
||||
.createWriter(
|
||||
conf,
|
||||
SequenceFile.Writer.file(outputPath),
|
||||
SequenceFile.Writer.keyClass(Text.class),
|
||||
SequenceFile.Writer.valueClass(Text.class))) {
|
||||
while ((entry = tais.getNextTarEntry()) != null) {
|
||||
String filename = entry.getName();
|
||||
|
||||
try {
|
||||
if (entry.isDirectory() || !filename.contains("works")) {
|
||||
|
||||
} else {
|
||||
Log.debug("XML work entry name: " + entry.getName());
|
||||
counter++;
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from
|
||||
// tarInput
|
||||
String line;
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
while ((line = br.readLine()) != null) {
|
||||
buffer.append(line);
|
||||
}
|
||||
WorkData workData = XMLRecordParser.VTDParseWorkData(buffer.toString().getBytes());
|
||||
if (workData != null) {
|
||||
if (workData.getErrorCode() != null) {
|
||||
errorFromOrcidFound += 1;
|
||||
Log
|
||||
.debug(
|
||||
"error from Orcid with code "
|
||||
+ workData.getErrorCode()
|
||||
+ " for entry "
|
||||
+ entry.getName());
|
||||
continue;
|
||||
}
|
||||
if (workData.isDoiFound()) {
|
||||
String jsonData = JsonWriter.create(workData);
|
||||
Log.debug("oid: " + workData.getOid() + " data: " + jsonData);
|
||||
|
||||
final Text key = new Text(workData.getOid());
|
||||
final Text value = new Text(jsonData);
|
||||
|
||||
try {
|
||||
writer.append(key, value);
|
||||
} catch (IOException e) {
|
||||
Log.debug("Writing to sequence file: " + e.getMessage());
|
||||
Log.debug(e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
doiFound += 1;
|
||||
}
|
||||
|
||||
} else {
|
||||
Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer.toString());
|
||||
xmlParserErrorFound += 1;
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
Log
|
||||
.warn(
|
||||
"Parsing work from tar archive and xml work: " + filename + " " + e.getMessage());
|
||||
Log.warn(e);
|
||||
}
|
||||
|
||||
if ((counter % XML_WORKS_PARSED_COUNTER_LOG_INTERVAL) == 0) {
|
||||
Log.info("Current xml works parsed: " + counter);
|
||||
}
|
||||
|
||||
if ((MAX_XML_WORKS_PARSED > -1) && (counter > MAX_XML_WORKS_PARSED)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
Log.warn("Parsing work from gzip archive: " + e.getMessage());
|
||||
Log.warn(e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
Log.info("Activities parse completed");
|
||||
Log.info("Total XML works parsed: " + counter);
|
||||
Log.info("Total doi found: " + doiFound);
|
||||
Log.info("Error from Orcid found: " + errorFromOrcidFound);
|
||||
Log.info("Error parsing xml work found: " + xmlParserErrorFound);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
package eu.dnetlib.doiboost.orcid
|
||||
|
||||
import java.io.IOException
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.{Author, Publication}
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil.{ORCID, PID_TYPES, createSP, generateDataInfo, generateIdentifier}
|
||||
import eu.dnetlib.doiboost.crossref.Crossref2Oaf
|
||||
import org.apache.commons.lang.StringUtils
|
||||
import org.codehaus.jackson.map.ObjectMapper
|
||||
import org.json4s
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
|
||||
case class ORCIDItem(oid:String,name:String,surname:String,creditName:String,errorCode:String){}
|
||||
|
||||
|
||||
|
||||
case class ORCIDElement(doi:String, authors:List[ORCIDItem]) {}
|
||||
object ORCIDToOAF {
|
||||
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
||||
val mapper = new ObjectMapper
|
||||
|
||||
def isJsonValid(inputStr: String): Boolean = {
|
||||
import java.io.IOException
|
||||
try {
|
||||
mapper.readTree(inputStr)
|
||||
true
|
||||
} catch {
|
||||
case e: IOException =>
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
def extractValueFromInputString(input: String): (String, String) = {
|
||||
val i = input.indexOf('[')
|
||||
if (i <5) {
|
||||
return null
|
||||
}
|
||||
val orcidList = input.substring(i, input.length - 1)
|
||||
val doi = input.substring(1, i - 1)
|
||||
if (isJsonValid(orcidList)) {
|
||||
(doi, orcidList)
|
||||
} else null
|
||||
}
|
||||
|
||||
|
||||
def convertTOOAF(input:ORCIDElement) :Publication = {
|
||||
val doi = input.doi
|
||||
val pub:Publication = new Publication
|
||||
pub.setPid(List(createSP(doi, "doi", PID_TYPES)).asJava)
|
||||
pub.setDataInfo(generateDataInfo())
|
||||
pub.setId(generateIdentifier(pub, doi.toLowerCase))
|
||||
try{
|
||||
pub.setAuthor(input.authors.map(a=> {
|
||||
generateAuhtor(a.name, a.surname, a.creditName, a.oid)
|
||||
}).asJava)
|
||||
pub.setCollectedfrom(List(DoiBoostMappingUtil.createORIDCollectedFrom()).asJava)
|
||||
pub.setDataInfo(DoiBoostMappingUtil.generateDataInfo())
|
||||
pub
|
||||
} catch {
|
||||
case e: Throwable =>
|
||||
logger.info(s"ERROR ON GENERATE Publication from $input")
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
def generateAuhtor(given: String, family: String, fullName:String, orcid: String): Author = {
|
||||
val a = new Author
|
||||
a.setName(given)
|
||||
a.setSurname(family)
|
||||
if (fullName!= null && fullName.nonEmpty)
|
||||
a.setFullname(fullName)
|
||||
else
|
||||
a.setFullname(s"$given $family")
|
||||
if (StringUtils.isNotBlank(orcid))
|
||||
a.setPid(List(createSP(orcid, ORCID, PID_TYPES)).asJava)
|
||||
|
||||
a
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.mortbay.log.Log;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
public class OrcidAuthorsDOIsDataGen extends OrcidDSManager {
|
||||
|
||||
private String activitiesFileNameTarGz;
|
||||
private String outputAuthorsDOIsPath;
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
OrcidAuthorsDOIsDataGen orcidAuthorsDOIsDataGen = new OrcidAuthorsDOIsDataGen();
|
||||
orcidAuthorsDOIsDataGen.loadArgs(args);
|
||||
orcidAuthorsDOIsDataGen.generateAuthorsDOIsData();
|
||||
}
|
||||
|
||||
public void generateAuthorsDOIsData() throws Exception {
|
||||
Configuration conf = initConfigurationObject();
|
||||
FileSystem fs = initFileSystemObject(conf);
|
||||
String tarGzUri = hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(activitiesFileNameTarGz);
|
||||
Path outputPath = new Path(hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(outputAuthorsDOIsPath));
|
||||
ActivitiesDecompressor.parseGzActivities(conf, tarGzUri, outputPath);
|
||||
}
|
||||
|
||||
private void loadArgs(String[] args) throws IOException, Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
OrcidAuthorsDOIsDataGen.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/doiboost/create_orcid_authors_dois_data.json")));
|
||||
parser.parseArgument(args);
|
||||
|
||||
hdfsServerUri = parser.get("hdfsServerUri");
|
||||
Log.info("HDFS URI: " + hdfsServerUri);
|
||||
hdfsOrcidDefaultPath = parser.get("hdfsOrcidDefaultPath");
|
||||
Log.info("Default Path: " + hdfsOrcidDefaultPath);
|
||||
activitiesFileNameTarGz = parser.get("activitiesFileNameTarGz");
|
||||
Log.info("Activities File Name: " + activitiesFileNameTarGz);
|
||||
outputAuthorsDOIsPath = parser.get("outputAuthorsDOIsPath");
|
||||
Log.info("Output Authors DOIs Data: " + outputAuthorsDOIsPath);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.mortbay.log.Log;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
public class OrcidDSManager {
|
||||
|
||||
protected String hdfsServerUri;
|
||||
protected String hdfsOrcidDefaultPath;
|
||||
private String summariesFileNameTarGz;
|
||||
private String outputAuthorsPath;
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
OrcidDSManager orcidDSManager = new OrcidDSManager();
|
||||
orcidDSManager.loadArgs(args);
|
||||
orcidDSManager.generateAuthors();
|
||||
}
|
||||
|
||||
public void generateAuthors() throws Exception {
|
||||
Configuration conf = initConfigurationObject();
|
||||
FileSystem fs = initFileSystemObject(conf);
|
||||
String tarGzUri = hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(summariesFileNameTarGz);
|
||||
Path outputPath = new Path(
|
||||
hdfsServerUri
|
||||
.concat(hdfsOrcidDefaultPath)
|
||||
.concat(outputAuthorsPath)
|
||||
.concat("authors.seq"));
|
||||
SummariesDecompressor.parseGzSummaries(conf, tarGzUri, outputPath);
|
||||
}
|
||||
|
||||
protected Configuration initConfigurationObject() {
|
||||
// ====== Init HDFS File System Object
|
||||
Configuration conf = new Configuration();
|
||||
// Set FileSystem URI
|
||||
conf.set("fs.defaultFS", hdfsServerUri.concat(hdfsOrcidDefaultPath));
|
||||
// Because of Maven
|
||||
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
|
||||
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
|
||||
return conf;
|
||||
}
|
||||
|
||||
protected FileSystem initFileSystemObject(Configuration conf) {
|
||||
// Get the filesystem - HDFS
|
||||
FileSystem fs = null;
|
||||
try {
|
||||
fs = FileSystem.get(URI.create(hdfsServerUri.concat(hdfsOrcidDefaultPath)), conf);
|
||||
} catch (IOException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
return fs;
|
||||
}
|
||||
|
||||
private void loadArgs(String[] args) throws IOException, Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
OrcidDSManager.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/doiboost/create_orcid_authors_data.json")));
|
||||
parser.parseArgument(args);
|
||||
|
||||
hdfsServerUri = parser.get("hdfsServerUri");
|
||||
Log.info("HDFS URI: " + hdfsServerUri);
|
||||
hdfsOrcidDefaultPath = parser.get("hdfsOrcidDefaultPath");
|
||||
Log.info("Default Path: " + hdfsOrcidDefaultPath);
|
||||
summariesFileNameTarGz = parser.get("summariesFileNameTarGz");
|
||||
Log.info("Summaries File Name: " + summariesFileNameTarGz);
|
||||
outputAuthorsPath = parser.get("outputAuthorsPath");
|
||||
Log.info("Output Authors Data: " + outputAuthorsPath);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,203 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.mortbay.log.Log;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
public class OrcidDownloader extends OrcidDSManager {
|
||||
|
||||
static final int REQ_LIMIT = 24;
|
||||
// static final int REQ_MAX_TEST = 100;
|
||||
static final int RECORD_PARSED_COUNTER_LOG_INTERVAL = 10000;
|
||||
static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
|
||||
static final String lastUpdate = "2019-09-30 00:00:00";
|
||||
private String lambdaFileName;
|
||||
private String outputPath;
|
||||
private String token;
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
OrcidDownloader orcidDownloader = new OrcidDownloader();
|
||||
orcidDownloader.loadArgs(args);
|
||||
orcidDownloader.parseLambdaFile();
|
||||
}
|
||||
|
||||
private String downloadRecord(String orcidId) {
|
||||
try (CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record");
|
||||
httpGet.addHeader("Accept", "application/vnd.orcid+xml");
|
||||
httpGet.addHeader("Authorization", String.format("Bearer %s", token));
|
||||
CloseableHttpResponse response = client.execute(httpGet);
|
||||
if (response.getStatusLine().getStatusCode() != 200) {
|
||||
Log
|
||||
.warn(
|
||||
"Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
|
||||
return new String("");
|
||||
}
|
||||
return IOUtils.toString(response.getEntity().getContent());
|
||||
|
||||
} catch (Throwable e) {
|
||||
Log.warn("Downloading " + orcidId, e.getMessage());
|
||||
|
||||
}
|
||||
return new String("");
|
||||
}
|
||||
|
||||
public void parseLambdaFile() throws Exception {
|
||||
int parsedRecordsCounter = 0;
|
||||
int downloadedRecordsCounter = 0;
|
||||
int savedRecordsCounter = 0;
|
||||
long startDownload = 0;
|
||||
Configuration conf = initConfigurationObject();
|
||||
FileSystem fs = initFileSystemObject(conf);
|
||||
String lambdaFileUri = hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(lambdaFileName);
|
||||
Path hdfsreadpath = new Path(lambdaFileUri);
|
||||
FSDataInputStream lambdaFileStream = fs.open(hdfsreadpath);
|
||||
Path hdfsoutputPath = new Path(
|
||||
hdfsServerUri
|
||||
.concat(hdfsOrcidDefaultPath)
|
||||
.concat(outputPath)
|
||||
.concat("orcid_records.seq"));
|
||||
|
||||
try (SequenceFile.Writer writer = SequenceFile
|
||||
.createWriter(
|
||||
conf,
|
||||
SequenceFile.Writer.file(hdfsoutputPath),
|
||||
SequenceFile.Writer.keyClass(Text.class),
|
||||
SequenceFile.Writer.valueClass(Text.class))) {
|
||||
|
||||
try (BufferedReader br = new BufferedReader(new InputStreamReader(lambdaFileStream))) {
|
||||
String line;
|
||||
int nReqTmp = 0;
|
||||
startDownload = System.currentTimeMillis();
|
||||
long startReqTmp = System.currentTimeMillis();
|
||||
while ((line = br.readLine()) != null) {
|
||||
parsedRecordsCounter++;
|
||||
// skip headers line
|
||||
if (parsedRecordsCounter == 1) {
|
||||
continue;
|
||||
}
|
||||
String[] values = line.split(",");
|
||||
List<String> recordInfo = Arrays.asList(values);
|
||||
String orcidId = recordInfo.get(0);
|
||||
if (isModified(orcidId, recordInfo.get(3))) {
|
||||
String record = downloadRecord(orcidId);
|
||||
downloadedRecordsCounter++;
|
||||
if (!record.isEmpty()) {
|
||||
String compressRecord = ArgumentApplicationParser.compressArgument(record);
|
||||
final Text key = new Text(recordInfo.get(0));
|
||||
final Text value = new Text(compressRecord);
|
||||
|
||||
try {
|
||||
writer.append(key, value);
|
||||
savedRecordsCounter++;
|
||||
} catch (IOException e) {
|
||||
Log.warn("Writing to sequence file: " + e.getMessage());
|
||||
Log.warn(e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
long endReq = System.currentTimeMillis();
|
||||
nReqTmp++;
|
||||
if (nReqTmp == REQ_LIMIT) {
|
||||
long reqSessionDuration = endReq - startReqTmp;
|
||||
if (reqSessionDuration <= 1000) {
|
||||
Log
|
||||
.warn(
|
||||
"\nreqSessionDuration: "
|
||||
+ reqSessionDuration
|
||||
+ " nReqTmp: "
|
||||
+ nReqTmp
|
||||
+ " wait ....");
|
||||
Thread.sleep(1000 - reqSessionDuration);
|
||||
} else {
|
||||
nReqTmp = 0;
|
||||
startReqTmp = System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
|
||||
// if (parsedRecordsCounter > REQ_MAX_TEST) {
|
||||
// break;
|
||||
// }
|
||||
if ((parsedRecordsCounter % RECORD_PARSED_COUNTER_LOG_INTERVAL) == 0) {
|
||||
Log
|
||||
.info(
|
||||
"Current parsed: "
|
||||
+ parsedRecordsCounter
|
||||
+ " downloaded: "
|
||||
+ downloadedRecordsCounter
|
||||
+ " saved: "
|
||||
+ savedRecordsCounter);
|
||||
// if (parsedRecordsCounter > REQ_MAX_TEST) {
|
||||
// break;
|
||||
// }
|
||||
}
|
||||
}
|
||||
long endDownload = System.currentTimeMillis();
|
||||
long downloadTime = endDownload - startDownload;
|
||||
Log.info("Download time: " + ((downloadTime / 1000) / 60) + " minutes");
|
||||
}
|
||||
}
|
||||
lambdaFileStream.close();
|
||||
Log.info("Download started at: " + new Date(startDownload).toString());
|
||||
Log.info("Parsed Records Counter: " + parsedRecordsCounter);
|
||||
Log.info("Downloaded Records Counter: " + downloadedRecordsCounter);
|
||||
Log.info("Saved Records Counter: " + savedRecordsCounter);
|
||||
}
|
||||
|
||||
private void loadArgs(String[] args) throws IOException, Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
OrcidDownloader.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/doiboost/download_orcid_data.json")));
|
||||
parser.parseArgument(args);
|
||||
|
||||
hdfsServerUri = parser.get("hdfsServerUri");
|
||||
Log.info("HDFS URI: " + hdfsServerUri);
|
||||
hdfsOrcidDefaultPath = parser.get("hdfsOrcidDefaultPath");
|
||||
Log.info("Default Path: " + hdfsOrcidDefaultPath);
|
||||
lambdaFileName = parser.get("lambdaFileName");
|
||||
Log.info("Lambda File Name: " + lambdaFileName);
|
||||
outputPath = parser.get("outputPath");
|
||||
Log.info("Output Data: " + outputPath);
|
||||
token = parser.get("token");
|
||||
}
|
||||
|
||||
private boolean isModified(String orcidId, String modifiedDate) {
|
||||
Date modifiedDateDt = null;
|
||||
Date lastUpdateDt = null;
|
||||
try {
|
||||
if (modifiedDate.length() != 19) {
|
||||
modifiedDate = modifiedDate.substring(0, 19);
|
||||
}
|
||||
modifiedDateDt = new SimpleDateFormat(DATE_FORMAT).parse(modifiedDate);
|
||||
lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate);
|
||||
} catch (Exception e) {
|
||||
Log.warn("[" + orcidId + "] Parsing date: ", e.getMessage());
|
||||
return true;
|
||||
}
|
||||
return modifiedDateDt.after(lastUpdateDt);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
package eu.dnetlib.doiboost.orcid
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication
|
||||
import eu.dnetlib.doiboost.mag.ConversionUtil
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
object SparkConvertORCIDToOAF {
|
||||
|
||||
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass)
|
||||
val conf: SparkConf = new SparkConf()
|
||||
val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json")))
|
||||
parser.parseArgument(args)
|
||||
val spark: SparkSession =
|
||||
SparkSession
|
||||
.builder()
|
||||
.config(conf)
|
||||
.appName(getClass.getSimpleName)
|
||||
.master(parser.get("master")).getOrCreate()
|
||||
|
||||
implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication]
|
||||
implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs)
|
||||
import spark.implicits._
|
||||
val sourcePath = parser.get("sourcePath")
|
||||
val targetPath = parser.get("targetPath")
|
||||
val dataset:Dataset[ORCIDElement] = spark.read.json(sourcePath).as[ORCIDElement]
|
||||
|
||||
|
||||
logger.info("Converting ORCID to OAF")
|
||||
val d:RDD[Publication] = dataset.map(o => ORCIDToOAF.convertTOOAF(o)).filter(p=>p!=null).map(p=>(p.getId,p)).rdd.reduceByKey(ConversionUtil.mergePublication)
|
||||
.map(_._2)
|
||||
|
||||
spark.createDataset(d).as[Publication].write.mode(SaveMode.Overwrite).save(targetPath)
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,180 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.compress.GzipCodec;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.Function;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.esotericsoftware.minlog.Log;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.gson.JsonElement;
|
||||
import com.google.gson.JsonParser;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.doiboost.orcid.model.AuthorData;
|
||||
import eu.dnetlib.doiboost.orcid.model.WorkData;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class SparkGenerateDoiAuthorList {
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
Logger logger = LoggerFactory.getLogger(SparkGenerateDoiAuthorList.class);
|
||||
logger.info("[ SparkGenerateDoiAuthorList STARTED]");
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
SparkGenerateDoiAuthorList.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/doiboost/gen_doi_author_list_orcid_parameters.json")));
|
||||
parser.parseArgument(args);
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
final String workingPath = parser.get("workingPath");
|
||||
logger.info("workingPath: ", workingPath);
|
||||
final String outputDoiAuthorListPath = parser.get("outputDoiAuthorListPath");
|
||||
logger.info("outputDoiAuthorListPath: ", outputDoiAuthorListPath);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaPairRDD<Text, Text> summariesRDD = sc
|
||||
.sequenceFile(workingPath + "../orcid_summaries/output/authors.seq", Text.class, Text.class);
|
||||
Dataset<AuthorData> summariesDataset = spark
|
||||
.createDataset(
|
||||
summariesRDD.map(seq -> loadAuthorFromJson(seq._1(), seq._2())).rdd(),
|
||||
Encoders.bean(AuthorData.class));
|
||||
|
||||
JavaPairRDD<Text, Text> activitiesRDD = sc
|
||||
.sequenceFile(workingPath + "/output/*.seq", Text.class, Text.class);
|
||||
Dataset<WorkData> activitiesDataset = spark
|
||||
.createDataset(
|
||||
activitiesRDD.map(seq -> loadWorkFromJson(seq._1(), seq._2())).rdd(),
|
||||
Encoders.bean(WorkData.class));
|
||||
|
||||
Function<Tuple2<String, AuthorData>, Tuple2<String, List<AuthorData>>> toAuthorListFunction = data -> {
|
||||
try {
|
||||
String doi = data._1();
|
||||
if (doi == null) {
|
||||
return null;
|
||||
}
|
||||
AuthorData author = data._2();
|
||||
if (author == null) {
|
||||
return null;
|
||||
}
|
||||
List<AuthorData> toAuthorList = Arrays.asList(author);
|
||||
return new Tuple2<>(doi, toAuthorList);
|
||||
} catch (Exception e) {
|
||||
Log.error("toAuthorListFunction ERROR", e);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
JavaRDD<Tuple2<String, List<AuthorData>>> doisRDD = activitiesDataset
|
||||
.joinWith(
|
||||
summariesDataset,
|
||||
activitiesDataset.col("oid").equalTo(summariesDataset.col("oid")), "inner")
|
||||
.map(
|
||||
(MapFunction<Tuple2<WorkData, AuthorData>, Tuple2<String, AuthorData>>) value -> {
|
||||
WorkData w = value._1;
|
||||
AuthorData a = value._2;
|
||||
return new Tuple2<>(w.getDoi(), a);
|
||||
},
|
||||
Encoders.tuple(Encoders.STRING(), Encoders.bean(AuthorData.class)))
|
||||
.filter(Objects::nonNull)
|
||||
.toJavaRDD()
|
||||
.map(toAuthorListFunction);
|
||||
|
||||
JavaPairRDD
|
||||
.fromJavaRDD(doisRDD)
|
||||
.reduceByKey((d1, d2) -> {
|
||||
try {
|
||||
if (d1 != null && d2 != null) {
|
||||
Stream<AuthorData> mergedStream = Stream
|
||||
.concat(
|
||||
d1.stream(),
|
||||
d2.stream());
|
||||
List<AuthorData> mergedAuthors = mergedStream.collect(Collectors.toList());
|
||||
return mergedAuthors;
|
||||
}
|
||||
if (d1 != null) {
|
||||
return d1;
|
||||
}
|
||||
if (d2 != null) {
|
||||
return d2;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
Log.error("mergeAuthorsFunction ERROR", e);
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
})
|
||||
.mapToPair(
|
||||
s -> {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
return new Tuple2<>(s._1(), mapper.writeValueAsString(s._2()));
|
||||
})
|
||||
.repartition(10)
|
||||
.saveAsTextFile(workingPath + outputDoiAuthorListPath);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
private static AuthorData loadAuthorFromJson(Text orcidId, Text json) {
|
||||
AuthorData authorData = new AuthorData();
|
||||
authorData.setOid(orcidId.toString());
|
||||
JsonElement jElement = new JsonParser().parse(json.toString());
|
||||
authorData.setName(getJsonValue(jElement, "name"));
|
||||
authorData.setSurname(getJsonValue(jElement, "surname"));
|
||||
authorData.setCreditName(getJsonValue(jElement, "creditname"));
|
||||
return authorData;
|
||||
}
|
||||
|
||||
private static WorkData loadWorkFromJson(Text orcidId, Text json) {
|
||||
WorkData workData = new WorkData();
|
||||
workData.setOid(orcidId.toString());
|
||||
JsonElement jElement = new JsonParser().parse(json.toString());
|
||||
workData.setDoi(getJsonValue(jElement, "doi"));
|
||||
return workData;
|
||||
}
|
||||
|
||||
private static String getJsonValue(JsonElement jElement, String property) {
|
||||
if (jElement.getAsJsonObject().has(property)) {
|
||||
JsonElement name = null;
|
||||
name = jElement.getAsJsonObject().get(property);
|
||||
if (name != null && !name.isJsonNull()) {
|
||||
return name.getAsString();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,165 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.Function;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
import org.mortbay.log.Log;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.doiboost.orcid.model.DownloadedRecordData;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class SparkOrcidGenerateAuthors {
|
||||
|
||||
static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
|
||||
static final String lastUpdate = "2019-09-30 00:00:00";
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
Logger logger = LoggerFactory.getLogger(SparkOrcidGenerateAuthors.class);
|
||||
logger.info("[ SparkOrcidGenerateAuthors STARTED]");
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
SparkOrcidGenerateAuthors.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/doiboost/gen_orcid_authors_parameters.json")));
|
||||
parser.parseArgument(args);
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
final String workingPath = parser.get("workingPath");
|
||||
logger.info("workingPath: ", workingPath);
|
||||
final String outputAuthorsPath = parser.get("outputAuthorsPath");
|
||||
logger.info("outputAuthorsPath: ", outputAuthorsPath);
|
||||
final String token = parser.get("token");
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
LongAccumulator parsedRecordsAcc = sc.sc().longAccumulator("parsedRecords");
|
||||
LongAccumulator modifiedRecordsAcc = sc.sc().longAccumulator("modifiedRecords");
|
||||
LongAccumulator downloadedRecordsAcc = sc.sc().longAccumulator("downloadedRecords");
|
||||
LongAccumulator alreadyDownloadedRecords = sc.sc().longAccumulator("alreadyDownloadedRecords");
|
||||
JavaRDD<String> lamdaFileRDD = sc.textFile(workingPath + "lamdafiles");
|
||||
|
||||
JavaRDD<String> downloadedRDD = sc.textFile(workingPath + "downloaded");
|
||||
Function<String, String> getOrcidIdFunction = line -> {
|
||||
try {
|
||||
String[] values = line.split(",");
|
||||
return values[0].substring(1);
|
||||
} catch (Exception e) {
|
||||
return new String("");
|
||||
}
|
||||
};
|
||||
List<String> downloadedRecords = downloadedRDD.map(getOrcidIdFunction).collect();
|
||||
|
||||
Function<String, Boolean> isModifiedAfterFilter = line -> {
|
||||
String[] values = line.split(",");
|
||||
String orcidId = values[0];
|
||||
parsedRecordsAcc.add(1);
|
||||
if (isModified(orcidId, values[3])) {
|
||||
modifiedRecordsAcc.add(1);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
Function<String, Boolean> isNotDownloadedFilter = line -> {
|
||||
String[] values = line.split(",");
|
||||
String orcidId = values[0];
|
||||
if (downloadedRecords.contains(orcidId)) {
|
||||
alreadyDownloadedRecords.add(1);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
Function<String, Tuple2<String, String>> downloadRecordFunction = line -> {
|
||||
String[] values = line.split(",");
|
||||
String orcidId = values[0];
|
||||
String modifiedDate = values[3];
|
||||
return downloadRecord(orcidId, modifiedDate, token, downloadedRecordsAcc);
|
||||
};
|
||||
|
||||
lamdaFileRDD
|
||||
.filter(isModifiedAfterFilter)
|
||||
.filter(isNotDownloadedFilter)
|
||||
.map(downloadRecordFunction)
|
||||
.rdd()
|
||||
.saveAsTextFile(workingPath.concat(outputAuthorsPath));
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
private static boolean isModified(String orcidId, String modifiedDate) {
|
||||
Date modifiedDateDt = null;
|
||||
Date lastUpdateDt = null;
|
||||
try {
|
||||
if (modifiedDate.length() != 19) {
|
||||
modifiedDate = modifiedDate.substring(0, 19);
|
||||
}
|
||||
modifiedDateDt = new SimpleDateFormat(DATE_FORMAT).parse(modifiedDate);
|
||||
lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate);
|
||||
} catch (Exception e) {
|
||||
Log.warn("[" + orcidId + "] Parsing date: ", e.getMessage());
|
||||
return true;
|
||||
}
|
||||
return modifiedDateDt.after(lastUpdateDt);
|
||||
}
|
||||
|
||||
private static Tuple2<String, String> downloadRecord(String orcidId, String modifiedDate, String token,
|
||||
LongAccumulator downloadedRecordsAcc) {
|
||||
final DownloadedRecordData data = new DownloadedRecordData();
|
||||
data.setOrcidId(orcidId);
|
||||
data.setModifiedDate(modifiedDate);
|
||||
try (CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record");
|
||||
httpGet.addHeader("Accept", "application/vnd.orcid+xml");
|
||||
httpGet.addHeader("Authorization", String.format("Bearer %s", token));
|
||||
CloseableHttpResponse response = client.execute(httpGet);
|
||||
int statusCode = response.getStatusLine().getStatusCode();
|
||||
data.setStatusCode(statusCode);
|
||||
if (statusCode != 200) {
|
||||
Log
|
||||
.warn(
|
||||
"Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
|
||||
return data.toTuple2();
|
||||
}
|
||||
downloadedRecordsAcc.add(1);
|
||||
data
|
||||
.setCompressedData(
|
||||
ArgumentApplicationParser.compressArgument(IOUtils.toString(response.getEntity().getContent())));
|
||||
} catch (Throwable e) {
|
||||
Log.warn("Downloading " + orcidId, e.getMessage());
|
||||
data.setErrorMessage(e.getMessage());
|
||||
return data.toTuple2();
|
||||
}
|
||||
return data.toTuple2();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
public class SparkPartitionLambdaFile {
|
||||
|
||||
public static void main(String[] args) throws IOException, Exception {
|
||||
Logger logger = LoggerFactory.getLogger(SparkOrcidGenerateAuthors.class);
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
SparkOrcidGenerateAuthors.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/doiboost/gen_orcid_authors_parameters.json")));
|
||||
parser.parseArgument(args);
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
final String workingPath = parser.get("workingPath");
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
JavaRDD<String> lamdaFileRDD = sc.textFile(workingPath + "last_modified.csv");
|
||||
|
||||
lamdaFileRDD
|
||||
.repartition(20)
|
||||
.saveAsTextFile(workingPath.concat("lamdafiles"));
|
||||
});
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,158 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URI;
|
||||
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.compress.CompressionCodec;
|
||||
import org.apache.hadoop.io.compress.CompressionCodecFactory;
|
||||
import org.mortbay.log.Log;
|
||||
|
||||
import eu.dnetlib.doiboost.orcid.json.JsonWriter;
|
||||
import eu.dnetlib.doiboost.orcid.model.AuthorData;
|
||||
import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser;
|
||||
|
||||
public class SummariesDecompressor {
|
||||
|
||||
private static final int MAX_XML_RECORDS_PARSED = -1;
|
||||
|
||||
public static void parseGzSummaries(Configuration conf, String inputUri, Path outputPath)
|
||||
throws Exception {
|
||||
String uri = inputUri;
|
||||
FileSystem fs = FileSystem.get(URI.create(uri), conf);
|
||||
Path inputPath = new Path(uri);
|
||||
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
|
||||
CompressionCodec codec = factory.getCodec(inputPath);
|
||||
if (codec == null) {
|
||||
System.err.println("No codec found for " + uri);
|
||||
System.exit(1);
|
||||
}
|
||||
CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());
|
||||
InputStream gzipInputStream = null;
|
||||
try {
|
||||
gzipInputStream = codec.createInputStream(fs.open(inputPath));
|
||||
parseTarSummaries(fs, conf, gzipInputStream, outputPath);
|
||||
|
||||
} finally {
|
||||
Log.debug("Closing gzip stream");
|
||||
IOUtils.closeStream(gzipInputStream);
|
||||
}
|
||||
}
|
||||
|
||||
private static void parseTarSummaries(
|
||||
FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) {
|
||||
int counter = 0;
|
||||
int nameFound = 0;
|
||||
int surnameFound = 0;
|
||||
int creditNameFound = 0;
|
||||
int errorFromOrcidFound = 0;
|
||||
int xmlParserErrorFound = 0;
|
||||
try (TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
|
||||
TarArchiveEntry entry = null;
|
||||
|
||||
try (SequenceFile.Writer writer = SequenceFile
|
||||
.createWriter(
|
||||
conf,
|
||||
SequenceFile.Writer.file(outputPath),
|
||||
SequenceFile.Writer.keyClass(Text.class),
|
||||
SequenceFile.Writer.valueClass(Text.class))) {
|
||||
while ((entry = tais.getNextTarEntry()) != null) {
|
||||
String filename = entry.getName();
|
||||
try {
|
||||
if (entry.isDirectory()) {
|
||||
Log.debug("Directory entry name: " + entry.getName());
|
||||
} else {
|
||||
Log.debug("XML record entry name: " + entry.getName());
|
||||
counter++;
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from
|
||||
// tarInput
|
||||
String line;
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
while ((line = br.readLine()) != null) {
|
||||
buffer.append(line);
|
||||
}
|
||||
AuthorData authorData = XMLRecordParser.VTDParseAuthorData(buffer.toString().getBytes());
|
||||
if (authorData != null) {
|
||||
if (authorData.getErrorCode() != null) {
|
||||
errorFromOrcidFound += 1;
|
||||
Log
|
||||
.debug(
|
||||
"error from Orcid with code "
|
||||
+ authorData.getErrorCode()
|
||||
+ " for oid "
|
||||
+ entry.getName());
|
||||
continue;
|
||||
}
|
||||
String jsonData = JsonWriter.create(authorData);
|
||||
Log.debug("oid: " + authorData.getOid() + " data: " + jsonData);
|
||||
|
||||
final Text key = new Text(authorData.getOid());
|
||||
final Text value = new Text(jsonData);
|
||||
|
||||
try {
|
||||
writer.append(key, value);
|
||||
} catch (IOException e) {
|
||||
Log.debug("Writing to sequence file: " + e.getMessage());
|
||||
Log.debug(e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
if (authorData.getName() != null) {
|
||||
nameFound += 1;
|
||||
}
|
||||
if (authorData.getSurname() != null) {
|
||||
surnameFound += 1;
|
||||
}
|
||||
if (authorData.getCreditName() != null) {
|
||||
creditNameFound += 1;
|
||||
}
|
||||
|
||||
} else {
|
||||
Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer.toString());
|
||||
xmlParserErrorFound += 1;
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
Log
|
||||
.warn(
|
||||
"Parsing record from tar archive and xml record: "
|
||||
+ filename
|
||||
+ " "
|
||||
+ e.getMessage());
|
||||
Log.warn(e);
|
||||
}
|
||||
|
||||
if ((counter % 100000) == 0) {
|
||||
Log.info("Current xml records parsed: " + counter);
|
||||
}
|
||||
|
||||
if ((MAX_XML_RECORDS_PARSED > -1) && (counter > MAX_XML_RECORDS_PARSED)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
Log.warn("Parsing record from gzip archive: " + e.getMessage());
|
||||
Log.warn(e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
Log.info("Summaries parse completed");
|
||||
Log.info("Total XML records parsed: " + counter);
|
||||
Log.info("Name found: " + nameFound);
|
||||
Log.info("Surname found: " + surnameFound);
|
||||
Log.info("Credit name found: " + creditNameFound);
|
||||
Log.info("Error from Orcid found: " + errorFromOrcidFound);
|
||||
Log.info("Error parsing xml record found: " + xmlParserErrorFound);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid.json;
|
||||
|
||||
import com.google.gson.JsonObject;
|
||||
|
||||
import eu.dnetlib.doiboost.orcid.model.AuthorData;
|
||||
import eu.dnetlib.doiboost.orcid.model.WorkData;
|
||||
|
||||
public class JsonWriter {
|
||||
|
||||
public static String create(AuthorData authorData) {
|
||||
JsonObject author = new JsonObject();
|
||||
author.addProperty("oid", authorData.getOid());
|
||||
author.addProperty("name", authorData.getName());
|
||||
author.addProperty("surname", authorData.getSurname());
|
||||
if (authorData.getCreditName() != null) {
|
||||
author.addProperty("creditname", authorData.getCreditName());
|
||||
}
|
||||
return author.toString();
|
||||
}
|
||||
|
||||
public static String create(WorkData workData) {
|
||||
JsonObject work = new JsonObject();
|
||||
work.addProperty("oid", workData.getOid());
|
||||
work.addProperty("doi", workData.getDoi());
|
||||
return work.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class AuthorData implements Serializable {
|
||||
|
||||
private String oid;
|
||||
private String name;
|
||||
private String surname;
|
||||
private String creditName;
|
||||
private String errorCode;
|
||||
|
||||
public String getErrorCode() {
|
||||
return errorCode;
|
||||
}
|
||||
|
||||
public void setErrorCode(String errorCode) {
|
||||
this.errorCode = errorCode;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getSurname() {
|
||||
return surname;
|
||||
}
|
||||
|
||||
public void setSurname(String surname) {
|
||||
this.surname = surname;
|
||||
}
|
||||
|
||||
public String getCreditName() {
|
||||
return creditName;
|
||||
}
|
||||
|
||||
public void setCreditName(String creditName) {
|
||||
this.creditName = creditName;
|
||||
}
|
||||
|
||||
public String getOid() {
|
||||
return oid;
|
||||
}
|
||||
|
||||
public void setOid(String oid) {
|
||||
this.oid = oid;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
||||
import com.google.gson.JsonObject;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
public class DownloadedRecordData implements Serializable {
|
||||
|
||||
private String orcidId;
|
||||
private String modifiedDate;
|
||||
private String statusCode;
|
||||
private String compressedData;
|
||||
private String errorMessage;
|
||||
|
||||
public Tuple2<String, String> toTuple2() {
|
||||
JsonObject data = new JsonObject();
|
||||
data.addProperty("statusCode", getStatusCode());
|
||||
data.addProperty("modifiedDate", getModifiedDate());
|
||||
if (getCompressedData() != null) {
|
||||
data.addProperty("compressedData", getCompressedData());
|
||||
}
|
||||
if (getErrorMessage() != null) {
|
||||
data.addProperty("errorMessage", getErrorMessage());
|
||||
}
|
||||
return new Tuple2<>(orcidId, data.toString());
|
||||
}
|
||||
|
||||
public String getErrorMessage() {
|
||||
return errorMessage;
|
||||
}
|
||||
|
||||
public void setErrorMessage(String errorMessage) {
|
||||
this.errorMessage = errorMessage;
|
||||
}
|
||||
|
||||
public String getOrcidId() {
|
||||
return orcidId;
|
||||
}
|
||||
|
||||
public void setOrcidId(String orcidId) {
|
||||
this.orcidId = orcidId;
|
||||
}
|
||||
|
||||
public int getStatusCode() {
|
||||
try {
|
||||
return Integer.parseInt(statusCode);
|
||||
} catch (Exception e) {
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
|
||||
public void setStatusCode(int statusCode) {
|
||||
this.statusCode = Integer.toString(statusCode);
|
||||
}
|
||||
|
||||
public String getCompressedData() {
|
||||
return compressedData;
|
||||
}
|
||||
|
||||
public void setCompressedData(String compressedData) {
|
||||
this.compressedData = compressedData;
|
||||
}
|
||||
|
||||
public String getModifiedDate() {
|
||||
return modifiedDate;
|
||||
}
|
||||
|
||||
public void setModifiedDate(String modifiedDate) {
|
||||
this.modifiedDate = modifiedDate;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class WorkData implements Serializable {
|
||||
|
||||
private String oid;
|
||||
private String doi;
|
||||
private boolean doiFound = false;
|
||||
|
||||
public boolean isDoiFound() {
|
||||
return doiFound;
|
||||
}
|
||||
|
||||
public void setDoiFound(boolean doiFound) {
|
||||
this.doiFound = doiFound;
|
||||
}
|
||||
|
||||
public String getOid() {
|
||||
return oid;
|
||||
}
|
||||
|
||||
public void setOid(String oid) {
|
||||
this.oid = oid;
|
||||
}
|
||||
|
||||
public String getDoi() {
|
||||
return doi;
|
||||
}
|
||||
|
||||
public void setDoi(String doi) {
|
||||
this.doi = doi;
|
||||
}
|
||||
|
||||
public String getErrorCode() {
|
||||
return errorCode;
|
||||
}
|
||||
|
||||
public void setErrorCode(String errorCode) {
|
||||
this.errorCode = errorCode;
|
||||
}
|
||||
|
||||
private String errorCode;
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid.xml;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import com.ximpleware.AutoPilot;
|
||||
import com.ximpleware.EOFException;
|
||||
import com.ximpleware.EncodingException;
|
||||
import com.ximpleware.EntityException;
|
||||
import com.ximpleware.ParseException;
|
||||
import com.ximpleware.VTDGen;
|
||||
import com.ximpleware.VTDNav;
|
||||
|
||||
import eu.dnetlib.dhp.parser.utility.VtdException;
|
||||
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
|
||||
import eu.dnetlib.doiboost.orcid.model.AuthorData;
|
||||
import eu.dnetlib.doiboost.orcid.model.WorkData;
|
||||
|
||||
public class XMLRecordParser {
|
||||
|
||||
private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common";
|
||||
private static final String NS_COMMON = "common";
|
||||
private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person";
|
||||
private static final String NS_PERSON = "person";
|
||||
private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details";
|
||||
private static final String NS_DETAILS = "personal-details";
|
||||
private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name";
|
||||
private static final String NS_OTHER = "other-name";
|
||||
private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record";
|
||||
private static final String NS_RECORD = "record";
|
||||
private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error";
|
||||
|
||||
private static final String NS_WORK = "work";
|
||||
private static final String NS_WORK_URL = "http://www.orcid.org/ns/work";
|
||||
|
||||
private static final String NS_ERROR = "error";
|
||||
|
||||
public static AuthorData VTDParseAuthorData(byte[] bytes)
|
||||
throws VtdException, EncodingException, EOFException, EntityException, ParseException {
|
||||
final VTDGen vg = new VTDGen();
|
||||
vg.setDoc(bytes);
|
||||
vg.parse(true);
|
||||
final VTDNav vn = vg.getNav();
|
||||
final AutoPilot ap = new AutoPilot(vn);
|
||||
ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
|
||||
ap.declareXPathNameSpace(NS_PERSON, NS_PERSON_URL);
|
||||
ap.declareXPathNameSpace(NS_DETAILS, NS_DETAILS_URL);
|
||||
ap.declareXPathNameSpace(NS_OTHER, NS_OTHER_URL);
|
||||
ap.declareXPathNameSpace(NS_RECORD, NS_RECORD_URL);
|
||||
ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
|
||||
|
||||
AuthorData authorData = new AuthorData();
|
||||
final List<String> errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code");
|
||||
if (!errors.isEmpty()) {
|
||||
authorData.setErrorCode(errors.get(0));
|
||||
return authorData;
|
||||
}
|
||||
|
||||
List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
|
||||
.getTextValuesWithAttributes(
|
||||
ap, vn, "//record:record", Arrays.asList("path"));
|
||||
if (!recordNodes.isEmpty()) {
|
||||
final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1);
|
||||
authorData.setOid(oid);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
final List<String> names = VtdUtilityParser.getTextValue(ap, vn, "//personal-details:given-names");
|
||||
if (!names.isEmpty()) {
|
||||
authorData.setName(names.get(0));
|
||||
}
|
||||
|
||||
final List<String> surnames = VtdUtilityParser.getTextValue(ap, vn, "//personal-details:family-name");
|
||||
if (!surnames.isEmpty()) {
|
||||
authorData.setSurname(surnames.get(0));
|
||||
}
|
||||
|
||||
final List<String> creditNames = VtdUtilityParser.getTextValue(ap, vn, "//personal-details:credit-name");
|
||||
if (!creditNames.isEmpty()) {
|
||||
authorData.setCreditName(creditNames.get(0));
|
||||
}
|
||||
return authorData;
|
||||
}
|
||||
|
||||
public static WorkData VTDParseWorkData(byte[] bytes)
|
||||
throws VtdException, EncodingException, EOFException, EntityException, ParseException {
|
||||
final VTDGen vg = new VTDGen();
|
||||
vg.setDoc(bytes);
|
||||
vg.parse(true);
|
||||
final VTDNav vn = vg.getNav();
|
||||
final AutoPilot ap = new AutoPilot(vn);
|
||||
ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
|
||||
ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
|
||||
ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
|
||||
|
||||
WorkData workData = new WorkData();
|
||||
final List<String> errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code");
|
||||
if (!errors.isEmpty()) {
|
||||
workData.setErrorCode(errors.get(0));
|
||||
return workData;
|
||||
}
|
||||
|
||||
List<VtdUtilityParser.Node> workNodes = VtdUtilityParser
|
||||
.getTextValuesWithAttributes(ap, vn, "//work:work", Arrays.asList("path"));
|
||||
if (!workNodes.isEmpty()) {
|
||||
final String oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1];
|
||||
workData.setOid(oid);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
final List<String> dois = VtdUtilityParser
|
||||
.getTextValue(
|
||||
ap, vn, "//common:external-id-type[text()=\"doi\"]/../common:external-id-value");
|
||||
if (!dois.isEmpty()) {
|
||||
workData.setDoi(dois.get(0));
|
||||
workData.setDoiFound(true);
|
||||
}
|
||||
return workData;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
package eu.dnetlib.doiboost.uw
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication
|
||||
import eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
|
||||
object SparkMapUnpayWallToOAF {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
|
||||
val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass)
|
||||
val conf: SparkConf = new SparkConf()
|
||||
val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json")))
|
||||
parser.parseArgument(args)
|
||||
val spark: SparkSession =
|
||||
SparkSession
|
||||
.builder()
|
||||
.config(conf)
|
||||
.appName(getClass.getSimpleName)
|
||||
.master(parser.get("master")).getOrCreate()
|
||||
|
||||
implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication]
|
||||
|
||||
|
||||
val sourcePath = parser.get("sourcePath")
|
||||
val targetPath = parser.get("targetPath")
|
||||
val inputRDD:RDD[String] = spark.sparkContext.textFile(s"$sourcePath")
|
||||
|
||||
logger.info("Converting UnpayWall to OAF")
|
||||
|
||||
val d:Dataset[Publication] = spark.createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p=>p!=null)).as[Publication]
|
||||
d.write.mode(SaveMode.Overwrite).save(targetPath)
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
package eu.dnetlib.doiboost.uw
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.{Instance, Publication}
|
||||
import org.json4s
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
|
||||
|
||||
|
||||
|
||||
case class OALocation(evidence:Option[String], host_type:Option[String], is_best:Option[Boolean], license: Option[String], pmh_id:Option[String], updated:Option[String],
|
||||
url:Option[String], url_for_landing_page:Option[String], url_for_pdf:Option[String], version:Option[String]) {}
|
||||
|
||||
|
||||
|
||||
|
||||
object UnpayWallToOAF {
|
||||
val logger: Logger = LoggerFactory.getLogger(getClass)
|
||||
|
||||
def convertToOAF(input:String):Publication = {
|
||||
val pub = new Publication
|
||||
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: json4s.JValue = parse(input)
|
||||
|
||||
val doi = (json \"doi").extract[String]
|
||||
|
||||
|
||||
val is_oa = (json\ "is_oa").extract[Boolean]
|
||||
|
||||
val oaLocation:OALocation = (json \ "best_oa_location").extractOrElse[OALocation](null)
|
||||
pub.setPid(List(createSP(doi, "doi", PID_TYPES)).asJava)
|
||||
pub.setId(generateIdentifier(pub, doi.toLowerCase))
|
||||
|
||||
pub.setCollectedfrom(List(createUnpayWallCollectedFrom()).asJava)
|
||||
pub.setDataInfo(generateDataInfo())
|
||||
|
||||
if (!is_oa)
|
||||
return null
|
||||
|
||||
if(oaLocation== null || oaLocation.url.isEmpty)
|
||||
return null
|
||||
val i :Instance= new Instance()
|
||||
|
||||
i.setCollectedfrom(createUnpayWallCollectedFrom())
|
||||
i.setAccessright(getOpenAccessQualifier())
|
||||
i.setUrl(List(oaLocation.url.get).asJava)
|
||||
|
||||
if (oaLocation.license.isDefined)
|
||||
i.setLicense(asField(oaLocation.license.get))
|
||||
pub.setInstance(List(i).asJava)
|
||||
|
||||
pub
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
[
|
||||
{"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true},
|
||||
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the working dir path", "paramRequired": true},
|
||||
{"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true}
|
||||
|
||||
]
|
|
@ -0,0 +1,6 @@
|
|||
[
|
||||
{"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true},
|
||||
{"paramName":"d", "paramLongName":"hdfsOrcidDefaultPath", "paramDescription": "the default work path", "paramRequired": true},
|
||||
{"paramName":"f", "paramLongName":"summariesFileNameTarGz", "paramDescription": "the name of the summaries orcid file", "paramRequired": true},
|
||||
{"paramName":"o", "paramLongName":"outputAuthorsPath", "paramDescription": "the relative folder of the sequencial file to write", "paramRequired": true}
|
||||
]
|
|
@ -0,0 +1,6 @@
|
|||
[
|
||||
{"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true},
|
||||
{"paramName":"d", "paramLongName":"hdfsOrcidDefaultPath", "paramDescription": "the default work path", "paramRequired": true},
|
||||
{"paramName":"f", "paramLongName":"activitiesFileNameTarGz", "paramDescription": "the name of the activities orcid file", "paramRequired": true},
|
||||
{"paramName":"o", "paramLongName":"outputAuthorsDOIsPath", "paramDescription": "the relative folder of the sequencial file to write", "paramRequired": true}
|
||||
]
|
|
@ -0,0 +1,42 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive_metastore_uris</name>
|
||||
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<value>/user/spark/spark2ApplicationHistory</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,80 @@
|
|||
<workflow-app name="import Crossref from index into HDFS" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>workingPath</name>
|
||||
<description>the working dir base path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>timestamp</name>
|
||||
<description>Timestamp for incremental Harvesting</description>
|
||||
</property>
|
||||
|
||||
</parameters>
|
||||
|
||||
<start to="ExtractCrossrefToOAF"/>
|
||||
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="ResetWorkingPath">
|
||||
<fs>
|
||||
<delete path='${workingPath}/input/crossref/index_dump'/>
|
||||
<!-- <mkdir path='${workingPath}/input/crossref'/>-->
|
||||
</fs>
|
||||
<ok to="ImportCrossRef"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
|
||||
<action name="ImportCrossRef">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.doiboost.crossref.CrossrefImporter</main-class>
|
||||
<arg>-t</arg><arg>${workingPath}/input/crossref/index_dump</arg>
|
||||
<arg>-n</arg><arg>${nameNode}</arg>
|
||||
<arg>-ts</arg><arg>${timestamp}</arg>
|
||||
</java>
|
||||
<ok to="ExtractCrossrefToOAF"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<action name="ExtractCrossrefToOAF">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>ExtractCrossrefToOAF</name>
|
||||
<class>eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF</class>
|
||||
<jar>dhp-doiboost-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
${sparkExtraOPT}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingPath}/input/crossref/index_dump,${workingPath}/crossref/index_dump</arg>
|
||||
<arg>--targetPath</arg><arg>${workingPath}/input/crossref</arg>
|
||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -0,0 +1,7 @@
|
|||
[
|
||||
{"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true},
|
||||
{"paramName":"d", "paramLongName":"hdfsOrcidDefaultPath", "paramDescription": "the default work path", "paramRequired": true},
|
||||
{"paramName":"f", "paramLongName":"lambdaFileName", "paramDescription": "the name of the lambda file", "paramRequired": true},
|
||||
{"paramName":"o", "paramLongName":"outputPath", "paramDescription": "the relative folder of the sequencial file to write", "paramRequired": true},
|
||||
{"paramName":"t", "paramLongName":"token", "paramDescription": "token to grant access", "paramRequired": true}
|
||||
]
|
|
@ -0,0 +1,3 @@
|
|||
[{"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working path", "paramRequired": true},
|
||||
{"paramName":"o", "paramLongName":"outputDoiAuthorListPath", "paramDescription": "the relative folder of the sequencial file to write the data", "paramRequired": true}
|
||||
]
|
|
@ -0,0 +1,4 @@
|
|||
[{"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working path", "paramRequired": true},
|
||||
{"paramName":"t", "paramLongName":"token", "paramDescription": "token to grant access", "paramRequired": true},
|
||||
{"paramName":"o", "paramLongName":"outputAuthorsPath", "paramDescription": "the relative folder of the sequencial file to write the authors data", "paramRequired": true}
|
||||
]
|
|
@ -0,0 +1,9 @@
|
|||
[
|
||||
{"paramName": "m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true},
|
||||
{"paramName": "dp", "paramLongName":"dbPublicationPath", "paramDescription": "the Crossref Publication Path", "paramRequired": true},
|
||||
{"paramName": "dd", "paramLongName":"dbDatasetPath", "paramDescription": "the Crossref Dataset Path", "paramRequired": true},
|
||||
{"paramName": "cr", "paramLongName":"crossRefRelation", "paramDescription": "the UnpayWall Publication Path", "paramRequired": true},
|
||||
{"paramName": "da", "paramLongName":"dbaffiliationRelationPath", "paramDescription": "the MAG Publication Path", "paramRequired": true},
|
||||
{"paramName": "do", "paramLongName":"dbOrganizationPath", "paramDescription": "the MAG Publication Path", "paramRequired": true},
|
||||
{"paramName": "w", "paramLongName":"targetPath", "paramDescription": "the Working Path", "paramRequired": true}
|
||||
]
|
|
@ -0,0 +1,7 @@
|
|||
[
|
||||
{"paramName": "m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true},
|
||||
{"paramName": "hb", "paramLongName":"hostedByMapPath", "paramDescription": "the hosted By Map Path", "paramRequired": true},
|
||||
{"paramName": "ap", "paramLongName":"affiliationPath", "paramDescription": "the Affliation Path", "paramRequired": true},
|
||||
{"paramName": "pa", "paramLongName":"paperAffiliationPath", "paramDescription": "the paperAffiliation Path", "paramRequired": true},
|
||||
{"paramName": "w", "paramLongName":"workingDirPath", "paramDescription": "the Working Path", "paramRequired": true}
|
||||
]
|
|
@ -0,0 +1,5 @@
|
|||
[
|
||||
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the sequencial file to write", "paramRequired": true},
|
||||
{"paramName":"n", "paramLongName":"namenode", "paramDescription": "the hive metastore uris", "paramRequired": true},
|
||||
{"paramName":"ts", "paramLongName":"timestamp", "paramDescription": "timestamp", "paramRequired": false}
|
||||
]
|
|
@ -0,0 +1,38 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive_metastore_uris</name>
|
||||
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<value>/user/spark/spark2ApplicationHistory</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,103 @@
|
|||
<workflow-app name="Create DOIBoostActionSet" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>hostedByMapPath</name>
|
||||
<description>the Hosted By Map Path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>affiliationPath</name>
|
||||
<description>the Affliation Path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>paperAffiliationPath</name>
|
||||
<description>the paperAffiliation Path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>workingDirPath</name>
|
||||
<description>the Working Path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
|
||||
|
||||
<start to="GenerateActionSet"/>
|
||||
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="ResetWorkingPath">
|
||||
<fs>
|
||||
<delete path='${workingDirPath}'/>
|
||||
<mkdir path='${workingDirPath}'/>
|
||||
</fs>
|
||||
<ok to="CreateDOIBoost"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="CreateDOIBoost">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Create DOIBoost Infospace</name>
|
||||
<class>eu.dnetlib.doiboost.SparkGenerateDoiBoost</class>
|
||||
<jar>dhp-doiboost-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
${sparkExtraOPT}
|
||||
</spark-opts>
|
||||
<arg>--hostedByMapPath</arg><arg>${hostedByMapPath}</arg>
|
||||
<arg>--affiliationPath</arg><arg>${affiliationPath}</arg>
|
||||
<arg>--paperAffiliationPath</arg><arg>${paperAffiliationPath}</arg>
|
||||
<arg>--workingDirPath</arg><arg>${workingDirPath}</arg>
|
||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||
</spark>
|
||||
<ok to="GenerateActionSet"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<action name="GenerateActionSet">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Generate DOIBoost ActionSet</name>
|
||||
<class>eu.dnetlib.doiboost.SparkGenerateDOIBoostActionSet</class>
|
||||
<jar>dhp-doiboost-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
${sparkExtraOPT}
|
||||
</spark-opts>
|
||||
<arg>--dbPublicationPath</arg><arg>${workingDirPath}/doiBoostPublicationFiltered</arg>
|
||||
<arg>--dbDatasetPath</arg><arg>${workingDirPath}/crossrefDataset</arg>
|
||||
<arg>--crossRefRelation</arg><arg>/data/doiboost/input/crossref/relations</arg>
|
||||
<arg>--dbaffiliationRelationPath</arg><arg>${workingDirPath}/doiBoostPublicationAffiliation</arg>
|
||||
<arg>-do</arg><arg>${workingDirPath}/doiBoostOrganization</arg>
|
||||
<arg>--targetPath</arg><arg>${workingDirPath}/actionDataSet</arg>
|
||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -0,0 +1,6 @@
|
|||
[
|
||||
{"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the base path of MAG input", "paramRequired": true},
|
||||
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the working dir path", "paramRequired": true},
|
||||
{"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true}
|
||||
|
||||
]
|
|
@ -0,0 +1,42 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.wf.rerun.failnodes</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive_metastore_uris</name>
|
||||
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<value>/user/spark/spark2ApplicationHistory</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,87 @@
|
|||
<workflow-app name="import MAG into HDFS" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>the working dir base path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>targetPath</name>
|
||||
<description>the working dir base path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="ResetWorkingPath"/>
|
||||
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="ResetWorkingPath">
|
||||
<fs>
|
||||
<delete path='${targetPath}'/>
|
||||
<mkdir path='${targetPath}'/>
|
||||
</fs>
|
||||
<ok to="PreprocessMag"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="ConvertMagToDataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Convert Mag to Dataset</name>
|
||||
<class>eu.dnetlib.doiboost.mag.SparkImportMagIntoDataset</class>
|
||||
<jar>dhp-doiboost-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
${sparkExtraOPT}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
|
||||
<action name="PreprocessMag">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Convert Mag to Dataset</name>
|
||||
<class>eu.dnetlib.doiboost.mag.SparkPreProcessMAG</class>
|
||||
<jar>dhp-doiboost-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
${sparkExtraOPT}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -0,0 +1,6 @@
|
|||
[
|
||||
{"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the base path of MAG input", "paramRequired": true},
|
||||
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the working dir path", "paramRequired": true},
|
||||
{"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true}
|
||||
|
||||
]
|
|
@ -0,0 +1,22 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.java</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.map.java.opts</name>
|
||||
<value>-Xmx4g</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,41 @@
|
|||
<workflow-app name="import Orcid" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>workingPath</name>
|
||||
<description>the working dir base path</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="ResetWorkingPath"/>
|
||||
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="ResetWorkingPath">
|
||||
<fs>
|
||||
<delete path='${workingPath}/output'/>
|
||||
<mkdir path='${workingPath}/output'/>
|
||||
</fs>
|
||||
<ok to="ImportOrcidSummary"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
|
||||
<action name="ImportOrcidSummary">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidDSManager</main-class>
|
||||
<arg>-d</arg><arg>${workingPath}/</arg>
|
||||
<arg>-n</arg><arg>${nameNode}</arg>
|
||||
<arg>-f</arg><arg>ORCID_2019_summaries.tar.gz</arg>
|
||||
<arg>-o</arg><arg>output/</arg>
|
||||
</java>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -0,0 +1,22 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.java</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.map.java.opts</name>
|
||||
<value>-Xmx4g</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,505 @@
|
|||
<workflow-app name="Gen Orcid Authors DOIs" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>workingPath_activities</name>
|
||||
<description>the working dir base path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>shell_cmd_0</name>
|
||||
<value>wget -O /tmp/ORCID_2019_activites_0.tar.gz https://orcid.figshare.com/ndownloader/files/18017660 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_0.tar.gz /data/orcid_activities/ORCID_2019_activites_0.tar.gz
|
||||
</value>
|
||||
<description>the shell command that downloads and puts to hdfs orcid activity file 0</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>shell_cmd_1</name>
|
||||
<value>wget -O /tmp/ORCID_2019_activites_1.tar.gz https://orcid.figshare.com/ndownloader/files/18017675 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_1.tar.gz /data/orcid_activities/ORCID_2019_activites_1.tar.gz
|
||||
</value>
|
||||
<description>the shell command that downloads and puts to hdfs orcid activity file 1</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>shell_cmd_2</name>
|
||||
<value>wget -O /tmp/ORCID_2019_activites_2.tar.gz https://orcid.figshare.com/ndownloader/files/18017717 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_2.tar.gz /data/orcid_activities/ORCID_2019_activites_2.tar.gz
|
||||
</value>
|
||||
<description>the shell command that downloads and puts to hdfs orcid activity file 2</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>shell_cmd_3</name>
|
||||
<value>wget -O /tmp/ORCID_2019_activites_3.tar.gz https://orcid.figshare.com/ndownloader/files/18017765 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_3.tar.gz /data/orcid_activities/ORCID_2019_activites_3.tar.gz
|
||||
</value>
|
||||
<description>the shell command that downloads and puts to hdfs orcid activity file 3</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>shell_cmd_4</name>
|
||||
<value>wget -O /tmp/ORCID_2019_activites_4.tar.gz https://orcid.figshare.com/ndownloader/files/18017831 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_4.tar.gz /data/orcid_activities/ORCID_2019_activites_4.tar.gz
|
||||
</value>
|
||||
<description>the shell command that downloads and puts to hdfs orcid activity file 4</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>shell_cmd_5</name>
|
||||
<value>wget -O /tmp/ORCID_2019_activites_5.tar.gz https://orcid.figshare.com/ndownloader/files/18017987 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_5.tar.gz /data/orcid_activities/ORCID_2019_activites_5.tar.gz
|
||||
</value>
|
||||
<description>the shell command that downloads and puts to hdfs orcid activity file 5</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>shell_cmd_6</name>
|
||||
<value>wget -O /tmp/ORCID_2019_activites_6.tar.gz https://orcid.figshare.com/ndownloader/files/18018053 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_6.tar.gz /data/orcid_activities/ORCID_2019_activites_6.tar.gz
|
||||
</value>
|
||||
<description>the shell command that downloads and puts to hdfs orcid activity file 6</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>shell_cmd_7</name>
|
||||
<value>wget -O /tmp/ORCID_2019_activites_7.tar.gz https://orcid.figshare.com/ndownloader/files/18018023 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_7.tar.gz /data/orcid_activities/ORCID_2019_activites_7.tar.gz
|
||||
</value>
|
||||
<description>the shell command that downloads and puts to hdfs orcid activity file 7</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>shell_cmd_8</name>
|
||||
<value>wget -O /tmp/ORCID_2019_activites_8.tar.gz https://orcid.figshare.com/ndownloader/files/18018248 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_8.tar.gz /data/orcid_activities/ORCID_2019_activites_8.tar.gz
|
||||
</value>
|
||||
<description>the shell command that downloads and puts to hdfs orcid activity file 8</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>shell_cmd_9</name>
|
||||
<value>wget -O /tmp/ORCID_2019_activites_9.tar.gz https://orcid.figshare.com/ndownloader/files/18018029 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_9.tar.gz /data/orcid_activities/ORCID_2019_activites_9.tar.gz
|
||||
</value>
|
||||
<description>the shell command that downloads and puts to hdfs orcid activity file 9</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>shell_cmd_X</name>
|
||||
<value>wget -O /tmp/ORCID_2019_activites_X.tar.gz https://orcid.figshare.com/ndownloader/files/18018182 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_X.tar.gz /data/orcid_activities/ORCID_2019_activites_X.tar.gz
|
||||
</value>
|
||||
<description>the shell command that downloads and puts to hdfs orcid activity file X</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="ResetWorkingPath"/>
|
||||
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="ResetWorkingPath">
|
||||
<fs>
|
||||
<delete path='${workingPath_activities}/output'/>
|
||||
<mkdir path='${workingPath_activities}/output'/>
|
||||
</fs>
|
||||
<ok to="fork_gen_orcid_authors_dois"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name = "fork_gen_orcid_authors_dois">
|
||||
<path start = "check_exist_on_hdfs_activities_0"/>
|
||||
<path start = "check_exist_on_hdfs_activities_1"/>
|
||||
<path start = "check_exist_on_hdfs_activities_2"/>
|
||||
<path start = "check_exist_on_hdfs_activities_3"/>
|
||||
<path start = "check_exist_on_hdfs_activities_4"/>
|
||||
<path start = "check_exist_on_hdfs_activities_5"/>
|
||||
<path start = "check_exist_on_hdfs_activities_6"/>
|
||||
<path start = "check_exist_on_hdfs_activities_7"/>
|
||||
<path start = "check_exist_on_hdfs_activities_8"/>
|
||||
<path start = "check_exist_on_hdfs_activities_9"/>
|
||||
<path start = "check_exist_on_hdfs_activities_X"/>
|
||||
</fork>
|
||||
|
||||
<decision name="check_exist_on_hdfs_activities_0">
|
||||
<switch>
|
||||
<case to="Gen_Orcid_Authors_DOIs_0">
|
||||
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_0.tar.gz'))}
|
||||
</case>
|
||||
<default to="Download_0" />
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="Download_0">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>bash</exec>
|
||||
<argument>-c</argument>
|
||||
<argument>${shell_cmd_0}</argument>
|
||||
<capture-output/>
|
||||
</shell>
|
||||
<ok to="Gen_Orcid_Authors_DOIs_0"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Gen_Orcid_Authors_DOIs_0">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
|
||||
<arg>-d</arg><arg>${workingPath_activities}/</arg>
|
||||
<arg>-n</arg><arg>${nameNode}</arg>
|
||||
<arg>-f</arg><arg>ORCID_2019_activites_0.tar.gz</arg>
|
||||
<arg>-o</arg><arg>output/authors_dois_0.seq</arg>
|
||||
</java>
|
||||
<ok to="join_node"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<decision name="check_exist_on_hdfs_activities_1">
|
||||
<switch>
|
||||
<case to="Gen_Orcid_Authors_DOIs_1">
|
||||
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_1.tar.gz'))}
|
||||
</case>
|
||||
<default to="Download_1" />
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="Download_1">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>bash</exec>
|
||||
<argument>-c</argument>
|
||||
<argument>${shell_cmd_1}</argument>
|
||||
<capture-output/>
|
||||
</shell>
|
||||
<ok to="Gen_Orcid_Authors_DOIs_1"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Gen_Orcid_Authors_DOIs_1">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
|
||||
<arg>-d</arg><arg>${workingPath_activities}/</arg>
|
||||
<arg>-n</arg><arg>${nameNode}</arg>
|
||||
<arg>-f</arg><arg>ORCID_2019_activites_1.tar.gz</arg>
|
||||
<arg>-o</arg><arg>output/authors_dois_1.seq</arg>
|
||||
</java>
|
||||
<ok to="join_node"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<decision name="check_exist_on_hdfs_activities_2">
|
||||
<switch>
|
||||
<case to="Gen_Orcid_Authors_DOIs_2">
|
||||
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_2.tar.gz'))}
|
||||
</case>
|
||||
<default to="Download_2" />
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="Download_2">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>bash</exec>
|
||||
<argument>-c</argument>
|
||||
<argument>${shell_cmd_2}</argument>
|
||||
<capture-output/>
|
||||
</shell>
|
||||
<ok to="Gen_Orcid_Authors_DOIs_2"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Gen_Orcid_Authors_DOIs_2">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
|
||||
<arg>-d</arg><arg>${workingPath_activities}/</arg>
|
||||
<arg>-n</arg><arg>${nameNode}</arg>
|
||||
<arg>-f</arg><arg>ORCID_2019_activites_2.tar.gz</arg>
|
||||
<arg>-o</arg><arg>output/authors_dois_2.seq</arg>
|
||||
</java>
|
||||
<ok to="join_node"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<decision name="check_exist_on_hdfs_activities_3">
|
||||
<switch>
|
||||
<case to="Gen_Orcid_Authors_DOIs_3">
|
||||
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_3.tar.gz'))}
|
||||
</case>
|
||||
<default to="Download_3" />
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="Download_3">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>bash</exec>
|
||||
<argument>-c</argument>
|
||||
<argument>${shell_cmd_3}</argument>
|
||||
<capture-output/>
|
||||
</shell>
|
||||
<ok to="Gen_Orcid_Authors_DOIs_3"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Gen_Orcid_Authors_DOIs_3">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
|
||||
<arg>-d</arg><arg>${workingPath_activities}/</arg>
|
||||
<arg>-n</arg><arg>${nameNode}</arg>
|
||||
<arg>-f</arg><arg>ORCID_2019_activites_3.tar.gz</arg>
|
||||
<arg>-o</arg><arg>output/authors_dois_3.seq</arg>
|
||||
</java>
|
||||
<ok to="join_node"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<decision name="check_exist_on_hdfs_activities_4">
|
||||
<switch>
|
||||
<case to="Gen_Orcid_Authors_DOIs_4">
|
||||
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_4.tar.gz'))}
|
||||
</case>
|
||||
<default to="Download_4" />
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="Download_4">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>bash</exec>
|
||||
<argument>-c</argument>
|
||||
<argument>${shell_cmd_4}</argument>
|
||||
<capture-output/>
|
||||
</shell>
|
||||
<ok to="Gen_Orcid_Authors_DOIs_4"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Gen_Orcid_Authors_DOIs_4">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
|
||||
<arg>-d</arg><arg>${workingPath_activities}/</arg>
|
||||
<arg>-n</arg><arg>${nameNode}</arg>
|
||||
<arg>-f</arg><arg>ORCID_2019_activites_4.tar.gz</arg>
|
||||
<arg>-o</arg><arg>output/authors_dois_4.seq</arg>
|
||||
</java>
|
||||
<ok to="join_node"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<decision name="check_exist_on_hdfs_activities_5">
|
||||
<switch>
|
||||
<case to="Gen_Orcid_Authors_DOIs_5">
|
||||
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_5.tar.gz'))}
|
||||
</case>
|
||||
<default to="Download_5" />
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="Download_5">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>bash</exec>
|
||||
<argument>-c</argument>
|
||||
<argument>${shell_cmd_5}</argument>
|
||||
<capture-output/>
|
||||
</shell>
|
||||
<ok to="Gen_Orcid_Authors_DOIs_5"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Gen_Orcid_Authors_DOIs_5">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
|
||||
<arg>-d</arg><arg>${workingPath_activities}/</arg>
|
||||
<arg>-n</arg><arg>${nameNode}</arg>
|
||||
<arg>-f</arg><arg>ORCID_2019_activites_5.tar.gz</arg>
|
||||
<arg>-o</arg><arg>output/authors_dois_5.seq</arg>
|
||||
</java>
|
||||
<ok to="join_node"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<decision name="check_exist_on_hdfs_activities_6">
|
||||
<switch>
|
||||
<case to="Gen_Orcid_Authors_DOIs_6">
|
||||
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_6.tar.gz'))}
|
||||
</case>
|
||||
<default to="Download_6" />
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="Download_6">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>bash</exec>
|
||||
<argument>-c</argument>
|
||||
<argument>${shell_cmd_6}</argument>
|
||||
<capture-output/>
|
||||
</shell>
|
||||
<ok to="Gen_Orcid_Authors_DOIs_6"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Gen_Orcid_Authors_DOIs_6">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
|
||||
<arg>-d</arg><arg>${workingPath_activities}/</arg>
|
||||
<arg>-n</arg><arg>${nameNode}</arg>
|
||||
<arg>-f</arg><arg>ORCID_2019_activites_6.tar.gz</arg>
|
||||
<arg>-o</arg><arg>output/authors_dois_6.seq</arg>
|
||||
</java>
|
||||
<ok to="join_node"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<decision name="check_exist_on_hdfs_activities_7">
|
||||
<switch>
|
||||
<case to="Gen_Orcid_Authors_DOIs_7">
|
||||
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_7.tar.gz'))}
|
||||
</case>
|
||||
<default to="Download_7" />
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="Download_7">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>bash</exec>
|
||||
<argument>-c</argument>
|
||||
<argument>${shell_cmd_7}</argument>
|
||||
<capture-output/>
|
||||
</shell>
|
||||
<ok to="Gen_Orcid_Authors_DOIs_7"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Gen_Orcid_Authors_DOIs_7">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
|
||||
<arg>-d</arg><arg>${workingPath_activities}/</arg>
|
||||
<arg>-n</arg><arg>${nameNode}</arg>
|
||||
<arg>-f</arg><arg>ORCID_2019_activites_7.tar.gz</arg>
|
||||
<arg>-o</arg><arg>output/authors_dois_7.seq</arg>
|
||||
</java>
|
||||
<ok to="join_node"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<decision name="check_exist_on_hdfs_activities_8">
|
||||
<switch>
|
||||
<case to="Gen_Orcid_Authors_DOIs_8">
|
||||
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_8.tar.gz'))}
|
||||
</case>
|
||||
<default to="Download_8" />
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="Download_8">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>bash</exec>
|
||||
<argument>-c</argument>
|
||||
<argument>${shell_cmd_8}</argument>
|
||||
<capture-output/>
|
||||
</shell>
|
||||
<ok to="Gen_Orcid_Authors_DOIs_8"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Gen_Orcid_Authors_DOIs_8">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
|
||||
<arg>-d</arg><arg>${workingPath_activities}/</arg>
|
||||
<arg>-n</arg><arg>${nameNode}</arg>
|
||||
<arg>-f</arg><arg>ORCID_2019_activites_8.tar.gz</arg>
|
||||
<arg>-o</arg><arg>output/authors_dois_8.seq</arg>
|
||||
</java>
|
||||
<ok to="join_node"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<decision name="check_exist_on_hdfs_activities_9">
|
||||
<switch>
|
||||
<case to="Gen_Orcid_Authors_DOIs_9">
|
||||
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_9.tar.gz'))}
|
||||
</case>
|
||||
<default to="Download_9" />
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="Download_9">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>bash</exec>
|
||||
<argument>-c</argument>
|
||||
<argument>${shell_cmd_9}</argument>
|
||||
<capture-output/>
|
||||
</shell>
|
||||
<ok to="Gen_Orcid_Authors_DOIs_9"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Gen_Orcid_Authors_DOIs_9">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
|
||||
<arg>-d</arg><arg>${workingPath_activities}/</arg>
|
||||
<arg>-n</arg><arg>${nameNode}</arg>
|
||||
<arg>-f</arg><arg>ORCID_2019_activites_9.tar.gz</arg>
|
||||
<arg>-o</arg><arg>output/authors_dois_9.seq</arg>
|
||||
</java>
|
||||
<ok to="join_node"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<decision name="check_exist_on_hdfs_activities_X">
|
||||
<switch>
|
||||
<case to="Gen_Orcid_Authors_DOIs_X">
|
||||
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_X.tar.gz'))}
|
||||
</case>
|
||||
<default to="Download_X" />
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="Download_X">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>bash</exec>
|
||||
<argument>-c</argument>
|
||||
<argument>${shell_cmd_X}</argument>
|
||||
<capture-output/>
|
||||
</shell>
|
||||
<ok to="Gen_Orcid_Authors_DOIs_X"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Gen_Orcid_Authors_DOIs_X">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
|
||||
<arg>-d</arg><arg>${workingPath_activities}/</arg>
|
||||
<arg>-n</arg><arg>${nameNode}</arg>
|
||||
<arg>-f</arg><arg>ORCID_2019_activites_X.tar.gz</arg>
|
||||
<arg>-o</arg><arg>output/authors_dois_X.seq</arg>
|
||||
</java>
|
||||
<ok to="join_node"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name = "join_node" to = "End"/>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -0,0 +1,18 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>hadoop-rm3.garr-pa1.d4science.org:8032</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>queueName</name>
|
||||
<value>default</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,55 @@
|
|||
<workflow-app name="Gen_Doi_Author_List_WF" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>workingPath</name>
|
||||
<description>the working dir base path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="ResetWorkingPath"/>
|
||||
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="ResetWorkingPath">
|
||||
<fs>
|
||||
<delete path='${workingPath_activities}/doi_author_list'/>
|
||||
</fs>
|
||||
<ok to="Gen_Doi_Author_List"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Gen_Doi_Author_List">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Gen_Doi_Author_List</name>
|
||||
<class>eu.dnetlib.doiboost.orcid.SparkGenerateDoiAuthorList</class>
|
||||
<jar>dhp-doiboost-1.2.1-SNAPSHOT.jar</jar>
|
||||
<spark-opts>--num-executors 10 --conf spark.yarn.jars="hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2" --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory}
|
||||
</spark-opts>
|
||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||
<arg>-o</arg><arg>doi_author_list/</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -0,0 +1,22 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.java</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.map.java.opts</name>
|
||||
<value>-Xmx4g</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,45 @@
|
|||
<workflow-app name="Orcid Download" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>workingPathOrcid</name>
|
||||
<description>the working dir base path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>token</name>
|
||||
<description>access token</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="ResetWorkingPath"/>
|
||||
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="ResetWorkingPath">
|
||||
<fs>
|
||||
<delete path='${workingPathOrcid}/download'/>
|
||||
<mkdir path='${workingPathOrcid}/download'/>
|
||||
</fs>
|
||||
<ok to="DownloadOrcidData"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="DownloadOrcidData">
|
||||
<java>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<main-class>eu.dnetlib.doiboost.orcid.OrcidDownloader</main-class>
|
||||
<arg>-d</arg><arg>${workingPathOrcid}/</arg>
|
||||
<arg>-n</arg><arg>${nameNode}</arg>
|
||||
<arg>-f</arg><arg>last_modified.csv</arg>
|
||||
<arg>-o</arg><arg>download/</arg>
|
||||
<arg>-t</arg><arg>${token}</arg>
|
||||
</java>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -0,0 +1,22 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>hadoop-rm3.garr-pa1.d4science.org:8032</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>queueName</name>
|
||||
<value>default</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,83 @@
|
|||
<workflow-app name="Gen Orcid Authors" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>workingPath</name>
|
||||
<description>the working dir base path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>token</name>
|
||||
<description>access token</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>the working dir base path</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="ResetWorkingPath"/>
|
||||
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="ResetWorkingPath">
|
||||
<fs>
|
||||
<delete path='${workingPath_activities}/authors'/>
|
||||
</fs>
|
||||
<ok to="Gen_Orcid_Authors"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Split_Lambda_File">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Split_Lambda_File</name>
|
||||
<class>eu.dnetlib.doiboost.orcid.SparkPartitionLambdaFile</class>
|
||||
<jar>dhp-doiboost-1.2.1-SNAPSHOT.jar</jar>
|
||||
<spark-opts>--num-executors 24 --conf spark.yarn.jars="hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2" --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory}
|
||||
</spark-opts>
|
||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||
<arg>-o</arg><arg>authors/</arg>
|
||||
<arg>-t</arg><arg>${token}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="Gen_Orcid_Authors">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Gen_Orcid_Authors</name>
|
||||
<class>eu.dnetlib.doiboost.orcid.SparkOrcidGenerateAuthors</class>
|
||||
<jar>dhp-doiboost-1.2.1-SNAPSHOT.jar</jar>
|
||||
<spark-opts>--num-executors 20 --conf spark.yarn.jars="hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2" --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory}
|
||||
</spark-opts>
|
||||
<arg>-w</arg><arg>${workingPath}/</arg>
|
||||
<arg>-o</arg><arg>authors/</arg>
|
||||
<arg>-t</arg><arg>${token}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -0,0 +1,38 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive_metastore_uris</name>
|
||||
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<value>/user/spark/spark2ApplicationHistory</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,55 @@
|
|||
<workflow-app name="import ORCID into HDFS" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>the working dir base path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>targetPath</name>
|
||||
<description>the working dir base path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="PreprocessORCID"/>
|
||||
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="PreprocessORCID">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Convert ORCID to Dataset</name>
|
||||
<class>eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF</class>
|
||||
<jar>dhp-doiboost-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
${sparkExtraOPT}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -0,0 +1,38 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive_metastore_uris</name>
|
||||
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<value>/user/spark/spark2ApplicationHistory</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,55 @@
|
|||
<workflow-app name="import UnpayWall into HDFS" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>the working dir base path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>targetPath</name>
|
||||
<description>the working dir base path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="PreprocessUW"/>
|
||||
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="PreprocessUW">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Convert UnpayWall to Dataset</name>
|
||||
<class>eu.dnetlib.doiboost.uw.SparkMapUnpayWallToOAF</class>
|
||||
<jar>dhp-doiboost-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
${sparkExtraOPT}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/uw_extracted</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -0,0 +1,70 @@
|
|||
package eu.dnetlib.dhp.doiboost
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.{Publication, Dataset => OafDataset}
|
||||
import eu.dnetlib.doiboost.{DoiBoostMappingUtil, HostedByItemType}
|
||||
import eu.dnetlib.doiboost.SparkGenerateDoiBoost.getClass
|
||||
import eu.dnetlib.doiboost.mag.ConversionUtil
|
||||
import eu.dnetlib.doiboost.orcid.ORCIDElement
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
||||
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
import scala.io.Source
|
||||
|
||||
class DoiBoostHostedByMapTest {
|
||||
|
||||
|
||||
|
||||
// @Test
|
||||
// def testMerge():Unit = {
|
||||
// val conf: SparkConf = new SparkConf()
|
||||
// val spark: SparkSession =
|
||||
// SparkSession
|
||||
// .builder()
|
||||
// .config(conf)
|
||||
// .appName(getClass.getSimpleName)
|
||||
// .master("local[*]").getOrCreate()
|
||||
//
|
||||
//
|
||||
//
|
||||
// implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication]
|
||||
// implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset]
|
||||
// implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPub)
|
||||
//
|
||||
//
|
||||
// import spark.implicits._
|
||||
// val dataset:RDD[String]= spark.sparkContext.textFile("/home/sandro/Downloads/hbMap.gz")
|
||||
//
|
||||
//
|
||||
// val hbMap:Dataset[(String, HostedByItemType)] =spark.createDataset(dataset.map(DoiBoostMappingUtil.toHostedByItem))
|
||||
//
|
||||
//
|
||||
// hbMap.show()
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
// }
|
||||
|
||||
|
||||
@Test
|
||||
def idDSGeneration():Unit = {
|
||||
val s ="doajarticles::0066-782X"
|
||||
|
||||
|
||||
|
||||
println(DoiBoostMappingUtil.generateDSId(s))
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,365 @@
|
|||
package eu.dnetlib.doiboost.crossref
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf._
|
||||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
|
||||
import org.junit.jupiter.api.Assertions._
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.io.Source
|
||||
import scala.util.matching.Regex
|
||||
|
||||
|
||||
class CrossrefMappingTest {
|
||||
|
||||
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
||||
val mapper = new ObjectMapper()
|
||||
|
||||
|
||||
@Test
|
||||
def testFunderRelationshipsMapping(): Unit = {
|
||||
val template = Source.fromInputStream(getClass.getResourceAsStream("article_funder_template.json")).mkString
|
||||
val funder_doi = Source.fromInputStream(getClass.getResourceAsStream("funder_doi")).mkString
|
||||
val funder_name = Source.fromInputStream(getClass.getResourceAsStream("funder_doi")).mkString
|
||||
|
||||
|
||||
for (line <- funder_doi.lines) {
|
||||
val json = template.replace("%s", line)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
assertTrue(resultList.nonEmpty)
|
||||
checkRelation(resultList)
|
||||
}
|
||||
for (line <- funder_name.lines) {
|
||||
val json = template.replace("%s", line)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
assertTrue(resultList.nonEmpty)
|
||||
checkRelation(resultList)
|
||||
}
|
||||
}
|
||||
|
||||
def checkRelation(generatedOAF: List[Oaf]): Unit = {
|
||||
|
||||
val rels: List[Relation] = generatedOAF.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]]
|
||||
assertFalse(rels.isEmpty)
|
||||
rels.foreach(relation => {
|
||||
val relJson = mapper.writeValueAsString(relation)
|
||||
|
||||
assertNotNull(relation.getSource, s"Source of relation null $relJson")
|
||||
assertNotNull(relation.getTarget, s"Target of relation null $relJson")
|
||||
assertFalse(relation.getTarget.isEmpty, s"Target is empty: $relJson")
|
||||
assertFalse(relation.getRelClass.isEmpty, s"RelClass is empty: $relJson")
|
||||
assertFalse(relation.getRelType.isEmpty, s"RelType is empty: $relJson")
|
||||
assertFalse(relation.getSubRelType.isEmpty, s"SubRelType is empty: $relJson")
|
||||
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def testEmptyTitle() :Unit = {
|
||||
val json = Source.fromInputStream(getClass.getResourceAsStream("empty_title.json")).mkString
|
||||
|
||||
|
||||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||
|
||||
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
items.foreach(p => println(mapper.writeValueAsString(p)))
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def testPeerReviewed(): Unit = {
|
||||
val json = Source.fromInputStream(getClass.getResourceAsStream("prwTest.json")).mkString
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
|
||||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||
|
||||
|
||||
items.foreach(p => logger.info(mapper.writeValueAsString(p)))
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
def extractECAward(award: String): String = {
|
||||
val awardECRegex: Regex = "[0-9]{4,9}".r
|
||||
if (awardECRegex.findAllIn(award).hasNext)
|
||||
return awardECRegex.findAllIn(award).max
|
||||
null
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def extractECTest(): Unit = {
|
||||
val s = "FP7/2007-2013"
|
||||
val awardExtracted = extractECAward(s)
|
||||
println(awardExtracted)
|
||||
|
||||
println(DHPUtils.md5(awardExtracted))
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
def testJournalRelation(): Unit = {
|
||||
val json = Source.fromInputStream(getClass.getResourceAsStream("awardTest.json")).mkString
|
||||
assertNotNull(json)
|
||||
|
||||
assertFalse(json.isEmpty)
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
val rels:List[Relation] = resultList.filter(p => p.isInstanceOf[Relation]).map(r=> r.asInstanceOf[Relation])
|
||||
|
||||
|
||||
|
||||
rels.foreach(s => logger.info(s.getTarget))
|
||||
assertEquals(rels.size, 3 )
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def testConvertBookFromCrossRef2Oaf(): Unit = {
|
||||
val json = Source.fromInputStream(getClass.getResourceAsStream("book.json")).mkString
|
||||
assertNotNull(json)
|
||||
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||
|
||||
assert(items.nonEmpty)
|
||||
assert(items.size == 1)
|
||||
val result: Result = items.head.asInstanceOf[Result]
|
||||
assertNotNull(result)
|
||||
|
||||
logger.info(mapper.writeValueAsString(result));
|
||||
|
||||
assertNotNull(result.getDataInfo, "Datainfo test not null Failed");
|
||||
assertNotNull(
|
||||
result.getDataInfo.getProvenanceaction,
|
||||
"DataInfo/Provenance test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getClassid.isEmpty,
|
||||
"DataInfo/Provenance/classId test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getClassname.isEmpty,
|
||||
"DataInfo/Provenance/className test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty,
|
||||
"DataInfo/Provenance/SchemeId test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getSchemename.isEmpty,
|
||||
"DataInfo/Provenance/SchemeName test not null Failed");
|
||||
|
||||
assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed");
|
||||
assertFalse(result.getCollectedfrom.isEmpty);
|
||||
|
||||
val collectedFromList = result.getCollectedfrom.asScala
|
||||
assert(collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion")
|
||||
|
||||
assert(collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), "Wrong collected from assertion")
|
||||
|
||||
|
||||
val relevantDates = result.getRelevantdate.asScala
|
||||
|
||||
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), "Missing relevant date of type created")
|
||||
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-online")), "Missing relevant date of type published-online")
|
||||
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-print")), "Missing relevant date of type published-print")
|
||||
val rels = resultList.filter(p => p.isInstanceOf[Relation])
|
||||
assert(rels.isEmpty)
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def testConvertPreprintFromCrossRef2Oaf(): Unit = {
|
||||
val json = Source.fromInputStream(getClass.getResourceAsStream("preprint.json")).mkString
|
||||
assertNotNull(json)
|
||||
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
||||
|
||||
assert(items.nonEmpty)
|
||||
assert(items.size == 1)
|
||||
val result: Result = items.head.asInstanceOf[Publication]
|
||||
assertNotNull(result)
|
||||
|
||||
logger.info(mapper.writeValueAsString(result));
|
||||
|
||||
assertNotNull(result.getDataInfo, "Datainfo test not null Failed");
|
||||
assertNotNull(
|
||||
result.getDataInfo.getProvenanceaction,
|
||||
"DataInfo/Provenance test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getClassid.isEmpty,
|
||||
"DataInfo/Provenance/classId test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getClassname.isEmpty,
|
||||
"DataInfo/Provenance/className test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty,
|
||||
"DataInfo/Provenance/SchemeId test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getSchemename.isEmpty,
|
||||
"DataInfo/Provenance/SchemeName test not null Failed");
|
||||
|
||||
assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed");
|
||||
assertFalse(result.getCollectedfrom.isEmpty);
|
||||
|
||||
val collectedFromList = result.getCollectedfrom.asScala
|
||||
assert(collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion")
|
||||
|
||||
assert(collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), "Wrong collected from assertion")
|
||||
|
||||
|
||||
val relevantDates = result.getRelevantdate.asScala
|
||||
|
||||
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), "Missing relevant date of type created")
|
||||
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("available")), "Missing relevant date of type available")
|
||||
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("accepted")), "Missing relevant date of type accepted")
|
||||
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-online")), "Missing relevant date of type published-online")
|
||||
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-print")), "Missing relevant date of type published-print")
|
||||
val rels = resultList.filter(p => p.isInstanceOf[Relation])
|
||||
assert(rels.isEmpty)
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def testConvertDatasetFromCrossRef2Oaf(): Unit = {
|
||||
val json = Source.fromInputStream(getClass.getResourceAsStream("dataset.json")).mkString
|
||||
assertNotNull(json)
|
||||
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
val items = resultList.filter(p => p.isInstanceOf[Dataset])
|
||||
|
||||
assert(items.nonEmpty)
|
||||
assert(items.size == 1)
|
||||
val result: Result = items.head.asInstanceOf[Dataset]
|
||||
assertNotNull(result)
|
||||
|
||||
logger.info(mapper.writeValueAsString(result));
|
||||
|
||||
assertNotNull(result.getDataInfo, "Datainfo test not null Failed");
|
||||
assertNotNull(
|
||||
result.getDataInfo.getProvenanceaction,
|
||||
"DataInfo/Provenance test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getClassid.isEmpty,
|
||||
"DataInfo/Provenance/classId test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getClassname.isEmpty,
|
||||
"DataInfo/Provenance/className test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty,
|
||||
"DataInfo/Provenance/SchemeId test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getSchemename.isEmpty,
|
||||
"DataInfo/Provenance/SchemeName test not null Failed");
|
||||
|
||||
assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed");
|
||||
assertFalse(result.getCollectedfrom.isEmpty);
|
||||
}
|
||||
|
||||
@Test
|
||||
def testConvertArticleFromCrossRef2Oaf(): Unit = {
|
||||
val json = Source.fromInputStream(getClass.getResourceAsStream("article.json")).mkString
|
||||
assertNotNull(json)
|
||||
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
||||
|
||||
assert(items.nonEmpty)
|
||||
assert(items.size == 1)
|
||||
val result: Result = items.head.asInstanceOf[Publication]
|
||||
assertNotNull(result)
|
||||
|
||||
logger.info(mapper.writeValueAsString(result));
|
||||
|
||||
assertNotNull(result.getDataInfo, "Datainfo test not null Failed");
|
||||
assertNotNull(
|
||||
result.getDataInfo.getProvenanceaction,
|
||||
"DataInfo/Provenance test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getClassid.isEmpty,
|
||||
"DataInfo/Provenance/classId test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getClassname.isEmpty,
|
||||
"DataInfo/Provenance/className test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty,
|
||||
"DataInfo/Provenance/SchemeId test not null Failed");
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getSchemename.isEmpty,
|
||||
"DataInfo/Provenance/SchemeName test not null Failed");
|
||||
|
||||
assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed");
|
||||
assertFalse(result.getCollectedfrom.isEmpty);
|
||||
|
||||
val collectedFromList = result.getCollectedfrom.asScala
|
||||
assert(collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion")
|
||||
|
||||
assert(collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), "Wrong collected from assertion")
|
||||
|
||||
|
||||
val relevantDates = result.getRelevantdate.asScala
|
||||
|
||||
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), "Missing relevant date of type created")
|
||||
|
||||
val rels = resultList.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]]
|
||||
assertFalse(rels.isEmpty)
|
||||
rels.foreach(relation => {
|
||||
assertNotNull(relation)
|
||||
assertFalse(relation.getSource.isEmpty)
|
||||
assertFalse(relation.getTarget.isEmpty)
|
||||
assertFalse(relation.getRelClass.isEmpty)
|
||||
assertFalse(relation.getRelType.isEmpty)
|
||||
assertFalse(relation.getSubRelType.isEmpty)
|
||||
|
||||
})
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
package eu.dnetlib.doiboost.mag
|
||||
|
||||
import java.sql.Timestamp
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication
|
||||
import org.apache.htrace.fasterxml.jackson.databind.SerializationFeature
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.api.java.function.MapFunction
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
||||
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
import org.junit.jupiter.api.Assertions._
|
||||
import org.apache.spark.sql.functions._
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.io.Source
|
||||
import scala.reflect.ClassTag
|
||||
import scala.util.matching.Regex
|
||||
|
||||
|
||||
|
||||
class MAGMappingTest {
|
||||
|
||||
val logger: Logger = LoggerFactory.getLogger(getClass)
|
||||
val mapper = new ObjectMapper()
|
||||
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
def testSplitter():Unit = {
|
||||
val s = "sports.team"
|
||||
|
||||
|
||||
if (s.contains(".")) {
|
||||
println(s.split("\\.")head)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
def testDate() :Unit = {
|
||||
|
||||
val p:Timestamp = Timestamp.valueOf("2011-10-02 00:00:00")
|
||||
|
||||
println(p.toString.substring(0,10))
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
def buildInvertedIndexTest(): Unit = {
|
||||
val json_input = Source.fromInputStream(getClass.getResourceAsStream("invertedIndex.json")).mkString
|
||||
val description = ConversionUtil.convertInvertedIndexString(json_input)
|
||||
assertNotNull(description)
|
||||
assertTrue(description.nonEmpty)
|
||||
|
||||
logger.debug(description)
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,30 @@
|
|||
package eu.dnetlib.doiboost.orcid
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication
|
||||
import eu.dnetlib.doiboost.crossref.Crossref2Oaf
|
||||
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
import org.junit.jupiter.api.Assertions._
|
||||
|
||||
import scala.io.Source
|
||||
|
||||
class MappingORCIDToOAFTest {
|
||||
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
||||
val mapper = new ObjectMapper()
|
||||
|
||||
@Test
|
||||
def testExtractData():Unit ={
|
||||
val json = Source.fromInputStream(getClass.getResourceAsStream("dataOutput")).mkString
|
||||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty)
|
||||
json.lines.foreach(s => {
|
||||
assertNotNull(ORCIDToOAF.extractValueFromInputString(s))
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,136 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
public class OrcidClientTest {
|
||||
final String orcidId = "0000-0001-7291-3210";
|
||||
final int REQ_LIMIT = 24;
|
||||
final int REQ_MAX_TEST = 100;
|
||||
final int RECORD_DOWNLOADED_COUNTER_LOG_INTERVAL = 10;
|
||||
final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
|
||||
final String toRetrieveDate = "2020-05-06 23:59:46.031145";
|
||||
String toNotRetrieveDate = "2019-09-29 23:59:59.000000";
|
||||
String lastUpdate = "2019-09-30 00:00:00";
|
||||
String shortDate = "2020-05-06 16:06:11";
|
||||
|
||||
// curl -i -H "Accept: application/vnd.orcid+xml"
|
||||
// -H 'Authorization: Bearer 78fdb232-7105-4086-8570-e153f4198e3d'
|
||||
// 'https://api.orcid.org/v3.0/0000-0001-7291-3210/record'
|
||||
|
||||
public String testDownloadRecord(String orcidId) throws Exception {
|
||||
try (CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record");
|
||||
httpGet.addHeader("Accept", "application/vnd.orcid+xml");
|
||||
httpGet.addHeader("Authorization", "Bearer 78fdb232-7105-4086-8570-e153f4198e3d");
|
||||
CloseableHttpResponse response = client.execute(httpGet);
|
||||
if (response.getStatusLine().getStatusCode() != 200) {
|
||||
System.out
|
||||
.println("Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
|
||||
}
|
||||
return IOUtils.toString(response.getEntity().getContent());
|
||||
} catch (Throwable e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return new String("");
|
||||
}
|
||||
|
||||
// @Test
|
||||
public void testLambdaFileParser() throws Exception {
|
||||
try (BufferedReader br = new BufferedReader(
|
||||
new InputStreamReader(this.getClass().getResourceAsStream("last_modified.csv")))) {
|
||||
String line;
|
||||
int counter = 0;
|
||||
int nReqTmp = 0;
|
||||
long startDownload = System.currentTimeMillis();
|
||||
long startReqTmp = System.currentTimeMillis();
|
||||
while ((line = br.readLine()) != null) {
|
||||
counter++;
|
||||
// skip headers line
|
||||
if (counter == 1) {
|
||||
continue;
|
||||
}
|
||||
String[] values = line.split(",");
|
||||
List<String> recordInfo = Arrays.asList(values);
|
||||
testDownloadRecord(recordInfo.get(0));
|
||||
long endReq = System.currentTimeMillis();
|
||||
nReqTmp++;
|
||||
if (nReqTmp == REQ_LIMIT) {
|
||||
long reqSessionDuration = endReq - startReqTmp;
|
||||
if (reqSessionDuration <= 1000) {
|
||||
System.out
|
||||
.println(
|
||||
"\nreqSessionDuration: " + reqSessionDuration + " nReqTmp: " + nReqTmp + " wait ....");
|
||||
Thread.sleep(1000 - reqSessionDuration);
|
||||
} else {
|
||||
nReqTmp = 0;
|
||||
startReqTmp = System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
|
||||
if (counter > REQ_MAX_TEST) {
|
||||
break;
|
||||
}
|
||||
if ((counter % RECORD_DOWNLOADED_COUNTER_LOG_INTERVAL) == 0) {
|
||||
System.out.println("Current record downloaded: " + counter);
|
||||
}
|
||||
}
|
||||
long endDownload = System.currentTimeMillis();
|
||||
long downloadTime = endDownload - startDownload;
|
||||
System.out.println("Download time: " + ((downloadTime / 1000) / 60) + " minutes");
|
||||
}
|
||||
}
|
||||
|
||||
// @Test
|
||||
public void getRecordDatestamp() throws ParseException {
|
||||
Date toRetrieveDateDt = new SimpleDateFormat(DATE_FORMAT).parse(toRetrieveDate);
|
||||
Date toNotRetrieveDateDt = new SimpleDateFormat(DATE_FORMAT).parse(toNotRetrieveDate);
|
||||
Date lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate);
|
||||
assertTrue(toRetrieveDateDt.after(lastUpdateDt));
|
||||
assertTrue(!toNotRetrieveDateDt.after(lastUpdateDt));
|
||||
}
|
||||
|
||||
public void testDate(String value) throws ParseException {
|
||||
System.out.println(value.toString());
|
||||
if (value.length() != 19) {
|
||||
value = value.substring(0, 19);
|
||||
}
|
||||
Date valueDt = new SimpleDateFormat(DATE_FORMAT).parse(value);
|
||||
System.out.println(valueDt.toString());
|
||||
}
|
||||
|
||||
// @Test
|
||||
public void testModifiedDate() throws ParseException {
|
||||
testDate(toRetrieveDate);
|
||||
testDate(toNotRetrieveDate);
|
||||
testDate(shortDate);
|
||||
}
|
||||
|
||||
// @Test
|
||||
public void testReadBase64CompressedRecord() throws Exception {
|
||||
final String base64CompressedRecord = IOUtils
|
||||
.toString(getClass().getResourceAsStream("0000-0001-6645-509X.compressed.base64"));
|
||||
final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord);
|
||||
System.out.println(recordFromSeqFile);
|
||||
final String downloadedRecord = testDownloadRecord("0000-0001-6645-509X");
|
||||
assertTrue(recordFromSeqFile.equals(downloadedRecord));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid.xml;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import eu.dnetlib.doiboost.orcid.model.AuthorData;
|
||||
import eu.dnetlib.doiboost.orcid.model.WorkData;
|
||||
|
||||
public class XMLRecordParserTest {
|
||||
|
||||
@Test
|
||||
public void testOrcidAuthorDataXMLParser() throws Exception {
|
||||
|
||||
String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_0000-0001-6828-479X.xml"));
|
||||
|
||||
XMLRecordParser p = new XMLRecordParser();
|
||||
|
||||
AuthorData authorData = p.VTDParseAuthorData(xml.getBytes());
|
||||
assertNotNull(authorData);
|
||||
assertNotNull(authorData.getName());
|
||||
System.out.println("name: " + authorData.getName());
|
||||
assertNotNull(authorData.getSurname());
|
||||
System.out.println("surname: " + authorData.getSurname());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOrcidXMLErrorRecordParser() throws Exception {
|
||||
|
||||
String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_error.xml"));
|
||||
|
||||
XMLRecordParser p = new XMLRecordParser();
|
||||
|
||||
AuthorData authorData = p.VTDParseAuthorData(xml.getBytes());
|
||||
assertNotNull(authorData);
|
||||
assertNotNull(authorData.getErrorCode());
|
||||
System.out.println("error: " + authorData.getErrorCode());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOrcidWorkDataXMLParser() throws Exception {
|
||||
|
||||
String xml = IOUtils
|
||||
.toString(
|
||||
this.getClass().getResourceAsStream("activity_work_0000-0002-5982-8983.xml"));
|
||||
|
||||
XMLRecordParser p = new XMLRecordParser();
|
||||
|
||||
WorkData workData = p.VTDParseWorkData(xml.getBytes());
|
||||
assertNotNull(workData);
|
||||
assertNotNull(workData.getOid());
|
||||
System.out.println("oid: " + workData.getOid());
|
||||
assertNotNull(workData.getDoi());
|
||||
System.out.println("doi: " + workData.getDoi());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
package eu.dnetlib.doiboost.uw
|
||||
|
||||
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
import scala.io.Source
|
||||
import org.junit.jupiter.api.Assertions._
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
class UnpayWallMappingTest {
|
||||
|
||||
val logger: Logger = LoggerFactory.getLogger(getClass)
|
||||
val mapper = new ObjectMapper()
|
||||
|
||||
|
||||
@Test
|
||||
def testMappingToOAF():Unit ={
|
||||
|
||||
val Ilist = Source.fromInputStream(getClass.getResourceAsStream("input.json")).mkString
|
||||
|
||||
|
||||
for (line <-Ilist.lines) {
|
||||
|
||||
|
||||
val p = UnpayWallToOAF.convertToOAF(line)
|
||||
|
||||
if(p!= null) {
|
||||
assertTrue(p.getPid.size()==1)
|
||||
logger.info(p.getId)
|
||||
}
|
||||
assertNotNull(line)
|
||||
assertTrue(line.nonEmpty)
|
||||
}
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
|
||||
|
||||
val l = Ilist.lines.next()
|
||||
|
||||
logger.info(mapper.writeValueAsString(UnpayWallToOAF.convertToOAF(l)))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,175 @@
|
|||
{
|
||||
"DOI": "10.26850/1678-4618eqj.v35.1.2010.p41-46",
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
2018,
|
||||
1,
|
||||
15
|
||||
]
|
||||
]
|
||||
},
|
||||
"abstract": "<jats:p>A qualitative spot-test and tandem quantitative analysis of dipyrone in the bulk drugand in pharmaceutical preparations is proposed. The formation of a reddish-violet\u00a0 color indicates a positive result. In sequence a quantitative procedure can be performed in the same flask. The quantitative results obtained were statistically compared with those obtained with the method indicated by the Brazilian\u00a0 Pharmacopoeia, using the Student\u2019s t and the F tests. Considering the concentration in a 100 \u03bcL aliquot, the qualitative visual limit of detection is about 5\u00d710-6 g; instrumental LOD \u2245 1.4\u00d710-4 mol L-1 ; LOQ \u2245 4.5\u00d710-4 mol L-1.</jats:p>",
|
||||
"prefix": "10.26850",
|
||||
"author": [
|
||||
{
|
||||
"authenticated-orcid": false,
|
||||
"given": "Matthieu",
|
||||
"family": "Tubino",
|
||||
"sequence": "first",
|
||||
"affiliation": [],
|
||||
"ORCID": "http://orcid.org/0000-0002-1987-3907"
|
||||
},
|
||||
{
|
||||
"affiliation": [],
|
||||
"given": "A. C.",
|
||||
"family": "Biondo",
|
||||
"sequence": "additional"
|
||||
},
|
||||
{
|
||||
"authenticated-orcid": false,
|
||||
"given": "Marta Maria Duarte Carvalho",
|
||||
"family": "Vila",
|
||||
"sequence": "additional",
|
||||
"affiliation": [],
|
||||
"ORCID": "http://orcid.org/0000-0002-0198-7076"
|
||||
},
|
||||
{
|
||||
"authenticated-orcid": false,
|
||||
"given": "Leonardo",
|
||||
"family": "Pezza",
|
||||
"sequence": "additional",
|
||||
"affiliation": [],
|
||||
"ORCID": "http://orcid.org/0000-0003-0197-7369"
|
||||
},
|
||||
{
|
||||
"authenticated-orcid": false,
|
||||
"given": "Helena Redigolo",
|
||||
"family": "Pezza",
|
||||
"sequence": "additional",
|
||||
"affiliation": [],
|
||||
"ORCID": "http://orcid.org/0000-0001-5564-1639"
|
||||
}
|
||||
],
|
||||
"reference-count": 0,
|
||||
"ISSN": [
|
||||
"1678-4618"
|
||||
],
|
||||
"member": "11395",
|
||||
"source": "Crossref",
|
||||
"score": 1.0,
|
||||
"deposited": {
|
||||
"timestamp": 1540823529000,
|
||||
"date-time": "2018-10-29T14:32:09Z",
|
||||
"date-parts": [
|
||||
[
|
||||
2018,
|
||||
10,
|
||||
29
|
||||
]
|
||||
]
|
||||
},
|
||||
"indexed": {
|
||||
"timestamp": 1540825815212,
|
||||
"date-time": "2018-10-29T15:10:15Z",
|
||||
"date-parts": [
|
||||
[
|
||||
2018,
|
||||
10,
|
||||
29
|
||||
]
|
||||
]
|
||||
},
|
||||
"type": "journal-article",
|
||||
"published-online": {
|
||||
"date-parts": [
|
||||
[
|
||||
2018,
|
||||
1,
|
||||
15
|
||||
]
|
||||
]
|
||||
},
|
||||
"URL": "http://dx.doi.org/10.26850/1678-4618eqj.v35.1.2010.p41-46",
|
||||
"is-referenced-by-count": 0,
|
||||
"volume": "35",
|
||||
"issn-type": [
|
||||
{
|
||||
"type": "electronic",
|
||||
"value": "1678-4618"
|
||||
}
|
||||
],
|
||||
"link": [
|
||||
{
|
||||
"URL": "http://revista.iq.unesp.br/ojs/index.php/ecletica/article/viewFile/191/149",
|
||||
"intended-application": "text-mining",
|
||||
"content-version": "vor",
|
||||
"content-type": "application/pdf"
|
||||
},
|
||||
{
|
||||
"URL": "http://revista.iq.unesp.br/ojs/index.php/ecletica/article/viewFile/191/149",
|
||||
"intended-application": "similarity-checking",
|
||||
"content-version": "vor",
|
||||
"content-type": "unspecified"
|
||||
}
|
||||
],
|
||||
"journal-issue": {
|
||||
"issue": "1",
|
||||
"published-online": {
|
||||
"date-parts": [
|
||||
[
|
||||
2018,
|
||||
1,
|
||||
15
|
||||
]
|
||||
]
|
||||
}
|
||||
},
|
||||
"references-count": 0,
|
||||
"short-container-title": [
|
||||
"Eclet. Quim. J."
|
||||
],
|
||||
"publisher": "Ecletica Quimica Journal",
|
||||
"content-domain": {
|
||||
"domain": [],
|
||||
"crossmark-restriction": false
|
||||
},
|
||||
"license": [
|
||||
{
|
||||
"URL": "http://creativecommons.org/licenses/by/4.0",
|
||||
"start": {
|
||||
"timestamp": 1515974400000,
|
||||
"date-time": "2018-01-15T00:00:00Z",
|
||||
"date-parts": [
|
||||
[
|
||||
2018,
|
||||
1,
|
||||
15
|
||||
]
|
||||
]
|
||||
},
|
||||
"content-version": "unspecified",
|
||||
"delay-in-days": 0
|
||||
}
|
||||
],
|
||||
"created": {
|
||||
"timestamp": 1517590842000,
|
||||
"date-time": "2018-02-02T17:00:42Z",
|
||||
"date-parts": [
|
||||
[
|
||||
2018,
|
||||
2,
|
||||
2
|
||||
]
|
||||
]
|
||||
},
|
||||
"issue": "1",
|
||||
"title": [
|
||||
"Spot-test identification and rapid quantitative sequential analys is of dipyrone"
|
||||
],
|
||||
"container-title": [
|
||||
"Ecl\u00e9tica Qu\u00edmica Journal"
|
||||
],
|
||||
"page": "41-50",
|
||||
"funder": [{"DOI": "10.13039/100010663","name": "H2020 European Research Council","doi-asserted-by": "publisher","award": ["677749"]}]
|
||||
}
|
|
@ -0,0 +1,175 @@
|
|||
{
|
||||
"DOI": "10.26850/1678-4618eqj.v35.1.2010.p41-46",
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
2018,
|
||||
1,
|
||||
15
|
||||
]
|
||||
]
|
||||
},
|
||||
"abstract": "<jats:p>A qualitative spot-test and tandem quantitative analysis of dipyrone in the bulk drugand in pharmaceutical preparations is proposed. The formation of a reddish-violet\u00a0 color indicates a positive result. In sequence a quantitative procedure can be performed in the same flask. The quantitative results obtained were statistically compared with those obtained with the method indicated by the Brazilian\u00a0 Pharmacopoeia, using the Student\u2019s t and the F tests. Considering the concentration in a 100 \u03bcL aliquot, the qualitative visual limit of detection is about 5\u00d710-6 g; instrumental LOD \u2245 1.4\u00d710-4 mol L-1 ; LOQ \u2245 4.5\u00d710-4 mol L-1.</jats:p>",
|
||||
"prefix": "10.26850",
|
||||
"author": [
|
||||
{
|
||||
"authenticated-orcid": false,
|
||||
"given": "Matthieu",
|
||||
"family": "Tubino",
|
||||
"sequence": "first",
|
||||
"affiliation": [],
|
||||
"ORCID": "http://orcid.org/0000-0002-1987-3907"
|
||||
},
|
||||
{
|
||||
"affiliation": [],
|
||||
"given": "A. C.",
|
||||
"family": "Biondo",
|
||||
"sequence": "additional"
|
||||
},
|
||||
{
|
||||
"authenticated-orcid": false,
|
||||
"given": "Marta Maria Duarte Carvalho",
|
||||
"family": "Vila",
|
||||
"sequence": "additional",
|
||||
"affiliation": [],
|
||||
"ORCID": "http://orcid.org/0000-0002-0198-7076"
|
||||
},
|
||||
{
|
||||
"authenticated-orcid": false,
|
||||
"given": "Leonardo",
|
||||
"family": "Pezza",
|
||||
"sequence": "additional",
|
||||
"affiliation": [],
|
||||
"ORCID": "http://orcid.org/0000-0003-0197-7369"
|
||||
},
|
||||
{
|
||||
"authenticated-orcid": false,
|
||||
"given": "Helena Redigolo",
|
||||
"family": "Pezza",
|
||||
"sequence": "additional",
|
||||
"affiliation": [],
|
||||
"ORCID": "http://orcid.org/0000-0001-5564-1639"
|
||||
}
|
||||
],
|
||||
"reference-count": 0,
|
||||
"ISSN": [
|
||||
"1678-4618"
|
||||
],
|
||||
"member": "11395",
|
||||
"source": "Crossref",
|
||||
"score": 1.0,
|
||||
"deposited": {
|
||||
"timestamp": 1540823529000,
|
||||
"date-time": "2018-10-29T14:32:09Z",
|
||||
"date-parts": [
|
||||
[
|
||||
2018,
|
||||
10,
|
||||
29
|
||||
]
|
||||
]
|
||||
},
|
||||
"indexed": {
|
||||
"timestamp": 1540825815212,
|
||||
"date-time": "2018-10-29T15:10:15Z",
|
||||
"date-parts": [
|
||||
[
|
||||
2018,
|
||||
10,
|
||||
29
|
||||
]
|
||||
]
|
||||
},
|
||||
"type": "journal-article",
|
||||
"published-online": {
|
||||
"date-parts": [
|
||||
[
|
||||
2018,
|
||||
1,
|
||||
15
|
||||
]
|
||||
]
|
||||
},
|
||||
"URL": "http://dx.doi.org/10.26850/1678-4618eqj.v35.1.2010.p41-46",
|
||||
"is-referenced-by-count": 0,
|
||||
"volume": "35",
|
||||
"issn-type": [
|
||||
{
|
||||
"type": "electronic",
|
||||
"value": "1678-4618"
|
||||
}
|
||||
],
|
||||
"link": [
|
||||
{
|
||||
"URL": "http://revista.iq.unesp.br/ojs/index.php/ecletica/article/viewFile/191/149",
|
||||
"intended-application": "text-mining",
|
||||
"content-version": "vor",
|
||||
"content-type": "application/pdf"
|
||||
},
|
||||
{
|
||||
"URL": "http://revista.iq.unesp.br/ojs/index.php/ecletica/article/viewFile/191/149",
|
||||
"intended-application": "similarity-checking",
|
||||
"content-version": "vor",
|
||||
"content-type": "unspecified"
|
||||
}
|
||||
],
|
||||
"journal-issue": {
|
||||
"issue": "1",
|
||||
"published-online": {
|
||||
"date-parts": [
|
||||
[
|
||||
2018,
|
||||
1,
|
||||
15
|
||||
]
|
||||
]
|
||||
}
|
||||
},
|
||||
"references-count": 0,
|
||||
"short-container-title": [
|
||||
"Eclet. Quim. J."
|
||||
],
|
||||
"publisher": "Ecletica Quimica Journal",
|
||||
"content-domain": {
|
||||
"domain": [],
|
||||
"crossmark-restriction": false
|
||||
},
|
||||
"license": [
|
||||
{
|
||||
"URL": "http://creativecommons.org/licenses/by/4.0",
|
||||
"start": {
|
||||
"timestamp": 1515974400000,
|
||||
"date-time": "2018-01-15T00:00:00Z",
|
||||
"date-parts": [
|
||||
[
|
||||
2018,
|
||||
1,
|
||||
15
|
||||
]
|
||||
]
|
||||
},
|
||||
"content-version": "unspecified",
|
||||
"delay-in-days": 0
|
||||
}
|
||||
],
|
||||
"created": {
|
||||
"timestamp": 1517590842000,
|
||||
"date-time": "2018-02-02T17:00:42Z",
|
||||
"date-parts": [
|
||||
[
|
||||
2018,
|
||||
2,
|
||||
2
|
||||
]
|
||||
]
|
||||
},
|
||||
"issue": "1",
|
||||
"title": [
|
||||
"Spot-test identification and rapid quantitative sequential analys is of dipyrone"
|
||||
],
|
||||
"container-title": [
|
||||
"Ecl\u00e9tica Qu\u00edmica Journal"
|
||||
],
|
||||
%s
|
||||
"page": "41-50"
|
||||
}
|
|
@ -0,0 +1,193 @@
|
|||
{
|
||||
"DOI": "10.1016/j.infbeh.2016.11.001",
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
2017,
|
||||
8
|
||||
]
|
||||
]
|
||||
},
|
||||
"update-policy": "http://dx.doi.org/10.1016/elsevier_cm_policy",
|
||||
"prefix": "10.1016",
|
||||
"subject": [
|
||||
"Developmental and Educational Psychology"
|
||||
],
|
||||
"author": [
|
||||
{
|
||||
"affiliation": [],
|
||||
"given": "Dora",
|
||||
"family": "Kampis",
|
||||
"sequence": "first"
|
||||
},
|
||||
{
|
||||
"affiliation": [],
|
||||
"given": "D\u00f3ra",
|
||||
"family": "Fogd",
|
||||
"sequence": "additional"
|
||||
},
|
||||
{
|
||||
"affiliation": [],
|
||||
"given": "\u00c1gnes Melinda",
|
||||
"family": "Kov\u00e1cs",
|
||||
"sequence": "additional"
|
||||
}
|
||||
],
|
||||
"reference-count": 109,
|
||||
"ISSN": [
|
||||
"0163-6383"
|
||||
],
|
||||
"assertion": [
|
||||
{
|
||||
"name": "publisher",
|
||||
"value": "Elsevier",
|
||||
"label": "This article is maintained by"
|
||||
},
|
||||
{
|
||||
"name": "articletitle",
|
||||
"value": "Nonverbal components of Theory of Mind in typical and atypical development",
|
||||
"label": "Article Title"
|
||||
},
|
||||
{
|
||||
"name": "journaltitle",
|
||||
"value": "Infant Behavior and Development",
|
||||
"label": "Journal Title"
|
||||
},
|
||||
{
|
||||
"name": "articlelink",
|
||||
"value": "https://doi.org/10.1016/j.infbeh.2016.11.001",
|
||||
"label": "CrossRef DOI link to publisher maintained version"
|
||||
},
|
||||
{
|
||||
"name": "content_type",
|
||||
"value": "article",
|
||||
"label": "Content Type"
|
||||
},
|
||||
{
|
||||
"name": "copyright",
|
||||
"value": "\u00a9 2016 Elsevier Inc. All rights reserved.",
|
||||
"label": "Copyright"
|
||||
}
|
||||
],
|
||||
"member": "78",
|
||||
"source": "Crossref",
|
||||
"score": 1.0,
|
||||
"deposited": {
|
||||
"timestamp": 1565383284000,
|
||||
"date-parts": [
|
||||
[
|
||||
2019,
|
||||
8,
|
||||
9
|
||||
]
|
||||
],
|
||||
"date-time": "2019-08-09T20:41:24Z"
|
||||
},
|
||||
"indexed": {
|
||||
"timestamp": 1565385055278,
|
||||
"date-parts": [
|
||||
[
|
||||
2019,
|
||||
8,
|
||||
9
|
||||
]
|
||||
],
|
||||
"date-time": "2019-08-09T21:10:55Z"
|
||||
},
|
||||
"type": "journal-article",
|
||||
"URL": "http://dx.doi.org/10.1016/j.infbeh.2016.11.001",
|
||||
"is-referenced-by-count": 1,
|
||||
"volume": "48",
|
||||
"issn-type": [
|
||||
{
|
||||
"type": "print",
|
||||
"value": "0163-6383"
|
||||
}
|
||||
],
|
||||
"link": [
|
||||
{
|
||||
"URL": "https://api.elsevier.com/content/article/PII:S0163638315300059?httpAccept=text/xml",
|
||||
"intended-application": "text-mining",
|
||||
"content-version": "vor",
|
||||
"content-type": "text/xml"
|
||||
},
|
||||
{
|
||||
"URL": "https://api.elsevier.com/content/article/PII:S0163638315300059?httpAccept=text/plain",
|
||||
"intended-application": "text-mining",
|
||||
"content-version": "vor",
|
||||
"content-type": "text/plain"
|
||||
}
|
||||
],
|
||||
"published-print": {
|
||||
"date-parts": [
|
||||
[
|
||||
2017,
|
||||
8
|
||||
]
|
||||
]
|
||||
},
|
||||
"references-count": 109,
|
||||
"short-container-title": [
|
||||
"Infant Behavior and Development"
|
||||
],
|
||||
"publisher": "Elsevier BV",
|
||||
"content-domain": {
|
||||
"domain": [
|
||||
"elsevier.com",
|
||||
"sciencedirect.com"
|
||||
],
|
||||
"crossmark-restriction": true
|
||||
},
|
||||
"license": [
|
||||
{
|
||||
"URL": "https://www.elsevier.com/tdm/userlicense/1.0/",
|
||||
"start": {
|
||||
"timestamp": 1501545600000,
|
||||
"date-parts": [
|
||||
[
|
||||
2017,
|
||||
8,
|
||||
1
|
||||
]
|
||||
],
|
||||
"date-time": "2017-08-01T00:00:00Z"
|
||||
},
|
||||
"content-version": "tdm",
|
||||
"delay-in-days": 0
|
||||
}
|
||||
],
|
||||
"language": "en",
|
||||
"created": {
|
||||
"timestamp": 1479142046000,
|
||||
"date-parts": [
|
||||
[
|
||||
2016,
|
||||
11,
|
||||
14
|
||||
]
|
||||
],
|
||||
"date-time": "2016-11-14T16:47:26Z"
|
||||
},
|
||||
"title": [
|
||||
"Nonverbal components of Theory of Mind in typical and atypical development"
|
||||
],
|
||||
"alternative-id": [
|
||||
"S0163638315300059"
|
||||
],
|
||||
"container-title": [
|
||||
"Infant Behavior and Development"
|
||||
],
|
||||
"funder": [
|
||||
{
|
||||
"DOI": "10.13039/501100001711",
|
||||
"name": "Swiss National Science Foundation (Schweizerische Nationalfonds)",
|
||||
"doi-asserted-by": "publisher",
|
||||
"award": [
|
||||
"CR32I3_156724",
|
||||
"31003A_173281/1",
|
||||
"200021_165850"
|
||||
]
|
||||
}
|
||||
],
|
||||
"page": "54-62"
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
{
|
||||
"DOI": "10.17848/9780880992299.vol1ch4",
|
||||
"ISBN": [
|
||||
"9780880992299"
|
||||
],
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
2001,
|
||||
12,
|
||||
1
|
||||
]
|
||||
]
|
||||
},
|
||||
"prefix": "10.17848",
|
||||
"author": [
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"given": "William E.",
|
||||
"family": "Even",
|
||||
"authenticated-orcid": false
|
||||
},
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"given": "David A.",
|
||||
"family": "Macpherson"
|
||||
}
|
||||
],
|
||||
"reference-count": 0,
|
||||
"member": "7312",
|
||||
"source": "Crossref",
|
||||
"score": 1.0,
|
||||
"deposited": {
|
||||
"timestamp": 1461687244000,
|
||||
"date-parts": [
|
||||
[
|
||||
2016,
|
||||
4,
|
||||
26
|
||||
]
|
||||
],
|
||||
"date-time": "2016-04-26T16:14:04Z"
|
||||
},
|
||||
"indexed": {
|
||||
"timestamp": 1502548826285,
|
||||
"date-parts": [
|
||||
[
|
||||
2017,
|
||||
8,
|
||||
12
|
||||
]
|
||||
],
|
||||
"date-time": "2017-08-12T14:40:26Z"
|
||||
},
|
||||
"type": "book-chapter",
|
||||
"published-online": {
|
||||
"date-parts": [
|
||||
[
|
||||
2010,
|
||||
5,
|
||||
27
|
||||
]
|
||||
]
|
||||
},
|
||||
"URL": "http://dx.doi.org/10.17848/9780880992299.vol1ch4",
|
||||
"is-referenced-by-count": 0,
|
||||
"download_ts": 1508079092.874343,
|
||||
"published-print": {
|
||||
"date-parts": [
|
||||
[
|
||||
2001,
|
||||
12,
|
||||
1
|
||||
]
|
||||
]
|
||||
},
|
||||
"references-count": 0,
|
||||
"publisher": "W.E. Upjohn Institute",
|
||||
"content-domain": {
|
||||
"domain": [
|
||||
],
|
||||
"crossmark-restriction": false
|
||||
},
|
||||
"created": {
|
||||
"timestamp": 1434034139000,
|
||||
"date-parts": [
|
||||
[
|
||||
2015,
|
||||
6,
|
||||
11
|
||||
]
|
||||
],
|
||||
"date-time": "2015-06-11T14:48:59Z"
|
||||
},
|
||||
"title": [
|
||||
"Children\\'s Effects on Women\\'s Labor Market Attachment and Earnings"
|
||||
],
|
||||
"container-title": [
|
||||
"Working Time in Comparative Perspective - Volume II: Life-Cycle Working Time and Nonstandard Hours"
|
||||
],
|
||||
"page": "99-128"
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
{
|
||||
"DOI": "10.1037/e522512014-096",
|
||||
"subtitle": [
|
||||
"(522512014-096)"
|
||||
],
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
2012
|
||||
]
|
||||
]
|
||||
},
|
||||
"prefix": "10.1037",
|
||||
"author": [
|
||||
{
|
||||
"affiliation": [],
|
||||
"given": "Jessica",
|
||||
"family": "Trudeau",
|
||||
"sequence": "first"
|
||||
},
|
||||
{
|
||||
"affiliation": [],
|
||||
"given": "Amy",
|
||||
"family": "McShane",
|
||||
"sequence": "additional"
|
||||
},
|
||||
{
|
||||
"affiliation": [],
|
||||
"given": "Renee",
|
||||
"family": "McDonald",
|
||||
"sequence": "additional"
|
||||
}
|
||||
],
|
||||
"reference-count": 0,
|
||||
"member": "15",
|
||||
"source": "Crossref",
|
||||
"score": 1.0,
|
||||
"deposited": {
|
||||
"timestamp": 1413827035000,
|
||||
"date-parts": [
|
||||
[
|
||||
2014,
|
||||
10,
|
||||
20
|
||||
]
|
||||
],
|
||||
"date-time": "2014-10-20T17:43:55Z"
|
||||
},
|
||||
"indexed": {
|
||||
"timestamp": 1550142454710,
|
||||
"date-parts": [
|
||||
[
|
||||
2019,
|
||||
2,
|
||||
14
|
||||
]
|
||||
],
|
||||
"date-time": "2019-02-14T11:07:34Z"
|
||||
},
|
||||
"type": "dataset",
|
||||
"URL": "http://dx.doi.org/10.1037/e522512014-096",
|
||||
"is-referenced-by-count": 0,
|
||||
"published-print": {
|
||||
"date-parts": [
|
||||
[
|
||||
2012
|
||||
]
|
||||
]
|
||||
},
|
||||
"references-count": 0,
|
||||
"institution": {
|
||||
"acronym": [
|
||||
"APA"
|
||||
],
|
||||
"place": [
|
||||
"-"
|
||||
],
|
||||
"name": "American Psychological Association"
|
||||
},
|
||||
"publisher": "American Psychological Association (APA)",
|
||||
"content-domain": {
|
||||
"domain": [],
|
||||
"crossmark-restriction": false
|
||||
},
|
||||
"created": {
|
||||
"timestamp": 1413826121000,
|
||||
"date-parts": [
|
||||
[
|
||||
2014,
|
||||
10,
|
||||
20
|
||||
]
|
||||
],
|
||||
"date-time": "2014-10-20T17:28:41Z"
|
||||
},
|
||||
"title": [
|
||||
"Project Support: A Randomized Control Study to Evaluate the Translation of an Evidence- Based Program"
|
||||
],
|
||||
"alternative-id": [
|
||||
"522512014-096"
|
||||
],
|
||||
"container-title": [
|
||||
"PsycEXTRA Dataset"
|
||||
]
|
||||
}
|
|
@ -0,0 +1,121 @@
|
|||
{
|
||||
"indexed": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
4,
|
||||
7
|
||||
]
|
||||
],
|
||||
"date-time": "2020-04-07T15:54:28Z",
|
||||
"timestamp": 1586274868901
|
||||
},
|
||||
"reference-count": 0,
|
||||
"publisher": "Japan Society of Mechanical Engineers",
|
||||
"issue": "432",
|
||||
"content-domain": {
|
||||
"domain": [],
|
||||
"crossmark-restriction": false
|
||||
},
|
||||
"short-container-title": [
|
||||
"JSMET"
|
||||
],
|
||||
"published-print": {
|
||||
"date-parts": [
|
||||
[
|
||||
1982
|
||||
]
|
||||
]
|
||||
},
|
||||
"DOI": "10.1299\/kikaib.48.1474",
|
||||
"type": "journal-article",
|
||||
"created": {
|
||||
"date-parts": [
|
||||
[
|
||||
2011,
|
||||
9,
|
||||
13
|
||||
]
|
||||
],
|
||||
"date-time": "2011-09-13T05:59:01Z",
|
||||
"timestamp": 1315893541000
|
||||
},
|
||||
"page": "1474-1482",
|
||||
"source": "Crossref",
|
||||
"is-referenced-by-count": 0,
|
||||
"title": [
|
||||
""
|
||||
],
|
||||
"prefix": "10.1299",
|
||||
"volume": "48",
|
||||
"author": [
|
||||
{
|
||||
"given": "Hiroshi",
|
||||
"family": "KATO",
|
||||
"sequence": "first",
|
||||
"affiliation": []
|
||||
},
|
||||
{
|
||||
"given": "Yoshichika",
|
||||
"family": "MIZUNO",
|
||||
"sequence": "additional",
|
||||
"affiliation": []
|
||||
}
|
||||
],
|
||||
"member": "124",
|
||||
"container-title": [
|
||||
"Transactions of the Japan Society of Mechanical Engineers Series B"
|
||||
],
|
||||
"original-title": [
|
||||
"\u5e0c\u8584\u9ad8\u5206\u5b50\u6eb6\u6db2\u4e2d\u306e\u6709\u9650\u9577\u5186\u67f1\u306e\u62b5\u6297"
|
||||
],
|
||||
"language": "ja",
|
||||
"deposited": {
|
||||
"date-parts": [
|
||||
[
|
||||
2011,
|
||||
9,
|
||||
13
|
||||
]
|
||||
],
|
||||
"date-time": "2011-09-13T06:01:33Z",
|
||||
"timestamp": 1315893693000
|
||||
},
|
||||
"score": 1.0,
|
||||
"subtitle": [],
|
||||
"short-title": [],
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
1982
|
||||
]
|
||||
]
|
||||
},
|
||||
"references-count": 0,
|
||||
"journal-issue": {
|
||||
"published-print": {
|
||||
"date-parts": [
|
||||
[
|
||||
1982
|
||||
]
|
||||
]
|
||||
},
|
||||
"issue": "432"
|
||||
},
|
||||
"URL": "http:\/\/dx.doi.org\/10.1299\/kikaib.48.1474",
|
||||
"relation": {},
|
||||
"ISSN": [
|
||||
"0387-5016",
|
||||
"1884-8346"
|
||||
],
|
||||
"issn-type": [
|
||||
{
|
||||
"value": "0387-5016",
|
||||
"type": "print"
|
||||
},
|
||||
{
|
||||
"value": "1884-8346",
|
||||
"type": "electronic"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
"funder": [{"DOI": "10.13039/100010663","name": "H2020 European Research Council","doi-asserted-by": "publisher","award": ["677749"]}],
|
||||
"funder": [{"name": "European Union’s Horizon 2020 research and innovation program","award": ["296801","304995","675395"]}],
|
||||
"funder": [{"DOI": "10.13039/100010661","name": "Horizon 2020 Framework Programme","doi-asserted-by": "publisher","award": ["722467", "H2020-FETOPEN-2015-CSA 712689","773830 (2018–2022)"]}],
|
||||
"funder": [{"DOI": "10.13039/501100007601","name": "Horizon 2020","doi-asserted-by": "publisher","award": ["645119"]}],
|
||||
"funder": [{"DOI": "10.13039/100010665","name": "H2020 Marie Skłodowska-Curie Actions","doi-asserted-by": "publisher","award": ["840267"]}],
|
||||
"funder": [{"DOI": "10.13039/100011199","name": "FP7 Ideas: European Research Council","doi-asserted-by": "publisher","award": ["226438"]}],
|
||||
"funder": [{"DOI": "10.13039/100004431","name": "Directorate-General for Research and Innovation","doi-asserted-by": "publisher","award": ["321427"]}],
|
||||
"funder": [{"DOI": "10.13039/501100004963","name": "Seventh Framework Programme","doi-asserted-by": "publisher","award": ["287818","612538"]}],
|
||||
"funder": [{"DOI": "10.13039/501100000781","name": "European Research Council","doi-asserted-by": "publisher","award": ["340185"]}],
|
||||
"funder": [{"name": "European Union's","award": ["763909"]}],
|
||||
"funder": [{"DOI": "10.13039/501100000780","name": "European Commission","doi-asserted-by": "publisher","award": ["645119", "H2020-FETOPEN-2015-CSA_712689"]}],
|
||||
"funder": [{"DOI": "10.13039/100000001","name": "National Science Foundation","doi-asserted-by": "publisher","award": ["1639552,1634422","ID0EEMBI7182"]}],
|
||||
"funder": [{"name": "The French National Research Agency (ANR)","award": ["ID0E4QBI7183","ANR-11-JS56-01501","ID0E3VBI7184","ANR-13-BS06-0008"]}],
|
||||
"funder": [{"DOI": "10.13039/501100001665","name": "Agence Nationale de la Recherche","doi-asserted-by": "publisher","award": ["ANR-14-ASTR-0004-01"]}],
|
||||
"funder": [{"DOI": "10.13039/501100002341","name": "Academy of Finland","doi-asserted-by": "publisher","award": ["294337","292335","31444","250114","292482"]}],
|
||||
"funder": [{"DOI": "10.13039/501100001602","name": "Science Foundation Ireland","doi-asserted-by": "publisher","award": ["16/SP/3829","12/RC/2302_P2","SFI/09/IN.I/12974"]}],
|
||||
"funder": [{"DOI": "10.13039/501100000923","name": "Australian Research Council","doi-asserted-by": "publisher","award": ["LP110200134"]}],
|
||||
"funder": [{"DOI": "10.13039/501100000038","name": "NSERC","doi-asserted-by": "crossref","award": []}],
|
||||
"funder": [{"DOI": "10.13039/501100000155","name": "Social Sciences and Humanities Research Council of Canada","doi-asserted-by": "publisher","award": []}],
|
||||
"funder": [{"DOI": "10.13039/501100000024","name": "Canadian Institutes for Health Research","doi-asserted-by": "crossref","award": ["HIB-126784","HHP-111405"]}],
|
||||
"funder": [{"DOI": "10.13039/501100002848","name": "Comisión Nacional de Investigación Científica y Tecnológica","doi-asserted-by": "publisher","award": ["15130011"]}],
|
||||
"funder": [{"DOI": "10.13039/501100003448","name": "General Secretariat for Research and Technology","doi-asserted-by": "publisher","award": ["MIS: 380170"]}],
|
||||
"funder": [{"DOI": "10.13039/501100010198","name": "Ministerio de Economía, Industria y Competitividad, Gobierno de España","doi-asserted-by": "publisher","award": ["ECO2017-89715-P"]}],
|
||||
"funder": [{"DOI": "10.13039/501100004564","name": "Ministarstvo Prosvete, Nauke i Tehnološkog Razvoja","doi-asserted-by": "publisher","award": ["TR34008"]}],
|
||||
"funder": [{"DOI": "10.13039/501100003407","name": "MIUR","doi-asserted-by": "publisher","award": ["20158A9CBM"]}],
|
||||
"funder": [{"DOI": "10.13039/501100003407","name": "MIUR","doi-asserted-by": "publisher","award": []}],
|
||||
"funder": [{"DOI": "10.13039/501100006588","name": "Ministarstvo Znanosti, Obrazovanja i Sporta","doi-asserted-by": "publisher","award": ["037-0372790-2799", "Project No. 125-1253008-1350"]}],
|
||||
"funder": [{"DOI": "10.13039/501100006588","name": "Ministry of Science, Education and Sports","doi-asserted-by": "publisher","award": ["181-1811096-1093"]}],
|
||||
"funder": [{"DOI": "10.13039/501100004488","name": "Hrvatska Zaklada za Znanost","doi-asserted-by": "publisher","award": ["HRZZ-IP-2013-11-3013", "UIP-2014-09-4744"]}],
|
||||
"funder": [{"DOI": "10.13039/501100006769","name": "Russian Science Foundation","doi-asserted-by": "publisher","award": ["17-11-01027"]}],
|
||||
"funder": [{"DOI": "10.13039/501100001711","name": "Swiss National Science Foundation (Schweizerische Nationalfonds)","doi-asserted-by": "publisher","award": ["CR32I3_156724", "31003A_173281/1"]}],
|
||||
"funder": [{"DOI": "10.13039/501100004410","name": "Türkiye Bilimsel ve Teknolojik Araştirma Kurumu","doi-asserted-by": "publisher","award": ["113M552"]}],
|
||||
"funder": [{"DOI": "10.13039/100004440","name": "Wellcome Trust","doi-asserted-by": "crossref","award": ["095127","079080"]}],
|
||||
"funder": [{"DOI": "10.13039/100004440","name": "Wellcome Trust","doi-asserted-by": "crossref","award": []}],
|
|
@ -0,0 +1,5 @@
|
|||
"funder": [{"name": "Wellcome Trust Masters Fellowship","award": ["090633"]}],
|
||||
"funder": [{"name": "CONICYT, Programa de Formación de Capital Humano Avanzado","award": ["#72140079"]}],
|
||||
"funder": [{"name": "European Union's","award": ["763909"]}],
|
||||
"funder": [{"name": "European Union’s Horizon 2020 research and innovation program","award": ["296801","304995","675395"]}],
|
||||
"funder": [{"name": "The French National Research Agency (ANR)","award": ["ID0E4QBI7183","ANR-11-JS56-01501","ID0E3VBI7184","ANR-13-BS06-0008"]}],
|
|
@ -0,0 +1,146 @@
|
|||
{
|
||||
"DOI": "10.1101/030080",
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
2015,
|
||||
10,
|
||||
28
|
||||
]
|
||||
]
|
||||
},
|
||||
"abstract": "<jats:p>Abstract Key Message<jats:italic>Agrobacterium tumefaciens</jats:italic>was used to transform radiata pine shoots and to efficiently produce stable genetically modified pine plants. Abstract Micropropagated shoot explants from<jats:italic>Pinus radiata</jats:italic>D. Don were used to produce stable transgenic plants by<jats:italic>Agrobacterium tumefaciens</jats:italic>mediated transformation. Using this method any genotype that can be micropropagated could produce stable transgenic lines. As over 80% of<jats:italic>P. radiata</jats:italic>genotypes tested can be micropropagated, this effectively means that any line chosen for superior characteristics could be transformed. There are well established protocols for progressing such germplasm to field deployment. Here we used open and control pollinated seed lines and embryogenic clones. The method developed was faster than other methods previously developed using mature cotyledons. PCR positive shoots could be obtain within 6 months of<jats:italic>Agrobacterium</jats:italic>cocultivation compared with 12 months for cotyledon methods. Transformed shoots were obtained using either kanamycin or geneticin as the selectable marker gene. Shoots were recovered from selection, were tested and were not chimeric, indicating that the selection pressure was optimal for this explant type. GFP was used as a vital marker, and the bar gene, (for resistance to the herbicide Buster\\u00ae/Basta\\u00ae) was used to produce lines that could potentially be used in commercial application. As expected, a range of expression phenotypes were identified for both these reporter genes and the analyses for expression were relatively easy.</jats:p>",
|
||||
"prefix": "10.1101",
|
||||
"author": [
|
||||
{
|
||||
"affiliation": [],
|
||||
"given": "Jan E",
|
||||
"family": "Grant",
|
||||
"sequence": "first"
|
||||
},
|
||||
{
|
||||
"affiliation": [],
|
||||
"given": "Pauline A",
|
||||
"family": "Cooper",
|
||||
"sequence": "additional"
|
||||
},
|
||||
{
|
||||
"affiliation": [],
|
||||
"given": "Tracy M",
|
||||
"family": "Dale",
|
||||
"sequence": "additional"
|
||||
}
|
||||
],
|
||||
"reference-count": 0,
|
||||
"member": "246",
|
||||
"source": "Crossref",
|
||||
"score": 1.0,
|
||||
"deposited": {
|
||||
"timestamp": 1483495053000,
|
||||
"date-time": "2017-01-04T01:57:33Z",
|
||||
"date-parts": [
|
||||
[
|
||||
2017,
|
||||
1,
|
||||
4
|
||||
]
|
||||
]
|
||||
},
|
||||
"indexed": {
|
||||
"timestamp": 1550234353119,
|
||||
"date-time": "2019-02-15T12:39:13Z",
|
||||
"date-parts": [
|
||||
[
|
||||
2019,
|
||||
2,
|
||||
15
|
||||
]
|
||||
]
|
||||
},
|
||||
"type": "posted-content",
|
||||
"URL": "http://dx.doi.org/10.1101/030080",
|
||||
"is-referenced-by-count": 2,
|
||||
"link": [
|
||||
{
|
||||
"URL": "https://syndication.highwire.org/content/doi/10.1101/030080",
|
||||
"intended-application": "similarity-checking",
|
||||
"content-version": "vor",
|
||||
"content-type": "unspecified"
|
||||
}
|
||||
],
|
||||
"accepted": {
|
||||
"date-parts": [
|
||||
[
|
||||
2015,
|
||||
10,
|
||||
28
|
||||
]
|
||||
]
|
||||
},
|
||||
"references-count": 0,
|
||||
"institution": {
|
||||
"acronym": [
|
||||
"-"
|
||||
],
|
||||
"place": [
|
||||
"-"
|
||||
],
|
||||
"name": "bioRxiv"
|
||||
},
|
||||
"posted": {
|
||||
"date-parts": [
|
||||
[
|
||||
2015,
|
||||
10,
|
||||
28
|
||||
]
|
||||
]
|
||||
},
|
||||
"publisher": "Cold Spring Harbor Laboratory",
|
||||
"content-domain": {
|
||||
"domain": [],
|
||||
"crossmark-restriction": false
|
||||
},
|
||||
"created": {
|
||||
"timestamp": 1446095513000,
|
||||
"date-time": "2015-10-29T05:11:53Z",
|
||||
"date-parts": [
|
||||
[
|
||||
2015,
|
||||
10,
|
||||
29
|
||||
]
|
||||
]
|
||||
},
|
||||
"published-print": {
|
||||
"timestamp": 1446095513000,
|
||||
"date-time": "2015-10-29T05:11:53Z",
|
||||
"date-parts": [
|
||||
[
|
||||
2015,
|
||||
2,
|
||||
29
|
||||
]
|
||||
]
|
||||
},
|
||||
"published-online": {
|
||||
"date-parts": [
|
||||
[
|
||||
2015,
|
||||
2,
|
||||
2
|
||||
]
|
||||
]
|
||||
},
|
||||
"title": [
|
||||
"Genetic transformation of micropropagated shoots ofPinus radiataD.Don"
|
||||
],
|
||||
"original-title": [
|
||||
"OR TITLE"
|
||||
],
|
||||
"short-title": [
|
||||
"SHORT TITLE"
|
||||
],
|
||||
"group-title": "Plant Biology",
|
||||
"subtype": "preprint"
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,334 @@
|
|||
{
|
||||
"IndexLength": 139,
|
||||
"InvertedIndex": {
|
||||
"The": [
|
||||
0,
|
||||
23,
|
||||
47
|
||||
],
|
||||
"invention": [
|
||||
1,
|
||||
53
|
||||
],
|
||||
"discloses": [
|
||||
2
|
||||
],
|
||||
"a": [
|
||||
3,
|
||||
10,
|
||||
71,
|
||||
81,
|
||||
121
|
||||
],
|
||||
"treatment": [
|
||||
4,
|
||||
69,
|
||||
85,
|
||||
96
|
||||
],
|
||||
"method": [
|
||||
5,
|
||||
24,
|
||||
49
|
||||
],
|
||||
"of": [
|
||||
6,
|
||||
9,
|
||||
19,
|
||||
57,
|
||||
84,
|
||||
117,
|
||||
120
|
||||
],
|
||||
"waste": [
|
||||
7,
|
||||
118
|
||||
],
|
||||
"mash": [
|
||||
8,
|
||||
119
|
||||
],
|
||||
"cane": [
|
||||
11,
|
||||
122
|
||||
],
|
||||
"sugar": [
|
||||
12,
|
||||
123
|
||||
],
|
||||
"factory,": [
|
||||
13
|
||||
],
|
||||
"belonging": [
|
||||
14
|
||||
],
|
||||
"to": [
|
||||
15
|
||||
],
|
||||
"the": [
|
||||
16,
|
||||
26,
|
||||
52,
|
||||
55,
|
||||
66,
|
||||
93,
|
||||
115,
|
||||
135
|
||||
],
|
||||
"technical": [
|
||||
17,
|
||||
48
|
||||
],
|
||||
"field": [
|
||||
18
|
||||
],
|
||||
"industrial": [
|
||||
20
|
||||
],
|
||||
"wastewater": [
|
||||
21
|
||||
],
|
||||
"treatment.": [
|
||||
22
|
||||
],
|
||||
"comprises": [
|
||||
25
|
||||
],
|
||||
"following": [
|
||||
27
|
||||
],
|
||||
"steps": [
|
||||
28
|
||||
],
|
||||
"of:": [
|
||||
29
|
||||
],
|
||||
"(1)": [
|
||||
30
|
||||
],
|
||||
"pretreatment;": [
|
||||
31
|
||||
],
|
||||
"(2)": [
|
||||
32
|
||||
],
|
||||
"primary": [
|
||||
33
|
||||
],
|
||||
"concentration;": [
|
||||
34
|
||||
],
|
||||
"(3)": [
|
||||
35
|
||||
],
|
||||
"cooling": [
|
||||
36
|
||||
],
|
||||
"sedimentation": [
|
||||
37
|
||||
],
|
||||
"and": [
|
||||
38,
|
||||
45,
|
||||
62,
|
||||
80,
|
||||
86,
|
||||
114,
|
||||
134
|
||||
],
|
||||
"dense": [
|
||||
39
|
||||
],
|
||||
"slurry": [
|
||||
40
|
||||
],
|
||||
"drying;": [
|
||||
41
|
||||
],
|
||||
"(4)": [
|
||||
42
|
||||
],
|
||||
"secondary": [
|
||||
43
|
||||
],
|
||||
"concentration": [
|
||||
44
|
||||
],
|
||||
"drying.": [
|
||||
46
|
||||
],
|
||||
"disclosed": [
|
||||
50
|
||||
],
|
||||
"by": [
|
||||
51
|
||||
],
|
||||
"has": [
|
||||
54
|
||||
],
|
||||
"advantages": [
|
||||
56
|
||||
],
|
||||
"small": [
|
||||
58
|
||||
],
|
||||
"investment,": [
|
||||
59
|
||||
],
|
||||
"simple": [
|
||||
60
|
||||
],
|
||||
"equipment": [
|
||||
61
|
||||
],
|
||||
"easiness": [
|
||||
63
|
||||
],
|
||||
"in": [
|
||||
64,
|
||||
132
|
||||
],
|
||||
"popularization;": [
|
||||
65
|
||||
],
|
||||
"product": [
|
||||
67
|
||||
],
|
||||
"after": [
|
||||
68
|
||||
],
|
||||
"is": [
|
||||
70,
|
||||
91,
|
||||
98,
|
||||
102,
|
||||
112,
|
||||
130,
|
||||
137
|
||||
],
|
||||
"high-quality": [
|
||||
72
|
||||
],
|
||||
"high": [
|
||||
73
|
||||
],
|
||||
"value-added": [
|
||||
74
|
||||
],
|
||||
"(fully": [
|
||||
75
|
||||
],
|
||||
"water-soluble)": [
|
||||
76
|
||||
],
|
||||
"potassium": [
|
||||
77
|
||||
],
|
||||
"humate": [
|
||||
78
|
||||
],
|
||||
"product,": [
|
||||
79
|
||||
],
|
||||
"new": [
|
||||
82
|
||||
],
|
||||
"mode": [
|
||||
83
|
||||
],
|
||||
"profit": [
|
||||
87
|
||||
],
|
||||
"enabling": [
|
||||
88
|
||||
],
|
||||
"sustainable": [
|
||||
89
|
||||
],
|
||||
"development": [
|
||||
90
|
||||
],
|
||||
"realized;": [
|
||||
92
|
||||
],
|
||||
"environmental": [
|
||||
94
|
||||
],
|
||||
"protection": [
|
||||
95
|
||||
],
|
||||
"effect": [
|
||||
97
|
||||
],
|
||||
"good,": [
|
||||
99
|
||||
],
|
||||
"water": [
|
||||
100,
|
||||
106
|
||||
],
|
||||
"balance": [
|
||||
101
|
||||
],
|
||||
"realized": [
|
||||
103
|
||||
],
|
||||
"through": [
|
||||
104
|
||||
],
|
||||
"final": [
|
||||
105
|
||||
],
|
||||
"quality": [
|
||||
107
|
||||
],
|
||||
"treatment,": [
|
||||
108
|
||||
],
|
||||
"real": [
|
||||
109
|
||||
],
|
||||
"zero": [
|
||||
110
|
||||
],
|
||||
"emission": [
|
||||
111
|
||||
],
|
||||
"realized,": [
|
||||
113
|
||||
],
|
||||
"problem": [
|
||||
116
|
||||
],
|
||||
"factory": [
|
||||
124
|
||||
],
|
||||
"can": [
|
||||
125
|
||||
],
|
||||
"be": [
|
||||
126
|
||||
],
|
||||
"solved": [
|
||||
127
|
||||
],
|
||||
"fundamentally;": [
|
||||
128
|
||||
],
|
||||
"energy": [
|
||||
129
|
||||
],
|
||||
"saved": [
|
||||
131
|
||||
],
|
||||
"operation,": [
|
||||
133
|
||||
],
|
||||
"feasibility": [
|
||||
136
|
||||
],
|
||||
"high.": [
|
||||
138
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
H4sIAAAAAAAAAO1a227bOBB9z1cIepd18SW24aho0wTbAgEWjRdY9I2RaJtbSdSSkhP165eURIm6kHa2SbCLNkBiWDxzhhxyZg7tbN49xZFxhIQinFyZ7sQxDZgEOETJ/sr8Y3trLU2DZiAJQYQTeGUWkJrv/IsNgQEm4bp6MVKQHa5M22E/Fvt1rcViNrfmzupP02AOErpGSQZJAqIr85Bl6dq2Hx8fJ5gEKGR/93ZCbYEQFjDMA5CV01KZNBBhEyKaoSTQW0mgxg6mbCUgg6HGrMEIK5wdILESEEO1VYsRVjGMH1i8DyhVW7WYJhqEYKKJBB8W2ADHsS4A1bhAV1uoRlfjAp2yaWG2S1YIM4AiqrbrIwXDN1g8ah3WgGblMbPWrJwPN9in6gxZKIRJhnYI6mI2BAueXZ5UGaCyrQFNVAjcQcISB+oC0oKEHQhDAqnGpga0WXRE7ABaKaZIf8j7SMHAIvtNbcVHBfLA0gSTQg2uAe0+pREuYhZK3WYJjLD6OwcRC/2pTO/AhC2F5IgCTfLVgO7ZPXVim71hFYLFEOm2tMW02UQhIAFP+pxojm0X186QvSfwiOCjbpoNSNg95JFmV/lof36MgOKc6KI3gJr+hcF+NlX9WJdgKXmqURmRE+RzdsroW+qRLrGxJYsBDe8uvs6qBAzMDphmfuO2AZePq4XY2pVspISVM1zyJCMiHIAI+jDZ2COPa4dayk2dUSL1JEdiJCCwTAErhtkBh/5d2SiskonAcGOrgEMqmj/EiPK+b4Wsq/me464sZ2l53tadrmeLtXc58ZbLry1n32IQ8QjQzIqZeGBBDAWrx7Ztbrnu1puu59P11JksPfdrE/sRm5FlRwDFMPQzkkNpjfXTIZ4Jmoqv7A49s96gxjolKAak0LN0QfU+j+7kpiowdR3SiCZRieSTVplyIWEcEUUPKEIZK85p/hChwKzJxgRYSyJvVXk+2k0abv187rWb1EGP8o1u/QlW3dZLi24lxHqPjjAp1RT1twgkRb4Z6IwO6ATfDsQoKkqs/xmBETIZ0e6GLW2H9LgVe5I2pLqNlmCmLTF120Ovq2gZe9AOa3lEK0Gl5ag0lWxZ6xAhWPSLEqJFJqhFnVB/WnuB6c59qNbG5J5+XSN44aTZ0+qlftg2eEkPWDSPecprY9Aqg2fUyZnlTLfObD2brZ3pZHm5OLNOStOUbjfaWMi47la3XM39Sh/VBqXkaWTfiWPXwFRMte7W0giMiqMvjbVkA7CKtb2yafkkmIpJ0ndaKhmn4uroZi1bF6niG2jCs2pRi1bx1kpdyyYwKg5+edESlABFP3zplOxPbk9wnnaHX9u9zC9VPjpEKZDjQAXYyooU+iFGzfwGg8+iO4Ioh77rTFzXWdnvr69v7u8nPCYTb7X0PNcZ9VNZPctRgknMjv53GBoZAQlF5Q2Wiz2zcQ8Cdu7oafct1/PmwDp1c1FiISyvSc9dOud4llMCoyrZWTHyKYx2o7Qd1PjJGTEbOYkjqJGjuOFJWqZy22XzzApwyG6qly67kCxWjnkqy+0WOSaWWe9LI1BYKAnhE1PNpj4lelqZp+XUmjpbz1szYTt3JjP38hyt3Od9raSXfVR19/TBqHBWEPHjr8192Wr8gl+RSJuzWi5nlrtyp+P3fJ2H3t1/yNS9++uoTn4eMGpsPztAvZCWd4Rrgillt/Q+XfcCoXGsAJXZkqEsOmOLK9g9K1CR9ZFdnBN+kzdu2WnNCTTuQEbQk3HNMp3VvlIXGnflZwfGDhPjI6y+FDC+wBQyJnbHMm7Ze0iMO3yElba7JTg2biIYZATzzzXSA4jwnoDYuEd7lvK0WZRmyhv71KLOb2oK9Hnn5YWam4ryVRqcytlbNznVPF690akcv1SzK/nPangq5An99W8jpIxKXSP4Gf2LlRI+CUAyFERQZJry+DZFuOyb1eeJ6pYjWxRM95fNrJlf+UQfpPPcVOsRS6nKxKebmxvjfXl+60V1x0fUyEBn9LS7rRfvP6rt64/GVlt3vnYXa8ebLJz5T6jt53ObB8OeLl2m2WZvJurP8fviav4cpz+BjF+4znzqzd3TMr5FvryMP5GBPyjjXyC/ZR+/ZPwvGd+Rzh8IQIl1jWOWVkyDf+L/PLMDATSuDyBJYGTdQ67DuYq/ZxUwg/vC+AAoq4fsyXuWtwVF1MA74+bIA/GFlwc2+BHSIgkOBCfoe1kvjC1OuYRPD4WBSi78DRq/szGu+H/p+ddqaiovb9bYVBN4veam8vj/l+6q0PwnNbu7OkOzy3bslxf3ZWNWPThpF4LC91or/va17gefq3e83v0GQZQdAkCgcZPsUQIhQcn+DW4NnbHyqwjxxaP2S0b/YmN3/tnSv/gH9+klwrUpAAA=
|
|
@ -0,0 +1,309 @@
|
|||
(10.1109/JPHOT.2018.2880319,[{"oid":"0000-0002-1473-2224","name":"zhong","surname":"chen","creditName":null,"errorCode":null},{"oid":"0000-0002-5739-2127","name":"Yue","surname":"Lin","creditName":null,"errorCode":null}])
|
||||
(10.1073/pnas.1816459115,[{"oid":"0000-0001-6260-4326","name":"Igor","surname":"Sokolov","creditName":null,"errorCode":null},{"oid":"0000-0001-6260-4326","name":"Igor","surname":"Sokolov","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.mjafi.2016.05.007,[{"oid":"0000-0002-4342-6656","name":"Anoop","surname":"Sharma","creditName":null,"errorCode":null}])
|
||||
(10.1152/japplphysiol.00476.2016,[{"oid":"0000-0001-9039-0302","name":"Graeme","surname":"Zosky","creditName":"Graeme R. Zosky","errorCode":null},{"oid":"0000-0001-8355-7362","name":"Stephen","surname":"Dubsky","creditName":"Stephen Eric Dubsky","errorCode":null}])
|
||||
(10.1111/j.1365-2621.1995.tb09798.x,[{"oid":"0000-0001-6457-4044","name":"jose martin","surname":"yañez limon","creditName":null,"errorCode":null},{"oid":"0000-0002-7108-8447","name":"Orlando","surname":"Zelaya-Angel","creditName":null,"errorCode":null},{"oid":"0000-0002-9267-4621","name":"Jean","surname":"Martinez","creditName":null,"errorCode":null},{"oid":"0000-0003-3060-2638","name":"Juan Jose","surname":"Alvarado-Gil","creditName":null,"errorCode":null}])
|
||||
(10.1007/s10461-019-02391-1,[{"oid":"0000-0003-4375-7452","name":"Cathy","surname":"Reback","creditName":null,"errorCode":null}])
|
||||
(10.1039/C4TA03030C,[{"oid":"0000-0003-1615-5034","name":"Jihong","surname":"Yu","creditName":null,"errorCode":null}])
|
||||
(10.1016/0306-4522(89)90381-3,[{"oid":"0000-0002-7272-8370","name":"PAOLA","surname":"D'ASCANIO","creditName":null,"errorCode":null}])
|
||||
(10.1016/S0921-4526(01)00945-0,[{"oid":"0000-0002-5360-1008","name":"Tomasz","surname":"Nowak","creditName":null,"errorCode":null},{"oid":"0000-0001-5554-8178","name":"Pawel","surname":"Olko","creditName":null,"errorCode":null},{"oid":"0000-0002-1993-9320","name":"Michael","surname":"Waligorski","creditName":null,"errorCode":null},{"oid":"0000-0002-1993-9320","name":"Michael","surname":"Waligorski","creditName":null,"errorCode":null}])
|
||||
(10.1074/jbc.m312875200,[{"oid":"0000-0003-3631-2252","name":"Isabelle","surname":"Dugail","creditName":null,"errorCode":null},{"oid":"0000-0001-6577-9009","name":"Bruno","surname":"Feve","creditName":null,"errorCode":null},{"oid":"0000-0002-4502-3543","name":"Martine","surname":"GLORIAN","creditName":"GLORIAN","errorCode":null},{"oid":"0000-0002-1275-9861","name":"KHADIJA","surname":"EL HADRI","creditName":null,"errorCode":null}])
|
||||
(10.1038/nrc3802,[{"oid":"0000-0001-7427-4651","name":"William","surname":"Foulkes","creditName":null,"errorCode":null},{"oid":"0000-0001-7427-4651","name":"William","surname":"Foulkes","creditName":null,"errorCode":null}])
|
||||
(10.1186/s12974-018-1125-5,[{"oid":"0000-0001-5270-9888","name":"I-Chen","surname":"Yu","creditName":null,"errorCode":null}])
|
||||
(10.4997/JRCPE.2016.310,[{"oid":"0000-0001-7658-1209","name":"David","surname":"Burn","creditName":null,"errorCode":null}])
|
||||
(10.13140/RG.2.2.20772.48006,[{"oid":"0000-0003-2825-5884","name":"Abdelkader","surname":"Mezghani","creditName":null,"errorCode":null}])
|
||||
(10.1039/c8ay00816g,[{"oid":"0000-0002-5444-7276","name":"Torbjörn","surname":"Pettersson","creditName":null,"errorCode":null}])
|
||||
(10.2147/dhps.s6226,[{"oid":"0000-0001-8558-3396","name":"Mario Francisco","surname":"Juruena","creditName":"JURUENA OR JURUENA M OR JURUENA M F OR OR JURUENA MARIO OR JURUENA MARIO F OR JURUENA MARIO FRANCISCO OR JURUENA MF","errorCode":null}])
|
||||
(10.1007/s00415-004-0351-1,[{"oid":"0000-0002-2737-3662","name":"Gian Domenico","surname":"Borasio","creditName":null,"errorCode":null}])
|
||||
(10.1088/0967-3334/23/1/318,[{"oid":"0000-0002-3549-4309","name":"Gavin","surname":"Screaton","creditName":null,"errorCode":null},{"oid":"0000-0002-0124-5072","name":"Carlos-Augusto","surname":"Gonzalez-Correa","creditName":null,"errorCode":null}])
|
||||
(10.1111/aogs.13316,[{"oid":"0000-0002-1241-9875","name":"Lena","surname":"Liljestrom","creditName":null,"errorCode":null}])
|
||||
(10.1038/sj.thj.6200111,[{"oid":"0000-0001-6115-8790","name":"Felipe","surname":"Prosper","creditName":null,"errorCode":null},{"oid":"0000-0001-6115-8790","name":"Felipe","surname":"Prosper","creditName":null,"errorCode":null},{"oid":"0000-0002-9467-932X","name":"Maria J","surname":"Terol","creditName":null,"errorCode":null},{"oid":"0000-0001-9622-1649","name":"María del Mar","surname":"Tormo Díaz","creditName":"Mar Tormo","errorCode":null},{"oid":"0000-0002-7938-3950","name":"Jose A","surname":"Martinez-Climent","creditName":null,"errorCode":null}])
|
||||
(10.1021/jo052299f,[{"oid":"0000-0002-2419-0705","name":"Per-Ola","surname":"Norrby","creditName":null,"errorCode":null},{"oid":"0000-0002-2548-7025","name":"Mogens","surname":"Johannsen","creditName":null,"errorCode":null},{"oid":"0000-0002-2548-7025","name":"Mogens","surname":"Johannsen","creditName":null,"errorCode":null}])
|
||||
(10.1182/blood-2014-04-569392,[{"oid":"0000-0003-1680-5295","name":"X. Long","surname":"Zheng","creditName":null,"errorCode":null},{"oid":"0000-0003-1680-5295","name":"X. Long","surname":"Zheng","creditName":null,"errorCode":null}])
|
||||
(10.1194/jlr.M030049,[{"oid":"0000-0003-4488-7734","name":"Sander","surname":"Kersten","creditName":null,"errorCode":null}])
|
||||
(10.1006/jcis.1998.6070,[{"oid":"0000-0003-1407-6498","name":"Anna Luisa","surname":"Costa","creditName":null,"errorCode":null},{"oid":"0000-0002-7892-2836","name":"Carmen","surname":"Galassi","creditName":null,"errorCode":null}])
|
||||
(10.1002/ange.201707070,[{"oid":"0000-0002-2516-084X","name":"tao","surname":"xiong","creditName":null,"errorCode":null}])
|
||||
(10.1056/NEJMc1502191,[{"oid":"0000-0003-0893-9015","name":"David","surname":"Stoltz","creditName":null,"errorCode":null},{"oid":"0000-0003-1552-3253","name":"David","surname":"Meyerholz","creditName":"David K. Meyerholz","errorCode":null}])
|
||||
(10.1300/J017v21n03_01,[{"oid":"0000-0002-6132-5883","name":"Stephen","surname":"Butler","creditName":null,"errorCode":null}])
|
||||
(10.4028/www.scientific.net/SSP.227.47,[{"oid":"0000-0001-5648-222X","name":"Pawel","surname":"Rokicki","creditName":null,"errorCode":null}])
|
||||
(10.1002/1521-3935(20000801)201:12<1329::AID-MACP1329>3.0.CO;2-8,[{"oid":"0000-0003-4175-4741","name":"Zhiyuan","surname":"Zhong","creditName":null,"errorCode":null}])
|
||||
(10.1111/j.1365-2966.2006.11355.x,[{"oid":"0000-0002-1967-2849","name":"Sunil","surname":"Maharaj","creditName":null,"errorCode":null}])
|
||||
(10.1021/acs.jpca.6b01563,[{"oid":"0000-0002-5301-6730","name":"Stefano","surname":"Falcinelli","creditName":null,"errorCode":null},{"oid":"0000-0002-5301-6730","name":"Stefano","surname":"Falcinelli","creditName":null,"errorCode":null},{"oid":"0000-0003-1934-7891","name":"Piergiorgio","surname":"Casavecchia","creditName":null,"errorCode":null},{"oid":"0000-0002-3961-5710","name":"Astrid","surname":"Bergeat","creditName":null,"errorCode":null},{"oid":"0000-0001-5121-5683","name":"Nadia","surname":"Balucani","creditName":null,"errorCode":null}])
|
||||
(10.1017/S1431927608089332,[{"oid":"0000-0003-4670-4516","name":"Sónia","surname":"Simões","creditName":"Sónia Simões","errorCode":null},{"oid":"0000-0003-4670-4516","name":"Sónia","surname":"Simões","creditName":"Sónia Simões","errorCode":null},{"oid":"0000-0002-2894-771X","name":"Filomena","surname":"Viana","creditName":"F. Viana","errorCode":null},{"oid":"0000-0002-8486-5436","name":"Ana Sofia","surname":"Ramos","creditName":"A.S. Ramos","errorCode":null},{"oid":"0000-0002-8486-5436","name":"Ana Sofia","surname":"Ramos","creditName":"A.S. Ramos","errorCode":null}])
|
||||
(10.1063/1.1452737,[{"oid":"0000-0002-4041-7631","name":"Thierry","surname":"Visart de Bocarmé","creditName":null,"errorCode":null}])
|
||||
(10.1056/nejmp1014255,[{"oid":"0000-0002-9354-5389","name":"Salal","surname":"Humair","creditName":null,"errorCode":null}])
|
||||
(10.1007/978-3-531-19249-9,[{"oid":"0000-0002-3497-6947","name":"Jan W.","surname":"van Deth","creditName":null,"errorCode":null}])
|
||||
(10.1080/00397910903291129,[{"oid":"0000-0003-3602-7400","name":"Hayreddin","surname":"Gezegen","creditName":null,"errorCode":null}])
|
||||
(10.1002/lite.201400037,[{"oid":"0000-0001-8347-8496","name":"James","surname":"Kenar","creditName":null,"errorCode":null},{"oid":"0000-0001-8347-8496","name":"James","surname":"Kenar","creditName":null,"errorCode":null}])
|
||||
(10.1109/ICACCS.2017.8014639,[{"oid":"0000-0001-9570-1077","name":"Gadadhar","surname":"Sahoo","creditName":null,"errorCode":null}])
|
||||
(10.18632/oncotarget.4840,[{"oid":"0000-0002-5664-7781","name":"Brian","surname":"McStay","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.palaeo.2010.12.015,[{"oid":"0000-0001-6088-3261","name":"Michael","surname":"Joachimski","creditName":null,"errorCode":null}])
|
||||
(10.1016/S0927-0256(97)00020-7,[{"oid":"0000-0002-0604-6590","name":"Ilja","surname":"Turek","creditName":null,"errorCode":null}])
|
||||
(10.1074/jbc.M111.222364,[{"oid":"0000-0002-5441-5709","name":"Khortnal","surname":"Delvecchio","creditName":null,"errorCode":null}])
|
||||
(10.1007/s00468-014-1099-6,[{"oid":"0000-0002-7441-2070","name":"Tingfa","surname":"Dong","creditName":"Tingfa Dong","errorCode":null}])
|
||||
(10.1055/s-0029-1241171,[{"oid":"0000-0003-2795-6013","name":"Alessandro","surname":"Mussa","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.conb.2013.01.010,[{"oid":"0000-0003-3044-9565","name":"Jonathan","surname":"Britt","creditName":null,"errorCode":null},{"oid":"0000-0003-3044-9565","name":"Jonathan","surname":"Britt","creditName":null,"errorCode":null},{"oid":"0000-0003-3044-9565","name":"Jonathan","surname":"Britt","creditName":null,"errorCode":null}])
|
||||
(10.1061/(ASCE)CC.1943-5614.0000977,[{"oid":"0000-0003-1663-7535","name":"CHRISTIAN","surname":"CARLONI","creditName":null,"errorCode":null}])
|
||||
(10.1177/0146167211399101,[{"oid":"0000-0001-6060-8083","name":"Kristof","surname":"Dhont","creditName":null,"errorCode":null},{"oid":"0000-0001-5814-1189","name":"Arne","surname":"Roets","creditName":null,"errorCode":null}])
|
||||
(10.1007/s00441-005-1096-6,[{"oid":"0000-0002-0240-7416","name":"Wee-Ming","surname":"Boon","creditName":null,"errorCode":null},{"oid":"0000-0002-8085-0034","name":"Karen","surname":"Moritz","creditName":null,"errorCode":null}])
|
||||
(10.1080/01468039308204227,[{"oid":"0000-0002-5148-6624","name":"Miguel A.","surname":"Muriel","creditName":"Miguel A. Muriel","errorCode":null},{"oid":"0000-0002-5148-6624","name":"Miguel A.","surname":"Muriel","creditName":"Miguel A. Muriel","errorCode":null}])
|
||||
(10.1111/ijfs.14302,[{"oid":"0000-0002-5053-6378","name":"Mariana","surname":"Blanco Massani","creditName":null,"errorCode":null},{"oid":"0000-0002-5053-6378","name":"Mariana","surname":"Blanco Massani","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.nanoen.2016.08.017,[{"oid":"0000-0002-4115-3287","name":"Xiaofeng","surname":"Li","creditName":null,"errorCode":null}])
|
||||
(10.1073/pnas.1402739111,[{"oid":"0000-0001-5933-6463","name":"Anselm","surname":"Enders","creditName":null,"errorCode":null},{"oid":"0000-0003-3922-6376","name":"Dan","surname":"Andrews","creditName":null,"errorCode":null}])
|
||||
(10.1128/JVI.01432-15,[{"oid":"0000-0002-9141-8001","name":"Markus","surname":"Cornberg","creditName":null,"errorCode":null},{"oid":"0000-0002-6993-4333","name":"Liisa","surname":"Selin","creditName":null,"errorCode":null}])
|
||||
(10.1097/PCC.0000000000001178,[{"oid":"0000-0003-3089-0318","name":"Sapna","surname":"Kudchadkar","creditName":"Sapna R. Kudchadkar","errorCode":null}])
|
||||
(10.3866/pku.whxb20001113,[{"oid":"0000-0002-6469-0376","name":"jingfang","surname":"zhou","creditName":null,"errorCode":null}])
|
||||
(10.1109/JSEN.2019.2912827,[{"oid":"0000-0003-4870-8473","name":"Fengde","surname":"Jia","creditName":null,"errorCode":null}])
|
||||
(10.1007/978-3-642-23430-9_65,[{"oid":"0000-0002-0663-3437","name":"Heppenstall","surname":"Alison","creditName":null,"errorCode":null},{"oid":"0000-0002-0650-6606","name":"Dianna","surname":"Smith","creditName":"Dianna M Smith","errorCode":null}])
|
||||
(10.3389/fenrg.2019.00056,[{"oid":"0000-0003-4222-6975","name":"andrea","surname":"saltelli","creditName":null,"errorCode":null},{"oid":"0000-0002-2625-483X","name":"Samuele","surname":"Lo Piano","creditName":null,"errorCode":null}])
|
||||
(10.1007/s00701-015-2639-6,[{"oid":"0000-0001-8914-5086","name":"Samuel","surname":"Lenell","creditName":null,"errorCode":null}])
|
||||
(10.4111/kju.2013.54.9.638,[{"oid":"0000-0002-7467-5954","name":"Jae Min","surname":"Chung","creditName":null,"errorCode":null}])
|
||||
(10.1136/eb-2015-102104,[{"oid":"0000-0001-6330-3314","name":"Paulo","surname":"Menezes","creditName":null,"errorCode":null},{"oid":"0000-0002-3403-5792","name":"Andrea","surname":"Silva","creditName":"Andréa Tenório C da Silva","errorCode":null}])
|
||||
(10.1016/j.compositesb.2016.03.046,[{"oid":"0000-0003-4867-1404","name":"Sergey","surname":"Vakhrushev","creditName":null,"errorCode":null},{"oid":"0000-0003-1929-032X","name":"Alexander","surname":"Naberezhnov","creditName":null,"errorCode":null},{"oid":"0000-0002-0830-272X","name":"Ewa","surname":"Rysiakiewicz-Pasek","creditName":null,"errorCode":null}])
|
||||
(10.1038/s41598-018-33547-z,[{"oid":"0000-0002-2080-1695","name":"Nadeem Ahmad","surname":"Sheikh","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.ecl.2012.04.016,[{"oid":"0000-0001-8107-7947","name":"Ignacio","surname":"Torres Aleman","creditName":null,"errorCode":null},{"oid":"0000-0001-8107-7947","name":"Ignacio","surname":"Torres Aleman","creditName":null,"errorCode":null}])
|
||||
(10.4102/hts.v64i1.33,[{"oid":"0000-0003-3810-4190","name":"Ernest","surname":"van Eck","creditName":null,"errorCode":null}])
|
||||
(10.3233/JAD-2010-100675,[{"oid":"0000-0003-2298-615X","name":"Monica","surname":"Di Luca","creditName":null,"errorCode":null},{"oid":"0000-0003-4598-5563","name":"Fabrizio","surname":"Gardoni","creditName":null,"errorCode":null}])
|
||||
(10.1039/c3dt51431e,[{"oid":"0000-0002-0675-2057","name":"Balasubramanian","surname":"Murugesapandian","creditName":null,"errorCode":null},{"oid":"0000-0001-5221-9459","name":"Mrituanjay D","surname":"Pandey","creditName":null,"errorCode":null}])
|
||||
(10.1159/000356772,[{"oid":"0000-0002-8803-4496","name":"Takashi","surname":"Matsukura","creditName":null,"errorCode":null}])
|
||||
(10.1111/j.1469-8137.2011.03973.x,[{"oid":"0000-0003-4801-4412","name":"Julianne","surname":"O'Reilly-Wapstra","creditName":null,"errorCode":null},{"oid":"0000-0003-4801-4412","name":"Julianne","surname":"O'Reilly-Wapstra","creditName":null,"errorCode":null},{"oid":"0000-0002-9383-667X","name":"Mark","surname":"Genung","creditName":null,"errorCode":null},{"oid":"0000-0001-8249-8057","name":"Jennifer","surname":"Rowntree","creditName":null,"errorCode":null},{"oid":"0000-0001-6244-289X","name":"Brad","surname":"Potts","creditName":"Brad M Potts","errorCode":null}])
|
||||
(10.1103/physreva.78.021401,[{"oid":"0000-0002-1228-5029","name":"Michael","surname":"Martins","creditName":null,"errorCode":null}])
|
||||
(10.1109/tencon.2011.6129079,[{"oid":"0000-0003-2519-0130","name":"Mustarum","surname":"Musaruddin","creditName":null,"errorCode":null}])
|
||||
(10.1016/s0956-053x(01)00047-2,[{"oid":"0000-0002-5719-8076","name":"Michael","surname":"Frisch","creditName":null,"errorCode":null}])
|
||||
(10.1200/JCO.2005.04.3216,[{"oid":"0000-0001-8530-780X","name":"Claus","surname":"Garbe","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.carbon.2018.04.084,[{"oid":"0000-0003-2149-4479","name":"Vincent","surname":"Chan","creditName":null,"errorCode":null}])
|
||||
(10.1037/0278-7393.28.3.497,[{"oid":"0000-0002-8868-7067","name":"Norbert","surname":"Schwarz","creditName":null,"errorCode":null},{"oid":"0000-0002-8868-7067","name":"Norbert","surname":"Schwarz","creditName":null,"errorCode":null}])
|
||||
(10.1039/c3ra47116k,[{"oid":"0000-0002-6626-0599","name":"Ying-Shi","surname":"Guan","creditName":null,"errorCode":null}])
|
||||
(10.1039/C8DT01469H,[{"oid":"0000-0002-2225-7072","name":"Hong-Bin","surname":"Luo","creditName":null,"errorCode":null}])
|
||||
(10.1088/1475-7516/2019/02/007,[{"oid":"0000-0002-4487-8742","name":"Miguel","surname":"Escudero","creditName":null,"errorCode":null}])
|
||||
(10.1109/DICTA.2009.27,[{"oid":"0000-0001-7782-1956","name":"Wojciech","surname":"Chojnacki","creditName":null,"errorCode":null},{"oid":"0000-0001-9612-5884","name":"Michael","surname":"Brooks","creditName":"M.J. Brooks","errorCode":null}])
|
||||
(10.1007/s11103-017-0629-1,[{"oid":"0000-0001-8226-0700","name":"Rose Adele","surname":"Monteiro","creditName":null,"errorCode":null},{"oid":"0000-0002-8553-0718","name":"Caroline","surname":"Kukolj","creditName":null,"errorCode":null},{"oid":"0000-0003-1882-1512","name":"Glaucio","surname":"Valdameri","creditName":null,"errorCode":null}])
|
||||
(10.1136/jnnp-2012-302993,[{"oid":"0000-0003-1258-5678","name":"Maeike","surname":"Zijlmans","creditName":null,"errorCode":null}])
|
||||
(10.1002/1521-3773(20000804)39:15<2707::aid-anie2707>3.0.co;2-m,[{"oid":"0000-0002-1443-8818","name":"Michael","surname":"Anderson","creditName":"Michael W Anderson","errorCode":null}])
|
||||
(10.1080/19401493.2017.1354070,[{"oid":"0000-0002-4248-6788","name":"Jérôme","surname":"Kämpf","creditName":null,"errorCode":null},{"oid":"0000-0002-1186-4299","name":"Clayton","surname":"Miller","creditName":null,"errorCode":null},{"oid":"0000-0002-1186-4299","name":"Clayton","surname":"Miller","creditName":null,"errorCode":null}])
|
||||
(10.1080/1062936x.2015.1032347,[{"oid":"0000-0003-4145-9590","name":"Ayako","surname":"Furuhama","creditName":null,"errorCode":null}])
|
||||
(10.1155/2016/9578308,[{"oid":"0000-0002-8874-1473","name":"Murat","surname":"Gunay","creditName":"Gunay M","errorCode":null}])
|
||||
(10.1016/j.gexplo.2015.09.011,[{"oid":"0000-0001-8503-4266","name":"Sérgio Benjamin","surname":"Baggio","creditName":null,"errorCode":null},{"oid":"0000-0001-7863-5071","name":"Léo","surname":"Hartmann","creditName":"Hartmann, L.A.","errorCode":null}])
|
||||
(10.1051/0004-6361:20053947,[{"oid":"0000-0003-3603-394X","name":"Christophe","surname":"Letellier","creditName":null,"errorCode":null},{"oid":"0000-0002-2746-5102","name":"Luis","surname":"Aguirre","creditName":null,"errorCode":null}])
|
||||
(10.1186/s12959-018-0180-6,[{"oid":"0000-0002-0936-1609","name":"Andrew","surname":"Kotaska","creditName":null,"errorCode":null}])
|
||||
(https://doi.org/10.1016/j.jct.2018.08.026,[{"oid":"0000-0001-8381-2466","name":"Jason","surname":"Calvin","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.ceb.2010.04.009,[{"oid":"0000-0002-3329-9032","name":"Steven","surname":"Kosak","creditName":null,"errorCode":null}])
|
||||
(10.1002/asi.20042,[{"oid":"0000-0002-9989-6681","name":"Aboul Ella","surname":"Hassanien","creditName":"Aboul Ella Hassanien","errorCode":null}])
|
||||
(10.2105/AJPH.92.6.897,[{"oid":"0000-0002-8214-1776","name":"Kenneth","surname":"Warner","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.foodhyd.2019.02.010,[{"oid":"0000-0002-4557-4580","name":"Diana","surname":"Gawkowska","creditName":null,"errorCode":null},{"oid":"0000-0002-8038-3050","name":"Jolanta","surname":"Cieśla","creditName":null,"errorCode":null},{"oid":"0000-0003-3323-4535","name":"Justyna","surname":"Cybulska","creditName":null,"errorCode":null},{"oid":"0000-0001-9395-1486","name":"Artur","surname":"Zdunek","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.jbiosc.2019.03.005,[{"oid":"0000-0002-8601-6657","name":"Sachiyo","surname":"Aburatani","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.idm.2017.06.004,[{"oid":"0000-0002-0946-2741","name":"Orou G.","surname":"Gaoue","creditName":null,"errorCode":null}])
|
||||
(10.1017/cbo9780511599194.007,[{"oid":"0000-0002-1683-4486","name":"Barry","surname":"Eichengreen","creditName":null,"errorCode":null}])
|
||||
(10.1134/S0006297915090060,[{"oid":"0000-0002-0530-4244","name":"Richard","surname":"Beckett","creditName":null,"errorCode":null},{"oid":"0000-0002-0530-4244","name":"Richard","surname":"Beckett","creditName":null,"errorCode":null},{"oid":"0000-0002-0313-7921","name":"Andrei","surname":"Chasov","creditName":null,"errorCode":null},{"oid":"0000-0002-0313-7921","name":"Andrei","surname":"Chasov","creditName":null,"errorCode":null}])
|
||||
(10.31229/osf.io/jdw7f,[{"oid":"0000-0001-7849-1282","name":"Adib Rifqi","surname":"Setiawan","creditName":"Alobatnic","errorCode":null}])
|
||||
(10.1016/j.proci.2018.06.150,[{"oid":"0000-0001-7058-6498","name":"Wolfgang","surname":"Polifke","creditName":null,"errorCode":null},{"oid":"0000-0001-5286-0756","name":"Matthias","surname":"Haeringer","creditName":null,"errorCode":null}])
|
||||
(10.3891/acta.chem.scand.39a-0411,[{"oid":"0000-0002-1061-7536","name":"Ingmar","surname":"Persson","creditName":null,"errorCode":null}])
|
||||
(10.1038/sj.ejcn.1601726,[{"oid":"0000-0001-8424-2864","name":"Agneta","surname":"Sjöberg","creditName":null,"errorCode":null}])
|
||||
(10.1093/brain/aww278,[{"oid":"0000-0001-8683-8741","name":"Arturo","surname":"Cardenas-Blanco","creditName":"Arturo Cardenas-Blanco","errorCode":null},{"oid":"0000-0003-1174-5983","name":"Julio","surname":"Acosta-Cabronero","creditName":"Julio Acosta-Cabronero","errorCode":null},{"oid":"0000-0003-1174-5983","name":"Julio","surname":"Acosta-Cabronero","creditName":"Julio Acosta-Cabronero","errorCode":null},{"oid":"0000-0002-2840-4678","name":"Matthew","surname":"Betts","creditName":null,"errorCode":null},{"oid":"0000-0002-5860-5921","name":"Peter","surname":"Nestor","creditName":null,"errorCode":null},{"oid":"0000-0002-5860-5921","name":"Peter","surname":"Nestor","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.drugpo.2015.05.017,[{"oid":"0000-0002-9402-8682","name":"Judith","surname":"Aldridge","creditName":null,"errorCode":null}])
|
||||
(10.1088/1742-6596/340/1/011001,[{"oid":"0000-0003-1298-2120","name":"Vladimir","surname":"Sechovsky","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.scitotenv.2019.06.168,[{"oid":"0000-0002-0543-2641","name":"Jordi","surname":"Garcia-Orellana","creditName":null,"errorCode":null}])
|
||||
(10.1007/978-3-319-05215-1_5,[{"oid":"0000-0002-6231-8415","name":"Siddhartha","surname":"Jana","creditName":null,"errorCode":null}])
|
||||
(10.1007/978-981-287-736-9_85,[{"oid":"0000-0002-7913-9712","name":"Oleg","surname":"Lupan","creditName":"Oleg LUPAN","errorCode":null},{"oid":"0000-0002-7913-9712","name":"Oleg","surname":"Lupan","creditName":"Oleg LUPAN","errorCode":null}])
|
||||
(10.1175/2009JCLI3061.1,[{"oid":"0000-0001-6935-4112","name":"David","surname":"Randall","creditName":null,"errorCode":null}])
|
||||
(10.3987/COM-05-10376,[{"oid":"0000-0003-3638-2517","name":"Tecla","surname":"Gasperi","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.bbrc.2009.07.098,[{"oid":"0000-0003-1537-0437","name":"A. M.","surname":"Stalcup","creditName":null,"errorCode":null}])
|
||||
(10.1179/1753555713Y.0000000065,[{"oid":"0000-0002-0233-1407","name":"Pimpa","surname":"Hormnirun","creditName":null,"errorCode":null}])
|
||||
(10.1109/OCEANS.2018.8604762,[{"oid":"0000-0002-5734-2699","name":"Gregory","surname":"Murad Reis","creditName":null,"errorCode":null}])
|
||||
(10.1063/1.4963346,[{"oid":"0000-0002-7504-031X","name":"Lin","surname":"Gu","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.jviromet.2014.06.023,[{"oid":"0000-0001-8276-1804","name":"MARGHERITA","surname":"PROFITI","creditName":null,"errorCode":null},{"oid":"0000-0002-8315-0316","name":"Sergio","surname":"Rosati","creditName":null,"errorCode":null},{"oid":"0000-0002-0344-3101","name":"Stefano","surname":"Nardelli","creditName":null,"errorCode":null},{"oid":"0000-0001-7883-1283","name":"Esterina","surname":"De Carlo","creditName":null,"errorCode":null}])
|
||||
(10.1590/S1678-58782011000500002,[{"oid":"0000-0003-1537-5430","name":"Mario Henrique","surname":"Macagnan","creditName":null,"errorCode":null}])
|
||||
(10.1080/17425255.2017.1290080,[{"oid":"0000-0003-4652-7089","name":"Michael","surname":"Durkin","creditName":null,"errorCode":null},{"oid":"0000-0003-4652-7089","name":"Michael","surname":"Durkin","creditName":null,"errorCode":null},{"oid":"0000-0003-4652-7089","name":"Michael","surname":"Durkin","creditName":null,"errorCode":null}])
|
||||
(10.1111/j.2047-6310.2013.00183.x,[{"oid":"0000-0002-0252-9923","name":"Thomas","surname":"Willis","creditName":"Thomas Andrew Willis","errorCode":null},{"oid":"0000-0002-0252-9923","name":"Thomas","surname":"Willis","creditName":"Thomas Andrew Willis","errorCode":null},{"oid":"0000-0002-4065-4397","name":"Charlotte","surname":"Evans","creditName":null,"errorCode":null}])
|
||||
(10.1098/rstb.2018.0138,[{"oid":"0000-0003-2308-2603","name":"Peter","surname":"Bossaerts","creditName":null,"errorCode":null}])
|
||||
(10.12660/GVCASOSV4N1N4,[{"oid":"0000-0003-2183-8112","name":"Pelayo Munhoz","surname":"Olea","creditName":"Pelayo Munhoz Olea","errorCode":null}])
|
||||
(10.1016/0306-4522(96)00157-1,[{"oid":"0000-0002-3525-4671","name":"Elek","surname":"Molnár","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.jclinane.2018.12.004,[{"oid":"0000-0002-4776-3211","name":"Hiroshi","surname":"Otake","creditName":null,"errorCode":null}])
|
||||
(10.1098/rsta.1996.0131,[{"oid":"0000-0002-8781-6154","name":"Ekhard","surname":"Salje","creditName":"Salje","errorCode":null}])
|
||||
(10.1074/jbc.M113.526178,[{"oid":"0000-0002-7945-4050","name":"Jaime","surname":"Nagy","creditName":null,"errorCode":null},{"oid":"0000-0002-8302-6905","name":"Xiaolan","surname":"Zhao","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.biomaterials.2009.08.014,[{"oid":"0000-0002-9774-7412","name":"Liam","surname":"Grover","creditName":null,"errorCode":null},{"oid":"0000-0001-6412-2371","name":"nicola","surname":"hunt","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.physb.2011.09.063,[{"oid":"0000-0001-5768-0244","name":"Anne","surname":"Henry","creditName":null,"errorCode":null},{"oid":"0000-0001-7721-5091","name":"Erik","surname":"Janzen","creditName":null,"errorCode":null},{"oid":"0000-0003-4579-3529","name":"Stefano","surname":"Leone","creditName":null,"errorCode":null},{"oid":"0000-0003-4579-3529","name":"Stefano","surname":"Leone","creditName":null,"errorCode":null},{"oid":"0000-0002-7171-5383","name":"Henrik","surname":"Pedersen","creditName":"Henrik Pedersen","errorCode":null}])
|
||||
(http://dx.doi.org/10.1080/00958970412331295219,[{"oid":"0000-0002-3226-2959","name":"Magdi","surname":"Iskander","creditName":null,"errorCode":null}])
|
||||
(10.1166/sam.2012.1300,[{"oid":"0000-0002-0260-1059","name":"Haibo","surname":"Zeng","creditName":null,"errorCode":null},{"oid":"0000-0002-0281-3617","name":"Haibo","surname":"Zeng","creditName":null,"errorCode":null},{"oid":"0000-0002-0281-3617","name":"Haibo","surname":"Zeng","creditName":null,"errorCode":null},{"oid":"0000-0002-0281-3617","name":"Haibo","surname":"Zeng","creditName":null,"errorCode":null}])
|
||||
(10.2217/fmb.11.70,[{"oid":"0000-0003-4042-7466","name":"Grzegorz","surname":"Wegrzyn","creditName":null,"errorCode":null}])
|
||||
(10.1109/SIU.2008.4632665,[{"oid":"0000-0002-6335-459X","name":"AHMET","surname":"SAYAR","creditName":null,"errorCode":null}])
|
||||
(10.1002/sim.910,[{"oid":"0000-0002-1829-8664","name":"Noah","surname":"Rosenberg","creditName":null,"errorCode":null}])
|
||||
(10.1007/S12603-011-0071-Z,[{"oid":"0000-0001-7593-3081","name":"Jean","surname":"Woo","creditName":null,"errorCode":null}])
|
||||
(10.1364/PHOTONICS.2016.Th3A.11,[{"oid":"0000-0003-2418-8418","name":"Narayan","surname":"Krishnaswamy","creditName":null,"errorCode":null}])
|
||||
(10.1134/S0020168510050109,[{"oid":"0000-0002-2562-6427","name":"Nikolay","surname":"Kuznetsov","creditName":null,"errorCode":null}])
|
||||
(10.1128/AEM.71.8.4539-4547.2005,[{"oid":"0000-0003-0501-2556","name":"Martin W.","surname":"Hahn","creditName":null,"errorCode":null},{"oid":"0000-0003-0501-2556","name":"Martin W.","surname":"Hahn","creditName":null,"errorCode":null}])
|
||||
(10.1162/artl_a_00211,[{"oid":"0000-0003-0910-743X","name":"Tim","surname":"Taylor","creditName":"Timothy John Taylor","errorCode":null}])
|
||||
(10.1080/10934529.2015.1047675,[{"oid":"0000-0003-2020-9824","name":"Bogdan","surname":"Skwarzec","creditName":null,"errorCode":null},{"oid":"0000-0001-7900-6517","name":"Dagmara","surname":"Struminska-Parulska","creditName":"Dagmara Struminska-Parulska","errorCode":null}])
|
||||
(10.3109/16066359.2011.588352,[{"oid":"0000-0002-6792-916X","name":"David","surname":"Best","creditName":null,"errorCode":null}])
|
||||
(10.1007/s001090050204,[{"oid":"0000-0003-3051-1285","name":"Thierry","surname":"Calandra","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.ajog.2007.10.519,[{"oid":"0000-0003-3159-6321","name":"Martin","surname":"Frasch","creditName":null,"errorCode":null}])
|
||||
(10.1111/phpr.12304,[{"oid":"0000-0001-6180-457X","name":"Douglas","surname":"Portmore","creditName":"Douglas W. Portmore","errorCode":null}])
|
||||
(10.2305/iucn.uk.2017-2.rlts.t22486174a22486850.en,[{"oid":"0000-0002-9847-2035","name":"Emma","surname":"Williams","creditName":null,"errorCode":null}])
|
||||
(10.3791/50878,[{"oid":"0000-0002-8699-6253","name":"Tessa","surname":"Durham Brooks","creditName":null,"errorCode":null},{"oid":"0000-0002-8699-6253","name":"Tessa","surname":"Durham Brooks","creditName":null,"errorCode":null}])
|
||||
(10.1007/s00340-009-3580-2,[{"oid":"0000-0002-2652-5134","name":"Ci-Ling","surname":"Pan","creditName":null,"errorCode":null},{"oid":"0000-0002-2652-5134","name":"Ci-Ling","surname":"Pan","creditName":null,"errorCode":null}])
|
||||
(10.1007/978-3-319-19216-1_38,[{"oid":"0000-0001-9922-0350","name":"Dariusz","surname":"Laskowski","creditName":null,"errorCode":null},{"oid":"0000-0002-1748-5070","name":"ŁUBKOWSKI","surname":"PIOTR","creditName":null,"errorCode":null}])
|
||||
(10.1007/s00192-017-3446-9,[{"oid":"0000-0002-9064-8454","name":"Katja","surname":"Stenström Bohlin","creditName":null,"errorCode":null}])
|
||||
(10.1088/0022-3700/13/6/001,[{"oid":"0000-0002-4357-6048","name":"Jacek","surname":"Migdalek","creditName":null,"errorCode":null}])
|
||||
(10.12776/qip.v16i2.67,[{"oid":"0000-0002-9155-189X","name":"Jostein","surname":"Langstrand","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.ecoenv.2018.07.109,[{"oid":"0000-0003-0926-4849","name":"Galia","surname":null,"creditName":null,"errorCode":null},{"oid":"0000-0002-8857-4353","name":"Vladimír","surname":"Žlábek","creditName":null,"errorCode":null},{"oid":"0000-0003-3091-7824","name":"Sidika","surname":"Sakalli","creditName":null,"errorCode":null}])
|
||||
(10.1080/09585192.2013.870290,[{"oid":"0000-0001-8766-3314","name":"Andreas","surname":"Hirschi","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.optcom.2010.07.033,[{"oid":"0000-0001-8387-8131","name":"Esteban","surname":"Vera","creditName":null,"errorCode":null},{"oid":"0000-0002-4834-7543","name":"Alberto","surname":"Lencina","creditName":null,"errorCode":null}])
|
||||
(10.1002/adma.201807383,[{"oid":"0000-0002-3042-0335","name":"Romain","surname":"Gautier","creditName":null,"errorCode":null},{"oid":"0000-0002-8671-0630","name":"Michael","surname":"PARIS","creditName":null,"errorCode":null}])
|
||||
(http://dx.doi.org/10.18561/2179-5746/biotaamazonia.v4n2p55-63,[{"oid":"0000-0003-2173-6968","name":"Marcelo","surname":"Moreira de Carvalho","creditName":"Carvalho M. M.","errorCode":null}])
|
||||
(10.1016/j.ehbc.2004.02.015,[{"oid":"0000-0001-7724-0668","name":"Eddy","surname":"Perez-Then","creditName":null,"errorCode":null},{"oid":"0000-0001-7724-0668","name":"Eddy","surname":"Perez-Then","creditName":null,"errorCode":null}])
|
||||
(10.1016/S0001-6519(08)75117-3,[{"oid":"0000-0001-5560-8097","name":"Francesc Xavier","surname":"Avilés-Jurado","creditName":null,"errorCode":null},{"oid":"0000-0001-5560-8097","name":"Francesc Xavier","surname":"Avilés-Jurado","creditName":null,"errorCode":null}])
|
||||
(10.1074/jbc.M409535200,[{"oid":"0000-0001-7811-7784","name":"Chihiro","surname":"Hisatsune","creditName":null,"errorCode":null},{"oid":"0000-0001-7713-9471","name":"Yukiko","surname":"Kuroda","creditName":"Yukiko Kuroda","errorCode":null}])
|
||||
(10.1063/1.98280,[{"oid":"0000-0002-1856-474X","name":"Brian","surname":"Skromme","creditName":null,"errorCode":null}])
|
||||
(10.1002/ajmg.a.37755,[{"oid":"0000-0003-3602-5704","name":null,"surname":null,"creditName":null,"errorCode":null},{"oid":"0000-0003-3602-5704","name":null,"surname":null,"creditName":null,"errorCode":null},{"oid":"0000-0003-4990-8784","name":"Anne","surname":"Trinh","creditName":null,"errorCode":null},{"oid":"0000-0003-4472-6893","name":"Stefano","surname":"Lise","creditName":null,"errorCode":null},{"oid":"0000-0002-1361-9174","name":"Caroline","surname":"Gorvin","creditName":null,"errorCode":null},{"oid":"0000-0002-1361-9174","name":"Caroline","surname":"Gorvin","creditName":null,"errorCode":null},{"oid":"0000-0002-1361-9174","name":"Caroline","surname":"Gorvin","creditName":null,"errorCode":null},{"oid":"0000-0002-3586-7654","name":"Sian","surname":"Piret","creditName":"Sian E Piret","errorCode":null}])
|
||||
(10.1002/jmor.10934,[{"oid":"0000-0002-5006-6679","name":"Miriam","surname":"Zelditch","creditName":null,"errorCode":null},{"oid":"0000-0002-5006-6679","name":"Miriam","surname":"Zelditch","creditName":null,"errorCode":null}])
|
||||
(http://dx.doi.org/10.1016/j.nucengdes.2010.12.007,[{"oid":"0000-0003-2653-3430","name":"Luca","surname":"Ammirabile","creditName":null,"errorCode":null}])
|
||||
(10.1016/S0009-2614(99)01368-8,[{"oid":"0000-0001-6672-2186","name":"Cleber","surname":"Mendonca","creditName":null,"errorCode":null},{"oid":"0000-0001-6624-8453","name":"Lino","surname":"Misoguti","creditName":null,"errorCode":null},{"oid":"0000-0003-2372-5509","name":"José Alberto","surname":"Giacometti","creditName":null,"errorCode":null}])
|
||||
(10.1038/ncomms2249,[{"oid":"0000-0002-1088-5565","name":"Santiago","surname":"Munne","creditName":null,"errorCode":null},{"oid":"0000-0002-8285-0222","name":"Shawn","surname":"Chavez","creditName":null,"errorCode":null},{"oid":"0000-0002-6487-1329","name":"Renee","surname":"Reijo Pera","creditName":null,"errorCode":null},{"oid":"0000-0002-6487-1329","name":"Renee","surname":"Reijo Pera","creditName":null,"errorCode":null}])
|
||||
(https://doi.org/10.1016/j.jclepro.2017.12.156,[{"oid":"0000-0003-4311-5185","name":"Hao","surname":"Zheng","creditName":null,"errorCode":null}])
|
||||
(10.1177/0022022112466590,[{"oid":"0000-0003-3764-3571","name":"Kenneth","surname":"Locke","creditName":null,"errorCode":null},{"oid":"0000-0002-4174-6955","name":"Guy","surname":"Curtis","creditName":null,"errorCode":null},{"oid":"0000-0002-4174-6955","name":"Guy","surname":"Curtis","creditName":null,"errorCode":null}])
|
||||
(10.3390/app8091453,[{"oid":"0000-0001-5102-0077","name":"Liu","surname":"Yuting","creditName":null,"errorCode":null},{"oid":"0000-0001-5102-0077","name":"Liu","surname":"Yuting","creditName":null,"errorCode":null},{"oid":"0000-0002-5578-408X","name":"Huanan","surname":"Liu","creditName":null,"errorCode":null}])
|
||||
(10.1063/1.3225247,[{"oid":"0000-0003-0574-1513","name":"Antonio","surname":"Garcia-Loureiro","creditName":null,"errorCode":null},{"oid":"0000-0002-6794-3893","name":"Manuel","surname":"Aldegunde","creditName":null,"errorCode":null},{"oid":"0000-0003-0973-461X","name":"Natalia","surname":"Seoane","creditName":null,"errorCode":null}])
|
||||
(10.1200/jco.2015.33.7_suppl.41,[{"oid":"0000-0002-2500-5030","name":"Julien","surname":"Boudon","creditName":null,"errorCode":null}])
|
||||
(10.1037/e592712011-023,[{"oid":"0000-0003-1574-585X","name":"Samuel","surname":"Posner","creditName":null,"errorCode":null}])
|
||||
(10.1007/s00605-013-0526-x,[{"oid":"0000-0003-2371-4818","name":"Ákos","surname":"G.Horváth","creditName":null,"errorCode":null}])
|
||||
(10.1371/journal.pone.0139754,[{"oid":"0000-0002-9109-0958","name":"Julien","surname":"Louis","creditName":null,"errorCode":null}])
|
||||
(10.1021/jp054948x,[{"oid":"0000-0002-2033-5284","name":"Victor","surname":"Climent","creditName":null,"errorCode":null},{"oid":"0000-0003-4751-3279","name":"Juan","surname":"Feliu","creditName":null,"errorCode":null},{"oid":"0000-0002-5231-8032","name":"Roberto","surname":"Gómez","creditName":"Roberto Gómez","errorCode":null}])
|
||||
(10.1155/2016/9828517,[{"oid":"0000-0002-2030-0356","name":"Ewa","surname":"Jasek-Gajda","creditName":null,"errorCode":null},{"oid":"0000-0002-4541-2864","name":"Ewa","surname":"Siucinska","creditName":null,"errorCode":null},{"oid":"0000-0002-8102-6039","name":"Malgorzata","surname":"Jasinska","creditName":null,"errorCode":null},{"oid":"0000-0002-8102-6039","name":"Malgorzata","surname":"Jasinska","creditName":null,"errorCode":null},{"oid":"0000-0001-7808-1101","name":"Malgorzata","surname":"Kossut","creditName":null,"errorCode":null}])
|
||||
(10.1002/pi.2097,[{"oid":"0000-0002-4375-5580","name":"Graeme","surname":"Moad","creditName":"Graeme Moad","errorCode":null},{"oid":"0000-0003-2849-229X","name":"John","surname":"Forsythe","creditName":null,"errorCode":null}])
|
||||
(10.1117/12.432956,[{"oid":"0000-0002-4209-1372","name":"Michael","surname":"Bakunov","creditName":null,"errorCode":null}])
|
||||
(10.2144/000114087,[{"oid":"0000-0001-8479-6269","name":"HERNAN GUILLERMO","surname":"HERNANDEZ","creditName":null,"errorCode":null},{"oid":"0000-0001-9175-3363","name":"Diego","surname":"Forero","creditName":"Forero DA","errorCode":null}])
|
||||
(10.1016/j.bcp.2015.08.003,[{"oid":"0000-0002-8247-9494","name":"Istvan","surname":"Czikora","creditName":null,"errorCode":null},{"oid":"0000-0003-3805-8868","name":"Rudolf","surname":"Lucas","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.foreco.2016.08.036,[{"oid":"0000-0001-7999-1692","name":"Dugald","surname":"Close","creditName":null,"errorCode":null}])
|
||||
(10.2903/j.efsa.2017.4782,[{"oid":"0000-0002-4720-2940","name":"Margaret","surname":"Good","creditName":null,"errorCode":null},{"oid":"0000-0002-2634-3138","name":"Antonio","surname":"Velarde","creditName":null,"errorCode":null},{"oid":"0000-0003-2417-0787","name":"Søren","surname":"Nielsen","creditName":"Søren Saxmose Nielsen","errorCode":null},{"oid":"0000-0002-5904-8397","name":"Andrew","surname":"Butterworth","creditName":"Andrew Butterworth","errorCode":null}])
|
||||
(10.1002/ejoc.201300745,[{"oid":"0000-0003-0168-9295","name":"Emanuela","surname":"Licandro","creditName":null,"errorCode":null},{"oid":"0000-0001-9252-6218","name":"Alberto","surname":"Bossi","creditName":null,"errorCode":null},{"oid":"0000-0001-9252-6218","name":"Alberto","surname":"Bossi","creditName":null,"errorCode":null},{"oid":"0000-0001-9252-6218","name":"Alberto","surname":"Bossi","creditName":null,"errorCode":null},{"oid":"0000-0003-4063-1563","name":"Patrizia Romana","surname":"Mussini","creditName":null,"errorCode":null},{"oid":"0000-0003-4063-1563","name":"Patrizia Romana","surname":"Mussini","creditName":null,"errorCode":null},{"oid":"0000-0002-9540-9073","name":"SILVIA","surname":"CAUTERUCCIO","creditName":null,"errorCode":null},{"oid":"0000-0002-9540-9073","name":"SILVIA","surname":"CAUTERUCCIO","creditName":null,"errorCode":null}])
|
||||
(10.1038/s41431-018-0183-6,[{"oid":"0000-0002-4475-4117","name":"Simone","surname":"Baldovino","creditName":null,"errorCode":null},{"oid":"0000-0001-6133-9831","name":"Marcella","surname":"Neri","creditName":null,"errorCode":null},{"oid":"0000-0003-4187-8816","name":"GIOVANNA","surname":"FLORIDIA","creditName":null,"errorCode":null},{"oid":"0000-0001-9406-5734","name":"Paraskevas","surname":"Iatropoulos","creditName":null,"errorCode":null}])
|
||||
(10.1038/sj.ijo.0800401,[{"oid":"0000-0002-7916-4619","name":"Frederic","surname":"Fumeron","creditName":null,"errorCode":null}])
|
||||
(10.1080/10934529.2012.668029,[{"oid":"0000-0002-6255-1153","name":"David","surname":"Stuckey","creditName":null,"errorCode":null},{"oid":"0000-0002-6255-1153","name":"David","surname":"Stuckey","creditName":null,"errorCode":null},{"oid":"0000-0002-2880-708X","name":"Antoine","surname":"Trzcinski","creditName":null,"errorCode":null}])
|
||||
(10.1371/journal.pmed.1001427,[{"oid":"0000-0001-6857-8931","name":"Etheldreda","surname":"Nakimuli-Mpungu","creditName":null,"errorCode":null}])
|
||||
(10.1186/S13660-015-0787-0,[{"oid":"0000-0003-1769-2800","name":"Ke","surname":"Wang","creditName":"Ke Wang","errorCode":null}])
|
||||
(10.2174/157018011794578204,[{"oid":"0000-0002-8902-2876","name":"Gajendra","surname":"Raghava","creditName":"G. P. S. Raghava","errorCode":null}])
|
||||
(10.1016/j.jtho.2016.11.002,[{"oid":"0000-0002-3832-8744","name":"Chandana","surname":"Reddy","creditName":null,"errorCode":null}])
|
||||
(10.26502/jesph.96120058,[{"oid":"0000-0002-1942-7384","name":"Anahi","surname":"Aguilera","creditName":null,"errorCode":null}])
|
||||
(10.1007/BF00749305,[{"oid":"0000-0003-4546-002X","name":"Oleg","surname":"Ponomarev","creditName":null,"errorCode":null},{"oid":"0000-0002-0362-4121","name":"Farid","surname":"Enikeev","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.engfracmech.2015.01.008,[{"oid":"0000-0003-1117-9619","name":"Xian-Fang","surname":"Li","creditName":"Xian-Fang Li","errorCode":null}])
|
||||
(10.1007/s10898-015-0341-0,[{"oid":"0000-0001-6340-385X","name":"Semu Mitiku","surname":"Kassa","creditName":null,"errorCode":null},{"oid":"0000-0001-5494-040X","name":"Semu Mitiku","surname":"Kassa","creditName":null,"errorCode":null}])
|
||||
(10.1177/0091270010395591,[{"oid":"0000-0002-4227-5817","name":"David","surname":"Le Couteur","creditName":null,"errorCode":null},{"oid":"0000-0002-5970-1501","name":"Sarah","surname":"Hilmer","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.cellimm.2016.09.003,[{"oid":"0000-0002-7412-9507","name":"Joseph","surname":"Mattapallil","creditName":null,"errorCode":null}])
|
||||
(10.1158/0008-5472.CAN-09-1091,[{"oid":"0000-0002-6093-0395","name":"Rajiv","surname":"Kumar","creditName":null,"errorCode":null},{"oid":"0000-0003-0674-7757","name":"Giuseppe","surname":"MATULLO","creditName":null,"errorCode":null},{"oid":"0000-0001-9599-309X","name":"Alessandra","surname":"Allione","creditName":null,"errorCode":null},{"oid":"0000-0003-1901-9513","name":"Heather","surname":"Nelson","creditName":null,"errorCode":null},{"oid":"0000-0002-8752-8785","name":"Tim","surname":"Bishop","creditName":"D T Bishop","errorCode":null},{"oid":"0000-0002-0787-3969","name":"Gunnar","surname":"Steineck","creditName":null,"errorCode":null},{"oid":"0000-0001-7208-2912","name":"Anne","surname":"Kiltie","creditName":null,"errorCode":null},{"oid":"0000-0002-4669-3995","name":"Zuo-Feng","surname":"Zhang","creditName":null,"errorCode":null},{"oid":"0000-0002-4620-3108","name":"Jian-Min","surname":"Yuan","creditName":null,"errorCode":null},{"oid":"0000-0002-3561-6580","name":"Ananya","surname":"Choudhury","creditName":null,"errorCode":null},{"oid":"0000-0002-3561-6580","name":"Ananya","surname":"Choudhury","creditName":null,"errorCode":null},{"oid":"0000-0002-1720-7724","name":"Jenny","surname":"Barrett","creditName":null,"errorCode":null},{"oid":"0000-0002-5277-8477","name":"Marcello","surname":"Campagna","creditName":null,"errorCode":null},{"oid":"0000-0003-2538-3784","name":"Nuria","surname":"Malats","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.intell.2007.04.003,[{"oid":"0000-0003-3159-9370","name":"Alasdair","surname":"MacLullich","creditName":null,"errorCode":null}])
|
||||
(10.1021/acs.orglett.7b00261,[{"oid":"0000-0002-7178-2981","name":"Jun Yong","surname":"Kang","creditName":null,"errorCode":null},{"oid":"0000-0002-9732-1454","name":"Hai","surname":"Huang","creditName":null,"errorCode":null}])
|
||||
(10.1088/0004-637X/778/2/175,[{"oid":"0000-0003-0489-0920","name":"Alexei","surname":"Pevtsov","creditName":null,"errorCode":null},{"oid":"0000-0003-0489-0920","name":"Alexei","surname":"Pevtsov","creditName":null,"errorCode":null}])
|
||||
(10.22201/iis.01882503p.2018.4.57796,[{"oid":"0000-0002-7334-1936","name":"Yanina","surname":"Welp","creditName":null,"errorCode":null},{"oid":"0000-0001-9905-0777","name":"Flavia","surname":"Freidenberg","creditName":null,"errorCode":null},{"oid":"0000-0001-9905-0777","name":"Flavia","surname":"Freidenberg","creditName":null,"errorCode":null}])
|
||||
(10.1109/IWAGPR.2017.7996109,[{"oid":"0000-0003-1698-0800","name":"Miro","surname":"Govedarica","creditName":null,"errorCode":null}])
|
||||
(10.1207/s15328015tlm1603_6,[{"oid":"0000-0002-0577-1440","name":"Theodor","surname":"Adla","creditName":null,"errorCode":null}])
|
||||
(10.1109/CBMI.2009.10,[{"oid":"0000-0002-1364-218X","name":"Marco","surname":"Bertini","creditName":"Marco Bertini","errorCode":null},{"oid":"0000-0002-4269-4501","name":"Giuseppe","surname":"Serra","creditName":null,"errorCode":null},{"oid":"0000-0002-1052-8322","name":"ALBERTO","surname":"DEL BIMBO","creditName":null,"errorCode":null},{"oid":"0000-0003-0819-851X","name":"Lamberto","surname":"Ballan","creditName":null,"errorCode":null}])
|
||||
(10.1017/S003329171200147X,[{"oid":"0000-0001-8198-9022","name":"Amanda","surname":"Baxter","creditName":null,"errorCode":null},{"oid":"0000-0003-4667-6623","name":"Harvey","surname":"Whiteford","creditName":null,"errorCode":null},{"oid":"0000-0003-4667-6623","name":"Harvey","surname":"Whiteford","creditName":null,"errorCode":null}])
|
||||
(10.1088/0305-4470/30/2/021,[{"oid":"0000-0001-6097-1202","name":"Andrés","surname":"Riaguas","creditName":null,"errorCode":null}])
|
||||
(10.4103/1658-354X.169476,[{"oid":"0000-0002-1009-0869","name":"KUSAI","surname":"BAROUDI","creditName":null,"errorCode":null},{"oid":"0000-0002-1009-0869","name":"KUSAI","surname":"BAROUDI","creditName":null,"errorCode":null}])
|
||||
(10.1007/s12414-015-0042-0,[{"oid":"0000-0003-3752-7837","name":"Matthijs","surname":"Janssen","creditName":null,"errorCode":null}])
|
||||
(10.1007/978-3-319-08491-6_5,[{"oid":"0000-0002-4728-689X","name":"Agnieszka","surname":"Landowska","creditName":null,"errorCode":null},{"oid":"0000-0002-1117-903X","name":"Michał","surname":"Wróbel","creditName":null,"errorCode":null}])
|
||||
(10.3390/inorganics7010008,[{"oid":"0000-0003-4502-5212","name":"Ian","surname":"Dance","creditName":null,"errorCode":null}])
|
||||
(10.4028/www.scientific.net/DDF.297-301.814,[{"oid":"0000-0002-6970-0406","name":"Jaroslav","surname":"Kováčik","creditName":null,"errorCode":null}])
|
||||
(10.1117/12.2257557,[{"oid":"0000-0002-7409-3305","name":"Jan","surname":"Kulawik","creditName":null,"errorCode":null}])
|
||||
(10.1109/SIBGRAPI.2011.20,[{"oid":"0000-0001-8612-5805","name":"Fabio","surname":"Miranda","creditName":null,"errorCode":null}])
|
||||
(10.1089/fpd.2012.1448,[{"oid":"0000-0001-7995-7427","name":"Beilei","surname":"Ge","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.lungcan.2013.04.006,[{"oid":"0000-0001-8100-7914","name":"Ruey-Long","surname":"Hong","creditName":null,"errorCode":null}])
|
||||
(10.1109/IROS.2005.1545338,[{"oid":"0000-0002-4612-3554","name":"Rui P.","surname":"Rocha","creditName":"Rui P. Rocha","errorCode":null},{"oid":"0000-0003-3729-5263","name":"Adriano","surname":"Carvalho","creditName":null,"errorCode":null},{"oid":"0000-0002-2725-8867","name":"Jorge","surname":"Dias","creditName":"Jorge Dias","errorCode":null}])
|
||||
(10.3182/20100901-3-IT-2016.00036,[{"oid":"0000-0001-8247-7790","name":"Hélène","surname":"Piet-Lahanier","creditName":null,"errorCode":null}])
|
||||
(10.1016/S0925-9635(99)00257-5,[{"oid":"0000-0002-2400-1759","name":"BRAMBILLA","surname":"Andrea","creditName":null,"errorCode":null},{"oid":"0000-0002-0647-9134","name":"Philippe","surname":"Bergonzo","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.jsg.2011.08.008,[{"oid":"0000-0002-4647-8224","name":"Bernhard","surname":"Grasemann","creditName":null,"errorCode":null},{"oid":"0000-0002-4647-8224","name":"Bernhard","surname":"Grasemann","creditName":null,"errorCode":null}])
|
||||
(10.1016/S1387-6473(03)00072-1,[{"oid":"0000-0001-5273-9817","name":"James","surname":"Lovell","creditName":null,"errorCode":null}])
|
||||
(10.1039/C9TA02405K,[{"oid":"0000-0003-3847-2620","name":"Ch Venkata","surname":"Surya Kumar","creditName":null,"errorCode":null},{"oid":"0000-0001-6353-8877","name":"Ki Tae","surname":"Nam","creditName":null,"errorCode":null}])
|
||||
(10.1007/s00216-008-2520-z,[{"oid":"0000-0002-5134-7130","name":"Helmut","surname":"Ehrenberg","creditName":null,"errorCode":null},{"oid":"0000-0002-5134-7130","name":"Helmut","surname":"Ehrenberg","creditName":null,"errorCode":null},{"oid":"0000-0002-5106-9214","name":"Kristian","surname":"Nikolowski","creditName":null,"errorCode":null}])
|
||||
(10.1007/s13760-018-0889-9,[{"oid":"0000-0002-6644-9148","name":"Ahmet","surname":"yildirim","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.enpol.2012.10.037,[{"oid":"0000-0002-0476-2315","name":"Anu","surname":"Ramaswami","creditName":null,"errorCode":null}])
|
||||
(10.1007/978-981-13-2517-5_65,[{"oid":"0000-0002-1182-0552","name":"Tiago","surname":"Benetti","creditName":null,"errorCode":null}])
|
||||
(10.1179/1973947814Y.0000000172,[{"oid":"0000-0001-5610-7745","name":"Fernanda","surname":"Ruiz Larrea","creditName":null,"errorCode":null},{"oid":"0000-0003-0023-3069","name":"Vanesa","surname":"Estepa","creditName":null,"errorCode":null},{"oid":"0000-0003-3709-1690","name":"Carmen","surname":"Torres","creditName":null,"errorCode":null}])
|
||||
(10.1016/s0378-1119(00)00530-8,[{"oid":"0000-0001-7584-3721","name":"Tzung-Fu","surname":"Hsieh","creditName":null,"errorCode":null}])
|
||||
(10.1149/1.3567401,[{"oid":"0000-0002-9655-6155","name":"Mohammad Reza","surname":"Hantehzadeh","creditName":null,"errorCode":null}])
|
||||
(10.1029/2012jd018226,[{"oid":"0000-0002-1520-4386","name":"永红","surname":"胡","creditName":"胡永红","errorCode":null},{"oid":"0000-0001-5950-9555","name":"Gensuo","surname":"Jia","creditName":null,"errorCode":null}])
|
||||
(10.1186/1477-7517-11-31,[{"oid":"0000-0002-2191-7833","name":"Tim","surname":"Mackey","creditName":null,"errorCode":null},{"oid":"0000-0003-0726-0676","name":"Leo","surname":"Beletsky","creditName":null,"errorCode":null},{"oid":"0000-0003-3376-152X","name":"Maria Gudelia","surname":"Rangel","creditName":null,"errorCode":null},{"oid":"0000-0002-7724-691X","name":"Steffanie","surname":"Strathdee","creditName":null,"errorCode":null}])
|
||||
(10.1023/A:1022686622752,[{"oid":"0000-0002-0534-7797","name":"Stéphane","surname":"Le Crom","creditName":null,"errorCode":null},{"oid":"0000-0001-9327-5166","name":"Marika","surname":"Kapsimali","creditName":null,"errorCode":null}])
|
||||
(10.1524/zpch.2012.0224,[{"oid":"0000-0003-1563-9176","name":"Paul","surname":"Heitjans","creditName":null,"errorCode":null},{"oid":"0000-0001-9706-4892","name":"H. Martin R.","surname":"Wilkening","creditName":"Prof. Dr. Martin Wilkening","errorCode":null},{"oid":"0000-0001-9706-4892","name":"H. Martin R.","surname":"Wilkening","creditName":"Prof. Dr. Martin Wilkening","errorCode":null},{"oid":"0000-0001-9706-4892","name":"H. Martin R.","surname":"Wilkening","creditName":"Prof. Dr. Martin Wilkening","errorCode":null}])
|
||||
(10.1080/03155986.2017.1393730,[{"oid":"0000-0002-8203-0689","name":"Linyan","surname":"Zhang","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.jbiotec.2011.06.040,[{"oid":"0000-0001-7635-1075","name":"Cecilia","surname":"Faraloni","creditName":null,"errorCode":null},{"oid":"0000-0001-7981-0847","name":"Alberto","surname":"Scoma","creditName":"Alberto Scoma","errorCode":null}])
|
||||
(10.1055/s-0033-1361119,[{"oid":"0000-0001-9369-5136","name":"Farnaz","surname":"Monajjemzadeh","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.ijporl.2013.03.025,[{"oid":"0000-0003-4084-2301","name":"Chul Ho","surname":"Jang","creditName":null,"errorCode":null}])
|
||||
(10.3758/s13423-011-0203-9,[{"oid":"0000-0002-7584-2275","name":"Patrik","surname":"Sörqvist","creditName":null,"errorCode":null}])
|
||||
(10.1016/s0306-4522(02)00200-2,[{"oid":"0000-0002-8323-8211","name":"BS","surname":"Shankaranarayana Rao","creditName":null,"errorCode":null}])
|
||||
(10.1021/tx030036f,[{"oid":"0000-0003-4194-6401","name":"Nico P.E.","surname":"Vermeulen","creditName":"N.P.E. Vermeulen","errorCode":null},{"oid":"0000-0002-8358-9425","name":"Angel","surname":"Messeguer","creditName":null,"errorCode":null}])
|
||||
(10.1063/1.4819076,[{"oid":"0000-0003-2367-3825","name":"Timothy","surname":"Hele","creditName":null,"errorCode":null}])
|
||||
(10.1049/el.2014.2651,[{"oid":"0000-0002-2826-6367","name":"Ivan","surname":"Lee","creditName":null,"errorCode":null}])
|
||||
(10.1300/J137v15n02_02,[{"oid":"0000-0001-8374-2277","name":"Donna","surname":"Ryan","creditName":null,"errorCode":null}])
|
||||
(10.1590/S1413-78522013000100003,[{"oid":"0000-0003-3584-3342","name":"Aldo José","surname":"Fontes Pereira","creditName":"Pereira, A.J.F","errorCode":null}])
|
||||
(10.1039/c5lc01356a,[{"oid":"0000-0002-3084-0823","name":"Anna","surname":"Marsano","creditName":null,"errorCode":null},{"oid":"0000-0001-7115-0151","name":"Emiliano","surname":"Votta","creditName":null,"errorCode":null}])
|
||||
(10.1007/s10903-018-0840-4,[{"oid":"0000-0003-1710-3578","name":"Ahsan","surname":"Saleem","creditName":null,"errorCode":null}])
|
||||
(10.1007/s00216-015-8936-3,[{"oid":"0000-0003-4741-8650","name":"Nina","surname":"Kroepfl","creditName":null,"errorCode":null}])
|
||||
(10.1109/ISCAS.2018.8351770,[{"oid":"0000-0002-7158-1426","name":"Panu","surname":"Sjövall","creditName":null,"errorCode":null}])
|
||||
(10.1021/jm100732t,[{"oid":"0000-0002-1767-7072","name":"Matthew","surname":"Fuchter","creditName":null,"errorCode":null},{"oid":"0000-0002-1767-7072","name":"Matthew","surname":"Fuchter","creditName":null,"errorCode":null},{"oid":"0000-0002-5658-8486","name":"Paul","surname":"Freemont","creditName":null,"errorCode":null},{"oid":"0000-0002-5658-8486","name":"Paul","surname":"Freemont","creditName":null,"errorCode":null}])
|
||||
(10.11588/BJB.2004.0.29842,[{"oid":"0000-0002-0555-3451","name":"Frank","surname":"Siegmund","creditName":null,"errorCode":null}])
|
||||
(10.1006/jctb.1999.1911,[{"oid":"0000-0002-2692-9198","name":"John","surname":"Caughman","creditName":null,"errorCode":null},{"oid":"0000-0002-2692-9198","name":"John","surname":"Caughman","creditName":null,"errorCode":null}])
|
||||
(10.1007/BF02125350,[{"oid":"0000-0002-4281-1843","name":"Gábor","surname":"Tardos","creditName":null,"errorCode":null}])
|
||||
(10.1039/C9SC01502G,[{"oid":"0000-0003-3527-7379","name":"Andrzej","surname":"Sienkiewicz","creditName":null,"errorCode":null},{"oid":"0000-0002-7515-7593","name":"Farzaneh","surname":"Fadaei Tirani","creditName":"Farzaneh Fadaei Tirani","errorCode":null}])
|
||||
(10.1063/1.119082,[{"oid":"0000-0001-8187-7469","name":"Hans","surname":"Christen","creditName":null,"errorCode":null}])
|
||||
(10.1109/OCEANSAP.2016.7485389,[{"oid":"0000-0001-7619-8015","name":"Horst","surname":"Hellbrück","creditName":null,"errorCode":null}])
|
||||
(10.1071/AN11006,[{"oid":"0000-0001-7750-0570","name":"Michael","surname":"Friend","creditName":null,"errorCode":null},{"oid":"0000-0001-5129-2216","name":"Susan","surname":"Robertson","creditName":null,"errorCode":null},{"oid":"0000-0001-5639-9581","name":"John","surname":"Broster","creditName":null,"errorCode":null}])
|
||||
(10.1186/1746-4811-4-1,[{"oid":"0000-0002-1872-2998","name":"Wilhelm","surname":"Gruissem","creditName":null,"errorCode":null},{"oid":"0000-0002-6645-1862","name":"Lars","surname":"Hennig","creditName":null,"errorCode":null},{"oid":"0000-0002-6645-1862","name":"Lars","surname":"Hennig","creditName":null,"errorCode":null},{"oid":"0000-0002-4802-754X","name":"Matthias","surname":"Hirsch-Hoffmann","creditName":null,"errorCode":null}])
|
||||
(10.1109/WCNC.2014.6951929,[{"oid":"0000-0003-1802-5022","name":"Michael","surname":"Heimlich","creditName":null,"errorCode":null},{"oid":"0000-0003-1802-5022","name":"Michael","surname":"Heimlich","creditName":null,"errorCode":null}])
|
||||
(10.1210/jc.2006-0075,[{"oid":"0000-0003-0637-1577","name":"Raj","surname":"Vuppalanchi","creditName":null,"errorCode":null}])
|
||||
(10.1590/S1678-91992005000300012,[{"oid":"0000-0001-9319-7516","name":"Rodrigo","surname":"Silva","creditName":"Da Silva, Rodrigo Costa","errorCode":null}])
|
||||
(10.1088/0953-2048/24/7/075025,[{"oid":"0000-0001-7488-066X","name":"Qing-Ping","surname":"Ding","creditName":null,"errorCode":null},{"oid":"0000-0003-1796-6840","name":"Toshihiro","surname":"Taen","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.apenergy.2015.11.058,[{"oid":"0000-0003-2692-8299","name":"Zia","surname":"Wadud","creditName":null,"errorCode":null}])
|
||||
(10.1002/ange.201106310,[{"oid":"0000-0002-7898-317X","name":"Belén","surname":"Martín-Matute","creditName":null,"errorCode":null},{"oid":"0000-0002-1534-2690","name":"Agnieszka","surname":"Bartoszewicz","creditName":null,"errorCode":null}])
|
||||
(10.1016/s0002-9440(10)64486-0,[{"oid":"0000-0001-6151-4257","name":"Markus","surname":"Britschgi","creditName":"Markus Britschgi","errorCode":null}])
|
||||
(10.1021/ac0715672,[{"oid":"0000-0002-3664-5711","name":"Maria Jose","surname":"Gomez Ramos","creditName":null,"errorCode":null},{"oid":"0000-0002-3664-5711","name":"Maria Jose","surname":"Gomez Ramos","creditName":null,"errorCode":null},{"oid":"0000-0003-4321-6795","name":"Maria Jesus","surname":"Martinez Bueno","creditName":null,"errorCode":null},{"oid":"0000-0002-1963-8106","name":"Dolores","surname":"Hernando","creditName":null,"errorCode":null},{"oid":"0000-0003-2649-6772","name":"Ana","surname":"Agüera","creditName":null,"errorCode":null},{"oid":"0000-0001-9158-0271","name":"JUAN F","surname":"GARCÍA-REYES","creditName":null,"errorCode":null}])
|
||||
(10.5020/18061230.2014.319,[{"oid":"0000-0002-9554-1736","name":"Thiago","surname":"Vasconcelos","creditName":null,"errorCode":null}])
|
||||
(10.1109/ICoAC.2017.7951738,[{"oid":"0000-0001-8438-0175","name":"Nedunchezhian","surname":"Raju","creditName":null,"errorCode":null},{"oid":"0000-0002-3769-657X","name":"Vijayakumar","surname":"Velusamy","creditName":null,"errorCode":null},{"oid":"0000-0002-3769-657X","name":"Vijayakumar","surname":"Velusamy","creditName":null,"errorCode":null}])
|
||||
(10.1126/scitranslmed.3002785,[{"oid":"0000-0003-3619-1657","name":"Christian","surname":"Ottensmeier","creditName":null,"errorCode":null}])
|
||||
(10.1007/s00245-010-9130-9,[{"oid":"0000-0002-8510-7477","name":"Catherine","surname":"Donnelly","creditName":null,"errorCode":null}])
|
||||
(10.1530/endoabs.37.ep899,[{"oid":"0000-0002-9205-9530","name":"Volodymyr","surname":"Pankiv","creditName":null,"errorCode":null}])
|
||||
(10.1029/2010GL045928,[{"oid":"0000-0003-1378-8434","name":"Shari","surname":"Yvon-Lewis","creditName":"Shari A Yvon-Lewis","errorCode":null},{"oid":"0000-0003-1097-6800","name":"John","surname":"Kessler","creditName":null,"errorCode":null},{"oid":"0000-0003-1097-6800","name":"John","surname":"Kessler","creditName":null,"errorCode":null}])
|
||||
(10.1016/S0952-7915(00)00127-8,[{"oid":"0000-0001-6479-5330","name":"Gunther","surname":"Eysenbach","creditName":null,"errorCode":null}])
|
||||
(10.1103/PhysRevB.83.064508,[{"oid":"0000-0001-7991-3918","name":"Jianxin","surname":"Zhu","creditName":null,"errorCode":null}])
|
||||
(10.1002/ETT.2664,[{"oid":"0000-0001-6868-6860","name":"Qianbin","surname":null,"creditName":null,"errorCode":null}])
|
||||
(10.1252/jcej.12we249,[{"oid":"0000-0002-7423-5561","name":"Takayoshi","surname":"Ishimoto","creditName":null,"errorCode":null},{"oid":"0000-0003-4347-9923","name":"Michihisa","surname":"Koyama","creditName":null,"errorCode":null}])
|
||||
(10.1088/0305-4470/37/13/006,[{"oid":"0000-0002-2295-8055","name":"Epifanio Guido","surname":"Virga","creditName":null,"errorCode":null}])
|
||||
(10.14419/IJET.V7I4.44.26872,[{"oid":"0000-0002-3164-5157","name":"Edy","surname":"Budiman","creditName":null,"errorCode":null}])
|
||||
(10.1117/12.667839,[{"oid":"0000-0002-1551-6646","name":"Mikhail","surname":"Bryushinin","creditName":null,"errorCode":null}])
|
||||
(10.1021/acs.jafc.6b03279,[{"oid":"0000-0002-4145-5031","name":"Manuel","surname":"Mota","creditName":"Mota, MM","errorCode":null},{"oid":"0000-0002-4145-5031","name":"Manuel","surname":"Mota","creditName":"Mota, MM","errorCode":null},{"oid":"0000-0003-2817-7943","name":"Jorge","surname":"Faria","creditName":"Jorge M. S. Faria","errorCode":null},{"oid":"0000-0003-2817-7943","name":"Jorge","surname":"Faria","creditName":"Jorge M. S. Faria","errorCode":null},{"oid":"0000-0003-3257-9777","name":"Ana M.","surname":"Rodrigues","creditName":null,"errorCode":null}])
|
||||
(10.1117/12.2228432,[{"oid":"0000-0003-2590-034X","name":"Peppino","surname":"Fazio","creditName":null,"errorCode":null},{"oid":"0000-0003-0593-5254","name":"Mauro","surname":"Tropea","creditName":null,"errorCode":null}])
|
||||
(10.1186/s40349-016-0049-8,[{"oid":"0000-0002-4271-7670","name":"Marc Nicola","surname":"Gallay","creditName":null,"errorCode":null}])
|
||||
(10.1055/s-0029-1214397,[{"oid":"0000-0002-5286-7322","name":"Eva","surname":"Münster","creditName":null,"errorCode":null},{"oid":"0000-0002-5286-7322","name":"Eva","surname":"Münster","creditName":null,"errorCode":null}])
|
||||
(10.1108/00197850610671991,[{"oid":"0000-0003-1239-8733","name":"viki","surname":"holton","creditName":null,"errorCode":null},{"oid":"0000-0003-1239-8733","name":"viki","surname":"holton","creditName":null,"errorCode":null}])
|
||||
(10.1186/s12863-017-0480-z,[{"oid":"0000-0001-7747-0930","name":"Puthen Veettil","surname":"Jithesh","creditName":null,"errorCode":null}])
|
||||
(10.1109/jsen.2011.2106156,[{"oid":"0000-0002-4213-9575","name":"Yael","surname":"Hanein","creditName":null,"errorCode":null}])
|
||||
(10.1115/FEDSM2012-72228,[{"oid":"0000-0001-6692-858X","name":"Yogesh","surname":"Joshi","creditName":null,"errorCode":null}])
|
||||
(10.1039/c5cp03696h,[{"oid":"0000-0002-0664-2536","name":"François","surname":"LIQUE","creditName":null,"errorCode":null},{"oid":"0000-0002-7407-303X","name":"Jacek","surname":"Klos","creditName":null,"errorCode":null}])
|
||||
(10.1016/0308-8146(95)00112-3,[{"oid":"0000-0002-3438-0852","name":"José María","surname":"Fresno-Baro","creditName":null,"errorCode":null},{"oid":"0000-0002-3438-0852","name":"José María","surname":"Fresno-Baro","creditName":null,"errorCode":null},{"oid":"0000-0002-3737-9830","name":"Javier","surname":"Carballo","creditName":null,"errorCode":null},{"oid":"0000-0002-1697-178X","name":"María Josefa","surname":"González Prieto","creditName":null,"errorCode":null},{"oid":"0000-0002-5658-6720","name":"Ana","surname":"Bernardo Álvarez","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.jmaa.2005.05.047,[{"oid":"0000-0002-8320-3596","name":"Kazimierz","surname":"Wlodarczyk","creditName":null,"errorCode":null},{"oid":"0000-0001-6635-0781","name":"Dariusz","surname":"Wardowski","creditName":null,"errorCode":null}])
|
||||
(10.1056/NEJM199312233292602,[{"oid":"0000-0002-9631-1254","name":"Martin","surname":"Larson","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.apergo.2008.01.001,[{"oid":"0000-0001-7844-4881","name":"Vilhelm F","surname":"Koefoed","creditName":null,"errorCode":null}])
|
||||
(10.1007/s00421-011-2121-y,[{"oid":"0000-0001-6864-7706","name":"Taku","surname":"Wakahara","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.agrformet.2007.05.012,[{"oid":"0000-0001-5779-2764","name":"Rachid","surname":"Hadria","creditName":null,"errorCode":null},{"oid":"0000-0002-3905-7560","name":"Gilles","surname":"Boulet","creditName":null,"errorCode":null}])
|
||||
(10.1393/ncb/i2007-10061-0,[{"oid":"0000-0002-7349-1109","name":"dafne","surname":"guetta","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.ajog.2015.11.023,[{"oid":"0000-0001-7122-6187","name":"Katie","surname":"Propst","creditName":null,"errorCode":null},{"oid":"0000-0001-7122-6187","name":"Katie","surname":"Propst","creditName":null,"errorCode":null},{"oid":"0000-0002-2044-1693","name":"Tyler","surname":"Muffly","creditName":null,"errorCode":null},{"oid":"0000-0002-2044-1693","name":"Tyler","surname":"Muffly","creditName":null,"errorCode":null}])
|
||||
(doi:10.1166/jnn.2018.15274,[{"oid":"0000-0003-0582-575X","name":"David","surname":"Rojas","creditName":"D. Rojas","errorCode":null}])
|
||||
(10.21061/cc.v2i1.a.10,[{"oid":"0000-0001-7242-6762","name":"Jacob","surname":"Bruggeman","creditName":null,"errorCode":null}])
|
||||
(10.3390/nu10050622,[{"oid":"0000-0001-6890-5250","name":"Marjory","surname":"Moodie","creditName":null,"errorCode":null},{"oid":"0000-0002-5957-6931","name":"Jaithri","surname":"Ananthapavan","creditName":null,"errorCode":null},{"oid":"0000-0001-9736-1539","name":"Gary","surname":"Sacks","creditName":null,"errorCode":null},{"oid":"0000-0001-9736-1539","name":"Gary","surname":"Sacks","creditName":null,"errorCode":null},{"oid":"0000-0001-9736-1539","name":"Gary","surname":"Sacks","creditName":null,"errorCode":null},{"oid":"0000-0002-3206-8232","name":"Lennert","surname":"Veerman","creditName":null,"errorCode":null},{"oid":"0000-0002-3323-575X","name":"kathryn","surname":"backholer","creditName":null,"errorCode":null},{"oid":"0000-0003-2891-9476","name":"Vicki","surname":"Brown","creditName":null,"errorCode":null}])
|
||||
(10.5194/bg-2018-344,[{"oid":"0000-0001-6469-7167","name":"Neus","surname":"Garcias-Bonet","creditName":null,"errorCode":null}])
|
||||
(10.1038/srep01933,[{"oid":"0000-0001-8948-466X","name":"Yichi","surname":"Zhang","creditName":null,"errorCode":null}])
|
||||
(10.1306/05131615170,[{"oid":"0000-0003-0397-8744","name":"Sarada","surname":"Mohanty","creditName":"Sarada P Mohanty","errorCode":null}])
|
||||
(10.1080/14658011.2017.1399531,[{"oid":"0000-0003-2296-5623","name":"Janak","surname":"Sapkota","creditName":null,"errorCode":null},{"oid":"0000-0003-2296-5623","name":"Janak","surname":"Sapkota","creditName":null,"errorCode":null},{"oid":"0000-0003-4737-9823","name":"Joamin","surname":"Gonzalez-Gutierrez","creditName":null,"errorCode":null},{"oid":"0000-0001-5149-7895","name":"Clemens","surname":"Holzer","creditName":null,"errorCode":null},{"oid":"0000-0001-5149-7895","name":"Clemens","surname":"Holzer","creditName":null,"errorCode":null}])
|
||||
(10.4322/acr.2015.007,[{"oid":"0000-0002-8414-4161","name":"Jussara Bianchi","surname":"Castelli","creditName":null,"errorCode":null},{"oid":"0000-0002-5092-0505","name":"Benoit","surname":"Bibas","creditName":null,"errorCode":null}])
|
||||
(10.1186/s12888-017-1560-3,[{"oid":"0000-0002-9742-1359","name":"Alexandra","surname":"Pitman","creditName":null,"errorCode":null},{"oid":"0000-0003-2519-1539","name":"David","surname":"Osborn","creditName":null,"errorCode":null}])
|
||||
(10.1007/s10741-019-09829-7,[{"oid":"0000-0001-7175-0464","name":"Pierpaolo","surname":"Pellicori","creditName":null,"errorCode":null}])
|
||||
(10.1007/s00542-012-1613-y,[{"oid":"0000-0001-5535-9473","name":"Shota","surname":"Yabui","creditName":null,"errorCode":null},{"oid":"0000-0003-0623-5938","name":"Takenori","surname":"Atsumi","creditName":null,"errorCode":null}])
|
||||
(10.1039/C9SC02399B,[{"oid":"0000-0002-1034-0856","name":"Marvin","surname":"Vega","creditName":"Marvin M Vega","errorCode":null},{"oid":"0000-0003-0994-7789","name":"Li","surname":"Fu","creditName":null,"errorCode":null}])
|
||||
(10.1039/C8DT04698K,[{"oid":"0000-0002-3213-1350","name":"Likun","surname":"Tan","creditName":null,"errorCode":null}])
|
||||
(10.1080/09595230802093802,[{"oid":"0000-0001-9580-1545","name":"Simon","surname":"Moyes","creditName":null,"errorCode":null}])
|
||||
(10.1371/journal.pone.0136586,[{"oid":"0000-0002-6296-1427","name":"Scot","surname":"Dowd","creditName":null,"errorCode":null},{"oid":"0000-0002-2176-6932","name":"Jan","surname":"Suchodolski","creditName":null,"errorCode":null},{"oid":"0000-0002-9557-3068","name":"Cole","surname":"Mcqueen","creditName":null,"errorCode":null}])
|
||||
(10.3233/RNN-2009-0473,[{"oid":"0000-0002-6746-1034","name":"Iris-Katharina","surname":"Penner","creditName":null,"errorCode":null}])
|
||||
(10.1016/j.jcpa.2015.04.004,[{"oid":"0000-0001-5281-0521","name":"Antonio","surname":"Fernandez","creditName":null,"errorCode":null},{"oid":"0000-0003-3749-8845","name":"Eva","surname":"Sierra","creditName":null,"errorCode":null},{"oid":"0000-0002-1623-5010","name":"Manuel","surname":"Arbelo","creditName":"marbelo","errorCode":null}])
|
||||
(10.1007/s11270-011-1042-z,[{"oid":"0000-0003-2811-1979","name":"Irvan","surname":"Dahlan","creditName":null,"errorCode":null}])
|
||||
(10.1054/bjoc.2001.2096,[{"oid":"0000-0001-9484-9977","name":"Sasiwarang","surname":"Wannamethee","creditName":null,"errorCode":null}])
|
||||
(10.1137/140988541,[{"oid":"0000-0001-5645-5854","name":"Ilse","surname":"Ipsen","creditName":null,"errorCode":null}])
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue