1
0
Fork 0

Merge branch 'master' into graph_cleaning

This commit is contained in:
Claudio Atzori 2020-06-08 15:23:24 +02:00
commit 3d871c6651
125 changed files with 10073 additions and 412 deletions

View File

@ -58,6 +58,18 @@ public class ModelSupport {
oafTypes.put("relation", Relation.class);
}
public static final Map<Class, String> idPrefixMap = Maps.newHashMap();
static {
idPrefixMap.put(Datasource.class, "10");
idPrefixMap.put(Organization.class, "20");
idPrefixMap.put(Project.class, "40");
idPrefixMap.put(Dataset.class, "50");
idPrefixMap.put(OtherResearchProduct.class, "50");
idPrefixMap.put(Software.class, "50");
idPrefixMap.put(Publication.class, "50");
}
public static final Map<String, String> entityIdPrefix = Maps.newHashMap();
static {
@ -289,6 +301,10 @@ public class ModelSupport {
private ModelSupport() {
}
public static <E extends OafEntity> String getIdPrefix(Class<E> clazz) {
return idPrefixMap.get(clazz);
}
/**
* Checks subclass-superclass relationship.
*

View File

@ -10,6 +10,7 @@ public class Dataset extends Result implements Serializable {
private Field<String> storagedate;
// candidate for removal
private Field<String> device;
private Field<String> size;

View File

@ -2,8 +2,10 @@
package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
public class Result extends OafEntity implements Serializable {
@ -248,13 +250,24 @@ public class Result extends OafEntity implements Serializable {
StructuredProperty baseMainTitle = null;
if (title != null) {
baseMainTitle = getMainTitle(title);
title.remove(baseMainTitle);
if (baseMainTitle != null) {
final StructuredProperty p = baseMainTitle;
title = title.stream().filter(t -> t != p).collect(Collectors.toList());
}
//
//
// title.remove(baseMainTitle);
}
StructuredProperty newMainTitle = null;
if (r.getTitle() != null) {
newMainTitle = getMainTitle(r.getTitle());
r.getTitle().remove(newMainTitle);
if (newMainTitle != null) {
final StructuredProperty p = newMainTitle;
title = title.stream().filter(t -> t != p).collect(Collectors.toList());
}
// r.getTitle().remove(newMainTitle);
}
if (newMainTitle != null && compareTrust(this, r) < 0)

View File

@ -10,8 +10,10 @@ public class Software extends Result implements Serializable {
private List<Field<String>> documentationUrl;
// candidate for removal
private List<StructuredProperty> license;
// candidate for removal
private Field<String> codeRepositoryUrl;
private Qualifier programmingLanguage;

View File

@ -57,7 +57,7 @@
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-openaire-broker-common</artifactId>
<version>[2.0.0,3.0.0)</version>
<version>[2.0.1,3.0.0)</version>
</dependency>
</dependencies>

View File

@ -12,7 +12,6 @@ import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.DateUtils;
import eu.dnetlib.broker.objects.OpenAireEventPayload;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
@ -37,14 +36,13 @@ public class EventFactory {
final Map<String, Object> map = createMapFromResult(updateInfo);
final String payload = createPayload(updateInfo);
final String eventId =
calculateEventId(updateInfo.getTopicPath(), updateInfo.getTarget().getOriginalId().get(0), updateInfo.getHighlightValueAsString());
final String eventId = calculateEventId(
updateInfo.getTopicPath(), updateInfo.getTarget().getOriginalId().get(0),
updateInfo.getHighlightValueAsString());
res.setEventId(eventId);
res.setProducerId(PRODUCER_ID);
res.setPayload(payload);
res.setPayload(updateInfo.asBrokerPayload().toJSON());
res.setMap(map);
res.setTopic(updateInfo.getTopicPath());
res.setCreationDate(now);
@ -53,15 +51,6 @@ public class EventFactory {
return res;
}
private static String createPayload(final UpdateInfo<?> updateInfo) {
final OpenAireEventPayload payload = new OpenAireEventPayload();
// TODO : use ConversionUtils
updateInfo.compileHighlight(payload);
return payload.toJSON();
}
private static Map<String, Object> createMapFromResult(final UpdateInfo<?> updateInfo) {
final Map<String, Object> map = new HashMap<>();
@ -92,13 +81,17 @@ public class EventFactory {
final List<StructuredProperty> subjects = target.getSubject();
if (subjects.size() > 0) {
map
.put("target_publication_subject_list", subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList()));
.put(
"target_publication_subject_list",
subjects.stream().map(StructuredProperty::getValue).collect(Collectors.toList()));
}
final List<Author> authors = target.getAuthor();
if (authors.size() > 0) {
map
.put("target_publication_author_list", authors.stream().map(Author::getFullname).collect(Collectors.toList()));
.put(
"target_publication_author_list",
authors.stream().map(Author::getFullname).collect(Collectors.toList()));
}
// PROVENANCE INFO
@ -125,7 +118,9 @@ public class EventFactory {
}
private static long parseDateTolong(final String date) {
if (StringUtils.isBlank(date)) { return -1; }
if (StringUtils.isBlank(date)) {
return -1;
}
try {
return DateUtils.parseDate(date, DATE_PATTERNS).getTime();
} catch (final ParseException e) {

View File

@ -87,32 +87,25 @@ public class GenerateEventsApplication {
private static final UpdateMatcher<Pair<Result, List<Software>>, ?> enrichMoreSoftware = new EnrichMoreSoftware();
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMisissingPublicationIsRelatedTo = new EnrichMissingPublicationIsRelatedTo();
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsReferencedBy =
new EnrichMissingPublicationIsReferencedBy();
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsReferencedBy = new EnrichMissingPublicationIsReferencedBy();
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationReferences = new EnrichMissingPublicationReferences();
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsSupplementedTo =
new EnrichMissingPublicationIsSupplementedTo();
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsSupplementedBy =
new EnrichMissingPublicationIsSupplementedBy();
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsSupplementedTo = new EnrichMissingPublicationIsSupplementedTo();
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsSupplementedBy = new EnrichMissingPublicationIsSupplementedBy();
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMisissingDatasetIsRelatedTo =
new EnrichMissingDatasetIsRelatedTo();
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsReferencedBy =
new EnrichMissingDatasetIsReferencedBy();
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetReferences =
new EnrichMissingDatasetReferences();
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsSupplementedTo =
new EnrichMissingDatasetIsSupplementedTo();
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsSupplementedBy =
new EnrichMissingDatasetIsSupplementedBy();
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMisissingDatasetIsRelatedTo = new EnrichMissingDatasetIsRelatedTo();
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsReferencedBy = new EnrichMissingDatasetIsReferencedBy();
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetReferences = new EnrichMissingDatasetReferences();
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsSupplementedTo = new EnrichMissingDatasetIsSupplementedTo();
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsSupplementedBy = new EnrichMissingDatasetIsSupplementedBy();
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(GenerateEventsApplication.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")));
.toString(
GenerateEventsApplication.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")));
parser.parseArgument(args);
final Boolean isSparkSessionManaged = Optional
@ -156,8 +149,9 @@ public class GenerateEventsApplication {
final String graphPath,
final Class<R> resultClazz) {
final Dataset<R> results = readPath(spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz)
.filter(r -> r.getDataInfo().getDeletedbyinference());
final Dataset<R> results = readPath(
spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz)
.filter(r -> r.getDataInfo().getDeletedbyinference());
final Dataset<Relation> rels = readPath(spark, graphPath + "/relation", Relation.class)
.filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));
@ -196,15 +190,18 @@ public class GenerateEventsApplication {
return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList());
}
private static <SRC extends Result, TRG extends OafEntity> JavaRDD<Event> generateRelationEvents(final SparkSession spark,
private static <SRC extends Result, TRG extends OafEntity> JavaRDD<Event> generateRelationEvents(
final SparkSession spark,
final String graphPath,
final Class<SRC> sourceClass,
final Class<TRG> targetClass) {
final Dataset<SRC> sources = readPath(spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), sourceClass)
.filter(r -> r.getDataInfo().getDeletedbyinference());
final Dataset<SRC> sources = readPath(
spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), sourceClass)
.filter(r -> r.getDataInfo().getDeletedbyinference());
final Dataset<TRG> targets = readPath(spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), targetClass);
final Dataset<TRG> targets = readPath(
spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), targetClass);
final Dataset<Relation> mergedRels = readPath(spark, graphPath + "/relation", Relation.class)
.filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));

View File

@ -28,7 +28,8 @@ public class EnrichMissingProject
if (source.getRight().isEmpty()) {
return Arrays.asList();
} else {
return target.getRight()
return target
.getRight()
.stream()
.map(ConversionUtils::oafProjectToBrokerProject)
.map(p -> generateUpdateInfo(p, source, target))

View File

@ -24,12 +24,14 @@ public class EnrichMoreProject extends UpdateMatcher<Pair<Result, List<Project>>
protected List<UpdateInfo<eu.dnetlib.broker.objects.Project>> findUpdates(final Pair<Result, List<Project>> source,
final Pair<Result, List<Project>> target) {
final Set<String> existingProjects = source.getRight()
final Set<String> existingProjects = source
.getRight()
.stream()
.map(Project::getId)
.collect(Collectors.toSet());
return target.getRight()
return target
.getRight()
.stream()
.filter(p -> !existingProjects.contains(p.getId()))
.map(ConversionUtils::oafProjectToBrokerProject)

View File

@ -39,7 +39,7 @@ public abstract class AbstractEnrichMissingPublication
.getRight()
.stream()
.filter(d -> !existingPublications.contains(d.getId()))
.map(ConversionUtils::oafPublicationToBrokerPublication)
.map(ConversionUtils::oafResultToBrokerPublication)
.map(i -> generateUpdateInfo(i, source, target))
.collect(Collectors.toList());

View File

@ -29,7 +29,8 @@ public class EnrichMissingSoftware
if (source.getRight().isEmpty()) {
return Arrays.asList();
} else {
return target.getRight()
return target
.getRight()
.stream()
.map(ConversionUtils::oafSoftwareToBrokerSoftware)
.map(p -> generateUpdateInfo(p, source, target))

View File

@ -26,12 +26,14 @@ public class EnrichMoreSoftware
final Pair<Result, List<Software>> source,
final Pair<Result, List<Software>> target) {
final Set<String> existingSoftwares = source.getRight()
final Set<String> existingSoftwares = source
.getRight()
.stream()
.map(Software::getId)
.collect(Collectors.toSet());
return target.getRight()
return target
.getRight()
.stream()
.filter(p -> !existingSoftwares.contains(p.getId()))
.map(ConversionUtils::oafSoftwareToBrokerSoftware)

View File

@ -28,7 +28,9 @@ public class EnrichMissingOpenAccess extends UpdateMatcher<Result, Instance> {
.filter(right -> right.equals(BrokerConstants.OPEN_ACCESS))
.count();
if (count > 0) { return Arrays.asList(); }
if (count > 0) {
return Arrays.asList();
}
return source
.getInstance()

View File

@ -15,7 +15,7 @@ public class BrokerConstants {
public static final String OPEN_ACCESS = "OPEN";
public static final String IS_MERGED_IN_CLASS = "isMergedIn";
public static final List<Class<? extends Result>> RESULT_CLASSES =
Arrays.asList(Publication.class, Dataset.class, Software.class, OtherResearchProduct.class);
public static final List<Class<? extends Result>> RESULT_CLASSES = Arrays
.asList(Publication.class, Dataset.class, Software.class, OtherResearchProduct.class);
}

View File

@ -5,83 +5,150 @@ import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.broker.objects.Instance;
import eu.dnetlib.broker.objects.Pid;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.ExternalReference;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.Journal;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class ConversionUtils {
public static List<Instance> oafInstanceToBrokerInstances(final eu.dnetlib.dhp.schema.oaf.Instance i) {
private static final Logger log = LoggerFactory.getLogger(ConversionUtils.class);
public static List<eu.dnetlib.broker.objects.Instance> oafInstanceToBrokerInstances(final Instance i) {
return i.getUrl().stream().map(url -> {
final Instance r = new Instance();
r.setUrl(url);
r.setInstancetype(i.getInstancetype().getClassid());
r.setLicense(BrokerConstants.OPEN_ACCESS);
r.setHostedby(i.getHostedby().getValue());
return r;
return new eu.dnetlib.broker.objects.Instance()
.setUrl(url)
.setInstancetype(i.getInstancetype().getClassid())
.setLicense(BrokerConstants.OPEN_ACCESS)
.setHostedby(i.getHostedby().getValue());
}).collect(Collectors.toList());
}
public static Pid oafPidToBrokerPid(final StructuredProperty sp) {
final Pid pid = new Pid();
pid.setValue(sp.getValue());
pid.setType(sp.getQualifier().getClassid());
return pid;
return sp != null ? new Pid()
.setValue(sp.getValue())
.setType(sp.getQualifier().getClassid()) : null;
}
public static final Pair<String, String> oafSubjectToPair(final StructuredProperty sp) {
return Pair.of(sp.getQualifier().getClassid(), sp.getValue());
return sp != null ? Pair.of(sp.getQualifier().getClassid(), sp.getValue()) : null;
}
public static final eu.dnetlib.broker.objects.Dataset oafDatasetToBrokerDataset(final Dataset d) {
final eu.dnetlib.broker.objects.Dataset res = new eu.dnetlib.broker.objects.Dataset();
res.setOriginalId(d.getOriginalId().get(0));
res.setTitles(structPropList(d.getTitle()));
res.setPids(d.getPid().stream().map(ConversionUtils::structeredPropertyToPid).collect(Collectors.toList()));
res.setInstances(d.getInstance().stream().map(ConversionUtils::oafInstanceToBrokerInstances).flatMap(List::stream).collect(Collectors.toList()));
res.setCollectedFrom(d.getCollectedfrom().stream().map(KeyValue::getValue).collect(Collectors.toList()));
return res;
return d != null ? new eu.dnetlib.broker.objects.Dataset()
.setOriginalId(d.getOriginalId().get(0))
.setTitles(structPropList(d.getTitle()))
.setPids(d.getPid().stream().map(ConversionUtils::oafPidToBrokerPid).collect(Collectors.toList()))
.setInstances(
d
.getInstance()
.stream()
.map(ConversionUtils::oafInstanceToBrokerInstances)
.flatMap(List::stream)
.collect(Collectors.toList()))
.setCollectedFrom(d.getCollectedfrom().stream().map(KeyValue::getValue).collect(Collectors.toList()))
: null;
}
public static final eu.dnetlib.broker.objects.Publication oafPublicationToBrokerPublication(final Publication d) {
final eu.dnetlib.broker.objects.Publication res = new eu.dnetlib.broker.objects.Publication();
// TODO This should be reusable
return res;
public static final eu.dnetlib.broker.objects.Publication oafResultToBrokerPublication(final Result result) {
return result != null ? new eu.dnetlib.broker.objects.Publication()
.setOriginalId(result.getOriginalId().get(0))
.setTitles(structPropList(result.getTitle()))
.setAbstracts(fieldList(result.getDescription()))
.setLanguage(result.getLanguage().getClassid())
.setSubjects(structPropList(result.getSubject()))
.setCreators(result.getAuthor().stream().map(Author::getFullname).collect(Collectors.toList()))
.setPublicationdate(result.getDateofcollection())
.setPublisher(fieldValue(result.getPublisher()))
.setEmbargoenddate(fieldValue(result.getEmbargoenddate()))
.setContributor(fieldList(result.getContributor()))
.setJournal(
result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null)
.setCollectedFrom(result.getCollectedfrom().stream().map(KeyValue::getValue).collect(Collectors.toList()))
.setPids(result.getPid().stream().map(ConversionUtils::oafPidToBrokerPid).collect(Collectors.toList()))
.setInstances(
result
.getInstance()
.stream()
.map(ConversionUtils::oafInstanceToBrokerInstances)
.flatMap(List::stream)
.collect(Collectors.toList()))
.setExternalReferences(
result
.getExternalReference()
.stream()
.map(ConversionUtils::oafExtRefToBrokerExtRef)
.collect(Collectors.toList()))
: null;
}
private static eu.dnetlib.broker.objects.Journal oafJournalToBrokerJournal(final Journal journal) {
return journal != null ? new eu.dnetlib.broker.objects.Journal()
.setName(journal.getName())
.setIssn(journal.getIssnPrinted())
.setEissn(journal.getIssnOnline())
.setLissn(journal.getIssnLinking()) : null;
}
private static eu.dnetlib.broker.objects.ExternalReference oafExtRefToBrokerExtRef(final ExternalReference ref) {
return ref != null ? new eu.dnetlib.broker.objects.ExternalReference()
.setRefidentifier(ref.getRefidentifier())
.setSitename(ref.getSitename())
.setType(ref.getQualifier().getClassid())
.setUrl(ref.getUrl())
: null;
}
public static final eu.dnetlib.broker.objects.Project oafProjectToBrokerProject(final Project p) {
final eu.dnetlib.broker.objects.Project res = new eu.dnetlib.broker.objects.Project();
res.setTitle(fieldValue(p.getTitle()));
res.setAcronym(fieldValue(p.getAcronym()));
res.setCode(fieldValue(p.getCode()));
res.setFunder(null); // TODO
res.setFundingProgram(null); // TODO
res.setJurisdiction(null); // TODO
if (p == null) {
return null;
}
final eu.dnetlib.broker.objects.Project res = new eu.dnetlib.broker.objects.Project()
.setTitle(fieldValue(p.getTitle()))
.setAcronym(fieldValue(p.getAcronym()))
.setCode(fieldValue(p.getCode()));
final String ftree = fieldValue(p.getFundingtree());
if (StringUtils.isNotBlank(ftree)) {
try {
final Document fdoc = DocumentHelper.parseText(ftree);
res.setFunder(fdoc.valueOf("/fundingtree/funder/shortname"));
res.setJurisdiction(fdoc.valueOf("/fundingtree/funder/jurisdiction"));
res.setFundingProgram(fdoc.valueOf("//funding_level_0/name"));
} catch (final DocumentException e) {
log.error("Error in record " + p.getId() + ": invalid fundingtree: " + ftree);
}
}
return res;
}
public static final eu.dnetlib.broker.objects.Software oafSoftwareToBrokerSoftware(final Software sw) {
final eu.dnetlib.broker.objects.Software res = new eu.dnetlib.broker.objects.Software();
res.setName(structPropValue(sw.getTitle()));
res.setDescription(fieldValue(sw.getDescription()));
res.setRepository(fieldValue(sw.getCodeRepositoryUrl()));
res.setLandingPage(fieldValue(sw.getDocumentationUrl()));
return res;
}
private static Pid structeredPropertyToPid(final StructuredProperty sp) {
final Pid pid = new Pid();
pid.setValue(sp.getValue());
pid.setType(sp.getQualifier().getClassid());
return pid;
return sw != null ? new eu.dnetlib.broker.objects.Software()
.setName(structPropValue(sw.getTitle()))
.setDescription(fieldValue(sw.getDescription()))
.setRepository(fieldValue(sw.getCodeRepositoryUrl()))
.setLandingPage(fieldValue(sw.getDocumentationUrl()))
: null;
}
private static String fieldValue(final Field<String> f) {
@ -89,14 +156,29 @@ public class ConversionUtils {
}
private static String fieldValue(final List<Field<String>> fl) {
return fl != null && !fl.isEmpty() && fl.get(0) != null ? fl.get(0).getValue() : null;
return fl != null ? fl.stream().map(Field::getValue).filter(StringUtils::isNotBlank).findFirst().orElse(null)
: null;
}
private static String structPropValue(final List<StructuredProperty> props) {
return props != null && !props.isEmpty() && props.get(0) != null ? props.get(0).getValue() : null;
return props != null
? props.stream().map(StructuredProperty::getValue).filter(StringUtils::isNotBlank).findFirst().orElse(null)
: null;
}
private static List<String> fieldList(final List<Field<String>> fl) {
return fl != null
? fl.stream().map(Field::getValue).filter(StringUtils::isNotBlank).collect(Collectors.toList())
: new ArrayList<>();
}
private static List<String> structPropList(final List<StructuredProperty> props) {
return props != null ? props.stream().map(StructuredProperty::getValue).collect(Collectors.toList()) : new ArrayList<>();
return props != null
? props
.stream()
.map(StructuredProperty::getValue)
.filter(StringUtils::isNotBlank)
.collect(Collectors.toList())
: new ArrayList<>();
}
}

View File

@ -1,12 +1,16 @@
package eu.dnetlib.dhp.broker.oa.util;
import java.util.List;
import java.util.function.BiConsumer;
import java.util.function.Function;
import eu.dnetlib.broker.objects.OpenAireEventPayload;
import eu.dnetlib.broker.objects.Provenance;
import eu.dnetlib.broker.objects.Publication;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Result;
public final class UpdateInfo<T> {
@ -66,12 +70,41 @@ public final class UpdateInfo<T> {
return trust;
}
public void compileHighlight(final OpenAireEventPayload payload) {
compileHighlight.accept(payload.getHighlight(), getHighlightValue());
}
public String getHighlightValueAsString() {
return highlightToString.apply(getHighlightValue());
}
public OpenAireEventPayload asBrokerPayload() {
final Publication p = ConversionUtils.oafResultToBrokerPublication(getSource());
compileHighlight.accept(p, getHighlightValue());
final Publication hl = new Publication();
compileHighlight.accept(hl, getHighlightValue());
final String provId = getSource().getOriginalId().stream().findFirst().orElse(null);
final String provRepo = getSource()
.getCollectedfrom()
.stream()
.map(KeyValue::getValue)
.findFirst()
.orElse(null);
final String provUrl = getSource()
.getInstance()
.stream()
.map(Instance::getUrl)
.flatMap(List::stream)
.findFirst()
.orElse(null);
;
final Provenance provenance = new Provenance().setId(provId).setRepositoryName(provRepo).setUrl(provUrl);
return new OpenAireEventPayload()
.setPublication(p)
.setHighlight(hl)
.setTrust(trust)
.setProvenance(provenance);
}
}

View File

@ -0,0 +1,93 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.2-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-doiboost</artifactId>
<build>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>4.0.1</version>
<executions>
<execution>
<id>scala-compile-first</id>
<phase>initialize</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>scala-test-compile</id>
<phase>process-test-resources</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.3.4</version>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-common</artifactId>
<version>${project.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.cxf</groupId>
<artifactId>cxf-rt-transports-http</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-schemas</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.jayway.jsonpath</groupId>
<artifactId>json-path</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,374 @@
package eu.dnetlib.doiboost
import eu.dnetlib.dhp.schema.action.AtomicAction
import eu.dnetlib.dhp.schema.oaf.{DataInfo, Dataset, Field, Instance, KeyValue, Oaf, Organization, Publication, Qualifier, Relation, Result, StructuredProperty}
import eu.dnetlib.dhp.utils.DHPUtils
import org.apache.commons.lang3.StringUtils
import org.codehaus.jackson.map.ObjectMapper
import org.json4s
import org.json4s.DefaultFormats
import org.json4s.jackson.JsonMethods.parse
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._
import scala.io.Source
case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {}
case class DoiBoostAffiliation(PaperId:Long, AffiliationId:Long, GridId:Option[String], OfficialPage:Option[String], DisplayName:Option[String]){}
object DoiBoostMappingUtil {
def getUnknownCountry(): Qualifier = {
createQualifier("UNKNOWN","UNKNOWN","dnet:countries","dnet:countries")
}
def generateMAGAffiliationId(affId: String): String = {
s"20|microsoft___$SEPARATOR${DHPUtils.md5(affId)}"
}
val logger: Logger = LoggerFactory.getLogger(getClass)
//STATIC STRING
val MAG = "microsoft"
val MAG_NAME = "Microsoft Academic Graph"
val ORCID = "ORCID"
val CROSSREF = "Crossref"
val UNPAYWALL = "UnpayWall"
val GRID_AC = "grid.ac"
val WIKPEDIA = "wikpedia"
val doiBoostNSPREFIX = "doiboost____"
val OPENAIRE_PREFIX = "openaire____"
val SEPARATOR = "::"
val DNET_LANGUAGES = "dnet:languages"
val PID_TYPES = "dnet:pid_types"
val invalidName = List(",", "none none", "none, none", "none &na;", "(:null)", "test test test", "test test", "test", "&na; &na;")
def toActionSet(item:Oaf) :(String, String) = {
val mapper = new ObjectMapper()
item match {
case dataset: Dataset =>
val a: AtomicAction[Dataset] = new AtomicAction[Dataset]
a.setClazz(classOf[Dataset])
a.setPayload(dataset)
(dataset.getClass.getCanonicalName, mapper.writeValueAsString(a))
case publication: Publication =>
val a: AtomicAction[Publication] = new AtomicAction[Publication]
a.setClazz(classOf[Publication])
a.setPayload(publication)
(publication.getClass.getCanonicalName, mapper.writeValueAsString(a))
case organization: Organization =>
val a: AtomicAction[Organization] = new AtomicAction[Organization]
a.setClazz(classOf[Organization])
a.setPayload(organization)
(organization.getClass.getCanonicalName, mapper.writeValueAsString(a))
case relation: Relation =>
val a: AtomicAction[Relation] = new AtomicAction[Relation]
a.setClazz(classOf[Relation])
a.setPayload(relation)
(relation.getClass.getCanonicalName, mapper.writeValueAsString(a))
case _ =>
null
}
}
def toHostedByItem(input:String): (String, HostedByItemType) = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = parse(input)
val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]]
(c.keys.head, c.values.head)
}
def toISSNPair(publication: Publication) : (String, Publication) = {
val issn = if (publication.getJournal == null) null else publication.getJournal.getIssnPrinted
val eissn =if (publication.getJournal == null) null else publication.getJournal.getIssnOnline
val lissn =if (publication.getJournal == null) null else publication.getJournal.getIssnLinking
if (issn!= null && issn.nonEmpty)
(issn, publication)
else if(eissn!= null && eissn.nonEmpty)
(eissn, publication)
else if(lissn!= null && lissn.nonEmpty)
(lissn, publication)
else
(publication.getId, publication)
}
def generateGridAffiliationId(gridId:String) :String = {
s"20|grid________::${DHPUtils.md5(gridId.toLowerCase().trim())}"
}
def fixResult(result: Dataset) :Dataset = {
val instanceType = result.getInstance().asScala.find(i => i.getInstancetype != null && i.getInstancetype.getClassid.nonEmpty)
if (instanceType.isDefined) {
result.getInstance().asScala.foreach(i => i.setInstancetype(instanceType.get.getInstancetype))
}
result.getInstance().asScala.foreach(i => {
i.setHostedby(getUbknownHostedBy())
})
result
}
def getUbknownHostedBy():KeyValue = {
val hb = new KeyValue
hb.setValue("Unknown Repository")
hb.setKey(s"10|$OPENAIRE_PREFIX::55045bd2a65019fd8e6741a755395c8c")
hb
}
def getOpenAccessQualifier():Qualifier = {
createQualifier("OPEN","Open Access","dnet:access_modes", "dnet:access_modes")
}
def getRestrictedQualifier():Qualifier = {
createQualifier("RESTRICTED","Restricted","dnet:access_modes", "dnet:access_modes")
}
def fixPublication(input:((String,Publication), (String,HostedByItemType))): Publication = {
val publication = input._1._2
val item = if (input._2 != null) input._2._2 else null
val instanceType = publication.getInstance().asScala.find(i => i.getInstancetype != null && i.getInstancetype.getClassid.nonEmpty)
if (instanceType.isDefined) {
publication.getInstance().asScala.foreach(i => i.setInstancetype(instanceType.get.getInstancetype))
}
publication.getInstance().asScala.foreach(i => {
val hb = new KeyValue
if (item != null) {
hb.setValue(item.officialname)
hb.setKey(generateDSId(item.id))
if (item.openAccess)
i.setAccessright(getOpenAccessQualifier())
publication.setBestaccessright(getOpenAccessQualifier())
}
else {
hb.setValue("Unknown Repository")
hb.setKey(s"10|$OPENAIRE_PREFIX::55045bd2a65019fd8e6741a755395c8c")
}
i.setHostedby(hb)
})
val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid)
if (ar.nonEmpty) {
if(ar.contains("OPEN")){
publication.setBestaccessright(getOpenAccessQualifier())
}
else {
publication.setBestaccessright(getRestrictedQualifier())
}
}
publication
}
def generateDSId(input: String): String = {
val b = StringUtils.substringBefore(input, "::")
val a = StringUtils.substringAfter(input, "::")
s"10|${b}::${DHPUtils.md5(a)}"
}
def generateDataInfo(): DataInfo = {
generateDataInfo("0.9")
}
def filterPublication(publication: Publication): Boolean = {
//Case empty publication
if (publication == null)
return false
//Case publication with no title
if (publication.getTitle == null || publication.getTitle.size == 0)
return false
val s = publication.getTitle.asScala.count(p => p.getValue != null
&& p.getValue.nonEmpty && !p.getValue.equalsIgnoreCase("[NO TITLE AVAILABLE]"))
if (s == 0)
return false
// fixes #4360 (test publisher)
val publisher = if (publication.getPublisher != null) publication.getPublisher.getValue else null
if (publisher != null && (publisher.equalsIgnoreCase("Test accounts") || publisher.equalsIgnoreCase("CrossRef Test Account"))) {
return false;
}
//Publication with no Author
if (publication.getAuthor == null || publication.getAuthor.size() == 0)
return false
//filter invalid author
val authors = publication.getAuthor.asScala.map(s => {
if (s.getFullname.nonEmpty) {
s.getFullname
}
else
s"${
s.getName
} ${
s.getSurname
}"
})
val c = authors.count(isValidAuthorName)
if (c == 0)
return false
// fixes #4368
if (authors.count(s => s.equalsIgnoreCase("Addie Jackson")) > 0 && "Elsevier BV".equalsIgnoreCase(publication.getPublisher.getValue))
return false
true
}
def isValidAuthorName(fullName: String): Boolean = {
if (fullName == null || fullName.isEmpty)
return false
if (invalidName.contains(fullName.toLowerCase.trim))
return false
true
}
def generateDataInfo(trust: String): DataInfo = {
val di = new DataInfo
di.setDeletedbyinference(false)
di.setInferred(false)
di.setInvisible(false)
di.setTrust(trust)
di.setProvenanceaction(createQualifier("sysimport:actionset", "dnet:provenanceActions"))
di
}
def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
val sp = new StructuredProperty
sp.setQualifier(createQualifier(classId, schemeId))
sp.setValue(value)
sp
}
def createSP(value: String, classId: String, schemeId: String, dataInfo: DataInfo): StructuredProperty = {
val sp = new StructuredProperty
sp.setQualifier(createQualifier(classId, schemeId))
sp.setValue(value)
sp.setDataInfo(dataInfo)
sp
}
def createCrossrefCollectedFrom(): KeyValue = {
val cf = new KeyValue
cf.setValue(CROSSREF)
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + DHPUtils.md5(CROSSREF.toLowerCase))
cf
}
def createUnpayWallCollectedFrom(): KeyValue = {
val cf = new KeyValue
cf.setValue(UNPAYWALL)
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + DHPUtils.md5(UNPAYWALL.toLowerCase))
cf
}
def createORIDCollectedFrom(): KeyValue = {
val cf = new KeyValue
cf.setValue(ORCID)
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + DHPUtils.md5(ORCID.toLowerCase))
cf
}
def generateIdentifier (oaf: Result, doi: String): String = {
val id = DHPUtils.md5 (doi.toLowerCase)
if (oaf.isInstanceOf[Dataset] )
return s"60|${
doiBoostNSPREFIX
}${
SEPARATOR
}${
id
}"
s"50|${
doiBoostNSPREFIX
}${
SEPARATOR
}${
id
}"
}
def createMAGCollectedFrom(): KeyValue = {
val cf = new KeyValue
cf.setValue(MAG_NAME)
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + DHPUtils.md5(MAG))
cf
}
def createQualifier(clsName: String, clsValue: String, schName: String, schValue: String): Qualifier = {
val q = new Qualifier
q.setClassid(clsName)
q.setClassname(clsValue)
q.setSchemeid(schName)
q.setSchemename(schValue)
q
}
def createQualifier(cls: String, sch: String): Qualifier = {
createQualifier(cls, cls, sch, sch)
}
def asField[T](value: T): Field[T] = {
val tmp = new Field[T]
tmp.setValue(value)
tmp
}
}

View File

@ -0,0 +1,80 @@
package eu.dnetlib.doiboost
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.action.AtomicAction
import eu.dnetlib.dhp.schema.oaf.{Organization, Publication, Relation, Dataset => OafDataset}
import org.apache.commons.io.IOUtils
import org.apache.hadoop.io.Text
import org.apache.hadoop.io.compress.GzipCodec
import org.apache.hadoop.mapred.SequenceFileOutputFormat
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
import org.slf4j.{Logger, LoggerFactory}
object SparkGenerateDOIBoostActionSet {
val logger: Logger = LoggerFactory.getLogger(getClass)
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf()
val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/generate_doiboost_as_params.json")))
parser.parseArgument(args)
val spark: SparkSession =
SparkSession
.builder()
.config(conf)
.appName(getClass.getSimpleName)
.master(parser.get("master")).getOrCreate()
implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication]
implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization]
implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset]
implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation]
implicit val mapEncoderAS: Encoder[(String, String)] = Encoders.tuple(Encoders.STRING, Encoders.STRING)
implicit val mapEncoderAtomiAction: Encoder[AtomicAction[OafDataset]] = Encoders.kryo[AtomicAction[OafDataset]]
val dbPublicationPath = parser.get("dbPublicationPath")
val dbDatasetPath = parser.get("dbDatasetPath")
val crossRefRelation = parser.get("crossRefRelation")
val dbaffiliationRelationPath = parser.get("dbaffiliationRelationPath")
val dbOrganizationPath = parser.get("dbOrganizationPath")
val workingDirPath = parser.get("targetPath")
spark.read.load(dbDatasetPath).as[OafDataset]
.map(d =>DoiBoostMappingUtil.fixResult(d))
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
.write.mode(SaveMode.Overwrite).save(s"$workingDirPath/actionSet")
spark.read.load(dbPublicationPath).as[Publication]
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
.write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
spark.read.load(dbOrganizationPath).as[Organization]
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
.write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
spark.read.load(crossRefRelation).as[Relation]
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
.write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
spark.read.load(dbaffiliationRelationPath).as[Relation]
.map(d=>DoiBoostMappingUtil.toActionSet(d))(Encoders.tuple(Encoders.STRING, Encoders.STRING))
.write.mode(SaveMode.Append).save(s"$workingDirPath/actionSet")
val d: Dataset[(String, String)] =spark.read.load(s"$workingDirPath/actionSet").as[(String,String)]
d.rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$workingDirPath/rawset", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec])
}
}

View File

@ -0,0 +1,140 @@
package eu.dnetlib.doiboost
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Dataset => OafDataset, Organization}
import eu.dnetlib.doiboost.mag.ConversionUtil
import org.apache.commons.io.IOUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._
object SparkGenerateDoiBoost {
def main(args: Array[String]): Unit = {
val logger: Logger = LoggerFactory.getLogger(getClass)
val conf: SparkConf = new SparkConf()
val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json")))
parser.parseArgument(args)
val spark: SparkSession =
SparkSession
.builder()
.config(conf)
.appName(getClass.getSimpleName)
.master(parser.get("master")).getOrCreate()
import spark.implicits._
val hostedByMapPath = parser.get("hostedByMapPath")
val workingDirPath = parser.get("workingDirPath")
implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication]
implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization]
implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset]
implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPub)
implicit val mapEncoderRel: Encoder[Relation] = Encoders.kryo[Relation]
logger.info("Phase 2) Join Crossref with UnpayWall")
val crossrefPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/crossrefPublication").as[Publication].map(p => (p.getId, p))
val uwPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/uwPublication").as[Publication].map(p => (p.getId, p))
def applyMerge(item:((String, Publication), (String, Publication))) : Publication =
{
val crossrefPub = item._1._2
if (item._2!= null) {
val otherPub = item._2._2
if (otherPub != null) {
crossrefPub.mergeFrom(otherPub)
}
}
crossrefPub
}
crossrefPublication.joinWith(uwPublication, crossrefPublication("_1").equalTo(uwPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/firstJoin")
logger.info("Phase 3) Join Result with ORCID")
val fj: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
val orcidPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/orcidPublication").as[Publication].map(p => (p.getId, p))
fj.joinWith(orcidPublication, fj("_1").equalTo(orcidPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/secondJoin")
logger.info("Phase 3) Join Result with MAG")
val sj: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/secondJoin").as[Publication].map(p => (p.getId, p))
val magPublication: Dataset[(String, Publication)] = spark.read.load(s"$workingDirPath/magPublication").as[Publication].map(p => (p.getId, p))
sj.joinWith(magPublication, sj("_1").equalTo(magPublication("_1")), "left").map(applyMerge).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublication")
val doiBoostPublication: Dataset[(String,Publication)] = spark.read.load(s"$workingDirPath/doiBoostPublication").as[Publication].filter(p=>DoiBoostMappingUtil.filterPublication(p)).map(DoiBoostMappingUtil.toISSNPair)(tupleForJoinEncoder)
val hostedByDataset : Dataset[(String, HostedByItemType)] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath).map(DoiBoostMappingUtil.toHostedByItem))
doiBoostPublication.joinWith(hostedByDataset, doiBoostPublication("_1").equalTo(hostedByDataset("_1")), "left")
.map(DoiBoostMappingUtil.fixPublication)
.write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationFiltered")
val affiliationPath = parser.get("affiliationPath")
val paperAffiliationPath = parser.get("paperAffiliationPath")
val affiliation = spark.read.load(affiliationPath).select(col("AffiliationId"), col("GridId"), col("OfficialPage"), col("DisplayName"))
val paperAffiliation = spark.read.load(paperAffiliationPath).select(col("AffiliationId").alias("affId"), col("PaperId"))
val a:Dataset[DoiBoostAffiliation] = paperAffiliation
.joinWith(affiliation, paperAffiliation("affId").equalTo(affiliation("AffiliationId")))
.select(col("_1.PaperId"), col("_2.AffiliationId"), col("_2.GridId"), col("_2.OfficialPage"), col("_2.DisplayName")).as[DoiBoostAffiliation]
val magPubs:Dataset[(String,Publication)]= spark.read.load(s"$workingDirPath/doiBoostPublicationFiltered").as[Publication]
.map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p))(tupleForJoinEncoder).filter(s =>s._1!= null )
magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).flatMap(item => {
val pub:Publication = item._1._2
val affiliation = item._2
val affId:String = if (affiliation.GridId.isDefined) DoiBoostMappingUtil.generateGridAffiliationId(affiliation.GridId.get) else DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString)
val r:Relation = new Relation
r.setSource(pub.getId)
r.setTarget(affId)
r.setRelType("resultOrganization")
r.setRelClass("hasAuthorInstitution")
r.setSubRelType("affiliation")
r.setDataInfo(pub.getDataInfo)
r.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava)
val r1:Relation = new Relation
r1.setTarget(pub.getId)
r1.setSource(affId)
r1.setRelType("resultOrganization")
r1.setRelClass("isAuthorInstitutionOf")
r1.setSubRelType("affiliation")
r1.setDataInfo(pub.getDataInfo)
r1.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava)
List(r, r1)
})(mapEncoderRel).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation")
magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).map( item => {
val affiliation = item._2
if (affiliation.GridId.isEmpty) {
val o = new Organization
o.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava)
o.setDataInfo(DoiBoostMappingUtil.generateDataInfo())
o.setId(DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString))
o.setOriginalId(List(affiliation.AffiliationId.toString).asJava)
if (affiliation.DisplayName.nonEmpty)
o.setLegalname(DoiBoostMappingUtil.asField(affiliation.DisplayName.get))
if (affiliation.OfficialPage.isDefined)
o.setWebsiteurl(DoiBoostMappingUtil.asField(affiliation.OfficialPage.get))
o.setCountry(DoiBoostMappingUtil.getUnknownCountry())
o
}
else
null
}).filter(o=> o!=null).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostOrganization")
}
}

View File

@ -0,0 +1,440 @@
package eu.dnetlib.doiboost.crossref
import eu.dnetlib.dhp.schema.oaf._
import eu.dnetlib.dhp.utils.DHPUtils
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
import org.apache.commons.lang.StringUtils
import org.json4s
import org.json4s.DefaultFormats
import org.json4s.JsonAST._
import org.json4s.jackson.JsonMethods._
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.util.matching.Regex
case class mappingAffiliation(name: String) {}
case class mappingAuthor(given: Option[String], family: String, ORCID: Option[String], affiliation: Option[mappingAffiliation]) {}
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
case object Crossref2Oaf {
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
val mappingCrossrefType = Map(
"book-section" -> "publication",
"book" -> "publication",
"book-chapter" -> "publication",
"book-part" -> "publication",
"book-series" -> "publication",
"book-set" -> "publication",
"book-track" -> "publication",
"edited-book" -> "publication",
"reference-book" -> "publication",
"monograph" -> "publication",
"journal-article" -> "publication",
"dissertation" -> "publication",
"other" -> "publication",
"peer-review" -> "publication",
"proceedings" -> "publication",
"proceedings-article" -> "publication",
"reference-entry" -> "publication",
"report" -> "publication",
"report-series" -> "publication",
"standard" -> "publication",
"standard-series" -> "publication",
"posted-content" -> "publication",
"dataset" -> "dataset"
)
val mappingCrossrefSubType = Map(
"book-section" -> "0013 Part of book or chapter of book",
"book" -> "0002 Book",
"book-chapter" -> "0013 Part of book or chapter of book",
"book-part" -> "0013 Part of book or chapter of book",
"book-series" -> "0002 Book",
"book-set" -> "0002 Book",
"book-track" -> "0002 Book",
"edited-book" -> "0002 Book",
"reference-book" -> "0002 Book",
"monograph" -> "0002 Book",
"journal-article" -> "0001 Article",
"dissertation" -> "0006 Doctoral thesis",
"other" -> "0038 Other literature type",
"peer-review" -> "0015 Review",
"proceedings" -> "0004 Conference object",
"proceedings-article" -> "0004 Conference object",
"reference-entry" -> "0013 Part of book or chapter of book",
"report" -> "0017 Report",
"report-series" -> "0017 Report",
"standard" -> "0038 Other literature type",
"standard-series" -> "0038 Other literature type",
"dataset" -> "0021 Dataset",
"preprint" -> "0016 Preprint",
"report" -> "0017 Report"
)
def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
//MAPPING Crossref DOI into PID
val doi: String = (json \ "DOI").extract[String]
result.setPid(List(createSP(doi, "doi", PID_TYPES)).asJava)
//MAPPING Crossref DOI into OriginalId
//and Other Original Identifier of dataset like clinical-trial-number
val clinicalTrialNumbers = for (JString(ctr) <- json \ "clinical-trial-number") yield ctr
val alternativeIds = for (JString(ids) <- json \ "alternative-id") yield ids
val tmp = clinicalTrialNumbers ::: alternativeIds ::: List(doi)
result.setOriginalId(tmp.filter(id => id != null).asJava)
//Set identifier as {50|60} | doiboost____::md5(DOI)
result.setId(generateIdentifier(result, doi))
// Add DataInfo
result.setDataInfo(generateDataInfo())
result.setLastupdatetimestamp((json \ "indexed" \ "timestamp").extract[Long])
result.setDateofcollection((json \ "indexed" \ "date-time").extract[String])
result.setCollectedfrom(List(createCrossrefCollectedFrom()).asJava)
// Publisher ( Name of work's publisher mapped into Result/Publisher)
val publisher = (json \ "publisher").extractOrElse[String](null)
if (publisher!= null && publisher.nonEmpty)
result.setPublisher(asField(publisher))
// TITLE
val mainTitles = for {JString(title) <- json \ "title" if title.nonEmpty} yield createSP(title, "main title", "dnet:dataCite_title")
val originalTitles = for {JString(title) <- json \ "original-title" if title.nonEmpty} yield createSP(title, "alternative title", "dnet:dataCite_title")
val shortTitles = for {JString(title) <- json \ "short-title" if title.nonEmpty} yield createSP(title, "alternative title", "dnet:dataCite_title")
val subtitles = for {JString(title) <- json \ "subtitle" if title.nonEmpty} yield createSP(title, "subtitle", "dnet:dataCite_title")
result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava)
// DESCRIPTION
val descriptionList = for {JString(description) <- json \ "abstract"} yield asField(description)
result.setDescription(descriptionList.asJava)
// Source
val sourceList = for {JString(source) <- json \ "source" if source!= null && source.nonEmpty} yield asField(source)
result.setSource(sourceList.asJava)
//RELEVANT DATE Mapping
val createdDate = generateDate((json \ "created" \ "date-time").extract[String], (json \ "created" \ "date-parts").extract[List[List[Int]]], "created", "dnet:dataCite_date")
val postedDate = generateDate((json \ "posted" \ "date-time").extractOrElse[String](null), (json \ "posted" \ "date-parts").extract[List[List[Int]]], "available", "dnet:dataCite_date")
val acceptedDate = generateDate((json \ "accepted" \ "date-time").extractOrElse[String](null), (json \ "accepted" \ "date-parts").extract[List[List[Int]]], "accepted", "dnet:dataCite_date")
val publishedPrintDate = generateDate((json \ "published-print" \ "date-time").extractOrElse[String](null), (json \ "published-print" \ "date-parts").extract[List[List[Int]]], "published-print", "dnet:dataCite_date")
val publishedOnlineDate = generateDate((json \ "published-online" \ "date-time").extractOrElse[String](null), (json \ "published-online" \ "date-parts").extract[List[List[Int]]], "published-online", "dnet:dataCite_date")
val issuedDate = extractDate((json \ "issued" \ "date-time").extractOrElse[String](null), (json \ "issued" \ "date-parts").extract[List[List[Int]]])
if (StringUtils.isNotBlank(issuedDate)) {
result.setDateofacceptance(asField(issuedDate))
}
else {
result.setDateofacceptance(asField(createdDate.getValue))
}
result.setRelevantdate(List(createdDate, postedDate, acceptedDate, publishedOnlineDate, publishedPrintDate).filter(p => p != null).asJava)
//Mapping Subject
val subjectList:List[String] = (json \ "subject").extractOrElse[List[String]](List())
if (subjectList.nonEmpty) {
result.setSubject(subjectList.map(s=> createSP(s, "keywords", "dnet:subject_classification_typologies")).asJava)
}
//Mapping Author
val authorList: List[mappingAuthor] = (json \ "author").extractOrElse[List[mappingAuthor]](List())
result.setAuthor(authorList.map(a => generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull)).asJava)
// Mapping instance
val instance = new Instance()
val license = for {
JString(lic) <- json \ "license" \ "URL"
} yield asField(lic)
val l = license.filter(d => StringUtils.isNotBlank(d.getValue))
if (l.nonEmpty)
instance.setLicense(l.head)
val has_review = (json \ "relation" \"has-review" \ "id")
if(has_review != JNothing)
instance.setRefereed(asField("peerReviewed"))
instance.setAccessright(getRestrictedQualifier())
result.setInstance(List(instance).asJava)
instance.setInstancetype(createQualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), "dnet:publication_resource", "dnet:publication_resource"))
result.setResourcetype(createQualifier(cobjCategory.substring(0, 4),"dnet:dataCite_resource"))
instance.setCollectedfrom(createCrossrefCollectedFrom())
if (StringUtils.isNotBlank(issuedDate)) {
instance.setDateofacceptance(asField(issuedDate))
}
val s: String = (json \ "URL").extract[String]
val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null).distinct
if (links.nonEmpty)
instance.setUrl(links.asJava)
result
}
def generateAuhtor(given: String, family: String, orcid: String): Author = {
val a = new Author
a.setName(given)
a.setSurname(family)
a.setFullname(s"$given $family")
if (StringUtils.isNotBlank(orcid))
a.setPid(List(createSP(orcid, ORCID, PID_TYPES)).asJava)
a
}
def convert(input: String): List[Oaf] = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = parse(input)
var resultList: List[Oaf] = List()
val objectType = (json \ "type").extractOrElse[String](null)
val objectSubType = (json \ "subtype").extractOrElse[String](null)
if (objectType == null)
return resultList
val result = generateItemFromType(objectType, objectSubType)
if (result == null)
return List()
val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType, mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type"));
mappingResult(result, json, cOBJCategory)
val funderList: List[mappingFunder] = (json \ "funder").extractOrElse[List[mappingFunder]](List())
if (funderList.nonEmpty) {
resultList = resultList ::: mappingFunderToRelations(funderList, result.getId, createCrossrefCollectedFrom(), result.getDataInfo, result.getLastupdatetimestamp)
}
result match {
case publication: Publication => convertPublication(publication, json, cOBJCategory)
case dataset: Dataset => convertDataset(dataset)
}
resultList = resultList ::: List(result)
resultList
}
def mappingFunderToRelations(funders: List[mappingFunder], sourceId: String, cf: KeyValue, di: DataInfo, ts: Long): List[Relation] = {
val queue = new mutable.Queue[Relation]
def snfRule(award:String): String = {
var tmp1 = StringUtils.substringAfter(award,"_")
val tmp2 = StringUtils.substringBefore(tmp1,"/")
logger.debug(s"From $award to $tmp2")
tmp2
}
def extractECAward(award: String): String = {
val awardECRegex: Regex = "[0-9]{4,9}".r
if (awardECRegex.findAllIn(award).hasNext)
return awardECRegex.findAllIn(award).max
null
}
def generateRelation(sourceId:String, targetId:String, nsPrefix:String) :Relation = {
val r = new Relation
r.setSource(sourceId)
r.setTarget(s"$nsPrefix::$targetId")
r.setRelType("resultProject")
r.setRelClass("isProducedBy")
r.setSubRelType("outcome")
r.setCollectedfrom(List(cf).asJava)
r.setDataInfo(di)
r.setLastupdatetimestamp(ts)
r
}
def generateSimpleRelationFromAward(funder: mappingFunder, nsPrefix: String, extractField: String => String): Unit = {
if (funder.award.isDefined && funder.award.get.nonEmpty)
funder.award.get.map(extractField).filter(a => a!= null && a.nonEmpty).foreach(
award => {
val targetId = DHPUtils.md5(award)
queue += generateRelation(sourceId, targetId, nsPrefix)
}
)
}
if (funders != null)
funders.foreach(funder => {
if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) {
funder.DOI.get match {
case "10.13039/100010663" |
"10.13039/100010661" |
"10.13039/501100007601" |
"10.13039/501100000780" |
"10.13039/100010665" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
case "10.13039/100011199" |
"10.13039/100004431" |
"10.13039/501100004963" |
"10.13039/501100000780" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
case "10.13039/501100000781" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
case "10.13039/100000001" => generateSimpleRelationFromAward(funder, "nsf_________", a => a)
case "10.13039/501100001665" => generateSimpleRelationFromAward(funder, "anr_________", a => a)
case "10.13039/501100002341" => generateSimpleRelationFromAward(funder, "aka_________", a => a)
case "10.13039/501100001602" => generateSimpleRelationFromAward(funder, "aka_________", a => a.replace("SFI", ""))
case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a)
case "10.13039/501100000038"=> queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "nserc_______" )
case "10.13039/501100000155"=> queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "sshrc_______" )
case "10.13039/501100000024"=> queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "cihr________" )
case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a)
case "10.13039/501100003448" => generateSimpleRelationFromAward(funder, "gsrt________", extractECAward)
case "10.13039/501100010198" => generateSimpleRelationFromAward(funder, "sgov________", a=>a)
case "10.13039/501100004564" => generateSimpleRelationFromAward(funder, "mestd_______", extractECAward)
case "10.13039/501100003407" => generateSimpleRelationFromAward(funder, "miur________", a=>a)
queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "miur________" )
case "10.13039/501100006588" |
"10.13039/501100004488" => generateSimpleRelationFromAward(funder, "irb_hr______", a=>a.replaceAll("Project No.", "").replaceAll("HRZZ-","") )
case "10.13039/501100006769"=> generateSimpleRelationFromAward(funder, "rsf_________", a=>a)
case "10.13039/501100001711"=> generateSimpleRelationFromAward(funder, "snsf________", snfRule)
case "10.13039/501100004410"=> generateSimpleRelationFromAward(funder, "tubitakf____", a =>a)
case "10.10.13039/100004440"=> generateSimpleRelationFromAward(funder, "wt__________", a =>a)
case "10.13039/100004440"=> queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "wt__________" )
case _ => logger.debug("no match for "+funder.DOI.get )
}
} else {
funder.name match {
case "European Unions Horizon 2020 research and innovation program" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
case "European Union's" =>
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
case "The French National Research Agency (ANR)" |
"The French National Research Agency" => generateSimpleRelationFromAward(funder, "anr_________", a => a)
case "CONICYT, Programa de Formación de Capital Humano Avanzado" => generateSimpleRelationFromAward(funder, "conicytf____", extractECAward)
case "Wellcome Trust Masters Fellowship" => queue += generateRelation(sourceId,"1e5e62235d094afd01cd56e65112fc63", "wt__________" )
case _ => logger.debug("no match for "+funder.name )
}
}
}
)
queue.toList
}
def convertDataset(dataset: Dataset): Unit = {
// TODO check if there are other info to map into the Dataset
}
def convertPublication(publication: Publication, json: JValue, cobjCategory: String): Unit = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
val containerTitles = for {JString(ct) <- json \ "container-title"} yield ct
//Mapping book
if (cobjCategory.toLowerCase.contains("book")) {
val ISBN = for {JString(isbn) <- json \ "ISBN"} yield isbn
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
if (publication.getSource != null) {
val l: List[Field[String]] = publication.getSource.asScala.toList
val ll: List[Field[String]] = l ::: List(asField(source))
publication.setSource(ll.asJava)
}
else
publication.setSource(List(asField(source)).asJava)
}
} else {
// Mapping Journal
val issnInfos = for {JArray(issn_types) <- json \ "issn-type"
JObject(issn_type) <- issn_types
JField("type", JString(tp)) <- issn_type
JField("value", JString(vl)) <- issn_type
} yield Tuple2(tp, vl)
val volume = (json \ "volume").extractOrElse[String](null)
if (containerTitles.nonEmpty) {
val journal = new Journal
journal.setName(containerTitles.head)
if (issnInfos.nonEmpty) {
issnInfos.foreach(tp => {
tp._1 match {
case "electronic" => journal.setIssnOnline(tp._2)
case "print" => journal.setIssnPrinted(tp._2)
}
})
}
journal.setVol(volume)
val page = (json \ "page").extractOrElse[String](null)
if (page != null) {
val pp = page.split("-")
if (pp.nonEmpty)
journal.setSp(pp.head)
if (pp.size > 1)
journal.setEp(pp(1))
}
publication.setJournal(journal)
}
}
}
def extractDate(dt: String, datePart: List[List[Int]]): String = {
if (StringUtils.isNotBlank(dt))
return dt
if (datePart != null && datePart.size == 1) {
val res = datePart.head
if (res.size == 3) {
val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
if (dp.length == 10) {
return dp
}
}
}
null
}
def generateDate(dt: String, datePart: List[List[Int]], classId: String, schemeId: String): StructuredProperty = {
val dp = extractDate(dt, datePart)
if (StringUtils.isNotBlank(dp))
return createSP(dp, classId, schemeId)
null
}
def generateItemFromType(objectType: String, objectSubType: String): Result = {
if (mappingCrossrefType.contains(objectType)) {
if (mappingCrossrefType(objectType).equalsIgnoreCase("publication"))
return new Publication()
if (mappingCrossrefType(objectType).equalsIgnoreCase("dataset"))
return new Dataset()
}
null
}
}

View File

@ -0,0 +1,103 @@
package eu.dnetlib.doiboost.crossref;
import java.io.ByteArrayOutputStream;
import java.util.zip.Inflater;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class CrossrefImporter {
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
CrossrefImporter.class
.getResourceAsStream(
"/eu/dnetlib/dhp/doiboost/import_from_es.json")));
parser.parseArgument(args);
final String hdfsuri = parser.get("namenode");
System.out.println("HDFS URI" + hdfsuri);
Path hdfswritepath = new Path(parser.get("targetPath"));
System.out.println("TargetPath: " + hdfsuri);
final Long timestamp = StringUtils.isNotBlank(parser.get("timestamp"))
? Long.parseLong(parser.get("timestamp"))
: -1;
if (timestamp > 0)
System.out.println("Timestamp added " + timestamp);
// ====== Init HDFS File System Object
Configuration conf = new Configuration();
// Set FileSystem URI
conf.set("fs.defaultFS", hdfsuri);
// Because of Maven
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
ESClient client = timestamp > 0
? new ESClient("ip-90-147-167-25.ct1.garrservices.it", "crossref", timestamp)
: new ESClient("ip-90-147-167-25.ct1.garrservices.it", "crossref");
try (SequenceFile.Writer writer = SequenceFile
.createWriter(
conf,
SequenceFile.Writer.file(hdfswritepath),
SequenceFile.Writer.keyClass(IntWritable.class),
SequenceFile.Writer.valueClass(Text.class))) {
int i = 0;
long start = System.currentTimeMillis();
long end = 0;
final IntWritable key = new IntWritable(i);
final Text value = new Text();
while (client.hasNext()) {
key.set(i++);
value.set(client.next());
writer.append(key, value);
if (i % 100000 == 0) {
end = System.currentTimeMillis();
final float time = (end - start) / 1000.0F;
System.out
.println(
String.format("Imported %d records last 100000 imported in %f seconds", i, time));
start = System.currentTimeMillis();
}
}
}
}
public static String decompressBlob(final String blob) {
try {
byte[] byteArray = Base64.decodeBase64(blob.getBytes());
final Inflater decompresser = new Inflater();
decompresser.setInput(byteArray);
final ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length);
byte[] buffer = new byte[8192];
while (!decompresser.finished()) {
int size = decompresser.inflate(buffer);
bos.write(buffer, 0, size);
}
byte[] unzippeddata = bos.toByteArray();
decompresser.end();
return new String(unzippeddata);
} catch (Throwable e) {
throw new RuntimeException("Wrong record:" + blob, e);
}
}
}

View File

@ -0,0 +1,114 @@
package eu.dnetlib.doiboost.crossref;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.jayway.jsonpath.JsonPath;
public class ESClient implements Iterator<String> {
private static final Logger logger = LoggerFactory.getLogger(ESClient.class);
static final String blobPath = "$.hits[*].hits[*]._source.blob";
static final String scrollIdPath = "$._scroll_id";
static final String JSON_NO_TS = "{\"size\":1000}";
static final String JSON_WITH_TS = "{\"size\":1000, \"query\":{\"range\":{\"timestamp\":{\"gte\":%d}}}}";
static final String JSON_SCROLL = "{\"scroll_id\":\"%s\",\"scroll\" : \"1m\"}";
private final String scrollId;
private List<String> buffer;
private final String esHost;
public ESClient(final String esHost, final String esIndex) throws IOException {
this.esHost = esHost;
final String body = getResponse(
String.format("http://%s:9200/%s/_search?scroll=1m", esHost, esIndex), JSON_NO_TS);
scrollId = getJPathString(scrollIdPath, body);
buffer = getBlobs(body);
}
public ESClient(final String esHost, final String esIndex, final long timestamp)
throws IOException {
this.esHost = esHost;
final String body = getResponse(
String.format("http://%s:9200/%s/_search?scroll=1m", esHost, esIndex),
String.format(JSON_WITH_TS, timestamp));
scrollId = getJPathString(scrollIdPath, body);
buffer = getBlobs(body);
}
private String getResponse(final String url, final String json) {
CloseableHttpClient client = HttpClients.createDefault();
try {
HttpPost httpPost = new HttpPost(url);
if (json != null) {
StringEntity entity = new StringEntity(json);
httpPost.setEntity(entity);
httpPost.setHeader("Accept", "application/json");
httpPost.setHeader("Content-type", "application/json");
}
CloseableHttpResponse response = client.execute(httpPost);
return IOUtils.toString(response.getEntity().getContent());
} catch (Throwable e) {
throw new RuntimeException("Error on executing request ", e);
} finally {
try {
client.close();
} catch (IOException e) {
throw new RuntimeException("Unable to close client ", e);
}
}
}
private String getJPathString(final String jsonPath, final String json) {
try {
Object o = JsonPath.read(json, jsonPath);
if (o instanceof String)
return (String) o;
return null;
} catch (Exception e) {
return "";
}
}
private List<String> getBlobs(final String body) {
final List<String> res = JsonPath.read(body, "$.hits.hits[*]._source.blob");
return res;
}
@Override
public boolean hasNext() {
return (buffer != null && !buffer.isEmpty());
}
@Override
public String next() {
final String nextItem = buffer.remove(0);
if (buffer.isEmpty()) {
final String json_param = String.format(JSON_SCROLL, scrollId);
final String body = getResponse(String.format("http://%s:9200/_search/scroll", esHost), json_param);
try {
buffer = getBlobs(body);
} catch (Throwable e) {
logger.error("Error on get next page: body:" + body);
}
}
return nextItem;
}
}

View File

@ -0,0 +1,98 @@
package eu.dnetlib.doiboost.crossref
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf
import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset}
import org.apache.commons.io.IOUtils
import org.apache.hadoop.io.{IntWritable, Text}
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
import org.slf4j.{Logger, LoggerFactory}
case class Reference(author: String, firstPage: String) {}
object SparkMapDumpIntoOAF {
def main(args: Array[String]): Unit = {
val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass)
val conf: SparkConf = new SparkConf()
val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json")))
parser.parseArgument(args)
val spark: SparkSession =
SparkSession
.builder()
.config(conf)
.appName(SparkMapDumpIntoOAF.getClass.getSimpleName)
.master(parser.get("master")).getOrCreate()
implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication]
implicit val mapEncoderRelatons: Encoder[Relation] = Encoders.kryo[Relation]
implicit val mapEncoderDatasets: Encoder[oaf.Dataset] = Encoders.kryo[OafDataset]
val sc = spark.sparkContext
val targetPath = parser.get("targetPath")
sc.sequenceFile(parser.get("sourcePath"), classOf[IntWritable], classOf[Text])
.map(k => k._2.toString).map(CrossrefImporter.decompressBlob)
.flatMap(k => Crossref2Oaf.convert(k)).saveAsObjectFile(s"${targetPath}/mixObject")
val inputRDD = sc.objectFile[Oaf](s"${targetPath}/mixObject").filter(p=> p!= null)
val distinctPubs:RDD[Publication] = inputRDD.filter(k => k != null && k.isInstanceOf[Publication])
.map(k => k.asInstanceOf[Publication]).map { p: Publication => Tuple2(p.getId, p) }.reduceByKey { case (p1: Publication, p2: Publication) =>
var r = if (p1 == null) p2 else p1
if (p1 != null && p2 != null) {
if (p1.getLastupdatetimestamp != null && p2.getLastupdatetimestamp != null) {
if (p1.getLastupdatetimestamp < p2.getLastupdatetimestamp)
r = p2
else
r = p1
} else {
r = if (p1.getLastupdatetimestamp == null) p2 else p1
}
}
r
}.map(_._2)
val pubs:Dataset[Publication] = spark.createDataset(distinctPubs)
pubs.write.mode(SaveMode.Overwrite).save(s"${targetPath}/publication")
val distincDatasets:RDD[OafDataset] = inputRDD.filter(k => k != null && k.isInstanceOf[OafDataset])
.map(k => k.asInstanceOf[OafDataset]).map(p => Tuple2(p.getId, p)).reduceByKey { case (p1: OafDataset, p2: OafDataset) =>
var r = if (p1 == null) p2 else p1
if (p1 != null && p2 != null) {
if (p1.getLastupdatetimestamp != null && p2.getLastupdatetimestamp != null) {
if (p1.getLastupdatetimestamp < p2.getLastupdatetimestamp)
r = p2
else
r = p1
} else {
r = if (p1.getLastupdatetimestamp == null) p2 else p1
}
}
r
}.map(_._2)
spark.createDataset(distincDatasets).write.mode(SaveMode.Overwrite).save(s"${targetPath}/dataset")
val distinctRels =inputRDD.filter(k => k != null && k.isInstanceOf[Relation])
.map(k => k.asInstanceOf[Relation]).map(r=> (s"${r.getSource}::${r.getTarget}",r))
.reduceByKey { case (p1: Relation, p2: Relation) =>
if (p1 == null) p2 else p1
}.map(_._2)
val rels: Dataset[Relation] = spark.createDataset(distinctRels)
rels.write.mode(SaveMode.Overwrite).save(s"${targetPath}/relations")
}
}

View File

@ -0,0 +1,314 @@
package eu.dnetlib.doiboost.mag
import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty}
import eu.dnetlib.doiboost.DoiBoostMappingUtil
import org.json4s
import org.json4s.DefaultFormats
import org.json4s.jackson.JsonMethods.parse
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.util.matching.Regex
case class MagPapers(PaperId: Long, Rank: Integer, Doi: String,
DocType: String, PaperTitle: String, OriginalTitle: String,
BookTitle: String, Year: Option[Integer], Date: Option[java.sql.Timestamp], Publisher: String,
JournalId: Option[Long], ConferenceSeriesId: Option[Long], ConferenceInstanceId: Option[Long],
Volume: String, Issue: String, FirstPage: String, LastPage: String,
ReferenceCount: Option[Long], CitationCount: Option[Long], EstimatedCitation: Option[Long],
OriginalVenue: String, FamilyId: Option[Long], CreatedDate: java.sql.Timestamp) {}
case class MagPaperAbstract(PaperId: Long, IndexedAbstract: String) {}
case class MagAuthor(AuthorId: Long, Rank: Option[Int], NormalizedName: Option[String], DisplayName: Option[String], LastKnownAffiliationId: Option[Long], PaperCount: Option[Long], CitationCount: Option[Long], CreatedDate: Option[java.sql.Timestamp]) {}
case class MagAffiliation(AffiliationId: Long, Rank: Int, NormalizedName: String, DisplayName: String, GridId: String, OfficialPage: String, WikiPage: String, PaperCount: Long, CitationCount: Long, Latitude: Option[Float], Longitude: Option[Float], CreatedDate: java.sql.Timestamp) {}
case class MagPaperAuthorAffiliation(PaperId: Long, AuthorId: Long, AffiliationId: Option[Long], AuthorSequenceNumber: Int, OriginalAuthor: String, OriginalAffiliation: String) {}
case class MagAuthorAffiliation(author: MagAuthor, affiliation:String)
case class MagPaperWithAuthorList(PaperId: Long, authors: List[MagAuthorAffiliation]) {}
case class MagPaperAuthorDenormalized(PaperId: Long, author: MagAuthor, affiliation:String) {}
case class MagPaperUrl(PaperId: Long, SourceType: Option[Int], SourceUrl: Option[String], LanguageCode: Option[String]) {}
case class MagUrlInstance(SourceUrl:String){}
case class MagUrl(PaperId: Long, instances: List[MagUrlInstance])
case class MagSubject(FieldOfStudyId:Long, DisplayName:String, MainType:Option[String], Score:Float){}
case class MagFieldOfStudy(PaperId:Long, subjects:List[MagSubject]) {}
case class MagJournal(JournalId: Long, Rank: Option[Int], NormalizedName: Option[String], DisplayName: Option[String], Issn: Option[String], Publisher: Option[String], Webpage: Option[String], PaperCount: Option[Long], CitationCount: Option[Long], CreatedDate: Option[java.sql.Timestamp]) {}
case class MagConferenceInstance(ci:Long, DisplayName:Option[String], Location:Option[String], StartDate:Option[java.sql.Timestamp], EndDate:Option[java.sql.Timestamp], PaperId:Long){}
case object ConversionUtil {
def extractMagIdentifier(pids:mutable.Buffer[String]) :String ={
val magIDRegex: Regex = "^[0-9]+$".r
val s =pids.filter(p=> magIDRegex.findAllIn(p).hasNext)
if (s.nonEmpty)
return s.head
null
}
def mergePublication(a: Publication, b:Publication) : Publication = {
if ((a != null) && (b != null)) {
a.mergeFrom(b)
a
} else {
if (a == null) b else a
}
}
def choiceLatestMagArtitcle(p1: MagPapers, p2:MagPapers) :MagPapers = {
var r = if (p1 == null) p2 else p1
if (p1 != null && p2 != null) {
if (p1.CreatedDate != null && p2.CreatedDate != null) {
if (p1.CreatedDate.before(p2.CreatedDate))
r = p2
else
r = p1
} else {
r = if (p1.CreatedDate == null) p2 else p1
}
}
r
}
def updatePubsWithDescription(inputItem:((String, Publication), MagPaperAbstract)) : Publication = {
val pub = inputItem._1._2
val abst = inputItem._2
if (abst != null) {
pub.setDescription(List(asField(abst.IndexedAbstract)).asJava)
}
pub
}
def updatePubsWithConferenceInfo(inputItem:((String, Publication), MagConferenceInstance)) : Publication = {
val publication:Publication= inputItem._1._2
val ci:MagConferenceInstance = inputItem._2
if (ci!= null){
val j:Journal = new Journal
if (ci.Location.isDefined)
j.setConferenceplace(ci.Location.get)
j.setName(ci.DisplayName.get)
if (ci.StartDate.isDefined && ci.EndDate.isDefined)
{
j.setConferencedate(s"${ci.StartDate.get.toString.substring(0,10)} - ${ci.EndDate.get.toString.substring(0,10)}")
}
publication.setJournal(j)
}
publication
}
def updatePubsWithSubject(item:((String, Publication), MagFieldOfStudy)) : Publication = {
val publication = item._1._2
val fieldOfStudy = item._2
if (fieldOfStudy != null && fieldOfStudy.subjects != null && fieldOfStudy.subjects.nonEmpty) {
val p: List[StructuredProperty] = fieldOfStudy.subjects.flatMap(s => {
val s1 = createSP(s.DisplayName, "keywords", "dnet:subject_classification_typologies")
val di = DoiBoostMappingUtil.generateDataInfo(s.Score.toString)
var resList: List[StructuredProperty] = List(s1)
if (s.MainType.isDefined) {
val maintp = s.MainType.get
val s2 = createSP(s.MainType.get, "keywords", "dnet:subject_classification_typologies")
s2.setDataInfo(di)
resList = resList ::: List(s2)
if (maintp.contains(".")) {
val s3 = createSP(maintp.split("\\.").head, "keywords", "dnet:subject_classification_typologies")
s3.setDataInfo(di)
resList = resList ::: List(s3)
}
}
resList
})
publication.setSubject(p.asJava)
}
publication
}
def addInstances(a: (Publication, MagUrl)): Publication = {
val pub = a._1
val urls = a._2
val i = new Instance
if (urls!= null) {
val l:List[String] = urls.instances.filter(k=>k.SourceUrl.nonEmpty).map(k=>k.SourceUrl):::List(s"https://academic.microsoft.com/#/detail/${extractMagIdentifier(pub.getOriginalId.asScala)}")
i.setUrl(l.asJava)
}
else
i.setUrl(List(s"https://academic.microsoft.com/#/detail/${extractMagIdentifier(pub.getOriginalId.asScala)}").asJava)
i.setCollectedfrom(createMAGCollectedFrom())
pub.setInstance(List(i).asJava)
pub
}
def transformPaperAbstract(input: MagPaperAbstract): MagPaperAbstract = {
MagPaperAbstract(input.PaperId, convertInvertedIndexString(input.IndexedAbstract))
}
def createOAFFromJournalAuthorPaper(inputParams: ((MagPapers, MagJournal), MagPaperWithAuthorList)): Publication = {
val paper = inputParams._1._1
val journal = inputParams._1._2
val authors = inputParams._2
val pub = new Publication
pub.setPid(List(createSP(paper.Doi.toLowerCase, "doi", PID_TYPES)).asJava)
pub.setOriginalId(List(paper.PaperId.toString, paper.Doi.toLowerCase).asJava)
//Set identifier as {50|60} | doiboost____::md5(DOI)
pub.setId(generateIdentifier(pub, paper.Doi.toLowerCase))
val mainTitles = createSP(paper.PaperTitle, "main title", "dnet:dataCite_title")
val originalTitles = createSP(paper.OriginalTitle, "alternative title", "dnet:dataCite_title")
pub.setTitle(List(mainTitles, originalTitles).asJava)
pub.setSource(List(asField(paper.BookTitle)).asJava)
val authorsOAF = authors.authors.map { f: MagAuthorAffiliation =>
val a: eu.dnetlib.dhp.schema.oaf.Author = new eu.dnetlib.dhp.schema.oaf.Author
a.setFullname(f.author.DisplayName.get)
if(f.affiliation!= null)
a.setAffiliation(List(asField(f.affiliation)).asJava)
a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", PID_TYPES)).asJava)
a
}
pub.setAuthor(authorsOAF.asJava)
if (paper.Date != null && paper.Date.isDefined) {
pub.setDateofacceptance(asField(paper.Date.get.toString.substring(0,10)))
}
pub.setPublisher(asField(paper.Publisher))
if (journal != null && journal.DisplayName.isDefined) {
val j = new Journal
j.setName(journal.DisplayName.get)
j.setSp(paper.FirstPage)
j.setEp(paper.LastPage)
if (journal.Publisher.isDefined)
pub.setPublisher(asField(journal.Publisher.get))
if (journal.Issn.isDefined)
j.setIssnPrinted(journal.Issn.get)
pub.setJournal(j)
}
pub.setCollectedfrom(List(createMAGCollectedFrom()).asJava)
pub.setDataInfo(generateDataInfo())
pub
}
def createOAF(inputParams: ((MagPapers, MagPaperWithAuthorList), MagPaperAbstract)): Publication = {
val paper = inputParams._1._1
val authors = inputParams._1._2
val description = inputParams._2
val pub = new Publication
pub.setPid(List(createSP(paper.Doi.toLowerCase, "doi", PID_TYPES)).asJava)
pub.setOriginalId(List(paper.PaperId.toString, paper.Doi.toLowerCase).asJava)
//Set identifier as {50|60} | doiboost____::md5(DOI)
pub.setId(generateIdentifier(pub, paper.Doi.toLowerCase))
val mainTitles = createSP(paper.PaperTitle, "main title", "dnet:dataCite_title")
val originalTitles = createSP(paper.OriginalTitle, "alternative title", "dnet:dataCite_title")
pub.setTitle(List(mainTitles, originalTitles).asJava)
pub.setSource(List(asField(paper.BookTitle)).asJava)
if (description != null) {
pub.setDescription(List(asField(description.IndexedAbstract)).asJava)
}
val authorsOAF = authors.authors.map { f: MagAuthorAffiliation =>
val a: eu.dnetlib.dhp.schema.oaf.Author = new eu.dnetlib.dhp.schema.oaf.Author
a.setFullname(f.author.DisplayName.get)
if(f.affiliation!= null)
a.setAffiliation(List(asField(f.affiliation)).asJava)
a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", PID_TYPES)).asJava)
a
}
if (paper.Date != null) {
pub.setDateofacceptance(asField(paper.Date.toString.substring(0,10)))
}
pub.setAuthor(authorsOAF.asJava)
pub
}
def convertInvertedIndexString(json_input: String): String = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = parse(json_input)
val idl = (json \ "IndexLength").extract[Int]
if (idl > 0) {
val res = Array.ofDim[String](idl)
val iid = (json \ "InvertedIndex").extract[Map[String, List[Int]]]
for {(k: String, v: List[Int]) <- iid} {
v.foreach(item => res(item) = k)
}
(0 until idl).foreach(i => {
if (res(i) == null)
res(i) = ""
})
return res.mkString(" ")
}
""
}
}

View File

@ -0,0 +1,92 @@
package eu.dnetlib.doiboost.mag
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import org.apache.commons.io.IOUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.{SaveMode, SparkSession}
import org.apache.spark.sql.types._
import org.slf4j.{Logger, LoggerFactory}
import org.apache.spark.sql.functions._
object SparkImportMagIntoDataset {
val datatypedict = Map(
"int" -> IntegerType,
"uint" -> IntegerType,
"long" -> LongType,
"ulong" -> LongType,
"float" -> FloatType,
"string" -> StringType,
"DateTime" -> DateType
)
val stream = Map(
"Affiliations" -> Tuple2("mag/Affiliations.txt", Seq("AffiliationId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "GridId:string", "OfficialPage:string", "WikiPage:string", "PaperCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")),
"Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
"ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")),
"ConferenceSeries" -> Tuple2("mag/ConferenceSeries.txt", Seq("ConferenceSeriesId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
"EntityRelatedEntities" -> Tuple2("advanced/EntityRelatedEntities.txt", Seq("EntityId:long", "EntityType:string", "RelatedEntityId:long", "RelatedEntityType:string", "RelatedType:int", "Score:float")),
"FieldOfStudyChildren" -> Tuple2("advanced/FieldOfStudyChildren.txt", Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long")),
"FieldOfStudyExtendedAttributes" -> Tuple2("advanced/FieldOfStudyExtendedAttributes.txt", Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string")),
"FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
"Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")),
"PaperAbstractsInvertedIndex" -> Tuple2("nlp/PaperAbstractsInvertedIndex.txt.*", Seq("PaperId:long", "IndexedAbstract:string")),
"PaperAuthorAffiliations" -> Tuple2("mag/PaperAuthorAffiliations.txt", Seq("PaperId:long", "AuthorId:long", "AffiliationId:long?", "AuthorSequenceNumber:uint", "OriginalAuthor:string", "OriginalAffiliation:string")),
"PaperCitationContexts" -> Tuple2("nlp/PaperCitationContexts.txt", Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string")),
"PaperExtendedAttributes" -> Tuple2("mag/PaperExtendedAttributes.txt", Seq("PaperId:long", "AttributeType:int", "AttributeValue:string")),
"PaperFieldsOfStudy" -> Tuple2("advanced/PaperFieldsOfStudy.txt", Seq("PaperId:long", "FieldOfStudyId:long", "Score:float")),
"PaperRecommendations" -> Tuple2("advanced/PaperRecommendations.txt", Seq("PaperId:long", "RecommendedPaperId:long", "Score:float")),
"PaperReferences" -> Tuple2("mag/PaperReferences.txt", Seq("PaperId:long", "PaperReferenceId:long")),
"PaperResources" -> Tuple2("mag/PaperResources.txt", Seq("PaperId:long", "ResourceType:int", "ResourceUrl:string", "SourceUrl:string", "RelationshipType:int")),
"PaperUrls" -> Tuple2("mag/PaperUrls.txt", Seq("PaperId:long", "SourceType:int?", "SourceUrl:string", "LanguageCode:string")),
"Papers" -> Tuple2("mag/Papers.txt", Seq("PaperId:long", "Rank:uint", "Doi:string", "DocType:string", "PaperTitle:string", "OriginalTitle:string", "BookTitle:string", "Year:int?", "Date:DateTime?", "Publisher:string", "JournalId:long?", "ConferenceSeriesId:long?", "ConferenceInstanceId:long?", "Volume:string", "Issue:string", "FirstPage:string", "LastPage:string", "ReferenceCount:long", "CitationCount:long", "EstimatedCitation:long", "OriginalVenue:string", "FamilyId:long?", "CreatedDate:DateTime")),
"RelatedFieldOfStudy" -> Tuple2("advanced/RelatedFieldOfStudy.txt", Seq("FieldOfStudyId1:long", "Type1:string", "FieldOfStudyId2:long", "Type2:string", "Rank:float"))
)
def getSchema(streamName: String): StructType = {
var schema = new StructType()
val d: Seq[String] = stream(streamName)._2
d.foreach { case t =>
val currentType = t.split(":")
val fieldName: String = currentType.head
var fieldType: String = currentType.last
val nullable: Boolean = fieldType.endsWith("?")
if (nullable)
fieldType = fieldType.replace("?", "")
schema = schema.add(StructField(fieldName, datatypedict(fieldType), nullable))
}
schema
}
def main(args: Array[String]): Unit = {
val logger: Logger = LoggerFactory.getLogger(getClass)
val conf: SparkConf = new SparkConf()
val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/mag/convert_mag_to_oaf_params.json")))
parser.parseArgument(args)
val spark: SparkSession =
SparkSession
.builder()
.config(conf)
.appName(getClass.getSimpleName)
.master(parser.get("master")).getOrCreate()
stream.foreach { case (k, v) =>
val s: StructType = getSchema(k)
val df = spark.read
.option("header", "false")
.option("charset", "UTF8")
.option("delimiter", "\t")
.schema(s)
.csv(s"${parser.get("sourcePath")}/${v._1}")
logger.info(s"Converting $k")
df.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/$k")
}
}
}

View File

@ -0,0 +1,157 @@
package eu.dnetlib.doiboost.mag
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf.Publication
import org.apache.commons.io.IOUtils
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.functions._
import org.apache.spark.sql._
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._
object SparkPreProcessMAG {
def main(args: Array[String]): Unit = {
val logger: Logger = LoggerFactory.getLogger(getClass)
val conf: SparkConf = new SparkConf()
val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/mag/preprocess_mag_params.json")))
parser.parseArgument(args)
val spark: SparkSession =
SparkSession
.builder()
.config(conf)
.appName(getClass.getSimpleName)
.master(parser.get("master")).getOrCreate()
val sourcePath = parser.get("sourcePath")
import spark.implicits._
implicit val mapEncoderPubs: Encoder[Publication] = org.apache.spark.sql.Encoders.kryo[Publication]
implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs)
logger.info("Phase 1) make uninque DOI in Papers:")
val d: Dataset[MagPapers] = spark.read.load(s"${parser.get("sourcePath")}/Papers").as[MagPapers]
// Filtering Papers with DOI, and since for the same DOI we have multiple version of item with different PapersId we get the last one
val result: RDD[MagPapers] = d.where(col("Doi").isNotNull)
.rdd
.map{ p: MagPapers => Tuple2(p.Doi, p) }
.reduceByKey((p1:MagPapers,p2:MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1,p2))
.map(_._2)
val distinctPaper: Dataset[MagPapers] = spark.createDataset(result)
distinctPaper.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/Papers_distinct")
logger.info("Phase 6) Enrich Publication with description")
val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract]
pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract")
logger.info("Phase 3) Group Author by PaperId")
val authors = spark.read.load(s"$sourcePath/Authors").as[MagAuthor]
val affiliation = spark.read.load(s"$sourcePath/Affiliations").as[MagAffiliation]
val paperAuthorAffiliation = spark.read.load(s"$sourcePath/PaperAuthorAffiliations").as[MagPaperAuthorAffiliation]
paperAuthorAffiliation.joinWith(authors, paperAuthorAffiliation("AuthorId").equalTo(authors("AuthorId")))
.map { case (a: MagPaperAuthorAffiliation, b: MagAuthor) => (a.AffiliationId, MagPaperAuthorDenormalized(a.PaperId, b, null)) }
.joinWith(affiliation, affiliation("AffiliationId").equalTo(col("_1")), "left")
.map(s => {
val mpa = s._1._2
val af = s._2
if (af != null) {
MagPaperAuthorDenormalized(mpa.PaperId, mpa.author, af.DisplayName)
} else
mpa
}).groupBy("PaperId").agg(collect_list(struct($"author", $"affiliation")).as("authors"))
.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_1_paper_authors")
logger.info("Phase 4) create First Version of publication Entity with Paper Journal and Authors")
val journals = spark.read.load(s"$sourcePath/Journals").as[MagJournal]
val papers = spark.read.load((s"${parser.get("targetPath")}/Papers_distinct")).as[MagPapers]
val paperWithAuthors = spark.read.load(s"${parser.get("targetPath")}/merge_step_1_paper_authors").as[MagPaperWithAuthorList]
val firstJoin = papers.joinWith(journals, papers("JournalId").equalTo(journals("JournalId")), "left")
firstJoin.joinWith(paperWithAuthors, firstJoin("_1.PaperId").equalTo(paperWithAuthors("PaperId")), "left")
.map { a => ConversionUtil.createOAFFromJournalAuthorPaper(a) }
.write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_2")
var magPubs: Dataset[(String, Publication)] =
spark.read.load(s"${parser.get("targetPath")}/merge_step_2").as[Publication]
.map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]
val conference = spark.read.load(s"$sourcePath/ConferenceInstances")
.select($"ConferenceInstanceId".as("ci"), $"DisplayName", $"Location", $"StartDate",$"EndDate" )
val conferenceInstance = conference.joinWith(papers, papers("ConferenceInstanceId").equalTo(conference("ci")))
.select($"_1.ci", $"_1.DisplayName", $"_1.Location", $"_1.StartDate",$"_1.EndDate", $"_2.PaperId").as[MagConferenceInstance]
magPubs.joinWith(conferenceInstance, col("_1").equalTo(conferenceInstance("PaperId")), "left")
.map(item => ConversionUtil.updatePubsWithConferenceInfo(item))
.write
.mode(SaveMode.Overwrite)
.save(s"${parser.get("targetPath")}/merge_step_2_conference")
magPubs= spark.read.load(s"${parser.get("targetPath")}/merge_step_2_conference").as[Publication]
.map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]
val paperUrlDataset = spark.read.load(s"$sourcePath/PaperUrls").as[MagPaperUrl].groupBy("PaperId").agg(collect_list(struct("sourceUrl")).as("instances")).as[MagUrl]
logger.info("Phase 5) enrich publication with URL and Instances")
magPubs.joinWith(paperUrlDataset, col("_1").equalTo(paperUrlDataset("PaperId")), "left")
.map { a: ((String, Publication), MagUrl) => ConversionUtil.addInstances((a._1._2, a._2)) }
.write.mode(SaveMode.Overwrite)
.save(s"${parser.get("targetPath")}/merge_step_3")
// logger.info("Phase 6) Enrich Publication with description")
// val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract]
// pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract")
val paperAbstract = spark.read.load((s"${parser.get("targetPath")}/PaperAbstract")).as[MagPaperAbstract]
magPubs = spark.read.load(s"${parser.get("targetPath")}/merge_step_3").as[Publication]
.map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]
magPubs.joinWith(paperAbstract, col("_1").equalTo(paperAbstract("PaperId")), "left")
.map(item => ConversionUtil.updatePubsWithDescription(item)
).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/merge_step_4")
logger.info("Phase 7) Enrich Publication with FieldOfStudy")
magPubs = spark.read.load(s"${parser.get("targetPath")}/merge_step_4").as[Publication]
.map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)]
val fos = spark.read.load(s"$sourcePath/FieldsOfStudy").select($"FieldOfStudyId".alias("fos"), $"DisplayName", $"MainType")
val pfos = spark.read.load(s"$sourcePath/PaperFieldsOfStudy")
val paperField = pfos.joinWith(fos, fos("fos").equalTo(pfos("FieldOfStudyId")))
.select($"_1.FieldOfStudyId", $"_2.DisplayName", $"_2.MainType", $"_1.PaperId", $"_1.Score")
.groupBy($"PaperId").agg(collect_list(struct($"FieldOfStudyId", $"DisplayName", $"MainType", $"Score")).as("subjects"))
.as[MagFieldOfStudy]
magPubs.joinWith(paperField, col("_1")
.equalTo(paperField("PaperId")), "left")
.map(item => ConversionUtil.updatePubsWithSubject(item))
.write.mode(SaveMode.Overwrite)
.save(s"${parser.get("targetPath")}/mag_publication")
val s:RDD[Publication] = spark.read.load(s"${parser.get("targetPath")}/mag_publication").as[Publication]
.map(p=>Tuple2(p.getId, p)).rdd.reduceByKey((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b))
.map(_._2)
spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/mag_publication_u")
}
}

View File

@ -0,0 +1,146 @@
package eu.dnetlib.doiboost.orcid;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URI;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.mortbay.log.Log;
import eu.dnetlib.doiboost.orcid.json.JsonWriter;
import eu.dnetlib.doiboost.orcid.model.WorkData;
import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser;
public class ActivitiesDecompressor {
private static final int MAX_XML_WORKS_PARSED = -1;
private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000;
public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath)
throws Exception {
String uri = inputUri;
FileSystem fs = FileSystem.get(URI.create(uri), conf);
Path inputPath = new Path(uri);
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
CompressionCodec codec = factory.getCodec(inputPath);
if (codec == null) {
System.err.println("No codec found for " + uri);
System.exit(1);
}
CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());
InputStream gzipInputStream = null;
try {
gzipInputStream = codec.createInputStream(fs.open(inputPath));
parseTarActivities(fs, conf, gzipInputStream, outputPath);
} finally {
Log.debug("Closing gzip stream");
IOUtils.closeStream(gzipInputStream);
}
}
private static void parseTarActivities(
FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) {
int counter = 0;
int doiFound = 0;
int errorFromOrcidFound = 0;
int xmlParserErrorFound = 0;
try (TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
TarArchiveEntry entry = null;
try (SequenceFile.Writer writer = SequenceFile
.createWriter(
conf,
SequenceFile.Writer.file(outputPath),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class))) {
while ((entry = tais.getNextTarEntry()) != null) {
String filename = entry.getName();
try {
if (entry.isDirectory() || !filename.contains("works")) {
} else {
Log.debug("XML work entry name: " + entry.getName());
counter++;
BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from
// tarInput
String line;
StringBuffer buffer = new StringBuffer();
while ((line = br.readLine()) != null) {
buffer.append(line);
}
WorkData workData = XMLRecordParser.VTDParseWorkData(buffer.toString().getBytes());
if (workData != null) {
if (workData.getErrorCode() != null) {
errorFromOrcidFound += 1;
Log
.debug(
"error from Orcid with code "
+ workData.getErrorCode()
+ " for entry "
+ entry.getName());
continue;
}
if (workData.isDoiFound()) {
String jsonData = JsonWriter.create(workData);
Log.debug("oid: " + workData.getOid() + " data: " + jsonData);
final Text key = new Text(workData.getOid());
final Text value = new Text(jsonData);
try {
writer.append(key, value);
} catch (IOException e) {
Log.debug("Writing to sequence file: " + e.getMessage());
Log.debug(e);
throw new RuntimeException(e);
}
doiFound += 1;
}
} else {
Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer.toString());
xmlParserErrorFound += 1;
}
}
} catch (Exception e) {
Log
.warn(
"Parsing work from tar archive and xml work: " + filename + " " + e.getMessage());
Log.warn(e);
}
if ((counter % XML_WORKS_PARSED_COUNTER_LOG_INTERVAL) == 0) {
Log.info("Current xml works parsed: " + counter);
}
if ((MAX_XML_WORKS_PARSED > -1) && (counter > MAX_XML_WORKS_PARSED)) {
break;
}
}
}
} catch (IOException e) {
Log.warn("Parsing work from gzip archive: " + e.getMessage());
Log.warn(e);
throw new RuntimeException(e);
}
Log.info("Activities parse completed");
Log.info("Total XML works parsed: " + counter);
Log.info("Total doi found: " + doiFound);
Log.info("Error from Orcid found: " + errorFromOrcidFound);
Log.info("Error parsing xml work found: " + xmlParserErrorFound);
}
}

View File

@ -0,0 +1,87 @@
package eu.dnetlib.doiboost.orcid
import java.io.IOException
import eu.dnetlib.dhp.schema.oaf.{Author, Publication}
import eu.dnetlib.doiboost.DoiBoostMappingUtil
import eu.dnetlib.doiboost.DoiBoostMappingUtil.{ORCID, PID_TYPES, createSP, generateDataInfo, generateIdentifier}
import eu.dnetlib.doiboost.crossref.Crossref2Oaf
import org.apache.commons.lang.StringUtils
import org.codehaus.jackson.map.ObjectMapper
import org.json4s
import org.json4s.DefaultFormats
import org.json4s.jackson.JsonMethods.parse
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._
case class ORCIDItem(oid:String,name:String,surname:String,creditName:String,errorCode:String){}
case class ORCIDElement(doi:String, authors:List[ORCIDItem]) {}
object ORCIDToOAF {
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
val mapper = new ObjectMapper
def isJsonValid(inputStr: String): Boolean = {
import java.io.IOException
try {
mapper.readTree(inputStr)
true
} catch {
case e: IOException =>
false
}
}
def extractValueFromInputString(input: String): (String, String) = {
val i = input.indexOf('[')
if (i <5) {
return null
}
val orcidList = input.substring(i, input.length - 1)
val doi = input.substring(1, i - 1)
if (isJsonValid(orcidList)) {
(doi, orcidList)
} else null
}
def convertTOOAF(input:ORCIDElement) :Publication = {
val doi = input.doi
val pub:Publication = new Publication
pub.setPid(List(createSP(doi, "doi", PID_TYPES)).asJava)
pub.setDataInfo(generateDataInfo())
pub.setId(generateIdentifier(pub, doi.toLowerCase))
try{
pub.setAuthor(input.authors.map(a=> {
generateAuhtor(a.name, a.surname, a.creditName, a.oid)
}).asJava)
pub.setCollectedfrom(List(DoiBoostMappingUtil.createORIDCollectedFrom()).asJava)
pub.setDataInfo(DoiBoostMappingUtil.generateDataInfo())
pub
} catch {
case e: Throwable =>
logger.info(s"ERROR ON GENERATE Publication from $input")
null
}
}
def generateAuhtor(given: String, family: String, fullName:String, orcid: String): Author = {
val a = new Author
a.setName(given)
a.setSurname(family)
if (fullName!= null && fullName.nonEmpty)
a.setFullname(fullName)
else
a.setFullname(s"$given $family")
if (StringUtils.isNotBlank(orcid))
a.setPid(List(createSP(orcid, ORCID, PID_TYPES)).asJava)
a
}
}

View File

@ -0,0 +1,51 @@
package eu.dnetlib.doiboost.orcid;
import java.io.IOException;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.mortbay.log.Log;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class OrcidAuthorsDOIsDataGen extends OrcidDSManager {
private String activitiesFileNameTarGz;
private String outputAuthorsDOIsPath;
public static void main(String[] args) throws IOException, Exception {
OrcidAuthorsDOIsDataGen orcidAuthorsDOIsDataGen = new OrcidAuthorsDOIsDataGen();
orcidAuthorsDOIsDataGen.loadArgs(args);
orcidAuthorsDOIsDataGen.generateAuthorsDOIsData();
}
public void generateAuthorsDOIsData() throws Exception {
Configuration conf = initConfigurationObject();
FileSystem fs = initFileSystemObject(conf);
String tarGzUri = hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(activitiesFileNameTarGz);
Path outputPath = new Path(hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(outputAuthorsDOIsPath));
ActivitiesDecompressor.parseGzActivities(conf, tarGzUri, outputPath);
}
private void loadArgs(String[] args) throws IOException, Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
OrcidAuthorsDOIsDataGen.class
.getResourceAsStream(
"/eu/dnetlib/dhp/doiboost/create_orcid_authors_dois_data.json")));
parser.parseArgument(args);
hdfsServerUri = parser.get("hdfsServerUri");
Log.info("HDFS URI: " + hdfsServerUri);
hdfsOrcidDefaultPath = parser.get("hdfsOrcidDefaultPath");
Log.info("Default Path: " + hdfsOrcidDefaultPath);
activitiesFileNameTarGz = parser.get("activitiesFileNameTarGz");
Log.info("Activities File Name: " + activitiesFileNameTarGz);
outputAuthorsDOIsPath = parser.get("outputAuthorsDOIsPath");
Log.info("Output Authors DOIs Data: " + outputAuthorsDOIsPath);
}
}

View File

@ -0,0 +1,81 @@
package eu.dnetlib.doiboost.orcid;
import java.io.IOException;
import java.net.URI;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.mortbay.log.Log;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class OrcidDSManager {
protected String hdfsServerUri;
protected String hdfsOrcidDefaultPath;
private String summariesFileNameTarGz;
private String outputAuthorsPath;
public static void main(String[] args) throws IOException, Exception {
OrcidDSManager orcidDSManager = new OrcidDSManager();
orcidDSManager.loadArgs(args);
orcidDSManager.generateAuthors();
}
public void generateAuthors() throws Exception {
Configuration conf = initConfigurationObject();
FileSystem fs = initFileSystemObject(conf);
String tarGzUri = hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(summariesFileNameTarGz);
Path outputPath = new Path(
hdfsServerUri
.concat(hdfsOrcidDefaultPath)
.concat(outputAuthorsPath)
.concat("authors.seq"));
SummariesDecompressor.parseGzSummaries(conf, tarGzUri, outputPath);
}
protected Configuration initConfigurationObject() {
// ====== Init HDFS File System Object
Configuration conf = new Configuration();
// Set FileSystem URI
conf.set("fs.defaultFS", hdfsServerUri.concat(hdfsOrcidDefaultPath));
// Because of Maven
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
return conf;
}
protected FileSystem initFileSystemObject(Configuration conf) {
// Get the filesystem - HDFS
FileSystem fs = null;
try {
fs = FileSystem.get(URI.create(hdfsServerUri.concat(hdfsOrcidDefaultPath)), conf);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return fs;
}
private void loadArgs(String[] args) throws IOException, Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
OrcidDSManager.class
.getResourceAsStream(
"/eu/dnetlib/dhp/doiboost/create_orcid_authors_data.json")));
parser.parseArgument(args);
hdfsServerUri = parser.get("hdfsServerUri");
Log.info("HDFS URI: " + hdfsServerUri);
hdfsOrcidDefaultPath = parser.get("hdfsOrcidDefaultPath");
Log.info("Default Path: " + hdfsOrcidDefaultPath);
summariesFileNameTarGz = parser.get("summariesFileNameTarGz");
Log.info("Summaries File Name: " + summariesFileNameTarGz);
outputAuthorsPath = parser.get("outputAuthorsPath");
Log.info("Output Authors Data: " + outputAuthorsPath);
}
}

View File

@ -0,0 +1,203 @@
package eu.dnetlib.doiboost.orcid;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.mortbay.log.Log;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class OrcidDownloader extends OrcidDSManager {
static final int REQ_LIMIT = 24;
// static final int REQ_MAX_TEST = 100;
static final int RECORD_PARSED_COUNTER_LOG_INTERVAL = 10000;
static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
static final String lastUpdate = "2019-09-30 00:00:00";
private String lambdaFileName;
private String outputPath;
private String token;
public static void main(String[] args) throws IOException, Exception {
OrcidDownloader orcidDownloader = new OrcidDownloader();
orcidDownloader.loadArgs(args);
orcidDownloader.parseLambdaFile();
}
private String downloadRecord(String orcidId) {
try (CloseableHttpClient client = HttpClients.createDefault()) {
HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record");
httpGet.addHeader("Accept", "application/vnd.orcid+xml");
httpGet.addHeader("Authorization", String.format("Bearer %s", token));
CloseableHttpResponse response = client.execute(httpGet);
if (response.getStatusLine().getStatusCode() != 200) {
Log
.warn(
"Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
return new String("");
}
return IOUtils.toString(response.getEntity().getContent());
} catch (Throwable e) {
Log.warn("Downloading " + orcidId, e.getMessage());
}
return new String("");
}
public void parseLambdaFile() throws Exception {
int parsedRecordsCounter = 0;
int downloadedRecordsCounter = 0;
int savedRecordsCounter = 0;
long startDownload = 0;
Configuration conf = initConfigurationObject();
FileSystem fs = initFileSystemObject(conf);
String lambdaFileUri = hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(lambdaFileName);
Path hdfsreadpath = new Path(lambdaFileUri);
FSDataInputStream lambdaFileStream = fs.open(hdfsreadpath);
Path hdfsoutputPath = new Path(
hdfsServerUri
.concat(hdfsOrcidDefaultPath)
.concat(outputPath)
.concat("orcid_records.seq"));
try (SequenceFile.Writer writer = SequenceFile
.createWriter(
conf,
SequenceFile.Writer.file(hdfsoutputPath),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class))) {
try (BufferedReader br = new BufferedReader(new InputStreamReader(lambdaFileStream))) {
String line;
int nReqTmp = 0;
startDownload = System.currentTimeMillis();
long startReqTmp = System.currentTimeMillis();
while ((line = br.readLine()) != null) {
parsedRecordsCounter++;
// skip headers line
if (parsedRecordsCounter == 1) {
continue;
}
String[] values = line.split(",");
List<String> recordInfo = Arrays.asList(values);
String orcidId = recordInfo.get(0);
if (isModified(orcidId, recordInfo.get(3))) {
String record = downloadRecord(orcidId);
downloadedRecordsCounter++;
if (!record.isEmpty()) {
String compressRecord = ArgumentApplicationParser.compressArgument(record);
final Text key = new Text(recordInfo.get(0));
final Text value = new Text(compressRecord);
try {
writer.append(key, value);
savedRecordsCounter++;
} catch (IOException e) {
Log.warn("Writing to sequence file: " + e.getMessage());
Log.warn(e);
throw new RuntimeException(e);
}
}
}
long endReq = System.currentTimeMillis();
nReqTmp++;
if (nReqTmp == REQ_LIMIT) {
long reqSessionDuration = endReq - startReqTmp;
if (reqSessionDuration <= 1000) {
Log
.warn(
"\nreqSessionDuration: "
+ reqSessionDuration
+ " nReqTmp: "
+ nReqTmp
+ " wait ....");
Thread.sleep(1000 - reqSessionDuration);
} else {
nReqTmp = 0;
startReqTmp = System.currentTimeMillis();
}
}
// if (parsedRecordsCounter > REQ_MAX_TEST) {
// break;
// }
if ((parsedRecordsCounter % RECORD_PARSED_COUNTER_LOG_INTERVAL) == 0) {
Log
.info(
"Current parsed: "
+ parsedRecordsCounter
+ " downloaded: "
+ downloadedRecordsCounter
+ " saved: "
+ savedRecordsCounter);
// if (parsedRecordsCounter > REQ_MAX_TEST) {
// break;
// }
}
}
long endDownload = System.currentTimeMillis();
long downloadTime = endDownload - startDownload;
Log.info("Download time: " + ((downloadTime / 1000) / 60) + " minutes");
}
}
lambdaFileStream.close();
Log.info("Download started at: " + new Date(startDownload).toString());
Log.info("Parsed Records Counter: " + parsedRecordsCounter);
Log.info("Downloaded Records Counter: " + downloadedRecordsCounter);
Log.info("Saved Records Counter: " + savedRecordsCounter);
}
private void loadArgs(String[] args) throws IOException, Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
OrcidDownloader.class
.getResourceAsStream(
"/eu/dnetlib/dhp/doiboost/download_orcid_data.json")));
parser.parseArgument(args);
hdfsServerUri = parser.get("hdfsServerUri");
Log.info("HDFS URI: " + hdfsServerUri);
hdfsOrcidDefaultPath = parser.get("hdfsOrcidDefaultPath");
Log.info("Default Path: " + hdfsOrcidDefaultPath);
lambdaFileName = parser.get("lambdaFileName");
Log.info("Lambda File Name: " + lambdaFileName);
outputPath = parser.get("outputPath");
Log.info("Output Data: " + outputPath);
token = parser.get("token");
}
private boolean isModified(String orcidId, String modifiedDate) {
Date modifiedDateDt = null;
Date lastUpdateDt = null;
try {
if (modifiedDate.length() != 19) {
modifiedDate = modifiedDate.substring(0, 19);
}
modifiedDateDt = new SimpleDateFormat(DATE_FORMAT).parse(modifiedDate);
lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate);
} catch (Exception e) {
Log.warn("[" + orcidId + "] Parsing date: ", e.getMessage());
return true;
}
return modifiedDateDt.after(lastUpdateDt);
}
}

View File

@ -0,0 +1,44 @@
package eu.dnetlib.doiboost.orcid
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf.Publication
import eu.dnetlib.doiboost.mag.ConversionUtil
import org.apache.commons.io.IOUtils
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
import org.slf4j.{Logger, LoggerFactory}
object SparkConvertORCIDToOAF {
def main(args: Array[String]): Unit = {
val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass)
val conf: SparkConf = new SparkConf()
val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json")))
parser.parseArgument(args)
val spark: SparkSession =
SparkSession
.builder()
.config(conf)
.appName(getClass.getSimpleName)
.master(parser.get("master")).getOrCreate()
implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication]
implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs)
import spark.implicits._
val sourcePath = parser.get("sourcePath")
val targetPath = parser.get("targetPath")
val dataset:Dataset[ORCIDElement] = spark.read.json(sourcePath).as[ORCIDElement]
logger.info("Converting ORCID to OAF")
val d:RDD[Publication] = dataset.map(o => ORCIDToOAF.convertTOOAF(o)).filter(p=>p!=null).map(p=>(p.getId,p)).rdd.reduceByKey(ConversionUtil.mergePublication)
.map(_._2)
spark.createDataset(d).as[Publication].write.mode(SaveMode.Overwrite).save(targetPath)
}
}

View File

@ -0,0 +1,180 @@
package eu.dnetlib.doiboost.orcid;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.esotericsoftware.minlog.Log;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.JsonElement;
import com.google.gson.JsonParser;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.doiboost.orcid.model.AuthorData;
import eu.dnetlib.doiboost.orcid.model.WorkData;
import scala.Tuple2;
public class SparkGenerateDoiAuthorList {
public static void main(String[] args) throws IOException, Exception {
Logger logger = LoggerFactory.getLogger(SparkGenerateDoiAuthorList.class);
logger.info("[ SparkGenerateDoiAuthorList STARTED]");
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
SparkGenerateDoiAuthorList.class
.getResourceAsStream(
"/eu/dnetlib/dhp/doiboost/gen_doi_author_list_orcid_parameters.json")));
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String workingPath = parser.get("workingPath");
logger.info("workingPath: ", workingPath);
final String outputDoiAuthorListPath = parser.get("outputDoiAuthorListPath");
logger.info("outputDoiAuthorListPath: ", outputDoiAuthorListPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaPairRDD<Text, Text> summariesRDD = sc
.sequenceFile(workingPath + "../orcid_summaries/output/authors.seq", Text.class, Text.class);
Dataset<AuthorData> summariesDataset = spark
.createDataset(
summariesRDD.map(seq -> loadAuthorFromJson(seq._1(), seq._2())).rdd(),
Encoders.bean(AuthorData.class));
JavaPairRDD<Text, Text> activitiesRDD = sc
.sequenceFile(workingPath + "/output/*.seq", Text.class, Text.class);
Dataset<WorkData> activitiesDataset = spark
.createDataset(
activitiesRDD.map(seq -> loadWorkFromJson(seq._1(), seq._2())).rdd(),
Encoders.bean(WorkData.class));
Function<Tuple2<String, AuthorData>, Tuple2<String, List<AuthorData>>> toAuthorListFunction = data -> {
try {
String doi = data._1();
if (doi == null) {
return null;
}
AuthorData author = data._2();
if (author == null) {
return null;
}
List<AuthorData> toAuthorList = Arrays.asList(author);
return new Tuple2<>(doi, toAuthorList);
} catch (Exception e) {
Log.error("toAuthorListFunction ERROR", e);
return null;
}
};
JavaRDD<Tuple2<String, List<AuthorData>>> doisRDD = activitiesDataset
.joinWith(
summariesDataset,
activitiesDataset.col("oid").equalTo(summariesDataset.col("oid")), "inner")
.map(
(MapFunction<Tuple2<WorkData, AuthorData>, Tuple2<String, AuthorData>>) value -> {
WorkData w = value._1;
AuthorData a = value._2;
return new Tuple2<>(w.getDoi(), a);
},
Encoders.tuple(Encoders.STRING(), Encoders.bean(AuthorData.class)))
.filter(Objects::nonNull)
.toJavaRDD()
.map(toAuthorListFunction);
JavaPairRDD
.fromJavaRDD(doisRDD)
.reduceByKey((d1, d2) -> {
try {
if (d1 != null && d2 != null) {
Stream<AuthorData> mergedStream = Stream
.concat(
d1.stream(),
d2.stream());
List<AuthorData> mergedAuthors = mergedStream.collect(Collectors.toList());
return mergedAuthors;
}
if (d1 != null) {
return d1;
}
if (d2 != null) {
return d2;
}
} catch (Exception e) {
Log.error("mergeAuthorsFunction ERROR", e);
return null;
}
return null;
})
.mapToPair(
s -> {
ObjectMapper mapper = new ObjectMapper();
return new Tuple2<>(s._1(), mapper.writeValueAsString(s._2()));
})
.repartition(10)
.saveAsTextFile(workingPath + outputDoiAuthorListPath);
});
}
private static AuthorData loadAuthorFromJson(Text orcidId, Text json) {
AuthorData authorData = new AuthorData();
authorData.setOid(orcidId.toString());
JsonElement jElement = new JsonParser().parse(json.toString());
authorData.setName(getJsonValue(jElement, "name"));
authorData.setSurname(getJsonValue(jElement, "surname"));
authorData.setCreditName(getJsonValue(jElement, "creditname"));
return authorData;
}
private static WorkData loadWorkFromJson(Text orcidId, Text json) {
WorkData workData = new WorkData();
workData.setOid(orcidId.toString());
JsonElement jElement = new JsonParser().parse(json.toString());
workData.setDoi(getJsonValue(jElement, "doi"));
return workData;
}
private static String getJsonValue(JsonElement jElement, String property) {
if (jElement.getAsJsonObject().has(property)) {
JsonElement name = null;
name = jElement.getAsJsonObject().get(property);
if (name != null && !name.isJsonNull()) {
return name.getAsString();
}
}
return null;
}
}

View File

@ -0,0 +1,165 @@
package eu.dnetlib.doiboost.orcid;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.util.LongAccumulator;
import org.mortbay.log.Log;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.doiboost.orcid.model.DownloadedRecordData;
import scala.Tuple2;
public class SparkOrcidGenerateAuthors {
static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
static final String lastUpdate = "2019-09-30 00:00:00";
public static void main(String[] args) throws IOException, Exception {
Logger logger = LoggerFactory.getLogger(SparkOrcidGenerateAuthors.class);
logger.info("[ SparkOrcidGenerateAuthors STARTED]");
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
SparkOrcidGenerateAuthors.class
.getResourceAsStream(
"/eu/dnetlib/dhp/doiboost/gen_orcid_authors_parameters.json")));
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String workingPath = parser.get("workingPath");
logger.info("workingPath: ", workingPath);
final String outputAuthorsPath = parser.get("outputAuthorsPath");
logger.info("outputAuthorsPath: ", outputAuthorsPath);
final String token = parser.get("token");
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
LongAccumulator parsedRecordsAcc = sc.sc().longAccumulator("parsedRecords");
LongAccumulator modifiedRecordsAcc = sc.sc().longAccumulator("modifiedRecords");
LongAccumulator downloadedRecordsAcc = sc.sc().longAccumulator("downloadedRecords");
LongAccumulator alreadyDownloadedRecords = sc.sc().longAccumulator("alreadyDownloadedRecords");
JavaRDD<String> lamdaFileRDD = sc.textFile(workingPath + "lamdafiles");
JavaRDD<String> downloadedRDD = sc.textFile(workingPath + "downloaded");
Function<String, String> getOrcidIdFunction = line -> {
try {
String[] values = line.split(",");
return values[0].substring(1);
} catch (Exception e) {
return new String("");
}
};
List<String> downloadedRecords = downloadedRDD.map(getOrcidIdFunction).collect();
Function<String, Boolean> isModifiedAfterFilter = line -> {
String[] values = line.split(",");
String orcidId = values[0];
parsedRecordsAcc.add(1);
if (isModified(orcidId, values[3])) {
modifiedRecordsAcc.add(1);
return true;
}
return false;
};
Function<String, Boolean> isNotDownloadedFilter = line -> {
String[] values = line.split(",");
String orcidId = values[0];
if (downloadedRecords.contains(orcidId)) {
alreadyDownloadedRecords.add(1);
return false;
}
return true;
};
Function<String, Tuple2<String, String>> downloadRecordFunction = line -> {
String[] values = line.split(",");
String orcidId = values[0];
String modifiedDate = values[3];
return downloadRecord(orcidId, modifiedDate, token, downloadedRecordsAcc);
};
lamdaFileRDD
.filter(isModifiedAfterFilter)
.filter(isNotDownloadedFilter)
.map(downloadRecordFunction)
.rdd()
.saveAsTextFile(workingPath.concat(outputAuthorsPath));
});
}
private static boolean isModified(String orcidId, String modifiedDate) {
Date modifiedDateDt = null;
Date lastUpdateDt = null;
try {
if (modifiedDate.length() != 19) {
modifiedDate = modifiedDate.substring(0, 19);
}
modifiedDateDt = new SimpleDateFormat(DATE_FORMAT).parse(modifiedDate);
lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate);
} catch (Exception e) {
Log.warn("[" + orcidId + "] Parsing date: ", e.getMessage());
return true;
}
return modifiedDateDt.after(lastUpdateDt);
}
private static Tuple2<String, String> downloadRecord(String orcidId, String modifiedDate, String token,
LongAccumulator downloadedRecordsAcc) {
final DownloadedRecordData data = new DownloadedRecordData();
data.setOrcidId(orcidId);
data.setModifiedDate(modifiedDate);
try (CloseableHttpClient client = HttpClients.createDefault()) {
HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record");
httpGet.addHeader("Accept", "application/vnd.orcid+xml");
httpGet.addHeader("Authorization", String.format("Bearer %s", token));
CloseableHttpResponse response = client.execute(httpGet);
int statusCode = response.getStatusLine().getStatusCode();
data.setStatusCode(statusCode);
if (statusCode != 200) {
Log
.warn(
"Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
return data.toTuple2();
}
downloadedRecordsAcc.add(1);
data
.setCompressedData(
ArgumentApplicationParser.compressArgument(IOUtils.toString(response.getEntity().getContent())));
} catch (Throwable e) {
Log.warn("Downloading " + orcidId, e.getMessage());
data.setErrorMessage(e.getMessage());
return data.toTuple2();
}
return data.toTuple2();
}
}

View File

@ -0,0 +1,50 @@
package eu.dnetlib.doiboost.orcid;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.IOException;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class SparkPartitionLambdaFile {
public static void main(String[] args) throws IOException, Exception {
Logger logger = LoggerFactory.getLogger(SparkOrcidGenerateAuthors.class);
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
SparkOrcidGenerateAuthors.class
.getResourceAsStream(
"/eu/dnetlib/dhp/doiboost/gen_orcid_authors_parameters.json")));
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
final String workingPath = parser.get("workingPath");
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<String> lamdaFileRDD = sc.textFile(workingPath + "last_modified.csv");
lamdaFileRDD
.repartition(20)
.saveAsTextFile(workingPath.concat("lamdafiles"));
});
}
}

View File

@ -0,0 +1,158 @@
package eu.dnetlib.doiboost.orcid;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URI;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.mortbay.log.Log;
import eu.dnetlib.doiboost.orcid.json.JsonWriter;
import eu.dnetlib.doiboost.orcid.model.AuthorData;
import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser;
public class SummariesDecompressor {
private static final int MAX_XML_RECORDS_PARSED = -1;
public static void parseGzSummaries(Configuration conf, String inputUri, Path outputPath)
throws Exception {
String uri = inputUri;
FileSystem fs = FileSystem.get(URI.create(uri), conf);
Path inputPath = new Path(uri);
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
CompressionCodec codec = factory.getCodec(inputPath);
if (codec == null) {
System.err.println("No codec found for " + uri);
System.exit(1);
}
CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());
InputStream gzipInputStream = null;
try {
gzipInputStream = codec.createInputStream(fs.open(inputPath));
parseTarSummaries(fs, conf, gzipInputStream, outputPath);
} finally {
Log.debug("Closing gzip stream");
IOUtils.closeStream(gzipInputStream);
}
}
private static void parseTarSummaries(
FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) {
int counter = 0;
int nameFound = 0;
int surnameFound = 0;
int creditNameFound = 0;
int errorFromOrcidFound = 0;
int xmlParserErrorFound = 0;
try (TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
TarArchiveEntry entry = null;
try (SequenceFile.Writer writer = SequenceFile
.createWriter(
conf,
SequenceFile.Writer.file(outputPath),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(Text.class))) {
while ((entry = tais.getNextTarEntry()) != null) {
String filename = entry.getName();
try {
if (entry.isDirectory()) {
Log.debug("Directory entry name: " + entry.getName());
} else {
Log.debug("XML record entry name: " + entry.getName());
counter++;
BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from
// tarInput
String line;
StringBuffer buffer = new StringBuffer();
while ((line = br.readLine()) != null) {
buffer.append(line);
}
AuthorData authorData = XMLRecordParser.VTDParseAuthorData(buffer.toString().getBytes());
if (authorData != null) {
if (authorData.getErrorCode() != null) {
errorFromOrcidFound += 1;
Log
.debug(
"error from Orcid with code "
+ authorData.getErrorCode()
+ " for oid "
+ entry.getName());
continue;
}
String jsonData = JsonWriter.create(authorData);
Log.debug("oid: " + authorData.getOid() + " data: " + jsonData);
final Text key = new Text(authorData.getOid());
final Text value = new Text(jsonData);
try {
writer.append(key, value);
} catch (IOException e) {
Log.debug("Writing to sequence file: " + e.getMessage());
Log.debug(e);
throw new RuntimeException(e);
}
if (authorData.getName() != null) {
nameFound += 1;
}
if (authorData.getSurname() != null) {
surnameFound += 1;
}
if (authorData.getCreditName() != null) {
creditNameFound += 1;
}
} else {
Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer.toString());
xmlParserErrorFound += 1;
}
}
} catch (Exception e) {
Log
.warn(
"Parsing record from tar archive and xml record: "
+ filename
+ " "
+ e.getMessage());
Log.warn(e);
}
if ((counter % 100000) == 0) {
Log.info("Current xml records parsed: " + counter);
}
if ((MAX_XML_RECORDS_PARSED > -1) && (counter > MAX_XML_RECORDS_PARSED)) {
break;
}
}
}
} catch (IOException e) {
Log.warn("Parsing record from gzip archive: " + e.getMessage());
Log.warn(e);
throw new RuntimeException(e);
}
Log.info("Summaries parse completed");
Log.info("Total XML records parsed: " + counter);
Log.info("Name found: " + nameFound);
Log.info("Surname found: " + surnameFound);
Log.info("Credit name found: " + creditNameFound);
Log.info("Error from Orcid found: " + errorFromOrcidFound);
Log.info("Error parsing xml record found: " + xmlParserErrorFound);
}
}

View File

@ -0,0 +1,28 @@
package eu.dnetlib.doiboost.orcid.json;
import com.google.gson.JsonObject;
import eu.dnetlib.doiboost.orcid.model.AuthorData;
import eu.dnetlib.doiboost.orcid.model.WorkData;
public class JsonWriter {
public static String create(AuthorData authorData) {
JsonObject author = new JsonObject();
author.addProperty("oid", authorData.getOid());
author.addProperty("name", authorData.getName());
author.addProperty("surname", authorData.getSurname());
if (authorData.getCreditName() != null) {
author.addProperty("creditname", authorData.getCreditName());
}
return author.toString();
}
public static String create(WorkData workData) {
JsonObject work = new JsonObject();
work.addProperty("oid", workData.getOid());
work.addProperty("doi", workData.getDoi());
return work.toString();
}
}

View File

@ -0,0 +1,53 @@
package eu.dnetlib.doiboost.orcid.model;
import java.io.Serializable;
public class AuthorData implements Serializable {
private String oid;
private String name;
private String surname;
private String creditName;
private String errorCode;
public String getErrorCode() {
return errorCode;
}
public void setErrorCode(String errorCode) {
this.errorCode = errorCode;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getSurname() {
return surname;
}
public void setSurname(String surname) {
this.surname = surname;
}
public String getCreditName() {
return creditName;
}
public void setCreditName(String creditName) {
this.creditName = creditName;
}
public String getOid() {
return oid;
}
public void setOid(String oid) {
this.oid = oid;
}
}

View File

@ -0,0 +1,76 @@
package eu.dnetlib.doiboost.orcid.model;
import java.io.Serializable;
import org.apache.hadoop.io.Text;
import com.google.gson.JsonObject;
import scala.Tuple2;
public class DownloadedRecordData implements Serializable {
private String orcidId;
private String modifiedDate;
private String statusCode;
private String compressedData;
private String errorMessage;
public Tuple2<String, String> toTuple2() {
JsonObject data = new JsonObject();
data.addProperty("statusCode", getStatusCode());
data.addProperty("modifiedDate", getModifiedDate());
if (getCompressedData() != null) {
data.addProperty("compressedData", getCompressedData());
}
if (getErrorMessage() != null) {
data.addProperty("errorMessage", getErrorMessage());
}
return new Tuple2<>(orcidId, data.toString());
}
public String getErrorMessage() {
return errorMessage;
}
public void setErrorMessage(String errorMessage) {
this.errorMessage = errorMessage;
}
public String getOrcidId() {
return orcidId;
}
public void setOrcidId(String orcidId) {
this.orcidId = orcidId;
}
public int getStatusCode() {
try {
return Integer.parseInt(statusCode);
} catch (Exception e) {
return -2;
}
}
public void setStatusCode(int statusCode) {
this.statusCode = Integer.toString(statusCode);
}
public String getCompressedData() {
return compressedData;
}
public void setCompressedData(String compressedData) {
this.compressedData = compressedData;
}
public String getModifiedDate() {
return modifiedDate;
}
public void setModifiedDate(String modifiedDate) {
this.modifiedDate = modifiedDate;
}
}

View File

@ -0,0 +1,45 @@
package eu.dnetlib.doiboost.orcid.model;
import java.io.Serializable;
public class WorkData implements Serializable {
private String oid;
private String doi;
private boolean doiFound = false;
public boolean isDoiFound() {
return doiFound;
}
public void setDoiFound(boolean doiFound) {
this.doiFound = doiFound;
}
public String getOid() {
return oid;
}
public void setOid(String oid) {
this.oid = oid;
}
public String getDoi() {
return doi;
}
public void setDoi(String doi) {
this.doi = doi;
}
public String getErrorCode() {
return errorCode;
}
public void setErrorCode(String errorCode) {
this.errorCode = errorCode;
}
private String errorCode;
}

View File

@ -0,0 +1,123 @@
package eu.dnetlib.doiboost.orcid.xml;
import java.util.Arrays;
import java.util.List;
import com.ximpleware.AutoPilot;
import com.ximpleware.EOFException;
import com.ximpleware.EncodingException;
import com.ximpleware.EntityException;
import com.ximpleware.ParseException;
import com.ximpleware.VTDGen;
import com.ximpleware.VTDNav;
import eu.dnetlib.dhp.parser.utility.VtdException;
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
import eu.dnetlib.doiboost.orcid.model.AuthorData;
import eu.dnetlib.doiboost.orcid.model.WorkData;
public class XMLRecordParser {
private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common";
private static final String NS_COMMON = "common";
private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person";
private static final String NS_PERSON = "person";
private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details";
private static final String NS_DETAILS = "personal-details";
private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name";
private static final String NS_OTHER = "other-name";
private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record";
private static final String NS_RECORD = "record";
private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error";
private static final String NS_WORK = "work";
private static final String NS_WORK_URL = "http://www.orcid.org/ns/work";
private static final String NS_ERROR = "error";
public static AuthorData VTDParseAuthorData(byte[] bytes)
throws VtdException, EncodingException, EOFException, EntityException, ParseException {
final VTDGen vg = new VTDGen();
vg.setDoc(bytes);
vg.parse(true);
final VTDNav vn = vg.getNav();
final AutoPilot ap = new AutoPilot(vn);
ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
ap.declareXPathNameSpace(NS_PERSON, NS_PERSON_URL);
ap.declareXPathNameSpace(NS_DETAILS, NS_DETAILS_URL);
ap.declareXPathNameSpace(NS_OTHER, NS_OTHER_URL);
ap.declareXPathNameSpace(NS_RECORD, NS_RECORD_URL);
ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
AuthorData authorData = new AuthorData();
final List<String> errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code");
if (!errors.isEmpty()) {
authorData.setErrorCode(errors.get(0));
return authorData;
}
List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
.getTextValuesWithAttributes(
ap, vn, "//record:record", Arrays.asList("path"));
if (!recordNodes.isEmpty()) {
final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1);
authorData.setOid(oid);
} else {
return null;
}
final List<String> names = VtdUtilityParser.getTextValue(ap, vn, "//personal-details:given-names");
if (!names.isEmpty()) {
authorData.setName(names.get(0));
}
final List<String> surnames = VtdUtilityParser.getTextValue(ap, vn, "//personal-details:family-name");
if (!surnames.isEmpty()) {
authorData.setSurname(surnames.get(0));
}
final List<String> creditNames = VtdUtilityParser.getTextValue(ap, vn, "//personal-details:credit-name");
if (!creditNames.isEmpty()) {
authorData.setCreditName(creditNames.get(0));
}
return authorData;
}
public static WorkData VTDParseWorkData(byte[] bytes)
throws VtdException, EncodingException, EOFException, EntityException, ParseException {
final VTDGen vg = new VTDGen();
vg.setDoc(bytes);
vg.parse(true);
final VTDNav vn = vg.getNav();
final AutoPilot ap = new AutoPilot(vn);
ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
WorkData workData = new WorkData();
final List<String> errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code");
if (!errors.isEmpty()) {
workData.setErrorCode(errors.get(0));
return workData;
}
List<VtdUtilityParser.Node> workNodes = VtdUtilityParser
.getTextValuesWithAttributes(ap, vn, "//work:work", Arrays.asList("path"));
if (!workNodes.isEmpty()) {
final String oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1];
workData.setOid(oid);
} else {
return null;
}
final List<String> dois = VtdUtilityParser
.getTextValue(
ap, vn, "//common:external-id-type[text()=\"doi\"]/../common:external-id-value");
if (!dois.isEmpty()) {
workData.setDoi(dois.get(0));
workData.setDoiFound(true);
}
return workData;
}
}

View File

@ -0,0 +1,43 @@
package eu.dnetlib.doiboost.uw
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf.Publication
import eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF
import org.apache.commons.io.IOUtils
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
import org.slf4j.{Logger, LoggerFactory}
object SparkMapUnpayWallToOAF {
def main(args: Array[String]): Unit = {
val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass)
val conf: SparkConf = new SparkConf()
val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json")))
parser.parseArgument(args)
val spark: SparkSession =
SparkSession
.builder()
.config(conf)
.appName(getClass.getSimpleName)
.master(parser.get("master")).getOrCreate()
implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication]
val sourcePath = parser.get("sourcePath")
val targetPath = parser.get("targetPath")
val inputRDD:RDD[String] = spark.sparkContext.textFile(s"$sourcePath")
logger.info("Converting UnpayWall to OAF")
val d:Dataset[Publication] = spark.createDataset(inputRDD.map(UnpayWallToOAF.convertToOAF).filter(p=>p!=null)).as[Publication]
d.write.mode(SaveMode.Overwrite).save(targetPath)
}
}

View File

@ -0,0 +1,63 @@
package eu.dnetlib.doiboost.uw
import eu.dnetlib.dhp.schema.oaf.{Instance, Publication}
import org.json4s
import org.json4s.DefaultFormats
import org.json4s.jackson.JsonMethods.parse
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
case class OALocation(evidence:Option[String], host_type:Option[String], is_best:Option[Boolean], license: Option[String], pmh_id:Option[String], updated:Option[String],
url:Option[String], url_for_landing_page:Option[String], url_for_pdf:Option[String], version:Option[String]) {}
object UnpayWallToOAF {
val logger: Logger = LoggerFactory.getLogger(getClass)
def convertToOAF(input:String):Publication = {
val pub = new Publication
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = parse(input)
val doi = (json \"doi").extract[String]
val is_oa = (json\ "is_oa").extract[Boolean]
val oaLocation:OALocation = (json \ "best_oa_location").extractOrElse[OALocation](null)
pub.setPid(List(createSP(doi, "doi", PID_TYPES)).asJava)
pub.setId(generateIdentifier(pub, doi.toLowerCase))
pub.setCollectedfrom(List(createUnpayWallCollectedFrom()).asJava)
pub.setDataInfo(generateDataInfo())
if (!is_oa)
return null
if(oaLocation== null || oaLocation.url.isEmpty)
return null
val i :Instance= new Instance()
i.setCollectedfrom(createUnpayWallCollectedFrom())
i.setAccessright(getOpenAccessQualifier())
i.setUrl(List(oaLocation.url.get).asJava)
if (oaLocation.license.isDefined)
i.setLicense(asField(oaLocation.license.get))
pub.setInstance(List(i).asJava)
pub
}
}

View File

@ -0,0 +1,6 @@
[
{"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true},
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the working dir path", "paramRequired": true},
{"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true}
]

View File

@ -0,0 +1,6 @@
[
{"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true},
{"paramName":"d", "paramLongName":"hdfsOrcidDefaultPath", "paramDescription": "the default work path", "paramRequired": true},
{"paramName":"f", "paramLongName":"summariesFileNameTarGz", "paramDescription": "the name of the summaries orcid file", "paramRequired": true},
{"paramName":"o", "paramLongName":"outputAuthorsPath", "paramDescription": "the relative folder of the sequencial file to write", "paramRequired": true}
]

View File

@ -0,0 +1,6 @@
[
{"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true},
{"paramName":"d", "paramLongName":"hdfsOrcidDefaultPath", "paramDescription": "the default work path", "paramRequired": true},
{"paramName":"f", "paramLongName":"activitiesFileNameTarGz", "paramDescription": "the name of the activities orcid file", "paramRequired": true},
{"paramName":"o", "paramLongName":"outputAuthorsDOIsPath", "paramDescription": "the relative folder of the sequencial file to write", "paramRequired": true}
]

View File

@ -0,0 +1,42 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
<property>
<name>hive_metastore_uris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
</property>
<property>
<name>spark2EventLogDir</name>
<value>/user/spark/spark2ApplicationHistory</value>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
</property>
</configuration>

View File

@ -0,0 +1,80 @@
<workflow-app name="import Crossref from index into HDFS" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>workingPath</name>
<description>the working dir base path</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>timestamp</name>
<description>Timestamp for incremental Harvesting</description>
</property>
</parameters>
<start to="ExtractCrossrefToOAF"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetWorkingPath">
<fs>
<delete path='${workingPath}/input/crossref/index_dump'/>
<!-- <mkdir path='${workingPath}/input/crossref'/>-->
</fs>
<ok to="ImportCrossRef"/>
<error to="Kill"/>
</action>
<action name="ImportCrossRef">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.crossref.CrossrefImporter</main-class>
<arg>-t</arg><arg>${workingPath}/input/crossref/index_dump</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-ts</arg><arg>${timestamp}</arg>
</java>
<ok to="ExtractCrossrefToOAF"/>
<error to="Kill"/>
</action>
<action name="ExtractCrossrefToOAF">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>ExtractCrossrefToOAF</name>
<class>eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF</class>
<jar>dhp-doiboost-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
${sparkExtraOPT}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingPath}/input/crossref/index_dump,${workingPath}/crossref/index_dump</arg>
<arg>--targetPath</arg><arg>${workingPath}/input/crossref</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,7 @@
[
{"paramName":"n", "paramLongName":"hdfsServerUri", "paramDescription": "the server uri", "paramRequired": true},
{"paramName":"d", "paramLongName":"hdfsOrcidDefaultPath", "paramDescription": "the default work path", "paramRequired": true},
{"paramName":"f", "paramLongName":"lambdaFileName", "paramDescription": "the name of the lambda file", "paramRequired": true},
{"paramName":"o", "paramLongName":"outputPath", "paramDescription": "the relative folder of the sequencial file to write", "paramRequired": true},
{"paramName":"t", "paramLongName":"token", "paramDescription": "token to grant access", "paramRequired": true}
]

View File

@ -0,0 +1,3 @@
[{"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working path", "paramRequired": true},
{"paramName":"o", "paramLongName":"outputDoiAuthorListPath", "paramDescription": "the relative folder of the sequencial file to write the data", "paramRequired": true}
]

View File

@ -0,0 +1,4 @@
[{"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working path", "paramRequired": true},
{"paramName":"t", "paramLongName":"token", "paramDescription": "token to grant access", "paramRequired": true},
{"paramName":"o", "paramLongName":"outputAuthorsPath", "paramDescription": "the relative folder of the sequencial file to write the authors data", "paramRequired": true}
]

View File

@ -0,0 +1,9 @@
[
{"paramName": "m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true},
{"paramName": "dp", "paramLongName":"dbPublicationPath", "paramDescription": "the Crossref Publication Path", "paramRequired": true},
{"paramName": "dd", "paramLongName":"dbDatasetPath", "paramDescription": "the Crossref Dataset Path", "paramRequired": true},
{"paramName": "cr", "paramLongName":"crossRefRelation", "paramDescription": "the UnpayWall Publication Path", "paramRequired": true},
{"paramName": "da", "paramLongName":"dbaffiliationRelationPath", "paramDescription": "the MAG Publication Path", "paramRequired": true},
{"paramName": "do", "paramLongName":"dbOrganizationPath", "paramDescription": "the MAG Publication Path", "paramRequired": true},
{"paramName": "w", "paramLongName":"targetPath", "paramDescription": "the Working Path", "paramRequired": true}
]

View File

@ -0,0 +1,7 @@
[
{"paramName": "m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true},
{"paramName": "hb", "paramLongName":"hostedByMapPath", "paramDescription": "the hosted By Map Path", "paramRequired": true},
{"paramName": "ap", "paramLongName":"affiliationPath", "paramDescription": "the Affliation Path", "paramRequired": true},
{"paramName": "pa", "paramLongName":"paperAffiliationPath", "paramDescription": "the paperAffiliation Path", "paramRequired": true},
{"paramName": "w", "paramLongName":"workingDirPath", "paramDescription": "the Working Path", "paramRequired": true}
]

View File

@ -0,0 +1,5 @@
[
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the sequencial file to write", "paramRequired": true},
{"paramName":"n", "paramLongName":"namenode", "paramDescription": "the hive metastore uris", "paramRequired": true},
{"paramName":"ts", "paramLongName":"timestamp", "paramDescription": "timestamp", "paramRequired": false}
]

View File

@ -0,0 +1,38 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>hive_metastore_uris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
</property>
<property>
<name>spark2EventLogDir</name>
<value>/user/spark/spark2ApplicationHistory</value>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
</property>
</configuration>

View File

@ -0,0 +1,103 @@
<workflow-app name="Create DOIBoostActionSet" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>hostedByMapPath</name>
<description>the Hosted By Map Path</description>
</property>
<property>
<name>affiliationPath</name>
<description>the Affliation Path</description>
</property>
<property>
<name>paperAffiliationPath</name>
<description>the paperAffiliation Path</description>
</property>
<property>
<name>workingDirPath</name>
<description>the Working Path</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
</parameters>
<start to="GenerateActionSet"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetWorkingPath">
<fs>
<delete path='${workingDirPath}'/>
<mkdir path='${workingDirPath}'/>
</fs>
<ok to="CreateDOIBoost"/>
<error to="Kill"/>
</action>
<action name="CreateDOIBoost">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Create DOIBoost Infospace</name>
<class>eu.dnetlib.doiboost.SparkGenerateDoiBoost</class>
<jar>dhp-doiboost-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.sql.shuffle.partitions=3840
${sparkExtraOPT}
</spark-opts>
<arg>--hostedByMapPath</arg><arg>${hostedByMapPath}</arg>
<arg>--affiliationPath</arg><arg>${affiliationPath}</arg>
<arg>--paperAffiliationPath</arg><arg>${paperAffiliationPath}</arg>
<arg>--workingDirPath</arg><arg>${workingDirPath}</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="GenerateActionSet"/>
<error to="Kill"/>
</action>
<action name="GenerateActionSet">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Generate DOIBoost ActionSet</name>
<class>eu.dnetlib.doiboost.SparkGenerateDOIBoostActionSet</class>
<jar>dhp-doiboost-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.sql.shuffle.partitions=3840
${sparkExtraOPT}
</spark-opts>
<arg>--dbPublicationPath</arg><arg>${workingDirPath}/doiBoostPublicationFiltered</arg>
<arg>--dbDatasetPath</arg><arg>${workingDirPath}/crossrefDataset</arg>
<arg>--crossRefRelation</arg><arg>/data/doiboost/input/crossref/relations</arg>
<arg>--dbaffiliationRelationPath</arg><arg>${workingDirPath}/doiBoostPublicationAffiliation</arg>
<arg>-do</arg><arg>${workingDirPath}/doiBoostOrganization</arg>
<arg>--targetPath</arg><arg>${workingDirPath}/actionDataSet</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,6 @@
[
{"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the base path of MAG input", "paramRequired": true},
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the working dir path", "paramRequired": true},
{"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true}
]

View File

@ -0,0 +1,42 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>oozie.wf.rerun.failnodes</name>
<value>false</value>
</property>
<property>
<name>hive_metastore_uris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
</property>
<property>
<name>spark2EventLogDir</name>
<value>/user/spark/spark2ApplicationHistory</value>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
</property>
</configuration>

View File

@ -0,0 +1,87 @@
<workflow-app name="import MAG into HDFS" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the working dir base path</description>
</property>
<property>
<name>targetPath</name>
<description>the working dir base path</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
</parameters>
<start to="ResetWorkingPath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetWorkingPath">
<fs>
<delete path='${targetPath}'/>
<mkdir path='${targetPath}'/>
</fs>
<ok to="PreprocessMag"/>
<error to="Kill"/>
</action>
<action name="ConvertMagToDataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Convert Mag to Dataset</name>
<class>eu.dnetlib.doiboost.mag.SparkImportMagIntoDataset</class>
<jar>dhp-doiboost-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
${sparkExtraOPT}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--targetPath</arg><arg>${targetPath}</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<action name="PreprocessMag">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Convert Mag to Dataset</name>
<class>eu.dnetlib.doiboost.mag.SparkPreProcessMAG</class>
<jar>dhp-doiboost-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.sql.shuffle.partitions=3840
${sparkExtraOPT}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--targetPath</arg><arg>${targetPath}</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,6 @@
[
{"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the base path of MAG input", "paramRequired": true},
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the working dir path", "paramRequired": true},
{"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true}
]

View File

@ -0,0 +1,22 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.action.sharelib.for.java</name>
<value>spark2</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx4g</value>
</property>
</configuration>

View File

@ -0,0 +1,41 @@
<workflow-app name="import Orcid" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>workingPath</name>
<description>the working dir base path</description>
</property>
</parameters>
<start to="ResetWorkingPath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetWorkingPath">
<fs>
<delete path='${workingPath}/output'/>
<mkdir path='${workingPath}/output'/>
</fs>
<ok to="ImportOrcidSummary"/>
<error to="Kill"/>
</action>
<action name="ImportOrcidSummary">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidDSManager</main-class>
<arg>-d</arg><arg>${workingPath}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_summaries.tar.gz</arg>
<arg>-o</arg><arg>output/</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,22 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.action.sharelib.for.java</name>
<value>spark2</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx4g</value>
</property>
</configuration>

View File

@ -0,0 +1,505 @@
<workflow-app name="Gen Orcid Authors DOIs" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>workingPath_activities</name>
<description>the working dir base path</description>
</property>
<property>
<name>shell_cmd_0</name>
<value>wget -O /tmp/ORCID_2019_activites_0.tar.gz https://orcid.figshare.com/ndownloader/files/18017660 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_0.tar.gz /data/orcid_activities/ORCID_2019_activites_0.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 0</description>
</property>
<property>
<name>shell_cmd_1</name>
<value>wget -O /tmp/ORCID_2019_activites_1.tar.gz https://orcid.figshare.com/ndownloader/files/18017675 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_1.tar.gz /data/orcid_activities/ORCID_2019_activites_1.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 1</description>
</property>
<property>
<name>shell_cmd_2</name>
<value>wget -O /tmp/ORCID_2019_activites_2.tar.gz https://orcid.figshare.com/ndownloader/files/18017717 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_2.tar.gz /data/orcid_activities/ORCID_2019_activites_2.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 2</description>
</property>
<property>
<name>shell_cmd_3</name>
<value>wget -O /tmp/ORCID_2019_activites_3.tar.gz https://orcid.figshare.com/ndownloader/files/18017765 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_3.tar.gz /data/orcid_activities/ORCID_2019_activites_3.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 3</description>
</property>
<property>
<name>shell_cmd_4</name>
<value>wget -O /tmp/ORCID_2019_activites_4.tar.gz https://orcid.figshare.com/ndownloader/files/18017831 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_4.tar.gz /data/orcid_activities/ORCID_2019_activites_4.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 4</description>
</property>
<property>
<name>shell_cmd_5</name>
<value>wget -O /tmp/ORCID_2019_activites_5.tar.gz https://orcid.figshare.com/ndownloader/files/18017987 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_5.tar.gz /data/orcid_activities/ORCID_2019_activites_5.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 5</description>
</property>
<property>
<name>shell_cmd_6</name>
<value>wget -O /tmp/ORCID_2019_activites_6.tar.gz https://orcid.figshare.com/ndownloader/files/18018053 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_6.tar.gz /data/orcid_activities/ORCID_2019_activites_6.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 6</description>
</property>
<property>
<name>shell_cmd_7</name>
<value>wget -O /tmp/ORCID_2019_activites_7.tar.gz https://orcid.figshare.com/ndownloader/files/18018023 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_7.tar.gz /data/orcid_activities/ORCID_2019_activites_7.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 7</description>
</property>
<property>
<name>shell_cmd_8</name>
<value>wget -O /tmp/ORCID_2019_activites_8.tar.gz https://orcid.figshare.com/ndownloader/files/18018248 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_8.tar.gz /data/orcid_activities/ORCID_2019_activites_8.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 8</description>
</property>
<property>
<name>shell_cmd_9</name>
<value>wget -O /tmp/ORCID_2019_activites_9.tar.gz https://orcid.figshare.com/ndownloader/files/18018029 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_9.tar.gz /data/orcid_activities/ORCID_2019_activites_9.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file 9</description>
</property>
<property>
<name>shell_cmd_X</name>
<value>wget -O /tmp/ORCID_2019_activites_X.tar.gz https://orcid.figshare.com/ndownloader/files/18018182 ; hdfs dfs -copyFromLocal /tmp/ORCID_2019_activites_X.tar.gz /data/orcid_activities/ORCID_2019_activites_X.tar.gz
</value>
<description>the shell command that downloads and puts to hdfs orcid activity file X</description>
</property>
</parameters>
<start to="ResetWorkingPath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetWorkingPath">
<fs>
<delete path='${workingPath_activities}/output'/>
<mkdir path='${workingPath_activities}/output'/>
</fs>
<ok to="fork_gen_orcid_authors_dois"/>
<error to="Kill"/>
</action>
<fork name = "fork_gen_orcid_authors_dois">
<path start = "check_exist_on_hdfs_activities_0"/>
<path start = "check_exist_on_hdfs_activities_1"/>
<path start = "check_exist_on_hdfs_activities_2"/>
<path start = "check_exist_on_hdfs_activities_3"/>
<path start = "check_exist_on_hdfs_activities_4"/>
<path start = "check_exist_on_hdfs_activities_5"/>
<path start = "check_exist_on_hdfs_activities_6"/>
<path start = "check_exist_on_hdfs_activities_7"/>
<path start = "check_exist_on_hdfs_activities_8"/>
<path start = "check_exist_on_hdfs_activities_9"/>
<path start = "check_exist_on_hdfs_activities_X"/>
</fork>
<decision name="check_exist_on_hdfs_activities_0">
<switch>
<case to="Gen_Orcid_Authors_DOIs_0">
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_0.tar.gz'))}
</case>
<default to="Download_0" />
</switch>
</decision>
<action name="Download_0">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>bash</exec>
<argument>-c</argument>
<argument>${shell_cmd_0}</argument>
<capture-output/>
</shell>
<ok to="Gen_Orcid_Authors_DOIs_0"/>
<error to="Kill"/>
</action>
<action name="Gen_Orcid_Authors_DOIs_0">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
<arg>-d</arg><arg>${workingPath_activities}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_0.tar.gz</arg>
<arg>-o</arg><arg>output/authors_dois_0.seq</arg>
</java>
<ok to="join_node"/>
<error to="Kill"/>
</action>
<decision name="check_exist_on_hdfs_activities_1">
<switch>
<case to="Gen_Orcid_Authors_DOIs_1">
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_1.tar.gz'))}
</case>
<default to="Download_1" />
</switch>
</decision>
<action name="Download_1">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>bash</exec>
<argument>-c</argument>
<argument>${shell_cmd_1}</argument>
<capture-output/>
</shell>
<ok to="Gen_Orcid_Authors_DOIs_1"/>
<error to="Kill"/>
</action>
<action name="Gen_Orcid_Authors_DOIs_1">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
<arg>-d</arg><arg>${workingPath_activities}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_1.tar.gz</arg>
<arg>-o</arg><arg>output/authors_dois_1.seq</arg>
</java>
<ok to="join_node"/>
<error to="Kill"/>
</action>
<decision name="check_exist_on_hdfs_activities_2">
<switch>
<case to="Gen_Orcid_Authors_DOIs_2">
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_2.tar.gz'))}
</case>
<default to="Download_2" />
</switch>
</decision>
<action name="Download_2">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>bash</exec>
<argument>-c</argument>
<argument>${shell_cmd_2}</argument>
<capture-output/>
</shell>
<ok to="Gen_Orcid_Authors_DOIs_2"/>
<error to="Kill"/>
</action>
<action name="Gen_Orcid_Authors_DOIs_2">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
<arg>-d</arg><arg>${workingPath_activities}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_2.tar.gz</arg>
<arg>-o</arg><arg>output/authors_dois_2.seq</arg>
</java>
<ok to="join_node"/>
<error to="Kill"/>
</action>
<decision name="check_exist_on_hdfs_activities_3">
<switch>
<case to="Gen_Orcid_Authors_DOIs_3">
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_3.tar.gz'))}
</case>
<default to="Download_3" />
</switch>
</decision>
<action name="Download_3">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>bash</exec>
<argument>-c</argument>
<argument>${shell_cmd_3}</argument>
<capture-output/>
</shell>
<ok to="Gen_Orcid_Authors_DOIs_3"/>
<error to="Kill"/>
</action>
<action name="Gen_Orcid_Authors_DOIs_3">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
<arg>-d</arg><arg>${workingPath_activities}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_3.tar.gz</arg>
<arg>-o</arg><arg>output/authors_dois_3.seq</arg>
</java>
<ok to="join_node"/>
<error to="Kill"/>
</action>
<decision name="check_exist_on_hdfs_activities_4">
<switch>
<case to="Gen_Orcid_Authors_DOIs_4">
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_4.tar.gz'))}
</case>
<default to="Download_4" />
</switch>
</decision>
<action name="Download_4">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>bash</exec>
<argument>-c</argument>
<argument>${shell_cmd_4}</argument>
<capture-output/>
</shell>
<ok to="Gen_Orcid_Authors_DOIs_4"/>
<error to="Kill"/>
</action>
<action name="Gen_Orcid_Authors_DOIs_4">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
<arg>-d</arg><arg>${workingPath_activities}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_4.tar.gz</arg>
<arg>-o</arg><arg>output/authors_dois_4.seq</arg>
</java>
<ok to="join_node"/>
<error to="Kill"/>
</action>
<decision name="check_exist_on_hdfs_activities_5">
<switch>
<case to="Gen_Orcid_Authors_DOIs_5">
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_5.tar.gz'))}
</case>
<default to="Download_5" />
</switch>
</decision>
<action name="Download_5">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>bash</exec>
<argument>-c</argument>
<argument>${shell_cmd_5}</argument>
<capture-output/>
</shell>
<ok to="Gen_Orcid_Authors_DOIs_5"/>
<error to="Kill"/>
</action>
<action name="Gen_Orcid_Authors_DOIs_5">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
<arg>-d</arg><arg>${workingPath_activities}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_5.tar.gz</arg>
<arg>-o</arg><arg>output/authors_dois_5.seq</arg>
</java>
<ok to="join_node"/>
<error to="Kill"/>
</action>
<decision name="check_exist_on_hdfs_activities_6">
<switch>
<case to="Gen_Orcid_Authors_DOIs_6">
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_6.tar.gz'))}
</case>
<default to="Download_6" />
</switch>
</decision>
<action name="Download_6">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>bash</exec>
<argument>-c</argument>
<argument>${shell_cmd_6}</argument>
<capture-output/>
</shell>
<ok to="Gen_Orcid_Authors_DOIs_6"/>
<error to="Kill"/>
</action>
<action name="Gen_Orcid_Authors_DOIs_6">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
<arg>-d</arg><arg>${workingPath_activities}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_6.tar.gz</arg>
<arg>-o</arg><arg>output/authors_dois_6.seq</arg>
</java>
<ok to="join_node"/>
<error to="Kill"/>
</action>
<decision name="check_exist_on_hdfs_activities_7">
<switch>
<case to="Gen_Orcid_Authors_DOIs_7">
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_7.tar.gz'))}
</case>
<default to="Download_7" />
</switch>
</decision>
<action name="Download_7">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>bash</exec>
<argument>-c</argument>
<argument>${shell_cmd_7}</argument>
<capture-output/>
</shell>
<ok to="Gen_Orcid_Authors_DOIs_7"/>
<error to="Kill"/>
</action>
<action name="Gen_Orcid_Authors_DOIs_7">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
<arg>-d</arg><arg>${workingPath_activities}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_7.tar.gz</arg>
<arg>-o</arg><arg>output/authors_dois_7.seq</arg>
</java>
<ok to="join_node"/>
<error to="Kill"/>
</action>
<decision name="check_exist_on_hdfs_activities_8">
<switch>
<case to="Gen_Orcid_Authors_DOIs_8">
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_8.tar.gz'))}
</case>
<default to="Download_8" />
</switch>
</decision>
<action name="Download_8">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>bash</exec>
<argument>-c</argument>
<argument>${shell_cmd_8}</argument>
<capture-output/>
</shell>
<ok to="Gen_Orcid_Authors_DOIs_8"/>
<error to="Kill"/>
</action>
<action name="Gen_Orcid_Authors_DOIs_8">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
<arg>-d</arg><arg>${workingPath_activities}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_8.tar.gz</arg>
<arg>-o</arg><arg>output/authors_dois_8.seq</arg>
</java>
<ok to="join_node"/>
<error to="Kill"/>
</action>
<decision name="check_exist_on_hdfs_activities_9">
<switch>
<case to="Gen_Orcid_Authors_DOIs_9">
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_9.tar.gz'))}
</case>
<default to="Download_9" />
</switch>
</decision>
<action name="Download_9">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>bash</exec>
<argument>-c</argument>
<argument>${shell_cmd_9}</argument>
<capture-output/>
</shell>
<ok to="Gen_Orcid_Authors_DOIs_9"/>
<error to="Kill"/>
</action>
<action name="Gen_Orcid_Authors_DOIs_9">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
<arg>-d</arg><arg>${workingPath_activities}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_9.tar.gz</arg>
<arg>-o</arg><arg>output/authors_dois_9.seq</arg>
</java>
<ok to="join_node"/>
<error to="Kill"/>
</action>
<decision name="check_exist_on_hdfs_activities_X">
<switch>
<case to="Gen_Orcid_Authors_DOIs_X">
${fs:exists(concat(workingPath_activities,'/ORCID_2019_activites_X.tar.gz'))}
</case>
<default to="Download_X" />
</switch>
</decision>
<action name="Download_X">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>bash</exec>
<argument>-c</argument>
<argument>${shell_cmd_X}</argument>
<capture-output/>
</shell>
<ok to="Gen_Orcid_Authors_DOIs_X"/>
<error to="Kill"/>
</action>
<action name="Gen_Orcid_Authors_DOIs_X">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidAuthorsDOIsDataGen</main-class>
<arg>-d</arg><arg>${workingPath_activities}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>ORCID_2019_activites_X.tar.gz</arg>
<arg>-o</arg><arg>output/authors_dois_X.seq</arg>
</java>
<ok to="join_node"/>
<error to="Kill"/>
</action>
<join name = "join_node" to = "End"/>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,18 @@
<configuration>
<property>
<name>jobTracker</name>
<value>hadoop-rm3.garr-pa1.d4science.org:8032</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>
</property>
<property>
<name>queueName</name>
<value>default</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
</configuration>

View File

@ -0,0 +1,55 @@
<workflow-app name="Gen_Doi_Author_List_WF" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>workingPath</name>
<description>the working dir base path</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
</parameters>
<start to="ResetWorkingPath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetWorkingPath">
<fs>
<delete path='${workingPath_activities}/doi_author_list'/>
</fs>
<ok to="Gen_Doi_Author_List"/>
<error to="Kill"/>
</action>
<action name="Gen_Doi_Author_List">
<spark xmlns="uri:oozie:spark-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<master>yarn</master>
<mode>cluster</mode>
<name>Gen_Doi_Author_List</name>
<class>eu.dnetlib.doiboost.orcid.SparkGenerateDoiAuthorList</class>
<jar>dhp-doiboost-1.2.1-SNAPSHOT.jar</jar>
<spark-opts>--num-executors 10 --conf spark.yarn.jars=&quot;hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2&quot; --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory}
</spark-opts>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-o</arg><arg>doi_author_list/</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,22 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.action.sharelib.for.java</name>
<value>spark2</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
<property>
<name>oozie.launcher.mapreduce.map.java.opts</name>
<value>-Xmx4g</value>
</property>
</configuration>

View File

@ -0,0 +1,45 @@
<workflow-app name="Orcid Download" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>workingPathOrcid</name>
<description>the working dir base path</description>
</property>
<property>
<name>token</name>
<description>access token</description>
</property>
</parameters>
<start to="ResetWorkingPath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetWorkingPath">
<fs>
<delete path='${workingPathOrcid}/download'/>
<mkdir path='${workingPathOrcid}/download'/>
</fs>
<ok to="DownloadOrcidData"/>
<error to="Kill"/>
</action>
<action name="DownloadOrcidData">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidDownloader</main-class>
<arg>-d</arg><arg>${workingPathOrcid}/</arg>
<arg>-n</arg><arg>${nameNode}</arg>
<arg>-f</arg><arg>last_modified.csv</arg>
<arg>-o</arg><arg>download/</arg>
<arg>-t</arg><arg>${token}</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,22 @@
<configuration>
<property>
<name>jobTracker</name>
<value>hadoop-rm3.garr-pa1.d4science.org:8032</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://hadoop-rm1.garr-pa1.d4science.org:8020</value>
</property>
<property>
<name>queueName</name>
<value>default</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
</configuration>

View File

@ -0,0 +1,83 @@
<workflow-app name="Gen Orcid Authors" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>workingPath</name>
<description>the working dir base path</description>
</property>
<property>
<name>token</name>
<description>access token</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>outputPath</name>
<description>the working dir base path</description>
</property>
</parameters>
<start to="ResetWorkingPath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetWorkingPath">
<fs>
<delete path='${workingPath_activities}/authors'/>
</fs>
<ok to="Gen_Orcid_Authors"/>
<error to="Kill"/>
</action>
<action name="Split_Lambda_File">
<spark xmlns="uri:oozie:spark-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<master>yarn</master>
<mode>cluster</mode>
<name>Split_Lambda_File</name>
<class>eu.dnetlib.doiboost.orcid.SparkPartitionLambdaFile</class>
<jar>dhp-doiboost-1.2.1-SNAPSHOT.jar</jar>
<spark-opts>--num-executors 24 --conf spark.yarn.jars=&quot;hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2&quot; --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory}
</spark-opts>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-o</arg><arg>authors/</arg>
<arg>-t</arg><arg>${token}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<action name="Gen_Orcid_Authors">
<spark xmlns="uri:oozie:spark-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<master>yarn</master>
<mode>cluster</mode>
<name>Gen_Orcid_Authors</name>
<class>eu.dnetlib.doiboost.orcid.SparkOrcidGenerateAuthors</class>
<jar>dhp-doiboost-1.2.1-SNAPSHOT.jar</jar>
<spark-opts>--num-executors 20 --conf spark.yarn.jars=&quot;hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/oozie/share/lib/lib_20180405103059/spark2&quot; --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory}
</spark-opts>
<arg>-w</arg><arg>${workingPath}/</arg>
<arg>-o</arg><arg>authors/</arg>
<arg>-t</arg><arg>${token}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,38 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>hive_metastore_uris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
</property>
<property>
<name>spark2EventLogDir</name>
<value>/user/spark/spark2ApplicationHistory</value>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
</property>
</configuration>

View File

@ -0,0 +1,55 @@
<workflow-app name="import ORCID into HDFS" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the working dir base path</description>
</property>
<property>
<name>targetPath</name>
<description>the working dir base path</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
</parameters>
<start to="PreprocessORCID"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="PreprocessORCID">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Convert ORCID to Dataset</name>
<class>eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF</class>
<jar>dhp-doiboost-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.sql.shuffle.partitions=3840
${sparkExtraOPT}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--targetPath</arg><arg>${targetPath}</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,38 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>hive_metastore_uris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
</property>
<property>
<name>spark2EventLogDir</name>
<value>/user/spark/spark2ApplicationHistory</value>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>"com.cloudera.spark.lineage.NavigatorAppListener"</value>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
</property>
</configuration>

View File

@ -0,0 +1,55 @@
<workflow-app name="import UnpayWall into HDFS" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the working dir base path</description>
</property>
<property>
<name>targetPath</name>
<description>the working dir base path</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
</parameters>
<start to="PreprocessUW"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="PreprocessUW">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>Convert UnpayWall to Dataset</name>
<class>eu.dnetlib.doiboost.uw.SparkMapUnpayWallToOAF</class>
<jar>dhp-doiboost-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.sql.shuffle.partitions=3840
${sparkExtraOPT}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/uw_extracted</arg>
<arg>--targetPath</arg><arg>${targetPath}</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,70 @@
package eu.dnetlib.dhp.doiboost
import eu.dnetlib.dhp.schema.oaf.{Publication, Dataset => OafDataset}
import eu.dnetlib.doiboost.{DoiBoostMappingUtil, HostedByItemType}
import eu.dnetlib.doiboost.SparkGenerateDoiBoost.getClass
import eu.dnetlib.doiboost.mag.ConversionUtil
import eu.dnetlib.doiboost.orcid.ORCIDElement
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
import org.junit.jupiter.api.Test
import scala.io.Source
class DoiBoostHostedByMapTest {
// @Test
// def testMerge():Unit = {
// val conf: SparkConf = new SparkConf()
// val spark: SparkSession =
// SparkSession
// .builder()
// .config(conf)
// .appName(getClass.getSimpleName)
// .master("local[*]").getOrCreate()
//
//
//
// implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication]
// implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset]
// implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPub)
//
//
// import spark.implicits._
// val dataset:RDD[String]= spark.sparkContext.textFile("/home/sandro/Downloads/hbMap.gz")
//
//
// val hbMap:Dataset[(String, HostedByItemType)] =spark.createDataset(dataset.map(DoiBoostMappingUtil.toHostedByItem))
//
//
// hbMap.show()
//
//
//
//
//
//
//
//
//
//
// }
@Test
def idDSGeneration():Unit = {
val s ="doajarticles::0066-782X"
println(DoiBoostMappingUtil.generateDSId(s))
}
}

View File

@ -0,0 +1,365 @@
package eu.dnetlib.doiboost.crossref
import eu.dnetlib.dhp.schema.oaf._
import eu.dnetlib.dhp.utils.DHPUtils
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
import org.junit.jupiter.api.Assertions._
import org.junit.jupiter.api.Test
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._
import scala.io.Source
import scala.util.matching.Regex
class CrossrefMappingTest {
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
val mapper = new ObjectMapper()
@Test
def testFunderRelationshipsMapping(): Unit = {
val template = Source.fromInputStream(getClass.getResourceAsStream("article_funder_template.json")).mkString
val funder_doi = Source.fromInputStream(getClass.getResourceAsStream("funder_doi")).mkString
val funder_name = Source.fromInputStream(getClass.getResourceAsStream("funder_doi")).mkString
for (line <- funder_doi.lines) {
val json = template.replace("%s", line)
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
assertTrue(resultList.nonEmpty)
checkRelation(resultList)
}
for (line <- funder_name.lines) {
val json = template.replace("%s", line)
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
assertTrue(resultList.nonEmpty)
checkRelation(resultList)
}
}
def checkRelation(generatedOAF: List[Oaf]): Unit = {
val rels: List[Relation] = generatedOAF.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]]
assertFalse(rels.isEmpty)
rels.foreach(relation => {
val relJson = mapper.writeValueAsString(relation)
assertNotNull(relation.getSource, s"Source of relation null $relJson")
assertNotNull(relation.getTarget, s"Target of relation null $relJson")
assertFalse(relation.getTarget.isEmpty, s"Target is empty: $relJson")
assertFalse(relation.getRelClass.isEmpty, s"RelClass is empty: $relJson")
assertFalse(relation.getRelType.isEmpty, s"RelType is empty: $relJson")
assertFalse(relation.getSubRelType.isEmpty, s"SubRelType is empty: $relJson")
})
}
@Test
def testEmptyTitle() :Unit = {
val json = Source.fromInputStream(getClass.getResourceAsStream("empty_title.json")).mkString
assertNotNull(json)
assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
assertTrue(resultList.nonEmpty)
val items = resultList.filter(p => p.isInstanceOf[Result])
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
items.foreach(p => println(mapper.writeValueAsString(p)))
}
@Test
def testPeerReviewed(): Unit = {
val json = Source.fromInputStream(getClass.getResourceAsStream("prwTest.json")).mkString
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
assertNotNull(json)
assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
assertTrue(resultList.nonEmpty)
val items = resultList.filter(p => p.isInstanceOf[Result])
items.foreach(p => logger.info(mapper.writeValueAsString(p)))
}
def extractECAward(award: String): String = {
val awardECRegex: Regex = "[0-9]{4,9}".r
if (awardECRegex.findAllIn(award).hasNext)
return awardECRegex.findAllIn(award).max
null
}
@Test
def extractECTest(): Unit = {
val s = "FP7/2007-2013"
val awardExtracted = extractECAward(s)
println(awardExtracted)
println(DHPUtils.md5(awardExtracted))
}
@Test
def testJournalRelation(): Unit = {
val json = Source.fromInputStream(getClass.getResourceAsStream("awardTest.json")).mkString
assertNotNull(json)
assertFalse(json.isEmpty)
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
assertTrue(resultList.nonEmpty)
val rels:List[Relation] = resultList.filter(p => p.isInstanceOf[Relation]).map(r=> r.asInstanceOf[Relation])
rels.foreach(s => logger.info(s.getTarget))
assertEquals(rels.size, 3 )
}
@Test
def testConvertBookFromCrossRef2Oaf(): Unit = {
val json = Source.fromInputStream(getClass.getResourceAsStream("book.json")).mkString
assertNotNull(json)
assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
assertTrue(resultList.nonEmpty)
val items = resultList.filter(p => p.isInstanceOf[Result])
assert(items.nonEmpty)
assert(items.size == 1)
val result: Result = items.head.asInstanceOf[Result]
assertNotNull(result)
logger.info(mapper.writeValueAsString(result));
assertNotNull(result.getDataInfo, "Datainfo test not null Failed");
assertNotNull(
result.getDataInfo.getProvenanceaction,
"DataInfo/Provenance test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getClassid.isEmpty,
"DataInfo/Provenance/classId test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getClassname.isEmpty,
"DataInfo/Provenance/className test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty,
"DataInfo/Provenance/SchemeId test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getSchemename.isEmpty,
"DataInfo/Provenance/SchemeName test not null Failed");
assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed");
assertFalse(result.getCollectedfrom.isEmpty);
val collectedFromList = result.getCollectedfrom.asScala
assert(collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion")
assert(collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), "Wrong collected from assertion")
val relevantDates = result.getRelevantdate.asScala
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), "Missing relevant date of type created")
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-online")), "Missing relevant date of type published-online")
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-print")), "Missing relevant date of type published-print")
val rels = resultList.filter(p => p.isInstanceOf[Relation])
assert(rels.isEmpty)
}
@Test
def testConvertPreprintFromCrossRef2Oaf(): Unit = {
val json = Source.fromInputStream(getClass.getResourceAsStream("preprint.json")).mkString
assertNotNull(json)
assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
assertTrue(resultList.nonEmpty)
val items = resultList.filter(p => p.isInstanceOf[Publication])
assert(items.nonEmpty)
assert(items.size == 1)
val result: Result = items.head.asInstanceOf[Publication]
assertNotNull(result)
logger.info(mapper.writeValueAsString(result));
assertNotNull(result.getDataInfo, "Datainfo test not null Failed");
assertNotNull(
result.getDataInfo.getProvenanceaction,
"DataInfo/Provenance test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getClassid.isEmpty,
"DataInfo/Provenance/classId test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getClassname.isEmpty,
"DataInfo/Provenance/className test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty,
"DataInfo/Provenance/SchemeId test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getSchemename.isEmpty,
"DataInfo/Provenance/SchemeName test not null Failed");
assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed");
assertFalse(result.getCollectedfrom.isEmpty);
val collectedFromList = result.getCollectedfrom.asScala
assert(collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion")
assert(collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), "Wrong collected from assertion")
val relevantDates = result.getRelevantdate.asScala
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), "Missing relevant date of type created")
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("available")), "Missing relevant date of type available")
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("accepted")), "Missing relevant date of type accepted")
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-online")), "Missing relevant date of type published-online")
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("published-print")), "Missing relevant date of type published-print")
val rels = resultList.filter(p => p.isInstanceOf[Relation])
assert(rels.isEmpty)
}
@Test
def testConvertDatasetFromCrossRef2Oaf(): Unit = {
val json = Source.fromInputStream(getClass.getResourceAsStream("dataset.json")).mkString
assertNotNull(json)
assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
assertTrue(resultList.nonEmpty)
val items = resultList.filter(p => p.isInstanceOf[Dataset])
assert(items.nonEmpty)
assert(items.size == 1)
val result: Result = items.head.asInstanceOf[Dataset]
assertNotNull(result)
logger.info(mapper.writeValueAsString(result));
assertNotNull(result.getDataInfo, "Datainfo test not null Failed");
assertNotNull(
result.getDataInfo.getProvenanceaction,
"DataInfo/Provenance test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getClassid.isEmpty,
"DataInfo/Provenance/classId test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getClassname.isEmpty,
"DataInfo/Provenance/className test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty,
"DataInfo/Provenance/SchemeId test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getSchemename.isEmpty,
"DataInfo/Provenance/SchemeName test not null Failed");
assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed");
assertFalse(result.getCollectedfrom.isEmpty);
}
@Test
def testConvertArticleFromCrossRef2Oaf(): Unit = {
val json = Source.fromInputStream(getClass.getResourceAsStream("article.json")).mkString
assertNotNull(json)
assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
assertTrue(resultList.nonEmpty)
val items = resultList.filter(p => p.isInstanceOf[Publication])
assert(items.nonEmpty)
assert(items.size == 1)
val result: Result = items.head.asInstanceOf[Publication]
assertNotNull(result)
logger.info(mapper.writeValueAsString(result));
assertNotNull(result.getDataInfo, "Datainfo test not null Failed");
assertNotNull(
result.getDataInfo.getProvenanceaction,
"DataInfo/Provenance test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getClassid.isEmpty,
"DataInfo/Provenance/classId test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getClassname.isEmpty,
"DataInfo/Provenance/className test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty,
"DataInfo/Provenance/SchemeId test not null Failed");
assertFalse(
result.getDataInfo.getProvenanceaction.getSchemename.isEmpty,
"DataInfo/Provenance/SchemeName test not null Failed");
assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed");
assertFalse(result.getCollectedfrom.isEmpty);
val collectedFromList = result.getCollectedfrom.asScala
assert(collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")), "Wrong collected from assertion")
assert(collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")), "Wrong collected from assertion")
val relevantDates = result.getRelevantdate.asScala
assert(relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")), "Missing relevant date of type created")
val rels = resultList.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]]
assertFalse(rels.isEmpty)
rels.foreach(relation => {
assertNotNull(relation)
assertFalse(relation.getSource.isEmpty)
assertFalse(relation.getTarget.isEmpty)
assertFalse(relation.getRelClass.isEmpty)
assertFalse(relation.getRelType.isEmpty)
assertFalse(relation.getSubRelType.isEmpty)
})
}
}

View File

@ -0,0 +1,69 @@
package eu.dnetlib.doiboost.mag
import java.sql.Timestamp
import eu.dnetlib.dhp.schema.oaf.Publication
import org.apache.htrace.fasterxml.jackson.databind.SerializationFeature
import org.apache.spark.SparkConf
import org.apache.spark.api.java.function.MapFunction
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
import org.junit.jupiter.api.Test
import org.slf4j.{Logger, LoggerFactory}
import org.junit.jupiter.api.Assertions._
import org.apache.spark.sql.functions._
import scala.collection.JavaConverters._
import scala.io.Source
import scala.reflect.ClassTag
import scala.util.matching.Regex
class MAGMappingTest {
val logger: Logger = LoggerFactory.getLogger(getClass)
val mapper = new ObjectMapper()
@Test
def testSplitter():Unit = {
val s = "sports.team"
if (s.contains(".")) {
println(s.split("\\.")head)
}
}
@Test
def testDate() :Unit = {
val p:Timestamp = Timestamp.valueOf("2011-10-02 00:00:00")
println(p.toString.substring(0,10))
}
@Test
def buildInvertedIndexTest(): Unit = {
val json_input = Source.fromInputStream(getClass.getResourceAsStream("invertedIndex.json")).mkString
val description = ConversionUtil.convertInvertedIndexString(json_input)
assertNotNull(description)
assertTrue(description.nonEmpty)
logger.debug(description)
}
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,30 @@
package eu.dnetlib.doiboost.orcid
import eu.dnetlib.dhp.schema.oaf.Publication
import eu.dnetlib.doiboost.crossref.Crossref2Oaf
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
import org.junit.jupiter.api.Test
import org.slf4j.{Logger, LoggerFactory}
import org.junit.jupiter.api.Assertions._
import scala.io.Source
class MappingORCIDToOAFTest {
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
val mapper = new ObjectMapper()
@Test
def testExtractData():Unit ={
val json = Source.fromInputStream(getClass.getResourceAsStream("dataOutput")).mkString
assertNotNull(json)
assertFalse(json.isEmpty)
json.lines.foreach(s => {
assertNotNull(ORCIDToOAF.extractValueFromInputString(s))
})
}
}

View File

@ -0,0 +1,136 @@
package eu.dnetlib.doiboost.orcid;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class OrcidClientTest {
final String orcidId = "0000-0001-7291-3210";
final int REQ_LIMIT = 24;
final int REQ_MAX_TEST = 100;
final int RECORD_DOWNLOADED_COUNTER_LOG_INTERVAL = 10;
final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
final String toRetrieveDate = "2020-05-06 23:59:46.031145";
String toNotRetrieveDate = "2019-09-29 23:59:59.000000";
String lastUpdate = "2019-09-30 00:00:00";
String shortDate = "2020-05-06 16:06:11";
// curl -i -H "Accept: application/vnd.orcid+xml"
// -H 'Authorization: Bearer 78fdb232-7105-4086-8570-e153f4198e3d'
// 'https://api.orcid.org/v3.0/0000-0001-7291-3210/record'
public String testDownloadRecord(String orcidId) throws Exception {
try (CloseableHttpClient client = HttpClients.createDefault()) {
HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record");
httpGet.addHeader("Accept", "application/vnd.orcid+xml");
httpGet.addHeader("Authorization", "Bearer 78fdb232-7105-4086-8570-e153f4198e3d");
CloseableHttpResponse response = client.execute(httpGet);
if (response.getStatusLine().getStatusCode() != 200) {
System.out
.println("Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
}
return IOUtils.toString(response.getEntity().getContent());
} catch (Throwable e) {
e.printStackTrace();
}
return new String("");
}
// @Test
public void testLambdaFileParser() throws Exception {
try (BufferedReader br = new BufferedReader(
new InputStreamReader(this.getClass().getResourceAsStream("last_modified.csv")))) {
String line;
int counter = 0;
int nReqTmp = 0;
long startDownload = System.currentTimeMillis();
long startReqTmp = System.currentTimeMillis();
while ((line = br.readLine()) != null) {
counter++;
// skip headers line
if (counter == 1) {
continue;
}
String[] values = line.split(",");
List<String> recordInfo = Arrays.asList(values);
testDownloadRecord(recordInfo.get(0));
long endReq = System.currentTimeMillis();
nReqTmp++;
if (nReqTmp == REQ_LIMIT) {
long reqSessionDuration = endReq - startReqTmp;
if (reqSessionDuration <= 1000) {
System.out
.println(
"\nreqSessionDuration: " + reqSessionDuration + " nReqTmp: " + nReqTmp + " wait ....");
Thread.sleep(1000 - reqSessionDuration);
} else {
nReqTmp = 0;
startReqTmp = System.currentTimeMillis();
}
}
if (counter > REQ_MAX_TEST) {
break;
}
if ((counter % RECORD_DOWNLOADED_COUNTER_LOG_INTERVAL) == 0) {
System.out.println("Current record downloaded: " + counter);
}
}
long endDownload = System.currentTimeMillis();
long downloadTime = endDownload - startDownload;
System.out.println("Download time: " + ((downloadTime / 1000) / 60) + " minutes");
}
}
// @Test
public void getRecordDatestamp() throws ParseException {
Date toRetrieveDateDt = new SimpleDateFormat(DATE_FORMAT).parse(toRetrieveDate);
Date toNotRetrieveDateDt = new SimpleDateFormat(DATE_FORMAT).parse(toNotRetrieveDate);
Date lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate);
assertTrue(toRetrieveDateDt.after(lastUpdateDt));
assertTrue(!toNotRetrieveDateDt.after(lastUpdateDt));
}
public void testDate(String value) throws ParseException {
System.out.println(value.toString());
if (value.length() != 19) {
value = value.substring(0, 19);
}
Date valueDt = new SimpleDateFormat(DATE_FORMAT).parse(value);
System.out.println(valueDt.toString());
}
// @Test
public void testModifiedDate() throws ParseException {
testDate(toRetrieveDate);
testDate(toNotRetrieveDate);
testDate(shortDate);
}
// @Test
public void testReadBase64CompressedRecord() throws Exception {
final String base64CompressedRecord = IOUtils
.toString(getClass().getResourceAsStream("0000-0001-6645-509X.compressed.base64"));
final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord);
System.out.println(recordFromSeqFile);
final String downloadedRecord = testDownloadRecord("0000-0001-6645-509X");
assertTrue(recordFromSeqFile.equals(downloadedRecord));
}
}

View File

@ -0,0 +1,58 @@
package eu.dnetlib.doiboost.orcid.xml;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import eu.dnetlib.doiboost.orcid.model.AuthorData;
import eu.dnetlib.doiboost.orcid.model.WorkData;
public class XMLRecordParserTest {
@Test
public void testOrcidAuthorDataXMLParser() throws Exception {
String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_0000-0001-6828-479X.xml"));
XMLRecordParser p = new XMLRecordParser();
AuthorData authorData = p.VTDParseAuthorData(xml.getBytes());
assertNotNull(authorData);
assertNotNull(authorData.getName());
System.out.println("name: " + authorData.getName());
assertNotNull(authorData.getSurname());
System.out.println("surname: " + authorData.getSurname());
}
@Test
public void testOrcidXMLErrorRecordParser() throws Exception {
String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_error.xml"));
XMLRecordParser p = new XMLRecordParser();
AuthorData authorData = p.VTDParseAuthorData(xml.getBytes());
assertNotNull(authorData);
assertNotNull(authorData.getErrorCode());
System.out.println("error: " + authorData.getErrorCode());
}
@Test
public void testOrcidWorkDataXMLParser() throws Exception {
String xml = IOUtils
.toString(
this.getClass().getResourceAsStream("activity_work_0000-0002-5982-8983.xml"));
XMLRecordParser p = new XMLRecordParser();
WorkData workData = p.VTDParseWorkData(xml.getBytes());
assertNotNull(workData);
assertNotNull(workData.getOid());
System.out.println("oid: " + workData.getOid());
assertNotNull(workData.getDoi());
System.out.println("doi: " + workData.getDoi());
}
}

View File

@ -0,0 +1,49 @@
package eu.dnetlib.doiboost.uw
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
import org.junit.jupiter.api.Test
import scala.io.Source
import org.junit.jupiter.api.Assertions._
import org.slf4j.{Logger, LoggerFactory}
class UnpayWallMappingTest {
val logger: Logger = LoggerFactory.getLogger(getClass)
val mapper = new ObjectMapper()
@Test
def testMappingToOAF():Unit ={
val Ilist = Source.fromInputStream(getClass.getResourceAsStream("input.json")).mkString
for (line <-Ilist.lines) {
val p = UnpayWallToOAF.convertToOAF(line)
if(p!= null) {
assertTrue(p.getPid.size()==1)
logger.info(p.getId)
}
assertNotNull(line)
assertTrue(line.nonEmpty)
}
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
val l = Ilist.lines.next()
logger.info(mapper.writeValueAsString(UnpayWallToOAF.convertToOAF(l)))
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,175 @@
{
"DOI": "10.26850/1678-4618eqj.v35.1.2010.p41-46",
"issued": {
"date-parts": [
[
2018,
1,
15
]
]
},
"abstract": "<jats:p>A qualitative spot-test and tandem quantitative analysis of dipyrone in the bulk drugand in pharmaceutical preparations is proposed. The formation of a reddish-violet\u00a0 color indicates a positive result. In sequence a quantitative procedure can be performed in the same flask. The quantitative results obtained were statistically compared with those obtained with the method indicated by the Brazilian\u00a0 Pharmacopoeia, using the Student\u2019s t and the F tests. Considering the concentration in a 100 \u03bcL aliquot, the qualitative visual limit of detection is about 5\u00d710-6 g; instrumental LOD \u2245 1.4\u00d710-4 mol L-1 ; LOQ \u2245 4.5\u00d710-4 mol L-1.</jats:p>",
"prefix": "10.26850",
"author": [
{
"authenticated-orcid": false,
"given": "Matthieu",
"family": "Tubino",
"sequence": "first",
"affiliation": [],
"ORCID": "http://orcid.org/0000-0002-1987-3907"
},
{
"affiliation": [],
"given": "A. C.",
"family": "Biondo",
"sequence": "additional"
},
{
"authenticated-orcid": false,
"given": "Marta Maria Duarte Carvalho",
"family": "Vila",
"sequence": "additional",
"affiliation": [],
"ORCID": "http://orcid.org/0000-0002-0198-7076"
},
{
"authenticated-orcid": false,
"given": "Leonardo",
"family": "Pezza",
"sequence": "additional",
"affiliation": [],
"ORCID": "http://orcid.org/0000-0003-0197-7369"
},
{
"authenticated-orcid": false,
"given": "Helena Redigolo",
"family": "Pezza",
"sequence": "additional",
"affiliation": [],
"ORCID": "http://orcid.org/0000-0001-5564-1639"
}
],
"reference-count": 0,
"ISSN": [
"1678-4618"
],
"member": "11395",
"source": "Crossref",
"score": 1.0,
"deposited": {
"timestamp": 1540823529000,
"date-time": "2018-10-29T14:32:09Z",
"date-parts": [
[
2018,
10,
29
]
]
},
"indexed": {
"timestamp": 1540825815212,
"date-time": "2018-10-29T15:10:15Z",
"date-parts": [
[
2018,
10,
29
]
]
},
"type": "journal-article",
"published-online": {
"date-parts": [
[
2018,
1,
15
]
]
},
"URL": "http://dx.doi.org/10.26850/1678-4618eqj.v35.1.2010.p41-46",
"is-referenced-by-count": 0,
"volume": "35",
"issn-type": [
{
"type": "electronic",
"value": "1678-4618"
}
],
"link": [
{
"URL": "http://revista.iq.unesp.br/ojs/index.php/ecletica/article/viewFile/191/149",
"intended-application": "text-mining",
"content-version": "vor",
"content-type": "application/pdf"
},
{
"URL": "http://revista.iq.unesp.br/ojs/index.php/ecletica/article/viewFile/191/149",
"intended-application": "similarity-checking",
"content-version": "vor",
"content-type": "unspecified"
}
],
"journal-issue": {
"issue": "1",
"published-online": {
"date-parts": [
[
2018,
1,
15
]
]
}
},
"references-count": 0,
"short-container-title": [
"Eclet. Quim. J."
],
"publisher": "Ecletica Quimica Journal",
"content-domain": {
"domain": [],
"crossmark-restriction": false
},
"license": [
{
"URL": "http://creativecommons.org/licenses/by/4.0",
"start": {
"timestamp": 1515974400000,
"date-time": "2018-01-15T00:00:00Z",
"date-parts": [
[
2018,
1,
15
]
]
},
"content-version": "unspecified",
"delay-in-days": 0
}
],
"created": {
"timestamp": 1517590842000,
"date-time": "2018-02-02T17:00:42Z",
"date-parts": [
[
2018,
2,
2
]
]
},
"issue": "1",
"title": [
"Spot-test identification and rapid quantitative sequential analys is of dipyrone"
],
"container-title": [
"Ecl\u00e9tica Qu\u00edmica Journal"
],
"page": "41-50",
"funder": [{"DOI": "10.13039/100010663","name": "H2020 European Research Council","doi-asserted-by": "publisher","award": ["677749"]}]
}

View File

@ -0,0 +1,175 @@
{
"DOI": "10.26850/1678-4618eqj.v35.1.2010.p41-46",
"issued": {
"date-parts": [
[
2018,
1,
15
]
]
},
"abstract": "<jats:p>A qualitative spot-test and tandem quantitative analysis of dipyrone in the bulk drugand in pharmaceutical preparations is proposed. The formation of a reddish-violet\u00a0 color indicates a positive result. In sequence a quantitative procedure can be performed in the same flask. The quantitative results obtained were statistically compared with those obtained with the method indicated by the Brazilian\u00a0 Pharmacopoeia, using the Student\u2019s t and the F tests. Considering the concentration in a 100 \u03bcL aliquot, the qualitative visual limit of detection is about 5\u00d710-6 g; instrumental LOD \u2245 1.4\u00d710-4 mol L-1 ; LOQ \u2245 4.5\u00d710-4 mol L-1.</jats:p>",
"prefix": "10.26850",
"author": [
{
"authenticated-orcid": false,
"given": "Matthieu",
"family": "Tubino",
"sequence": "first",
"affiliation": [],
"ORCID": "http://orcid.org/0000-0002-1987-3907"
},
{
"affiliation": [],
"given": "A. C.",
"family": "Biondo",
"sequence": "additional"
},
{
"authenticated-orcid": false,
"given": "Marta Maria Duarte Carvalho",
"family": "Vila",
"sequence": "additional",
"affiliation": [],
"ORCID": "http://orcid.org/0000-0002-0198-7076"
},
{
"authenticated-orcid": false,
"given": "Leonardo",
"family": "Pezza",
"sequence": "additional",
"affiliation": [],
"ORCID": "http://orcid.org/0000-0003-0197-7369"
},
{
"authenticated-orcid": false,
"given": "Helena Redigolo",
"family": "Pezza",
"sequence": "additional",
"affiliation": [],
"ORCID": "http://orcid.org/0000-0001-5564-1639"
}
],
"reference-count": 0,
"ISSN": [
"1678-4618"
],
"member": "11395",
"source": "Crossref",
"score": 1.0,
"deposited": {
"timestamp": 1540823529000,
"date-time": "2018-10-29T14:32:09Z",
"date-parts": [
[
2018,
10,
29
]
]
},
"indexed": {
"timestamp": 1540825815212,
"date-time": "2018-10-29T15:10:15Z",
"date-parts": [
[
2018,
10,
29
]
]
},
"type": "journal-article",
"published-online": {
"date-parts": [
[
2018,
1,
15
]
]
},
"URL": "http://dx.doi.org/10.26850/1678-4618eqj.v35.1.2010.p41-46",
"is-referenced-by-count": 0,
"volume": "35",
"issn-type": [
{
"type": "electronic",
"value": "1678-4618"
}
],
"link": [
{
"URL": "http://revista.iq.unesp.br/ojs/index.php/ecletica/article/viewFile/191/149",
"intended-application": "text-mining",
"content-version": "vor",
"content-type": "application/pdf"
},
{
"URL": "http://revista.iq.unesp.br/ojs/index.php/ecletica/article/viewFile/191/149",
"intended-application": "similarity-checking",
"content-version": "vor",
"content-type": "unspecified"
}
],
"journal-issue": {
"issue": "1",
"published-online": {
"date-parts": [
[
2018,
1,
15
]
]
}
},
"references-count": 0,
"short-container-title": [
"Eclet. Quim. J."
],
"publisher": "Ecletica Quimica Journal",
"content-domain": {
"domain": [],
"crossmark-restriction": false
},
"license": [
{
"URL": "http://creativecommons.org/licenses/by/4.0",
"start": {
"timestamp": 1515974400000,
"date-time": "2018-01-15T00:00:00Z",
"date-parts": [
[
2018,
1,
15
]
]
},
"content-version": "unspecified",
"delay-in-days": 0
}
],
"created": {
"timestamp": 1517590842000,
"date-time": "2018-02-02T17:00:42Z",
"date-parts": [
[
2018,
2,
2
]
]
},
"issue": "1",
"title": [
"Spot-test identification and rapid quantitative sequential analys is of dipyrone"
],
"container-title": [
"Ecl\u00e9tica Qu\u00edmica Journal"
],
%s
"page": "41-50"
}

View File

@ -0,0 +1,193 @@
{
"DOI": "10.1016/j.infbeh.2016.11.001",
"issued": {
"date-parts": [
[
2017,
8
]
]
},
"update-policy": "http://dx.doi.org/10.1016/elsevier_cm_policy",
"prefix": "10.1016",
"subject": [
"Developmental and Educational Psychology"
],
"author": [
{
"affiliation": [],
"given": "Dora",
"family": "Kampis",
"sequence": "first"
},
{
"affiliation": [],
"given": "D\u00f3ra",
"family": "Fogd",
"sequence": "additional"
},
{
"affiliation": [],
"given": "\u00c1gnes Melinda",
"family": "Kov\u00e1cs",
"sequence": "additional"
}
],
"reference-count": 109,
"ISSN": [
"0163-6383"
],
"assertion": [
{
"name": "publisher",
"value": "Elsevier",
"label": "This article is maintained by"
},
{
"name": "articletitle",
"value": "Nonverbal components of Theory of Mind in typical and atypical development",
"label": "Article Title"
},
{
"name": "journaltitle",
"value": "Infant Behavior and Development",
"label": "Journal Title"
},
{
"name": "articlelink",
"value": "https://doi.org/10.1016/j.infbeh.2016.11.001",
"label": "CrossRef DOI link to publisher maintained version"
},
{
"name": "content_type",
"value": "article",
"label": "Content Type"
},
{
"name": "copyright",
"value": "\u00a9 2016 Elsevier Inc. All rights reserved.",
"label": "Copyright"
}
],
"member": "78",
"source": "Crossref",
"score": 1.0,
"deposited": {
"timestamp": 1565383284000,
"date-parts": [
[
2019,
8,
9
]
],
"date-time": "2019-08-09T20:41:24Z"
},
"indexed": {
"timestamp": 1565385055278,
"date-parts": [
[
2019,
8,
9
]
],
"date-time": "2019-08-09T21:10:55Z"
},
"type": "journal-article",
"URL": "http://dx.doi.org/10.1016/j.infbeh.2016.11.001",
"is-referenced-by-count": 1,
"volume": "48",
"issn-type": [
{
"type": "print",
"value": "0163-6383"
}
],
"link": [
{
"URL": "https://api.elsevier.com/content/article/PII:S0163638315300059?httpAccept=text/xml",
"intended-application": "text-mining",
"content-version": "vor",
"content-type": "text/xml"
},
{
"URL": "https://api.elsevier.com/content/article/PII:S0163638315300059?httpAccept=text/plain",
"intended-application": "text-mining",
"content-version": "vor",
"content-type": "text/plain"
}
],
"published-print": {
"date-parts": [
[
2017,
8
]
]
},
"references-count": 109,
"short-container-title": [
"Infant Behavior and Development"
],
"publisher": "Elsevier BV",
"content-domain": {
"domain": [
"elsevier.com",
"sciencedirect.com"
],
"crossmark-restriction": true
},
"license": [
{
"URL": "https://www.elsevier.com/tdm/userlicense/1.0/",
"start": {
"timestamp": 1501545600000,
"date-parts": [
[
2017,
8,
1
]
],
"date-time": "2017-08-01T00:00:00Z"
},
"content-version": "tdm",
"delay-in-days": 0
}
],
"language": "en",
"created": {
"timestamp": 1479142046000,
"date-parts": [
[
2016,
11,
14
]
],
"date-time": "2016-11-14T16:47:26Z"
},
"title": [
"Nonverbal components of Theory of Mind in typical and atypical development"
],
"alternative-id": [
"S0163638315300059"
],
"container-title": [
"Infant Behavior and Development"
],
"funder": [
{
"DOI": "10.13039/501100001711",
"name": "Swiss National Science Foundation (Schweizerische Nationalfonds)",
"doi-asserted-by": "publisher",
"award": [
"CR32I3_156724",
"31003A_173281/1",
"200021_165850"
]
}
],
"page": "54-62"
}

View File

@ -0,0 +1,104 @@
{
"DOI": "10.17848/9780880992299.vol1ch4",
"ISBN": [
"9780880992299"
],
"issued": {
"date-parts": [
[
2001,
12,
1
]
]
},
"prefix": "10.17848",
"author": [
{
"affiliation": [
],
"given": "William E.",
"family": "Even",
"authenticated-orcid": false
},
{
"affiliation": [
],
"given": "David A.",
"family": "Macpherson"
}
],
"reference-count": 0,
"member": "7312",
"source": "Crossref",
"score": 1.0,
"deposited": {
"timestamp": 1461687244000,
"date-parts": [
[
2016,
4,
26
]
],
"date-time": "2016-04-26T16:14:04Z"
},
"indexed": {
"timestamp": 1502548826285,
"date-parts": [
[
2017,
8,
12
]
],
"date-time": "2017-08-12T14:40:26Z"
},
"type": "book-chapter",
"published-online": {
"date-parts": [
[
2010,
5,
27
]
]
},
"URL": "http://dx.doi.org/10.17848/9780880992299.vol1ch4",
"is-referenced-by-count": 0,
"download_ts": 1508079092.874343,
"published-print": {
"date-parts": [
[
2001,
12,
1
]
]
},
"references-count": 0,
"publisher": "W.E. Upjohn Institute",
"content-domain": {
"domain": [
],
"crossmark-restriction": false
},
"created": {
"timestamp": 1434034139000,
"date-parts": [
[
2015,
6,
11
]
],
"date-time": "2015-06-11T14:48:59Z"
},
"title": [
"Children\\'s Effects on Women\\'s Labor Market Attachment and Earnings"
],
"container-title": [
"Working Time in Comparative Perspective - Volume II: Life-Cycle Working Time and Nonstandard Hours"
],
"page": "99-128"
}

View File

@ -0,0 +1,105 @@
{
"DOI": "10.1037/e522512014-096",
"subtitle": [
"(522512014-096)"
],
"issued": {
"date-parts": [
[
2012
]
]
},
"prefix": "10.1037",
"author": [
{
"affiliation": [],
"given": "Jessica",
"family": "Trudeau",
"sequence": "first"
},
{
"affiliation": [],
"given": "Amy",
"family": "McShane",
"sequence": "additional"
},
{
"affiliation": [],
"given": "Renee",
"family": "McDonald",
"sequence": "additional"
}
],
"reference-count": 0,
"member": "15",
"source": "Crossref",
"score": 1.0,
"deposited": {
"timestamp": 1413827035000,
"date-parts": [
[
2014,
10,
20
]
],
"date-time": "2014-10-20T17:43:55Z"
},
"indexed": {
"timestamp": 1550142454710,
"date-parts": [
[
2019,
2,
14
]
],
"date-time": "2019-02-14T11:07:34Z"
},
"type": "dataset",
"URL": "http://dx.doi.org/10.1037/e522512014-096",
"is-referenced-by-count": 0,
"published-print": {
"date-parts": [
[
2012
]
]
},
"references-count": 0,
"institution": {
"acronym": [
"APA"
],
"place": [
"-"
],
"name": "American Psychological Association"
},
"publisher": "American Psychological Association (APA)",
"content-domain": {
"domain": [],
"crossmark-restriction": false
},
"created": {
"timestamp": 1413826121000,
"date-parts": [
[
2014,
10,
20
]
],
"date-time": "2014-10-20T17:28:41Z"
},
"title": [
"Project Support: A Randomized Control Study to Evaluate the Translation of an Evidence- Based Program"
],
"alternative-id": [
"522512014-096"
],
"container-title": [
"PsycEXTRA Dataset"
]
}

View File

@ -0,0 +1,121 @@
{
"indexed": {
"date-parts": [
[
2020,
4,
7
]
],
"date-time": "2020-04-07T15:54:28Z",
"timestamp": 1586274868901
},
"reference-count": 0,
"publisher": "Japan Society of Mechanical Engineers",
"issue": "432",
"content-domain": {
"domain": [],
"crossmark-restriction": false
},
"short-container-title": [
"JSMET"
],
"published-print": {
"date-parts": [
[
1982
]
]
},
"DOI": "10.1299\/kikaib.48.1474",
"type": "journal-article",
"created": {
"date-parts": [
[
2011,
9,
13
]
],
"date-time": "2011-09-13T05:59:01Z",
"timestamp": 1315893541000
},
"page": "1474-1482",
"source": "Crossref",
"is-referenced-by-count": 0,
"title": [
""
],
"prefix": "10.1299",
"volume": "48",
"author": [
{
"given": "Hiroshi",
"family": "KATO",
"sequence": "first",
"affiliation": []
},
{
"given": "Yoshichika",
"family": "MIZUNO",
"sequence": "additional",
"affiliation": []
}
],
"member": "124",
"container-title": [
"Transactions of the Japan Society of Mechanical Engineers Series B"
],
"original-title": [
"\u5e0c\u8584\u9ad8\u5206\u5b50\u6eb6\u6db2\u4e2d\u306e\u6709\u9650\u9577\u5186\u67f1\u306e\u62b5\u6297"
],
"language": "ja",
"deposited": {
"date-parts": [
[
2011,
9,
13
]
],
"date-time": "2011-09-13T06:01:33Z",
"timestamp": 1315893693000
},
"score": 1.0,
"subtitle": [],
"short-title": [],
"issued": {
"date-parts": [
[
1982
]
]
},
"references-count": 0,
"journal-issue": {
"published-print": {
"date-parts": [
[
1982
]
]
},
"issue": "432"
},
"URL": "http:\/\/dx.doi.org\/10.1299\/kikaib.48.1474",
"relation": {},
"ISSN": [
"0387-5016",
"1884-8346"
],
"issn-type": [
{
"value": "0387-5016",
"type": "print"
},
{
"value": "1884-8346",
"type": "electronic"
}
]
}

View File

@ -0,0 +1,34 @@
"funder": [{"DOI": "10.13039/100010663","name": "H2020 European Research Council","doi-asserted-by": "publisher","award": ["677749"]}],
"funder": [{"name": "European Unions Horizon 2020 research and innovation program","award": ["296801","304995","675395"]}],
"funder": [{"DOI": "10.13039/100010661","name": "Horizon 2020 Framework Programme","doi-asserted-by": "publisher","award": ["722467", "H2020-FETOPEN-2015-CSA 712689","773830 (20182022)"]}],
"funder": [{"DOI": "10.13039/501100007601","name": "Horizon 2020","doi-asserted-by": "publisher","award": ["645119"]}],
"funder": [{"DOI": "10.13039/100010665","name": "H2020 Marie Skłodowska-Curie Actions","doi-asserted-by": "publisher","award": ["840267"]}],
"funder": [{"DOI": "10.13039/100011199","name": "FP7 Ideas: European Research Council","doi-asserted-by": "publisher","award": ["226438"]}],
"funder": [{"DOI": "10.13039/100004431","name": "Directorate-General for Research and Innovation","doi-asserted-by": "publisher","award": ["321427"]}],
"funder": [{"DOI": "10.13039/501100004963","name": "Seventh Framework Programme","doi-asserted-by": "publisher","award": ["287818","612538"]}],
"funder": [{"DOI": "10.13039/501100000781","name": "European Research Council","doi-asserted-by": "publisher","award": ["340185"]}],
"funder": [{"name": "European Union's","award": ["763909"]}],
"funder": [{"DOI": "10.13039/501100000780","name": "European Commission","doi-asserted-by": "publisher","award": ["645119", "H2020-FETOPEN-2015-CSA_712689"]}],
"funder": [{"DOI": "10.13039/100000001","name": "National Science Foundation","doi-asserted-by": "publisher","award": ["1639552,1634422","ID0EEMBI7182"]}],
"funder": [{"name": "The French National Research Agency (ANR)","award": ["ID0E4QBI7183","ANR-11-JS56-01501","ID0E3VBI7184","ANR-13-BS06-0008"]}],
"funder": [{"DOI": "10.13039/501100001665","name": "Agence Nationale de la Recherche","doi-asserted-by": "publisher","award": ["ANR-14-ASTR-0004-01"]}],
"funder": [{"DOI": "10.13039/501100002341","name": "Academy of Finland","doi-asserted-by": "publisher","award": ["294337","292335","31444","250114","292482"]}],
"funder": [{"DOI": "10.13039/501100001602","name": "Science Foundation Ireland","doi-asserted-by": "publisher","award": ["16/SP/3829","12/RC/2302_P2","SFI/09/IN.I/12974"]}],
"funder": [{"DOI": "10.13039/501100000923","name": "Australian Research Council","doi-asserted-by": "publisher","award": ["LP110200134"]}],
"funder": [{"DOI": "10.13039/501100000038","name": "NSERC","doi-asserted-by": "crossref","award": []}],
"funder": [{"DOI": "10.13039/501100000155","name": "Social Sciences and Humanities Research Council of Canada","doi-asserted-by": "publisher","award": []}],
"funder": [{"DOI": "10.13039/501100000024","name": "Canadian Institutes for Health Research","doi-asserted-by": "crossref","award": ["HIB-126784","HHP-111405"]}],
"funder": [{"DOI": "10.13039/501100002848","name": "Comisión Nacional de Investigación Científica y Tecnológica","doi-asserted-by": "publisher","award": ["15130011"]}],
"funder": [{"DOI": "10.13039/501100003448","name": "General Secretariat for Research and Technology","doi-asserted-by": "publisher","award": ["MIS: 380170"]}],
"funder": [{"DOI": "10.13039/501100010198","name": "Ministerio de Economía, Industria y Competitividad, Gobierno de España","doi-asserted-by": "publisher","award": ["ECO2017-89715-P"]}],
"funder": [{"DOI": "10.13039/501100004564","name": "Ministarstvo Prosvete, Nauke i Tehnološkog Razvoja","doi-asserted-by": "publisher","award": ["TR34008"]}],
"funder": [{"DOI": "10.13039/501100003407","name": "MIUR","doi-asserted-by": "publisher","award": ["20158A9CBM"]}],
"funder": [{"DOI": "10.13039/501100003407","name": "MIUR","doi-asserted-by": "publisher","award": []}],
"funder": [{"DOI": "10.13039/501100006588","name": "Ministarstvo Znanosti, Obrazovanja i Sporta","doi-asserted-by": "publisher","award": ["037-0372790-2799", "Project No. 125-1253008-1350"]}],
"funder": [{"DOI": "10.13039/501100006588","name": "Ministry of Science, Education and Sports","doi-asserted-by": "publisher","award": ["181-1811096-1093"]}],
"funder": [{"DOI": "10.13039/501100004488","name": "Hrvatska Zaklada za Znanost","doi-asserted-by": "publisher","award": ["HRZZ-IP-2013-11-3013", "UIP-2014-09-4744"]}],
"funder": [{"DOI": "10.13039/501100006769","name": "Russian Science Foundation","doi-asserted-by": "publisher","award": ["17-11-01027"]}],
"funder": [{"DOI": "10.13039/501100001711","name": "Swiss National Science Foundation (Schweizerische Nationalfonds)","doi-asserted-by": "publisher","award": ["CR32I3_156724", "31003A_173281/1"]}],
"funder": [{"DOI": "10.13039/501100004410","name": "Türkiye Bilimsel ve Teknolojik Araştirma Kurumu","doi-asserted-by": "publisher","award": ["113M552"]}],
"funder": [{"DOI": "10.13039/100004440","name": "Wellcome Trust","doi-asserted-by": "crossref","award": ["095127","079080"]}],
"funder": [{"DOI": "10.13039/100004440","name": "Wellcome Trust","doi-asserted-by": "crossref","award": []}],

View File

@ -0,0 +1,5 @@
"funder": [{"name": "Wellcome Trust Masters Fellowship","award": ["090633"]}],
"funder": [{"name": "CONICYT, Programa de Formación de Capital Humano Avanzado","award": ["#72140079"]}],
"funder": [{"name": "European Union's","award": ["763909"]}],
"funder": [{"name": "European Unions Horizon 2020 research and innovation program","award": ["296801","304995","675395"]}],
"funder": [{"name": "The French National Research Agency (ANR)","award": ["ID0E4QBI7183","ANR-11-JS56-01501","ID0E3VBI7184","ANR-13-BS06-0008"]}],

View File

@ -0,0 +1,146 @@
{
"DOI": "10.1101/030080",
"issued": {
"date-parts": [
[
2015,
10,
28
]
]
},
"abstract": "<jats:p>Abstract Key Message<jats:italic>Agrobacterium tumefaciens</jats:italic>was used to transform radiata pine shoots and to efficiently produce stable genetically modified pine plants. Abstract Micropropagated shoot explants from<jats:italic>Pinus radiata</jats:italic>D. Don were used to produce stable transgenic plants by<jats:italic>Agrobacterium tumefaciens</jats:italic>mediated transformation. Using this method any genotype that can be micropropagated could produce stable transgenic lines. As over 80% of<jats:italic>P. radiata</jats:italic>genotypes tested can be micropropagated, this effectively means that any line chosen for superior characteristics could be transformed. There are well established protocols for progressing such germplasm to field deployment. Here we used open and control pollinated seed lines and embryogenic clones. The method developed was faster than other methods previously developed using mature cotyledons. PCR positive shoots could be obtain within 6 months of<jats:italic>Agrobacterium</jats:italic>cocultivation compared with 12 months for cotyledon methods. Transformed shoots were obtained using either kanamycin or geneticin as the selectable marker gene. Shoots were recovered from selection, were tested and were not chimeric, indicating that the selection pressure was optimal for this explant type. GFP was used as a vital marker, and the bar gene, (for resistance to the herbicide Buster\\u00ae/Basta\\u00ae) was used to produce lines that could potentially be used in commercial application. As expected, a range of expression phenotypes were identified for both these reporter genes and the analyses for expression were relatively easy.</jats:p>",
"prefix": "10.1101",
"author": [
{
"affiliation": [],
"given": "Jan E",
"family": "Grant",
"sequence": "first"
},
{
"affiliation": [],
"given": "Pauline A",
"family": "Cooper",
"sequence": "additional"
},
{
"affiliation": [],
"given": "Tracy M",
"family": "Dale",
"sequence": "additional"
}
],
"reference-count": 0,
"member": "246",
"source": "Crossref",
"score": 1.0,
"deposited": {
"timestamp": 1483495053000,
"date-time": "2017-01-04T01:57:33Z",
"date-parts": [
[
2017,
1,
4
]
]
},
"indexed": {
"timestamp": 1550234353119,
"date-time": "2019-02-15T12:39:13Z",
"date-parts": [
[
2019,
2,
15
]
]
},
"type": "posted-content",
"URL": "http://dx.doi.org/10.1101/030080",
"is-referenced-by-count": 2,
"link": [
{
"URL": "https://syndication.highwire.org/content/doi/10.1101/030080",
"intended-application": "similarity-checking",
"content-version": "vor",
"content-type": "unspecified"
}
],
"accepted": {
"date-parts": [
[
2015,
10,
28
]
]
},
"references-count": 0,
"institution": {
"acronym": [
"-"
],
"place": [
"-"
],
"name": "bioRxiv"
},
"posted": {
"date-parts": [
[
2015,
10,
28
]
]
},
"publisher": "Cold Spring Harbor Laboratory",
"content-domain": {
"domain": [],
"crossmark-restriction": false
},
"created": {
"timestamp": 1446095513000,
"date-time": "2015-10-29T05:11:53Z",
"date-parts": [
[
2015,
10,
29
]
]
},
"published-print": {
"timestamp": 1446095513000,
"date-time": "2015-10-29T05:11:53Z",
"date-parts": [
[
2015,
2,
29
]
]
},
"published-online": {
"date-parts": [
[
2015,
2,
2
]
]
},
"title": [
"Genetic transformation of micropropagated shoots ofPinus radiataD.Don"
],
"original-title": [
"OR TITLE"
],
"short-title": [
"SHORT TITLE"
],
"group-title": "Plant Biology",
"subtype": "preprint"
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,334 @@
{
"IndexLength": 139,
"InvertedIndex": {
"The": [
0,
23,
47
],
"invention": [
1,
53
],
"discloses": [
2
],
"a": [
3,
10,
71,
81,
121
],
"treatment": [
4,
69,
85,
96
],
"method": [
5,
24,
49
],
"of": [
6,
9,
19,
57,
84,
117,
120
],
"waste": [
7,
118
],
"mash": [
8,
119
],
"cane": [
11,
122
],
"sugar": [
12,
123
],
"factory,": [
13
],
"belonging": [
14
],
"to": [
15
],
"the": [
16,
26,
52,
55,
66,
93,
115,
135
],
"technical": [
17,
48
],
"field": [
18
],
"industrial": [
20
],
"wastewater": [
21
],
"treatment.": [
22
],
"comprises": [
25
],
"following": [
27
],
"steps": [
28
],
"of:": [
29
],
"(1)": [
30
],
"pretreatment;": [
31
],
"(2)": [
32
],
"primary": [
33
],
"concentration;": [
34
],
"(3)": [
35
],
"cooling": [
36
],
"sedimentation": [
37
],
"and": [
38,
45,
62,
80,
86,
114,
134
],
"dense": [
39
],
"slurry": [
40
],
"drying;": [
41
],
"(4)": [
42
],
"secondary": [
43
],
"concentration": [
44
],
"drying.": [
46
],
"disclosed": [
50
],
"by": [
51
],
"has": [
54
],
"advantages": [
56
],
"small": [
58
],
"investment,": [
59
],
"simple": [
60
],
"equipment": [
61
],
"easiness": [
63
],
"in": [
64,
132
],
"popularization;": [
65
],
"product": [
67
],
"after": [
68
],
"is": [
70,
91,
98,
102,
112,
130,
137
],
"high-quality": [
72
],
"high": [
73
],
"value-added": [
74
],
"(fully": [
75
],
"water-soluble)": [
76
],
"potassium": [
77
],
"humate": [
78
],
"product,": [
79
],
"new": [
82
],
"mode": [
83
],
"profit": [
87
],
"enabling": [
88
],
"sustainable": [
89
],
"development": [
90
],
"realized;": [
92
],
"environmental": [
94
],
"protection": [
95
],
"effect": [
97
],
"good,": [
99
],
"water": [
100,
106
],
"balance": [
101
],
"realized": [
103
],
"through": [
104
],
"final": [
105
],
"quality": [
107
],
"treatment,": [
108
],
"real": [
109
],
"zero": [
110
],
"emission": [
111
],
"realized,": [
113
],
"problem": [
116
],
"factory": [
124
],
"can": [
125
],
"be": [
126
],
"solved": [
127
],
"fundamentally;": [
128
],
"energy": [
129
],
"saved": [
131
],
"operation,": [
133
],
"feasibility": [
136
],
"high.": [
138
]
}
}

View File

@ -0,0 +1 @@
H4sIAAAAAAAAAO1a227bOBB9z1cIepd18SW24aho0wTbAgEWjRdY9I2RaJtbSdSSkhP165eURIm6kHa2SbCLNkBiWDxzhhxyZg7tbN49xZFxhIQinFyZ7sQxDZgEOETJ/sr8Y3trLU2DZiAJQYQTeGUWkJrv/IsNgQEm4bp6MVKQHa5M22E/Fvt1rcViNrfmzupP02AOErpGSQZJAqIr85Bl6dq2Hx8fJ5gEKGR/93ZCbYEQFjDMA5CV01KZNBBhEyKaoSTQW0mgxg6mbCUgg6HGrMEIK5wdILESEEO1VYsRVjGMH1i8DyhVW7WYJhqEYKKJBB8W2ADHsS4A1bhAV1uoRlfjAp2yaWG2S1YIM4AiqrbrIwXDN1g8ah3WgGblMbPWrJwPN9in6gxZKIRJhnYI6mI2BAueXZ5UGaCyrQFNVAjcQcISB+oC0oKEHQhDAqnGpga0WXRE7ABaKaZIf8j7SMHAIvtNbcVHBfLA0gSTQg2uAe0+pREuYhZK3WYJjLD6OwcRC/2pTO/AhC2F5IgCTfLVgO7ZPXVim71hFYLFEOm2tMW02UQhIAFP+pxojm0X186QvSfwiOCjbpoNSNg95JFmV/lof36MgOKc6KI3gJr+hcF+NlX9WJdgKXmqURmRE+RzdsroW+qRLrGxJYsBDe8uvs6qBAzMDphmfuO2AZePq4XY2pVspISVM1zyJCMiHIAI+jDZ2COPa4dayk2dUSL1JEdiJCCwTAErhtkBh/5d2SiskonAcGOrgEMqmj/EiPK+b4Wsq/me464sZ2l53tadrmeLtXc58ZbLry1n32IQ8QjQzIqZeGBBDAWrx7Ztbrnu1puu59P11JksPfdrE/sRm5FlRwDFMPQzkkNpjfXTIZ4Jmoqv7A49s96gxjolKAak0LN0QfU+j+7kpiowdR3SiCZRieSTVplyIWEcEUUPKEIZK85p/hChwKzJxgRYSyJvVXk+2k0abv187rWb1EGP8o1u/QlW3dZLi24lxHqPjjAp1RT1twgkRb4Z6IwO6ATfDsQoKkqs/xmBETIZ0e6GLW2H9LgVe5I2pLqNlmCmLTF120Ovq2gZe9AOa3lEK0Gl5ag0lWxZ6xAhWPSLEqJFJqhFnVB/WnuB6c59qNbG5J5+XSN44aTZ0+qlftg2eEkPWDSPecprY9Aqg2fUyZnlTLfObD2brZ3pZHm5OLNOStOUbjfaWMi47la3XM39Sh/VBqXkaWTfiWPXwFRMte7W0giMiqMvjbVkA7CKtb2yafkkmIpJ0ndaKhmn4uroZi1bF6niG2jCs2pRi1bx1kpdyyYwKg5+edESlABFP3zplOxPbk9wnnaHX9u9zC9VPjpEKZDjQAXYyooU+iFGzfwGg8+iO4Ioh77rTFzXWdnvr69v7u8nPCYTb7X0PNcZ9VNZPctRgknMjv53GBoZAQlF5Q2Wiz2zcQ8Cdu7oafct1/PmwDp1c1FiISyvSc9dOud4llMCoyrZWTHyKYx2o7Qd1PjJGTEbOYkjqJGjuOFJWqZy22XzzApwyG6qly67kCxWjnkqy+0WOSaWWe9LI1BYKAnhE1PNpj4lelqZp+XUmjpbz1szYTt3JjP38hyt3Od9raSXfVR19/TBqHBWEPHjr8192Wr8gl+RSJuzWi5nlrtyp+P3fJ2H3t1/yNS9++uoTn4eMGpsPztAvZCWd4Rrgillt/Q+XfcCoXGsAJXZkqEsOmOLK9g9K1CR9ZFdnBN+kzdu2WnNCTTuQEbQk3HNMp3VvlIXGnflZwfGDhPjI6y+FDC+wBQyJnbHMm7Ze0iMO3yElba7JTg2biIYZATzzzXSA4jwnoDYuEd7lvK0WZRmyhv71KLOb2oK9Hnn5YWam4ryVRqcytlbNznVPF690akcv1SzK/nPangq5An99W8jpIxKXSP4Gf2LlRI+CUAyFERQZJry+DZFuOyb1eeJ6pYjWxRM95fNrJlf+UQfpPPcVOsRS6nKxKebmxvjfXl+60V1x0fUyEBn9LS7rRfvP6rt64/GVlt3vnYXa8ebLJz5T6jt53ObB8OeLl2m2WZvJurP8fviav4cpz+BjF+4znzqzd3TMr5FvryMP5GBPyjjXyC/ZR+/ZPwvGd+Rzh8IQIl1jWOWVkyDf+L/PLMDATSuDyBJYGTdQ67DuYq/ZxUwg/vC+AAoq4fsyXuWtwVF1MA74+bIA/GFlwc2+BHSIgkOBCfoe1kvjC1OuYRPD4WBSi78DRq/szGu+H/p+ddqaiovb9bYVBN4veam8vj/l+6q0PwnNbu7OkOzy3bslxf3ZWNWPThpF4LC91or/va17gefq3e83v0GQZQdAkCgcZPsUQIhQcn+DW4NnbHyqwjxxaP2S0b/YmN3/tnSv/gH9+klwrUpAAA=

View File

@ -0,0 +1,309 @@
(10.1109/JPHOT.2018.2880319,[{"oid":"0000-0002-1473-2224","name":"zhong","surname":"chen","creditName":null,"errorCode":null},{"oid":"0000-0002-5739-2127","name":"Yue","surname":"Lin","creditName":null,"errorCode":null}])
(10.1073/pnas.1816459115,[{"oid":"0000-0001-6260-4326","name":"Igor","surname":"Sokolov","creditName":null,"errorCode":null},{"oid":"0000-0001-6260-4326","name":"Igor","surname":"Sokolov","creditName":null,"errorCode":null}])
(10.1016/j.mjafi.2016.05.007,[{"oid":"0000-0002-4342-6656","name":"Anoop","surname":"Sharma","creditName":null,"errorCode":null}])
(10.1152/japplphysiol.00476.2016,[{"oid":"0000-0001-9039-0302","name":"Graeme","surname":"Zosky","creditName":"Graeme R. Zosky","errorCode":null},{"oid":"0000-0001-8355-7362","name":"Stephen","surname":"Dubsky","creditName":"Stephen Eric Dubsky","errorCode":null}])
(10.1111/j.1365-2621.1995.tb09798.x,[{"oid":"0000-0001-6457-4044","name":"jose martin","surname":"yañez limon","creditName":null,"errorCode":null},{"oid":"0000-0002-7108-8447","name":"Orlando","surname":"Zelaya-Angel","creditName":null,"errorCode":null},{"oid":"0000-0002-9267-4621","name":"Jean","surname":"Martinez","creditName":null,"errorCode":null},{"oid":"0000-0003-3060-2638","name":"Juan Jose","surname":"Alvarado-Gil","creditName":null,"errorCode":null}])
(10.1007/s10461-019-02391-1,[{"oid":"0000-0003-4375-7452","name":"Cathy","surname":"Reback","creditName":null,"errorCode":null}])
(10.1039/C4TA03030C,[{"oid":"0000-0003-1615-5034","name":"Jihong","surname":"Yu","creditName":null,"errorCode":null}])
(10.1016/0306-4522(89)90381-3,[{"oid":"0000-0002-7272-8370","name":"PAOLA","surname":"D'ASCANIO","creditName":null,"errorCode":null}])
(10.1016/S0921-4526(01)00945-0,[{"oid":"0000-0002-5360-1008","name":"Tomasz","surname":"Nowak","creditName":null,"errorCode":null},{"oid":"0000-0001-5554-8178","name":"Pawel","surname":"Olko","creditName":null,"errorCode":null},{"oid":"0000-0002-1993-9320","name":"Michael","surname":"Waligorski","creditName":null,"errorCode":null},{"oid":"0000-0002-1993-9320","name":"Michael","surname":"Waligorski","creditName":null,"errorCode":null}])
(10.1074/jbc.m312875200,[{"oid":"0000-0003-3631-2252","name":"Isabelle","surname":"Dugail","creditName":null,"errorCode":null},{"oid":"0000-0001-6577-9009","name":"Bruno","surname":"Feve","creditName":null,"errorCode":null},{"oid":"0000-0002-4502-3543","name":"Martine","surname":"GLORIAN","creditName":"GLORIAN","errorCode":null},{"oid":"0000-0002-1275-9861","name":"KHADIJA","surname":"EL HADRI","creditName":null,"errorCode":null}])
(10.1038/nrc3802,[{"oid":"0000-0001-7427-4651","name":"William","surname":"Foulkes","creditName":null,"errorCode":null},{"oid":"0000-0001-7427-4651","name":"William","surname":"Foulkes","creditName":null,"errorCode":null}])
(10.1186/s12974-018-1125-5,[{"oid":"0000-0001-5270-9888","name":"I-Chen","surname":"Yu","creditName":null,"errorCode":null}])
(10.4997/JRCPE.2016.310,[{"oid":"0000-0001-7658-1209","name":"David","surname":"Burn","creditName":null,"errorCode":null}])
(10.13140/RG.2.2.20772.48006,[{"oid":"0000-0003-2825-5884","name":"Abdelkader","surname":"Mezghani","creditName":null,"errorCode":null}])
(10.1039/c8ay00816g,[{"oid":"0000-0002-5444-7276","name":"Torbjörn","surname":"Pettersson","creditName":null,"errorCode":null}])
(10.2147/dhps.s6226,[{"oid":"0000-0001-8558-3396","name":"Mario Francisco","surname":"Juruena","creditName":"JURUENA OR JURUENA M OR JURUENA M F OR OR JURUENA MARIO OR JURUENA MARIO F OR JURUENA MARIO FRANCISCO OR JURUENA MF","errorCode":null}])
(10.1007/s00415-004-0351-1,[{"oid":"0000-0002-2737-3662","name":"Gian Domenico","surname":"Borasio","creditName":null,"errorCode":null}])
(10.1088/0967-3334/23/1/318,[{"oid":"0000-0002-3549-4309","name":"Gavin","surname":"Screaton","creditName":null,"errorCode":null},{"oid":"0000-0002-0124-5072","name":"Carlos-Augusto","surname":"Gonzalez-Correa","creditName":null,"errorCode":null}])
(10.1111/aogs.13316,[{"oid":"0000-0002-1241-9875","name":"Lena","surname":"Liljestrom","creditName":null,"errorCode":null}])
(10.1038/sj.thj.6200111,[{"oid":"0000-0001-6115-8790","name":"Felipe","surname":"Prosper","creditName":null,"errorCode":null},{"oid":"0000-0001-6115-8790","name":"Felipe","surname":"Prosper","creditName":null,"errorCode":null},{"oid":"0000-0002-9467-932X","name":"Maria J","surname":"Terol","creditName":null,"errorCode":null},{"oid":"0000-0001-9622-1649","name":"María del Mar","surname":"Tormo Díaz","creditName":"Mar Tormo","errorCode":null},{"oid":"0000-0002-7938-3950","name":"Jose A","surname":"Martinez-Climent","creditName":null,"errorCode":null}])
(10.1021/jo052299f,[{"oid":"0000-0002-2419-0705","name":"Per-Ola","surname":"Norrby","creditName":null,"errorCode":null},{"oid":"0000-0002-2548-7025","name":"Mogens","surname":"Johannsen","creditName":null,"errorCode":null},{"oid":"0000-0002-2548-7025","name":"Mogens","surname":"Johannsen","creditName":null,"errorCode":null}])
(10.1182/blood-2014-04-569392,[{"oid":"0000-0003-1680-5295","name":"X. Long","surname":"Zheng","creditName":null,"errorCode":null},{"oid":"0000-0003-1680-5295","name":"X. Long","surname":"Zheng","creditName":null,"errorCode":null}])
(10.1194/jlr.M030049,[{"oid":"0000-0003-4488-7734","name":"Sander","surname":"Kersten","creditName":null,"errorCode":null}])
(10.1006/jcis.1998.6070,[{"oid":"0000-0003-1407-6498","name":"Anna Luisa","surname":"Costa","creditName":null,"errorCode":null},{"oid":"0000-0002-7892-2836","name":"Carmen","surname":"Galassi","creditName":null,"errorCode":null}])
(10.1002/ange.201707070,[{"oid":"0000-0002-2516-084X","name":"tao","surname":"xiong","creditName":null,"errorCode":null}])
(10.1056/NEJMc1502191,[{"oid":"0000-0003-0893-9015","name":"David","surname":"Stoltz","creditName":null,"errorCode":null},{"oid":"0000-0003-1552-3253","name":"David","surname":"Meyerholz","creditName":"David K. Meyerholz","errorCode":null}])
(10.1300/J017v21n03_01,[{"oid":"0000-0002-6132-5883","name":"Stephen","surname":"Butler","creditName":null,"errorCode":null}])
(10.4028/www.scientific.net/SSP.227.47,[{"oid":"0000-0001-5648-222X","name":"Pawel","surname":"Rokicki","creditName":null,"errorCode":null}])
(10.1002/1521-3935(20000801)201:12<1329::AID-MACP1329>3.0.CO;2-8,[{"oid":"0000-0003-4175-4741","name":"Zhiyuan","surname":"Zhong","creditName":null,"errorCode":null}])
(10.1111/j.1365-2966.2006.11355.x,[{"oid":"0000-0002-1967-2849","name":"Sunil","surname":"Maharaj","creditName":null,"errorCode":null}])
(10.1021/acs.jpca.6b01563,[{"oid":"0000-0002-5301-6730","name":"Stefano","surname":"Falcinelli","creditName":null,"errorCode":null},{"oid":"0000-0002-5301-6730","name":"Stefano","surname":"Falcinelli","creditName":null,"errorCode":null},{"oid":"0000-0003-1934-7891","name":"Piergiorgio","surname":"Casavecchia","creditName":null,"errorCode":null},{"oid":"0000-0002-3961-5710","name":"Astrid","surname":"Bergeat","creditName":null,"errorCode":null},{"oid":"0000-0001-5121-5683","name":"Nadia","surname":"Balucani","creditName":null,"errorCode":null}])
(10.1017/S1431927608089332,[{"oid":"0000-0003-4670-4516","name":"Sónia","surname":"Simões","creditName":"Sónia Simões","errorCode":null},{"oid":"0000-0003-4670-4516","name":"Sónia","surname":"Simões","creditName":"Sónia Simões","errorCode":null},{"oid":"0000-0002-2894-771X","name":"Filomena","surname":"Viana","creditName":"F. Viana","errorCode":null},{"oid":"0000-0002-8486-5436","name":"Ana Sofia","surname":"Ramos","creditName":"A.S. Ramos","errorCode":null},{"oid":"0000-0002-8486-5436","name":"Ana Sofia","surname":"Ramos","creditName":"A.S. Ramos","errorCode":null}])
(10.1063/1.1452737,[{"oid":"0000-0002-4041-7631","name":"Thierry","surname":"Visart de Bocarmé","creditName":null,"errorCode":null}])
(10.1056/nejmp1014255,[{"oid":"0000-0002-9354-5389","name":"Salal","surname":"Humair","creditName":null,"errorCode":null}])
(10.1007/978-3-531-19249-9,[{"oid":"0000-0002-3497-6947","name":"Jan W.","surname":"van Deth","creditName":null,"errorCode":null}])
(10.1080/00397910903291129,[{"oid":"0000-0003-3602-7400","name":"Hayreddin","surname":"Gezegen","creditName":null,"errorCode":null}])
(10.1002/lite.201400037,[{"oid":"0000-0001-8347-8496","name":"James","surname":"Kenar","creditName":null,"errorCode":null},{"oid":"0000-0001-8347-8496","name":"James","surname":"Kenar","creditName":null,"errorCode":null}])
(10.1109/ICACCS.2017.8014639,[{"oid":"0000-0001-9570-1077","name":"Gadadhar","surname":"Sahoo","creditName":null,"errorCode":null}])
(10.18632/oncotarget.4840,[{"oid":"0000-0002-5664-7781","name":"Brian","surname":"McStay","creditName":null,"errorCode":null}])
(10.1016/j.palaeo.2010.12.015,[{"oid":"0000-0001-6088-3261","name":"Michael","surname":"Joachimski","creditName":null,"errorCode":null}])
(10.1016/S0927-0256(97)00020-7,[{"oid":"0000-0002-0604-6590","name":"Ilja","surname":"Turek","creditName":null,"errorCode":null}])
(10.1074/jbc.M111.222364,[{"oid":"0000-0002-5441-5709","name":"Khortnal","surname":"Delvecchio","creditName":null,"errorCode":null}])
(10.1007/s00468-014-1099-6,[{"oid":"0000-0002-7441-2070","name":"Tingfa","surname":"Dong","creditName":"Tingfa Dong","errorCode":null}])
(10.1055/s-0029-1241171,[{"oid":"0000-0003-2795-6013","name":"Alessandro","surname":"Mussa","creditName":null,"errorCode":null}])
(10.1016/j.conb.2013.01.010,[{"oid":"0000-0003-3044-9565","name":"Jonathan","surname":"Britt","creditName":null,"errorCode":null},{"oid":"0000-0003-3044-9565","name":"Jonathan","surname":"Britt","creditName":null,"errorCode":null},{"oid":"0000-0003-3044-9565","name":"Jonathan","surname":"Britt","creditName":null,"errorCode":null}])
(10.1061/(ASCE)CC.1943-5614.0000977,[{"oid":"0000-0003-1663-7535","name":"CHRISTIAN","surname":"CARLONI","creditName":null,"errorCode":null}])
(10.1177/0146167211399101,[{"oid":"0000-0001-6060-8083","name":"Kristof","surname":"Dhont","creditName":null,"errorCode":null},{"oid":"0000-0001-5814-1189","name":"Arne","surname":"Roets","creditName":null,"errorCode":null}])
(10.1007/s00441-005-1096-6,[{"oid":"0000-0002-0240-7416","name":"Wee-Ming","surname":"Boon","creditName":null,"errorCode":null},{"oid":"0000-0002-8085-0034","name":"Karen","surname":"Moritz","creditName":null,"errorCode":null}])
(10.1080/01468039308204227,[{"oid":"0000-0002-5148-6624","name":"Miguel A.","surname":"Muriel","creditName":"Miguel A. Muriel","errorCode":null},{"oid":"0000-0002-5148-6624","name":"Miguel A.","surname":"Muriel","creditName":"Miguel A. Muriel","errorCode":null}])
(10.1111/ijfs.14302,[{"oid":"0000-0002-5053-6378","name":"Mariana","surname":"Blanco Massani","creditName":null,"errorCode":null},{"oid":"0000-0002-5053-6378","name":"Mariana","surname":"Blanco Massani","creditName":null,"errorCode":null}])
(10.1016/j.nanoen.2016.08.017,[{"oid":"0000-0002-4115-3287","name":"Xiaofeng","surname":"Li","creditName":null,"errorCode":null}])
(10.1073/pnas.1402739111,[{"oid":"0000-0001-5933-6463","name":"Anselm","surname":"Enders","creditName":null,"errorCode":null},{"oid":"0000-0003-3922-6376","name":"Dan","surname":"Andrews","creditName":null,"errorCode":null}])
(10.1128/JVI.01432-15,[{"oid":"0000-0002-9141-8001","name":"Markus","surname":"Cornberg","creditName":null,"errorCode":null},{"oid":"0000-0002-6993-4333","name":"Liisa","surname":"Selin","creditName":null,"errorCode":null}])
(10.1097/PCC.0000000000001178,[{"oid":"0000-0003-3089-0318","name":"Sapna","surname":"Kudchadkar","creditName":"Sapna R. Kudchadkar","errorCode":null}])
(10.3866/pku.whxb20001113,[{"oid":"0000-0002-6469-0376","name":"jingfang","surname":"zhou","creditName":null,"errorCode":null}])
(10.1109/JSEN.2019.2912827,[{"oid":"0000-0003-4870-8473","name":"Fengde","surname":"Jia","creditName":null,"errorCode":null}])
(10.1007/978-3-642-23430-9_65,[{"oid":"0000-0002-0663-3437","name":"Heppenstall","surname":"Alison","creditName":null,"errorCode":null},{"oid":"0000-0002-0650-6606","name":"Dianna","surname":"Smith","creditName":"Dianna M Smith","errorCode":null}])
(10.3389/fenrg.2019.00056,[{"oid":"0000-0003-4222-6975","name":"andrea","surname":"saltelli","creditName":null,"errorCode":null},{"oid":"0000-0002-2625-483X","name":"Samuele","surname":"Lo Piano","creditName":null,"errorCode":null}])
(10.1007/s00701-015-2639-6,[{"oid":"0000-0001-8914-5086","name":"Samuel","surname":"Lenell","creditName":null,"errorCode":null}])
(10.4111/kju.2013.54.9.638,[{"oid":"0000-0002-7467-5954","name":"Jae Min","surname":"Chung","creditName":null,"errorCode":null}])
(10.1136/eb-2015-102104,[{"oid":"0000-0001-6330-3314","name":"Paulo","surname":"Menezes","creditName":null,"errorCode":null},{"oid":"0000-0002-3403-5792","name":"Andrea","surname":"Silva","creditName":"Andréa Tenório C da Silva","errorCode":null}])
(10.1016/j.compositesb.2016.03.046,[{"oid":"0000-0003-4867-1404","name":"Sergey","surname":"Vakhrushev","creditName":null,"errorCode":null},{"oid":"0000-0003-1929-032X","name":"Alexander","surname":"Naberezhnov","creditName":null,"errorCode":null},{"oid":"0000-0002-0830-272X","name":"Ewa","surname":"Rysiakiewicz-Pasek","creditName":null,"errorCode":null}])
(10.1038/s41598-018-33547-z,[{"oid":"0000-0002-2080-1695","name":"Nadeem Ahmad","surname":"Sheikh","creditName":null,"errorCode":null}])
(10.1016/j.ecl.2012.04.016,[{"oid":"0000-0001-8107-7947","name":"Ignacio","surname":"Torres Aleman","creditName":null,"errorCode":null},{"oid":"0000-0001-8107-7947","name":"Ignacio","surname":"Torres Aleman","creditName":null,"errorCode":null}])
(10.4102/hts.v64i1.33,[{"oid":"0000-0003-3810-4190","name":"Ernest","surname":"van Eck","creditName":null,"errorCode":null}])
(10.3233/JAD-2010-100675,[{"oid":"0000-0003-2298-615X","name":"Monica","surname":"Di Luca","creditName":null,"errorCode":null},{"oid":"0000-0003-4598-5563","name":"Fabrizio","surname":"Gardoni","creditName":null,"errorCode":null}])
(10.1039/c3dt51431e,[{"oid":"0000-0002-0675-2057","name":"Balasubramanian","surname":"Murugesapandian","creditName":null,"errorCode":null},{"oid":"0000-0001-5221-9459","name":"Mrituanjay D","surname":"Pandey","creditName":null,"errorCode":null}])
(10.1159/000356772,[{"oid":"0000-0002-8803-4496","name":"Takashi","surname":"Matsukura","creditName":null,"errorCode":null}])
(10.1111/j.1469-8137.2011.03973.x,[{"oid":"0000-0003-4801-4412","name":"Julianne","surname":"O'Reilly-Wapstra","creditName":null,"errorCode":null},{"oid":"0000-0003-4801-4412","name":"Julianne","surname":"O'Reilly-Wapstra","creditName":null,"errorCode":null},{"oid":"0000-0002-9383-667X","name":"Mark","surname":"Genung","creditName":null,"errorCode":null},{"oid":"0000-0001-8249-8057","name":"Jennifer","surname":"Rowntree","creditName":null,"errorCode":null},{"oid":"0000-0001-6244-289X","name":"Brad","surname":"Potts","creditName":"Brad M Potts","errorCode":null}])
(10.1103/physreva.78.021401,[{"oid":"0000-0002-1228-5029","name":"Michael","surname":"Martins","creditName":null,"errorCode":null}])
(10.1109/tencon.2011.6129079,[{"oid":"0000-0003-2519-0130","name":"Mustarum","surname":"Musaruddin","creditName":null,"errorCode":null}])
(10.1016/s0956-053x(01)00047-2,[{"oid":"0000-0002-5719-8076","name":"Michael","surname":"Frisch","creditName":null,"errorCode":null}])
(10.1200/JCO.2005.04.3216,[{"oid":"0000-0001-8530-780X","name":"Claus","surname":"Garbe","creditName":null,"errorCode":null}])
(10.1016/j.carbon.2018.04.084,[{"oid":"0000-0003-2149-4479","name":"Vincent","surname":"Chan","creditName":null,"errorCode":null}])
(10.1037/0278-7393.28.3.497,[{"oid":"0000-0002-8868-7067","name":"Norbert","surname":"Schwarz","creditName":null,"errorCode":null},{"oid":"0000-0002-8868-7067","name":"Norbert","surname":"Schwarz","creditName":null,"errorCode":null}])
(10.1039/c3ra47116k,[{"oid":"0000-0002-6626-0599","name":"Ying-Shi","surname":"Guan","creditName":null,"errorCode":null}])
(10.1039/C8DT01469H,[{"oid":"0000-0002-2225-7072","name":"Hong-Bin","surname":"Luo","creditName":null,"errorCode":null}])
(10.1088/1475-7516/2019/02/007,[{"oid":"0000-0002-4487-8742","name":"Miguel","surname":"Escudero","creditName":null,"errorCode":null}])
(10.1109/DICTA.2009.27,[{"oid":"0000-0001-7782-1956","name":"Wojciech","surname":"Chojnacki","creditName":null,"errorCode":null},{"oid":"0000-0001-9612-5884","name":"Michael","surname":"Brooks","creditName":"M.J. Brooks","errorCode":null}])
(10.1007/s11103-017-0629-1,[{"oid":"0000-0001-8226-0700","name":"Rose Adele","surname":"Monteiro","creditName":null,"errorCode":null},{"oid":"0000-0002-8553-0718","name":"Caroline","surname":"Kukolj","creditName":null,"errorCode":null},{"oid":"0000-0003-1882-1512","name":"Glaucio","surname":"Valdameri","creditName":null,"errorCode":null}])
(10.1136/jnnp-2012-302993,[{"oid":"0000-0003-1258-5678","name":"Maeike","surname":"Zijlmans","creditName":null,"errorCode":null}])
(10.1002/1521-3773(20000804)39:15<2707::aid-anie2707>3.0.co;2-m,[{"oid":"0000-0002-1443-8818","name":"Michael","surname":"Anderson","creditName":"Michael W Anderson","errorCode":null}])
(10.1080/19401493.2017.1354070,[{"oid":"0000-0002-4248-6788","name":"Jérôme","surname":"Kämpf","creditName":null,"errorCode":null},{"oid":"0000-0002-1186-4299","name":"Clayton","surname":"Miller","creditName":null,"errorCode":null},{"oid":"0000-0002-1186-4299","name":"Clayton","surname":"Miller","creditName":null,"errorCode":null}])
(10.1080/1062936x.2015.1032347,[{"oid":"0000-0003-4145-9590","name":"Ayako","surname":"Furuhama","creditName":null,"errorCode":null}])
(10.1155/2016/9578308,[{"oid":"0000-0002-8874-1473","name":"Murat","surname":"Gunay","creditName":"Gunay M","errorCode":null}])
(10.1016/j.gexplo.2015.09.011,[{"oid":"0000-0001-8503-4266","name":"Sérgio Benjamin","surname":"Baggio","creditName":null,"errorCode":null},{"oid":"0000-0001-7863-5071","name":"Léo","surname":"Hartmann","creditName":"Hartmann, L.A.","errorCode":null}])
(10.1051/0004-6361:20053947,[{"oid":"0000-0003-3603-394X","name":"Christophe","surname":"Letellier","creditName":null,"errorCode":null},{"oid":"0000-0002-2746-5102","name":"Luis","surname":"Aguirre","creditName":null,"errorCode":null}])
(10.1186/s12959-018-0180-6,[{"oid":"0000-0002-0936-1609","name":"Andrew","surname":"Kotaska","creditName":null,"errorCode":null}])
(https://doi.org/10.1016/j.jct.2018.08.026,[{"oid":"0000-0001-8381-2466","name":"Jason","surname":"Calvin","creditName":null,"errorCode":null}])
(10.1016/j.ceb.2010.04.009,[{"oid":"0000-0002-3329-9032","name":"Steven","surname":"Kosak","creditName":null,"errorCode":null}])
(10.1002/asi.20042,[{"oid":"0000-0002-9989-6681","name":"Aboul Ella","surname":"Hassanien","creditName":"Aboul Ella Hassanien","errorCode":null}])
(10.2105/AJPH.92.6.897,[{"oid":"0000-0002-8214-1776","name":"Kenneth","surname":"Warner","creditName":null,"errorCode":null}])
(10.1016/j.foodhyd.2019.02.010,[{"oid":"0000-0002-4557-4580","name":"Diana","surname":"Gawkowska","creditName":null,"errorCode":null},{"oid":"0000-0002-8038-3050","name":"Jolanta","surname":"Cieśla","creditName":null,"errorCode":null},{"oid":"0000-0003-3323-4535","name":"Justyna","surname":"Cybulska","creditName":null,"errorCode":null},{"oid":"0000-0001-9395-1486","name":"Artur","surname":"Zdunek","creditName":null,"errorCode":null}])
(10.1016/j.jbiosc.2019.03.005,[{"oid":"0000-0002-8601-6657","name":"Sachiyo","surname":"Aburatani","creditName":null,"errorCode":null}])
(10.1016/j.idm.2017.06.004,[{"oid":"0000-0002-0946-2741","name":"Orou G.","surname":"Gaoue","creditName":null,"errorCode":null}])
(10.1017/cbo9780511599194.007,[{"oid":"0000-0002-1683-4486","name":"Barry","surname":"Eichengreen","creditName":null,"errorCode":null}])
(10.1134/S0006297915090060,[{"oid":"0000-0002-0530-4244","name":"Richard","surname":"Beckett","creditName":null,"errorCode":null},{"oid":"0000-0002-0530-4244","name":"Richard","surname":"Beckett","creditName":null,"errorCode":null},{"oid":"0000-0002-0313-7921","name":"Andrei","surname":"Chasov","creditName":null,"errorCode":null},{"oid":"0000-0002-0313-7921","name":"Andrei","surname":"Chasov","creditName":null,"errorCode":null}])
(10.31229/osf.io/jdw7f,[{"oid":"0000-0001-7849-1282","name":"Adib Rifqi","surname":"Setiawan","creditName":"Alobatnic","errorCode":null}])
(10.1016/j.proci.2018.06.150,[{"oid":"0000-0001-7058-6498","name":"Wolfgang","surname":"Polifke","creditName":null,"errorCode":null},{"oid":"0000-0001-5286-0756","name":"Matthias","surname":"Haeringer","creditName":null,"errorCode":null}])
(10.3891/acta.chem.scand.39a-0411,[{"oid":"0000-0002-1061-7536","name":"Ingmar","surname":"Persson","creditName":null,"errorCode":null}])
(10.1038/sj.ejcn.1601726,[{"oid":"0000-0001-8424-2864","name":"Agneta","surname":"Sjöberg","creditName":null,"errorCode":null}])
(10.1093/brain/aww278,[{"oid":"0000-0001-8683-8741","name":"Arturo","surname":"Cardenas-Blanco","creditName":"Arturo Cardenas-Blanco","errorCode":null},{"oid":"0000-0003-1174-5983","name":"Julio","surname":"Acosta-Cabronero","creditName":"Julio Acosta-Cabronero","errorCode":null},{"oid":"0000-0003-1174-5983","name":"Julio","surname":"Acosta-Cabronero","creditName":"Julio Acosta-Cabronero","errorCode":null},{"oid":"0000-0002-2840-4678","name":"Matthew","surname":"Betts","creditName":null,"errorCode":null},{"oid":"0000-0002-5860-5921","name":"Peter","surname":"Nestor","creditName":null,"errorCode":null},{"oid":"0000-0002-5860-5921","name":"Peter","surname":"Nestor","creditName":null,"errorCode":null}])
(10.1016/j.drugpo.2015.05.017,[{"oid":"0000-0002-9402-8682","name":"Judith","surname":"Aldridge","creditName":null,"errorCode":null}])
(10.1088/1742-6596/340/1/011001,[{"oid":"0000-0003-1298-2120","name":"Vladimir","surname":"Sechovsky","creditName":null,"errorCode":null}])
(10.1016/j.scitotenv.2019.06.168,[{"oid":"0000-0002-0543-2641","name":"Jordi","surname":"Garcia-Orellana","creditName":null,"errorCode":null}])
(10.1007/978-3-319-05215-1_5,[{"oid":"0000-0002-6231-8415","name":"Siddhartha","surname":"Jana","creditName":null,"errorCode":null}])
(10.1007/978-981-287-736-9_85,[{"oid":"0000-0002-7913-9712","name":"Oleg","surname":"Lupan","creditName":"Oleg LUPAN","errorCode":null},{"oid":"0000-0002-7913-9712","name":"Oleg","surname":"Lupan","creditName":"Oleg LUPAN","errorCode":null}])
(10.1175/2009JCLI3061.1,[{"oid":"0000-0001-6935-4112","name":"David","surname":"Randall","creditName":null,"errorCode":null}])
(10.3987/COM-05-10376,[{"oid":"0000-0003-3638-2517","name":"Tecla","surname":"Gasperi","creditName":null,"errorCode":null}])
(10.1016/j.bbrc.2009.07.098,[{"oid":"0000-0003-1537-0437","name":"A. M.","surname":"Stalcup","creditName":null,"errorCode":null}])
(10.1179/1753555713Y.0000000065,[{"oid":"0000-0002-0233-1407","name":"Pimpa","surname":"Hormnirun","creditName":null,"errorCode":null}])
(10.1109/OCEANS.2018.8604762,[{"oid":"0000-0002-5734-2699","name":"Gregory","surname":"Murad Reis","creditName":null,"errorCode":null}])
(10.1063/1.4963346,[{"oid":"0000-0002-7504-031X","name":"Lin","surname":"Gu","creditName":null,"errorCode":null}])
(10.1016/j.jviromet.2014.06.023,[{"oid":"0000-0001-8276-1804","name":"MARGHERITA","surname":"PROFITI","creditName":null,"errorCode":null},{"oid":"0000-0002-8315-0316","name":"Sergio","surname":"Rosati","creditName":null,"errorCode":null},{"oid":"0000-0002-0344-3101","name":"Stefano","surname":"Nardelli","creditName":null,"errorCode":null},{"oid":"0000-0001-7883-1283","name":"Esterina","surname":"De Carlo","creditName":null,"errorCode":null}])
(10.1590/S1678-58782011000500002,[{"oid":"0000-0003-1537-5430","name":"Mario Henrique","surname":"Macagnan","creditName":null,"errorCode":null}])
(10.1080/17425255.2017.1290080,[{"oid":"0000-0003-4652-7089","name":"Michael","surname":"Durkin","creditName":null,"errorCode":null},{"oid":"0000-0003-4652-7089","name":"Michael","surname":"Durkin","creditName":null,"errorCode":null},{"oid":"0000-0003-4652-7089","name":"Michael","surname":"Durkin","creditName":null,"errorCode":null}])
(10.1111/j.2047-6310.2013.00183.x,[{"oid":"0000-0002-0252-9923","name":"Thomas","surname":"Willis","creditName":"Thomas Andrew Willis","errorCode":null},{"oid":"0000-0002-0252-9923","name":"Thomas","surname":"Willis","creditName":"Thomas Andrew Willis","errorCode":null},{"oid":"0000-0002-4065-4397","name":"Charlotte","surname":"Evans","creditName":null,"errorCode":null}])
(10.1098/rstb.2018.0138,[{"oid":"0000-0003-2308-2603","name":"Peter","surname":"Bossaerts","creditName":null,"errorCode":null}])
(10.12660/GVCASOSV4N1N4,[{"oid":"0000-0003-2183-8112","name":"Pelayo Munhoz","surname":"Olea","creditName":"Pelayo Munhoz Olea","errorCode":null}])
(10.1016/0306-4522(96)00157-1,[{"oid":"0000-0002-3525-4671","name":"Elek","surname":"Molnár","creditName":null,"errorCode":null}])
(10.1016/j.jclinane.2018.12.004,[{"oid":"0000-0002-4776-3211","name":"Hiroshi","surname":"Otake","creditName":null,"errorCode":null}])
(10.1098/rsta.1996.0131,[{"oid":"0000-0002-8781-6154","name":"Ekhard","surname":"Salje","creditName":"Salje","errorCode":null}])
(10.1074/jbc.M113.526178,[{"oid":"0000-0002-7945-4050","name":"Jaime","surname":"Nagy","creditName":null,"errorCode":null},{"oid":"0000-0002-8302-6905","name":"Xiaolan","surname":"Zhao","creditName":null,"errorCode":null}])
(10.1016/j.biomaterials.2009.08.014,[{"oid":"0000-0002-9774-7412","name":"Liam","surname":"Grover","creditName":null,"errorCode":null},{"oid":"0000-0001-6412-2371","name":"nicola","surname":"hunt","creditName":null,"errorCode":null}])
(10.1016/j.physb.2011.09.063,[{"oid":"0000-0001-5768-0244","name":"Anne","surname":"Henry","creditName":null,"errorCode":null},{"oid":"0000-0001-7721-5091","name":"Erik","surname":"Janzen","creditName":null,"errorCode":null},{"oid":"0000-0003-4579-3529","name":"Stefano","surname":"Leone","creditName":null,"errorCode":null},{"oid":"0000-0003-4579-3529","name":"Stefano","surname":"Leone","creditName":null,"errorCode":null},{"oid":"0000-0002-7171-5383","name":"Henrik","surname":"Pedersen","creditName":"Henrik Pedersen","errorCode":null}])
(http://dx.doi.org/10.1080/00958970412331295219,[{"oid":"0000-0002-3226-2959","name":"Magdi","surname":"Iskander","creditName":null,"errorCode":null}])
(10.1166/sam.2012.1300,[{"oid":"0000-0002-0260-1059","name":"Haibo","surname":"Zeng","creditName":null,"errorCode":null},{"oid":"0000-0002-0281-3617","name":"Haibo","surname":"Zeng","creditName":null,"errorCode":null},{"oid":"0000-0002-0281-3617","name":"Haibo","surname":"Zeng","creditName":null,"errorCode":null},{"oid":"0000-0002-0281-3617","name":"Haibo","surname":"Zeng","creditName":null,"errorCode":null}])
(10.2217/fmb.11.70,[{"oid":"0000-0003-4042-7466","name":"Grzegorz","surname":"Wegrzyn","creditName":null,"errorCode":null}])
(10.1109/SIU.2008.4632665,[{"oid":"0000-0002-6335-459X","name":"AHMET","surname":"SAYAR","creditName":null,"errorCode":null}])
(10.1002/sim.910,[{"oid":"0000-0002-1829-8664","name":"Noah","surname":"Rosenberg","creditName":null,"errorCode":null}])
(10.1007/S12603-011-0071-Z,[{"oid":"0000-0001-7593-3081","name":"Jean","surname":"Woo","creditName":null,"errorCode":null}])
(10.1364/PHOTONICS.2016.Th3A.11,[{"oid":"0000-0003-2418-8418","name":"Narayan","surname":"Krishnaswamy","creditName":null,"errorCode":null}])
(10.1134/S0020168510050109,[{"oid":"0000-0002-2562-6427","name":"Nikolay","surname":"Kuznetsov","creditName":null,"errorCode":null}])
(10.1128/AEM.71.8.4539-4547.2005,[{"oid":"0000-0003-0501-2556","name":"Martin W.","surname":"Hahn","creditName":null,"errorCode":null},{"oid":"0000-0003-0501-2556","name":"Martin W.","surname":"Hahn","creditName":null,"errorCode":null}])
(10.1162/artl_a_00211,[{"oid":"0000-0003-0910-743X","name":"Tim","surname":"Taylor","creditName":"Timothy John Taylor","errorCode":null}])
(10.1080/10934529.2015.1047675,[{"oid":"0000-0003-2020-9824","name":"Bogdan","surname":"Skwarzec","creditName":null,"errorCode":null},{"oid":"0000-0001-7900-6517","name":"Dagmara","surname":"Struminska-Parulska","creditName":"Dagmara Struminska-Parulska","errorCode":null}])
(10.3109/16066359.2011.588352,[{"oid":"0000-0002-6792-916X","name":"David","surname":"Best","creditName":null,"errorCode":null}])
(10.1007/s001090050204,[{"oid":"0000-0003-3051-1285","name":"Thierry","surname":"Calandra","creditName":null,"errorCode":null}])
(10.1016/j.ajog.2007.10.519,[{"oid":"0000-0003-3159-6321","name":"Martin","surname":"Frasch","creditName":null,"errorCode":null}])
(10.1111/phpr.12304,[{"oid":"0000-0001-6180-457X","name":"Douglas","surname":"Portmore","creditName":"Douglas W. Portmore","errorCode":null}])
(10.2305/iucn.uk.2017-2.rlts.t22486174a22486850.en,[{"oid":"0000-0002-9847-2035","name":"Emma","surname":"Williams","creditName":null,"errorCode":null}])
(10.3791/50878,[{"oid":"0000-0002-8699-6253","name":"Tessa","surname":"Durham Brooks","creditName":null,"errorCode":null},{"oid":"0000-0002-8699-6253","name":"Tessa","surname":"Durham Brooks","creditName":null,"errorCode":null}])
(10.1007/s00340-009-3580-2,[{"oid":"0000-0002-2652-5134","name":"Ci-Ling","surname":"Pan","creditName":null,"errorCode":null},{"oid":"0000-0002-2652-5134","name":"Ci-Ling","surname":"Pan","creditName":null,"errorCode":null}])
(10.1007/978-3-319-19216-1_38,[{"oid":"0000-0001-9922-0350","name":"Dariusz","surname":"Laskowski","creditName":null,"errorCode":null},{"oid":"0000-0002-1748-5070","name":"ŁUBKOWSKI","surname":"PIOTR","creditName":null,"errorCode":null}])
(10.1007/s00192-017-3446-9,[{"oid":"0000-0002-9064-8454","name":"Katja","surname":"Stenström Bohlin","creditName":null,"errorCode":null}])
(10.1088/0022-3700/13/6/001,[{"oid":"0000-0002-4357-6048","name":"Jacek","surname":"Migdalek","creditName":null,"errorCode":null}])
(10.12776/qip.v16i2.67,[{"oid":"0000-0002-9155-189X","name":"Jostein","surname":"Langstrand","creditName":null,"errorCode":null}])
(10.1016/j.ecoenv.2018.07.109,[{"oid":"0000-0003-0926-4849","name":"Galia","surname":null,"creditName":null,"errorCode":null},{"oid":"0000-0002-8857-4353","name":"Vladimír","surname":"Žlábek","creditName":null,"errorCode":null},{"oid":"0000-0003-3091-7824","name":"Sidika","surname":"Sakalli","creditName":null,"errorCode":null}])
(10.1080/09585192.2013.870290,[{"oid":"0000-0001-8766-3314","name":"Andreas","surname":"Hirschi","creditName":null,"errorCode":null}])
(10.1016/j.optcom.2010.07.033,[{"oid":"0000-0001-8387-8131","name":"Esteban","surname":"Vera","creditName":null,"errorCode":null},{"oid":"0000-0002-4834-7543","name":"Alberto","surname":"Lencina","creditName":null,"errorCode":null}])
(10.1002/adma.201807383,[{"oid":"0000-0002-3042-0335","name":"Romain","surname":"Gautier","creditName":null,"errorCode":null},{"oid":"0000-0002-8671-0630","name":"Michael","surname":"PARIS","creditName":null,"errorCode":null}])
(http://dx.doi.org/10.18561/2179-5746/biotaamazonia.v4n2p55-63,[{"oid":"0000-0003-2173-6968","name":"Marcelo","surname":"Moreira de Carvalho","creditName":"Carvalho M. M.","errorCode":null}])
(10.1016/j.ehbc.2004.02.015,[{"oid":"0000-0001-7724-0668","name":"Eddy","surname":"Perez-Then","creditName":null,"errorCode":null},{"oid":"0000-0001-7724-0668","name":"Eddy","surname":"Perez-Then","creditName":null,"errorCode":null}])
(10.1016/S0001-6519(08)75117-3,[{"oid":"0000-0001-5560-8097","name":"Francesc Xavier","surname":"Avilés-Jurado","creditName":null,"errorCode":null},{"oid":"0000-0001-5560-8097","name":"Francesc Xavier","surname":"Avilés-Jurado","creditName":null,"errorCode":null}])
(10.1074/jbc.M409535200,[{"oid":"0000-0001-7811-7784","name":"Chihiro","surname":"Hisatsune","creditName":null,"errorCode":null},{"oid":"0000-0001-7713-9471","name":"Yukiko","surname":"Kuroda","creditName":"Yukiko Kuroda","errorCode":null}])
(10.1063/1.98280,[{"oid":"0000-0002-1856-474X","name":"Brian","surname":"Skromme","creditName":null,"errorCode":null}])
(10.1002/ajmg.a.37755,[{"oid":"0000-0003-3602-5704","name":null,"surname":null,"creditName":null,"errorCode":null},{"oid":"0000-0003-3602-5704","name":null,"surname":null,"creditName":null,"errorCode":null},{"oid":"0000-0003-4990-8784","name":"Anne","surname":"Trinh","creditName":null,"errorCode":null},{"oid":"0000-0003-4472-6893","name":"Stefano","surname":"Lise","creditName":null,"errorCode":null},{"oid":"0000-0002-1361-9174","name":"Caroline","surname":"Gorvin","creditName":null,"errorCode":null},{"oid":"0000-0002-1361-9174","name":"Caroline","surname":"Gorvin","creditName":null,"errorCode":null},{"oid":"0000-0002-1361-9174","name":"Caroline","surname":"Gorvin","creditName":null,"errorCode":null},{"oid":"0000-0002-3586-7654","name":"Sian","surname":"Piret","creditName":"Sian E Piret","errorCode":null}])
(10.1002/jmor.10934,[{"oid":"0000-0002-5006-6679","name":"Miriam","surname":"Zelditch","creditName":null,"errorCode":null},{"oid":"0000-0002-5006-6679","name":"Miriam","surname":"Zelditch","creditName":null,"errorCode":null}])
(http://dx.doi.org/10.1016/j.nucengdes.2010.12.007,[{"oid":"0000-0003-2653-3430","name":"Luca","surname":"Ammirabile","creditName":null,"errorCode":null}])
(10.1016/S0009-2614(99)01368-8,[{"oid":"0000-0001-6672-2186","name":"Cleber","surname":"Mendonca","creditName":null,"errorCode":null},{"oid":"0000-0001-6624-8453","name":"Lino","surname":"Misoguti","creditName":null,"errorCode":null},{"oid":"0000-0003-2372-5509","name":"José Alberto","surname":"Giacometti","creditName":null,"errorCode":null}])
(10.1038/ncomms2249,[{"oid":"0000-0002-1088-5565","name":"Santiago","surname":"Munne","creditName":null,"errorCode":null},{"oid":"0000-0002-8285-0222","name":"Shawn","surname":"Chavez","creditName":null,"errorCode":null},{"oid":"0000-0002-6487-1329","name":"Renee","surname":"Reijo Pera","creditName":null,"errorCode":null},{"oid":"0000-0002-6487-1329","name":"Renee","surname":"Reijo Pera","creditName":null,"errorCode":null}])
(https://doi.org/10.1016/j.jclepro.2017.12.156,[{"oid":"0000-0003-4311-5185","name":"Hao","surname":"Zheng","creditName":null,"errorCode":null}])
(10.1177/0022022112466590,[{"oid":"0000-0003-3764-3571","name":"Kenneth","surname":"Locke","creditName":null,"errorCode":null},{"oid":"0000-0002-4174-6955","name":"Guy","surname":"Curtis","creditName":null,"errorCode":null},{"oid":"0000-0002-4174-6955","name":"Guy","surname":"Curtis","creditName":null,"errorCode":null}])
(10.3390/app8091453,[{"oid":"0000-0001-5102-0077","name":"Liu","surname":"Yuting","creditName":null,"errorCode":null},{"oid":"0000-0001-5102-0077","name":"Liu","surname":"Yuting","creditName":null,"errorCode":null},{"oid":"0000-0002-5578-408X","name":"Huanan","surname":"Liu","creditName":null,"errorCode":null}])
(10.1063/1.3225247,[{"oid":"0000-0003-0574-1513","name":"Antonio","surname":"Garcia-Loureiro","creditName":null,"errorCode":null},{"oid":"0000-0002-6794-3893","name":"Manuel","surname":"Aldegunde","creditName":null,"errorCode":null},{"oid":"0000-0003-0973-461X","name":"Natalia","surname":"Seoane","creditName":null,"errorCode":null}])
(10.1200/jco.2015.33.7_suppl.41,[{"oid":"0000-0002-2500-5030","name":"Julien","surname":"Boudon","creditName":null,"errorCode":null}])
(10.1037/e592712011-023,[{"oid":"0000-0003-1574-585X","name":"Samuel","surname":"Posner","creditName":null,"errorCode":null}])
(10.1007/s00605-013-0526-x,[{"oid":"0000-0003-2371-4818","name":"Ákos","surname":"G.Horváth","creditName":null,"errorCode":null}])
(10.1371/journal.pone.0139754,[{"oid":"0000-0002-9109-0958","name":"Julien","surname":"Louis","creditName":null,"errorCode":null}])
(10.1021/jp054948x,[{"oid":"0000-0002-2033-5284","name":"Victor","surname":"Climent","creditName":null,"errorCode":null},{"oid":"0000-0003-4751-3279","name":"Juan","surname":"Feliu","creditName":null,"errorCode":null},{"oid":"0000-0002-5231-8032","name":"Roberto","surname":"Gómez","creditName":"Roberto Gómez","errorCode":null}])
(10.1155/2016/9828517,[{"oid":"0000-0002-2030-0356","name":"Ewa","surname":"Jasek-Gajda","creditName":null,"errorCode":null},{"oid":"0000-0002-4541-2864","name":"Ewa","surname":"Siucinska","creditName":null,"errorCode":null},{"oid":"0000-0002-8102-6039","name":"Malgorzata","surname":"Jasinska","creditName":null,"errorCode":null},{"oid":"0000-0002-8102-6039","name":"Malgorzata","surname":"Jasinska","creditName":null,"errorCode":null},{"oid":"0000-0001-7808-1101","name":"Malgorzata","surname":"Kossut","creditName":null,"errorCode":null}])
(10.1002/pi.2097,[{"oid":"0000-0002-4375-5580","name":"Graeme","surname":"Moad","creditName":"Graeme Moad","errorCode":null},{"oid":"0000-0003-2849-229X","name":"John","surname":"Forsythe","creditName":null,"errorCode":null}])
(10.1117/12.432956,[{"oid":"0000-0002-4209-1372","name":"Michael","surname":"Bakunov","creditName":null,"errorCode":null}])
(10.2144/000114087,[{"oid":"0000-0001-8479-6269","name":"HERNAN GUILLERMO","surname":"HERNANDEZ","creditName":null,"errorCode":null},{"oid":"0000-0001-9175-3363","name":"Diego","surname":"Forero","creditName":"Forero DA","errorCode":null}])
(10.1016/j.bcp.2015.08.003,[{"oid":"0000-0002-8247-9494","name":"Istvan","surname":"Czikora","creditName":null,"errorCode":null},{"oid":"0000-0003-3805-8868","name":"Rudolf","surname":"Lucas","creditName":null,"errorCode":null}])
(10.1016/j.foreco.2016.08.036,[{"oid":"0000-0001-7999-1692","name":"Dugald","surname":"Close","creditName":null,"errorCode":null}])
(10.2903/j.efsa.2017.4782,[{"oid":"0000-0002-4720-2940","name":"Margaret","surname":"Good","creditName":null,"errorCode":null},{"oid":"0000-0002-2634-3138","name":"Antonio","surname":"Velarde","creditName":null,"errorCode":null},{"oid":"0000-0003-2417-0787","name":"Søren","surname":"Nielsen","creditName":"Søren Saxmose Nielsen","errorCode":null},{"oid":"0000-0002-5904-8397","name":"Andrew","surname":"Butterworth","creditName":"Andrew Butterworth","errorCode":null}])
(10.1002/ejoc.201300745,[{"oid":"0000-0003-0168-9295","name":"Emanuela","surname":"Licandro","creditName":null,"errorCode":null},{"oid":"0000-0001-9252-6218","name":"Alberto","surname":"Bossi","creditName":null,"errorCode":null},{"oid":"0000-0001-9252-6218","name":"Alberto","surname":"Bossi","creditName":null,"errorCode":null},{"oid":"0000-0001-9252-6218","name":"Alberto","surname":"Bossi","creditName":null,"errorCode":null},{"oid":"0000-0003-4063-1563","name":"Patrizia Romana","surname":"Mussini","creditName":null,"errorCode":null},{"oid":"0000-0003-4063-1563","name":"Patrizia Romana","surname":"Mussini","creditName":null,"errorCode":null},{"oid":"0000-0002-9540-9073","name":"SILVIA","surname":"CAUTERUCCIO","creditName":null,"errorCode":null},{"oid":"0000-0002-9540-9073","name":"SILVIA","surname":"CAUTERUCCIO","creditName":null,"errorCode":null}])
(10.1038/s41431-018-0183-6,[{"oid":"0000-0002-4475-4117","name":"Simone","surname":"Baldovino","creditName":null,"errorCode":null},{"oid":"0000-0001-6133-9831","name":"Marcella","surname":"Neri","creditName":null,"errorCode":null},{"oid":"0000-0003-4187-8816","name":"GIOVANNA","surname":"FLORIDIA","creditName":null,"errorCode":null},{"oid":"0000-0001-9406-5734","name":"Paraskevas","surname":"Iatropoulos","creditName":null,"errorCode":null}])
(10.1038/sj.ijo.0800401,[{"oid":"0000-0002-7916-4619","name":"Frederic","surname":"Fumeron","creditName":null,"errorCode":null}])
(10.1080/10934529.2012.668029,[{"oid":"0000-0002-6255-1153","name":"David","surname":"Stuckey","creditName":null,"errorCode":null},{"oid":"0000-0002-6255-1153","name":"David","surname":"Stuckey","creditName":null,"errorCode":null},{"oid":"0000-0002-2880-708X","name":"Antoine","surname":"Trzcinski","creditName":null,"errorCode":null}])
(10.1371/journal.pmed.1001427,[{"oid":"0000-0001-6857-8931","name":"Etheldreda","surname":"Nakimuli-Mpungu","creditName":null,"errorCode":null}])
(10.1186/S13660-015-0787-0,[{"oid":"0000-0003-1769-2800","name":"Ke","surname":"Wang","creditName":"Ke Wang","errorCode":null}])
(10.2174/157018011794578204,[{"oid":"0000-0002-8902-2876","name":"Gajendra","surname":"Raghava","creditName":"G. P. S. Raghava","errorCode":null}])
(10.1016/j.jtho.2016.11.002,[{"oid":"0000-0002-3832-8744","name":"Chandana","surname":"Reddy","creditName":null,"errorCode":null}])
(10.26502/jesph.96120058,[{"oid":"0000-0002-1942-7384","name":"Anahi","surname":"Aguilera","creditName":null,"errorCode":null}])
(10.1007/BF00749305,[{"oid":"0000-0003-4546-002X","name":"Oleg","surname":"Ponomarev","creditName":null,"errorCode":null},{"oid":"0000-0002-0362-4121","name":"Farid","surname":"Enikeev","creditName":null,"errorCode":null}])
(10.1016/j.engfracmech.2015.01.008,[{"oid":"0000-0003-1117-9619","name":"Xian-Fang","surname":"Li","creditName":"Xian-Fang Li","errorCode":null}])
(10.1007/s10898-015-0341-0,[{"oid":"0000-0001-6340-385X","name":"Semu Mitiku","surname":"Kassa","creditName":null,"errorCode":null},{"oid":"0000-0001-5494-040X","name":"Semu Mitiku","surname":"Kassa","creditName":null,"errorCode":null}])
(10.1177/0091270010395591,[{"oid":"0000-0002-4227-5817","name":"David","surname":"Le Couteur","creditName":null,"errorCode":null},{"oid":"0000-0002-5970-1501","name":"Sarah","surname":"Hilmer","creditName":null,"errorCode":null}])
(10.1016/j.cellimm.2016.09.003,[{"oid":"0000-0002-7412-9507","name":"Joseph","surname":"Mattapallil","creditName":null,"errorCode":null}])
(10.1158/0008-5472.CAN-09-1091,[{"oid":"0000-0002-6093-0395","name":"Rajiv","surname":"Kumar","creditName":null,"errorCode":null},{"oid":"0000-0003-0674-7757","name":"Giuseppe","surname":"MATULLO","creditName":null,"errorCode":null},{"oid":"0000-0001-9599-309X","name":"Alessandra","surname":"Allione","creditName":null,"errorCode":null},{"oid":"0000-0003-1901-9513","name":"Heather","surname":"Nelson","creditName":null,"errorCode":null},{"oid":"0000-0002-8752-8785","name":"Tim","surname":"Bishop","creditName":"D T Bishop","errorCode":null},{"oid":"0000-0002-0787-3969","name":"Gunnar","surname":"Steineck","creditName":null,"errorCode":null},{"oid":"0000-0001-7208-2912","name":"Anne","surname":"Kiltie","creditName":null,"errorCode":null},{"oid":"0000-0002-4669-3995","name":"Zuo-Feng","surname":"Zhang","creditName":null,"errorCode":null},{"oid":"0000-0002-4620-3108","name":"Jian-Min","surname":"Yuan","creditName":null,"errorCode":null},{"oid":"0000-0002-3561-6580","name":"Ananya","surname":"Choudhury","creditName":null,"errorCode":null},{"oid":"0000-0002-3561-6580","name":"Ananya","surname":"Choudhury","creditName":null,"errorCode":null},{"oid":"0000-0002-1720-7724","name":"Jenny","surname":"Barrett","creditName":null,"errorCode":null},{"oid":"0000-0002-5277-8477","name":"Marcello","surname":"Campagna","creditName":null,"errorCode":null},{"oid":"0000-0003-2538-3784","name":"Nuria","surname":"Malats","creditName":null,"errorCode":null}])
(10.1016/j.intell.2007.04.003,[{"oid":"0000-0003-3159-9370","name":"Alasdair","surname":"MacLullich","creditName":null,"errorCode":null}])
(10.1021/acs.orglett.7b00261,[{"oid":"0000-0002-7178-2981","name":"Jun Yong","surname":"Kang","creditName":null,"errorCode":null},{"oid":"0000-0002-9732-1454","name":"Hai","surname":"Huang","creditName":null,"errorCode":null}])
(10.1088/0004-637X/778/2/175,[{"oid":"0000-0003-0489-0920","name":"Alexei","surname":"Pevtsov","creditName":null,"errorCode":null},{"oid":"0000-0003-0489-0920","name":"Alexei","surname":"Pevtsov","creditName":null,"errorCode":null}])
(10.22201/iis.01882503p.2018.4.57796,[{"oid":"0000-0002-7334-1936","name":"Yanina","surname":"Welp","creditName":null,"errorCode":null},{"oid":"0000-0001-9905-0777","name":"Flavia","surname":"Freidenberg","creditName":null,"errorCode":null},{"oid":"0000-0001-9905-0777","name":"Flavia","surname":"Freidenberg","creditName":null,"errorCode":null}])
(10.1109/IWAGPR.2017.7996109,[{"oid":"0000-0003-1698-0800","name":"Miro","surname":"Govedarica","creditName":null,"errorCode":null}])
(10.1207/s15328015tlm1603_6,[{"oid":"0000-0002-0577-1440","name":"Theodor","surname":"Adla","creditName":null,"errorCode":null}])
(10.1109/CBMI.2009.10,[{"oid":"0000-0002-1364-218X","name":"Marco","surname":"Bertini","creditName":"Marco Bertini","errorCode":null},{"oid":"0000-0002-4269-4501","name":"Giuseppe","surname":"Serra","creditName":null,"errorCode":null},{"oid":"0000-0002-1052-8322","name":"ALBERTO","surname":"DEL BIMBO","creditName":null,"errorCode":null},{"oid":"0000-0003-0819-851X","name":"Lamberto","surname":"Ballan","creditName":null,"errorCode":null}])
(10.1017/S003329171200147X,[{"oid":"0000-0001-8198-9022","name":"Amanda","surname":"Baxter","creditName":null,"errorCode":null},{"oid":"0000-0003-4667-6623","name":"Harvey","surname":"Whiteford","creditName":null,"errorCode":null},{"oid":"0000-0003-4667-6623","name":"Harvey","surname":"Whiteford","creditName":null,"errorCode":null}])
(10.1088/0305-4470/30/2/021,[{"oid":"0000-0001-6097-1202","name":"Andrés","surname":"Riaguas","creditName":null,"errorCode":null}])
(10.4103/1658-354X.169476,[{"oid":"0000-0002-1009-0869","name":"KUSAI","surname":"BAROUDI","creditName":null,"errorCode":null},{"oid":"0000-0002-1009-0869","name":"KUSAI","surname":"BAROUDI","creditName":null,"errorCode":null}])
(10.1007/s12414-015-0042-0,[{"oid":"0000-0003-3752-7837","name":"Matthijs","surname":"Janssen","creditName":null,"errorCode":null}])
(10.1007/978-3-319-08491-6_5,[{"oid":"0000-0002-4728-689X","name":"Agnieszka","surname":"Landowska","creditName":null,"errorCode":null},{"oid":"0000-0002-1117-903X","name":"Michał","surname":"Wróbel","creditName":null,"errorCode":null}])
(10.3390/inorganics7010008,[{"oid":"0000-0003-4502-5212","name":"Ian","surname":"Dance","creditName":null,"errorCode":null}])
(10.4028/www.scientific.net/DDF.297-301.814,[{"oid":"0000-0002-6970-0406","name":"Jaroslav","surname":"Kováčik","creditName":null,"errorCode":null}])
(10.1117/12.2257557,[{"oid":"0000-0002-7409-3305","name":"Jan","surname":"Kulawik","creditName":null,"errorCode":null}])
(10.1109/SIBGRAPI.2011.20,[{"oid":"0000-0001-8612-5805","name":"Fabio","surname":"Miranda","creditName":null,"errorCode":null}])
(10.1089/fpd.2012.1448,[{"oid":"0000-0001-7995-7427","name":"Beilei","surname":"Ge","creditName":null,"errorCode":null}])
(10.1016/j.lungcan.2013.04.006,[{"oid":"0000-0001-8100-7914","name":"Ruey-Long","surname":"Hong","creditName":null,"errorCode":null}])
(10.1109/IROS.2005.1545338,[{"oid":"0000-0002-4612-3554","name":"Rui P.","surname":"Rocha","creditName":"Rui P. Rocha","errorCode":null},{"oid":"0000-0003-3729-5263","name":"Adriano","surname":"Carvalho","creditName":null,"errorCode":null},{"oid":"0000-0002-2725-8867","name":"Jorge","surname":"Dias","creditName":"Jorge Dias","errorCode":null}])
(10.3182/20100901-3-IT-2016.00036,[{"oid":"0000-0001-8247-7790","name":"Hélène","surname":"Piet-Lahanier","creditName":null,"errorCode":null}])
(10.1016/S0925-9635(99)00257-5,[{"oid":"0000-0002-2400-1759","name":"BRAMBILLA","surname":"Andrea","creditName":null,"errorCode":null},{"oid":"0000-0002-0647-9134","name":"Philippe","surname":"Bergonzo","creditName":null,"errorCode":null}])
(10.1016/j.jsg.2011.08.008,[{"oid":"0000-0002-4647-8224","name":"Bernhard","surname":"Grasemann","creditName":null,"errorCode":null},{"oid":"0000-0002-4647-8224","name":"Bernhard","surname":"Grasemann","creditName":null,"errorCode":null}])
(10.1016/S1387-6473(03)00072-1,[{"oid":"0000-0001-5273-9817","name":"James","surname":"Lovell","creditName":null,"errorCode":null}])
(10.1039/C9TA02405K,[{"oid":"0000-0003-3847-2620","name":"Ch Venkata","surname":"Surya Kumar","creditName":null,"errorCode":null},{"oid":"0000-0001-6353-8877","name":"Ki Tae","surname":"Nam","creditName":null,"errorCode":null}])
(10.1007/s00216-008-2520-z,[{"oid":"0000-0002-5134-7130","name":"Helmut","surname":"Ehrenberg","creditName":null,"errorCode":null},{"oid":"0000-0002-5134-7130","name":"Helmut","surname":"Ehrenberg","creditName":null,"errorCode":null},{"oid":"0000-0002-5106-9214","name":"Kristian","surname":"Nikolowski","creditName":null,"errorCode":null}])
(10.1007/s13760-018-0889-9,[{"oid":"0000-0002-6644-9148","name":"Ahmet","surname":"yildirim","creditName":null,"errorCode":null}])
(10.1016/j.enpol.2012.10.037,[{"oid":"0000-0002-0476-2315","name":"Anu","surname":"Ramaswami","creditName":null,"errorCode":null}])
(10.1007/978-981-13-2517-5_65,[{"oid":"0000-0002-1182-0552","name":"Tiago","surname":"Benetti","creditName":null,"errorCode":null}])
(10.1179/1973947814Y.0000000172,[{"oid":"0000-0001-5610-7745","name":"Fernanda","surname":"Ruiz Larrea","creditName":null,"errorCode":null},{"oid":"0000-0003-0023-3069","name":"Vanesa","surname":"Estepa","creditName":null,"errorCode":null},{"oid":"0000-0003-3709-1690","name":"Carmen","surname":"Torres","creditName":null,"errorCode":null}])
(10.1016/s0378-1119(00)00530-8,[{"oid":"0000-0001-7584-3721","name":"Tzung-Fu","surname":"Hsieh","creditName":null,"errorCode":null}])
(10.1149/1.3567401,[{"oid":"0000-0002-9655-6155","name":"Mohammad Reza","surname":"Hantehzadeh","creditName":null,"errorCode":null}])
(10.1029/2012jd018226,[{"oid":"0000-0002-1520-4386","name":"永红","surname":"胡","creditName":"胡永红","errorCode":null},{"oid":"0000-0001-5950-9555","name":"Gensuo","surname":"Jia","creditName":null,"errorCode":null}])
(10.1186/1477-7517-11-31,[{"oid":"0000-0002-2191-7833","name":"Tim","surname":"Mackey","creditName":null,"errorCode":null},{"oid":"0000-0003-0726-0676","name":"Leo","surname":"Beletsky","creditName":null,"errorCode":null},{"oid":"0000-0003-3376-152X","name":"Maria Gudelia","surname":"Rangel","creditName":null,"errorCode":null},{"oid":"0000-0002-7724-691X","name":"Steffanie","surname":"Strathdee","creditName":null,"errorCode":null}])
(10.1023/A:1022686622752,[{"oid":"0000-0002-0534-7797","name":"Stéphane","surname":"Le Crom","creditName":null,"errorCode":null},{"oid":"0000-0001-9327-5166","name":"Marika","surname":"Kapsimali","creditName":null,"errorCode":null}])
(10.1524/zpch.2012.0224,[{"oid":"0000-0003-1563-9176","name":"Paul","surname":"Heitjans","creditName":null,"errorCode":null},{"oid":"0000-0001-9706-4892","name":"H. Martin R.","surname":"Wilkening","creditName":"Prof. Dr. Martin Wilkening","errorCode":null},{"oid":"0000-0001-9706-4892","name":"H. Martin R.","surname":"Wilkening","creditName":"Prof. Dr. Martin Wilkening","errorCode":null},{"oid":"0000-0001-9706-4892","name":"H. Martin R.","surname":"Wilkening","creditName":"Prof. Dr. Martin Wilkening","errorCode":null}])
(10.1080/03155986.2017.1393730,[{"oid":"0000-0002-8203-0689","name":"Linyan","surname":"Zhang","creditName":null,"errorCode":null}])
(10.1016/j.jbiotec.2011.06.040,[{"oid":"0000-0001-7635-1075","name":"Cecilia","surname":"Faraloni","creditName":null,"errorCode":null},{"oid":"0000-0001-7981-0847","name":"Alberto","surname":"Scoma","creditName":"Alberto Scoma","errorCode":null}])
(10.1055/s-0033-1361119,[{"oid":"0000-0001-9369-5136","name":"Farnaz","surname":"Monajjemzadeh","creditName":null,"errorCode":null}])
(10.1016/j.ijporl.2013.03.025,[{"oid":"0000-0003-4084-2301","name":"Chul Ho","surname":"Jang","creditName":null,"errorCode":null}])
(10.3758/s13423-011-0203-9,[{"oid":"0000-0002-7584-2275","name":"Patrik","surname":"Sörqvist","creditName":null,"errorCode":null}])
(10.1016/s0306-4522(02)00200-2,[{"oid":"0000-0002-8323-8211","name":"BS","surname":"Shankaranarayana Rao","creditName":null,"errorCode":null}])
(10.1021/tx030036f,[{"oid":"0000-0003-4194-6401","name":"Nico P.E.","surname":"Vermeulen","creditName":"N.P.E. Vermeulen","errorCode":null},{"oid":"0000-0002-8358-9425","name":"Angel","surname":"Messeguer","creditName":null,"errorCode":null}])
(10.1063/1.4819076,[{"oid":"0000-0003-2367-3825","name":"Timothy","surname":"Hele","creditName":null,"errorCode":null}])
(10.1049/el.2014.2651,[{"oid":"0000-0002-2826-6367","name":"Ivan","surname":"Lee","creditName":null,"errorCode":null}])
(10.1300/J137v15n02_02,[{"oid":"0000-0001-8374-2277","name":"Donna","surname":"Ryan","creditName":null,"errorCode":null}])
(10.1590/S1413-78522013000100003,[{"oid":"0000-0003-3584-3342","name":"Aldo José","surname":"Fontes Pereira","creditName":"Pereira, A.J.F","errorCode":null}])
(10.1039/c5lc01356a,[{"oid":"0000-0002-3084-0823","name":"Anna","surname":"Marsano","creditName":null,"errorCode":null},{"oid":"0000-0001-7115-0151","name":"Emiliano","surname":"Votta","creditName":null,"errorCode":null}])
(10.1007/s10903-018-0840-4,[{"oid":"0000-0003-1710-3578","name":"Ahsan","surname":"Saleem","creditName":null,"errorCode":null}])
(10.1007/s00216-015-8936-3,[{"oid":"0000-0003-4741-8650","name":"Nina","surname":"Kroepfl","creditName":null,"errorCode":null}])
(10.1109/ISCAS.2018.8351770,[{"oid":"0000-0002-7158-1426","name":"Panu","surname":"Sjövall","creditName":null,"errorCode":null}])
(10.1021/jm100732t,[{"oid":"0000-0002-1767-7072","name":"Matthew","surname":"Fuchter","creditName":null,"errorCode":null},{"oid":"0000-0002-1767-7072","name":"Matthew","surname":"Fuchter","creditName":null,"errorCode":null},{"oid":"0000-0002-5658-8486","name":"Paul","surname":"Freemont","creditName":null,"errorCode":null},{"oid":"0000-0002-5658-8486","name":"Paul","surname":"Freemont","creditName":null,"errorCode":null}])
(10.11588/BJB.2004.0.29842,[{"oid":"0000-0002-0555-3451","name":"Frank","surname":"Siegmund","creditName":null,"errorCode":null}])
(10.1006/jctb.1999.1911,[{"oid":"0000-0002-2692-9198","name":"John","surname":"Caughman","creditName":null,"errorCode":null},{"oid":"0000-0002-2692-9198","name":"John","surname":"Caughman","creditName":null,"errorCode":null}])
(10.1007/BF02125350,[{"oid":"0000-0002-4281-1843","name":"Gábor","surname":"Tardos","creditName":null,"errorCode":null}])
(10.1039/C9SC01502G,[{"oid":"0000-0003-3527-7379","name":"Andrzej","surname":"Sienkiewicz","creditName":null,"errorCode":null},{"oid":"0000-0002-7515-7593","name":"Farzaneh","surname":"Fadaei Tirani","creditName":"Farzaneh Fadaei Tirani","errorCode":null}])
(10.1063/1.119082,[{"oid":"0000-0001-8187-7469","name":"Hans","surname":"Christen","creditName":null,"errorCode":null}])
(10.1109/OCEANSAP.2016.7485389,[{"oid":"0000-0001-7619-8015","name":"Horst","surname":"Hellbrück","creditName":null,"errorCode":null}])
(10.1071/AN11006,[{"oid":"0000-0001-7750-0570","name":"Michael","surname":"Friend","creditName":null,"errorCode":null},{"oid":"0000-0001-5129-2216","name":"Susan","surname":"Robertson","creditName":null,"errorCode":null},{"oid":"0000-0001-5639-9581","name":"John","surname":"Broster","creditName":null,"errorCode":null}])
(10.1186/1746-4811-4-1,[{"oid":"0000-0002-1872-2998","name":"Wilhelm","surname":"Gruissem","creditName":null,"errorCode":null},{"oid":"0000-0002-6645-1862","name":"Lars","surname":"Hennig","creditName":null,"errorCode":null},{"oid":"0000-0002-6645-1862","name":"Lars","surname":"Hennig","creditName":null,"errorCode":null},{"oid":"0000-0002-4802-754X","name":"Matthias","surname":"Hirsch-Hoffmann","creditName":null,"errorCode":null}])
(10.1109/WCNC.2014.6951929,[{"oid":"0000-0003-1802-5022","name":"Michael","surname":"Heimlich","creditName":null,"errorCode":null},{"oid":"0000-0003-1802-5022","name":"Michael","surname":"Heimlich","creditName":null,"errorCode":null}])
(10.1210/jc.2006-0075,[{"oid":"0000-0003-0637-1577","name":"Raj","surname":"Vuppalanchi","creditName":null,"errorCode":null}])
(10.1590/S1678-91992005000300012,[{"oid":"0000-0001-9319-7516","name":"Rodrigo","surname":"Silva","creditName":"Da Silva, Rodrigo Costa","errorCode":null}])
(10.1088/0953-2048/24/7/075025,[{"oid":"0000-0001-7488-066X","name":"Qing-Ping","surname":"Ding","creditName":null,"errorCode":null},{"oid":"0000-0003-1796-6840","name":"Toshihiro","surname":"Taen","creditName":null,"errorCode":null}])
(10.1016/j.apenergy.2015.11.058,[{"oid":"0000-0003-2692-8299","name":"Zia","surname":"Wadud","creditName":null,"errorCode":null}])
(10.1002/ange.201106310,[{"oid":"0000-0002-7898-317X","name":"Belén","surname":"Martín-Matute","creditName":null,"errorCode":null},{"oid":"0000-0002-1534-2690","name":"Agnieszka","surname":"Bartoszewicz","creditName":null,"errorCode":null}])
(10.1016/s0002-9440(10)64486-0,[{"oid":"0000-0001-6151-4257","name":"Markus","surname":"Britschgi","creditName":"Markus Britschgi","errorCode":null}])
(10.1021/ac0715672,[{"oid":"0000-0002-3664-5711","name":"Maria Jose","surname":"Gomez Ramos","creditName":null,"errorCode":null},{"oid":"0000-0002-3664-5711","name":"Maria Jose","surname":"Gomez Ramos","creditName":null,"errorCode":null},{"oid":"0000-0003-4321-6795","name":"Maria Jesus","surname":"Martinez Bueno","creditName":null,"errorCode":null},{"oid":"0000-0002-1963-8106","name":"Dolores","surname":"Hernando","creditName":null,"errorCode":null},{"oid":"0000-0003-2649-6772","name":"Ana","surname":"Agüera","creditName":null,"errorCode":null},{"oid":"0000-0001-9158-0271","name":"JUAN F","surname":"GARCÍA-REYES","creditName":null,"errorCode":null}])
(10.5020/18061230.2014.319,[{"oid":"0000-0002-9554-1736","name":"Thiago","surname":"Vasconcelos","creditName":null,"errorCode":null}])
(10.1109/ICoAC.2017.7951738,[{"oid":"0000-0001-8438-0175","name":"Nedunchezhian","surname":"Raju","creditName":null,"errorCode":null},{"oid":"0000-0002-3769-657X","name":"Vijayakumar","surname":"Velusamy","creditName":null,"errorCode":null},{"oid":"0000-0002-3769-657X","name":"Vijayakumar","surname":"Velusamy","creditName":null,"errorCode":null}])
(10.1126/scitranslmed.3002785,[{"oid":"0000-0003-3619-1657","name":"Christian","surname":"Ottensmeier","creditName":null,"errorCode":null}])
(10.1007/s00245-010-9130-9,[{"oid":"0000-0002-8510-7477","name":"Catherine","surname":"Donnelly","creditName":null,"errorCode":null}])
(10.1530/endoabs.37.ep899,[{"oid":"0000-0002-9205-9530","name":"Volodymyr","surname":"Pankiv","creditName":null,"errorCode":null}])
(10.1029/2010GL045928,[{"oid":"0000-0003-1378-8434","name":"Shari","surname":"Yvon-Lewis","creditName":"Shari A Yvon-Lewis","errorCode":null},{"oid":"0000-0003-1097-6800","name":"John","surname":"Kessler","creditName":null,"errorCode":null},{"oid":"0000-0003-1097-6800","name":"John","surname":"Kessler","creditName":null,"errorCode":null}])
(10.1016/S0952-7915(00)00127-8,[{"oid":"0000-0001-6479-5330","name":"Gunther","surname":"Eysenbach","creditName":null,"errorCode":null}])
(10.1103/PhysRevB.83.064508,[{"oid":"0000-0001-7991-3918","name":"Jianxin","surname":"Zhu","creditName":null,"errorCode":null}])
(10.1002/ETT.2664,[{"oid":"0000-0001-6868-6860","name":"Qianbin","surname":null,"creditName":null,"errorCode":null}])
(10.1252/jcej.12we249,[{"oid":"0000-0002-7423-5561","name":"Takayoshi","surname":"Ishimoto","creditName":null,"errorCode":null},{"oid":"0000-0003-4347-9923","name":"Michihisa","surname":"Koyama","creditName":null,"errorCode":null}])
(10.1088/0305-4470/37/13/006,[{"oid":"0000-0002-2295-8055","name":"Epifanio Guido","surname":"Virga","creditName":null,"errorCode":null}])
(10.14419/IJET.V7I4.44.26872,[{"oid":"0000-0002-3164-5157","name":"Edy","surname":"Budiman","creditName":null,"errorCode":null}])
(10.1117/12.667839,[{"oid":"0000-0002-1551-6646","name":"Mikhail","surname":"Bryushinin","creditName":null,"errorCode":null}])
(10.1021/acs.jafc.6b03279,[{"oid":"0000-0002-4145-5031","name":"Manuel","surname":"Mota","creditName":"Mota, MM","errorCode":null},{"oid":"0000-0002-4145-5031","name":"Manuel","surname":"Mota","creditName":"Mota, MM","errorCode":null},{"oid":"0000-0003-2817-7943","name":"Jorge","surname":"Faria","creditName":"Jorge M. S. Faria","errorCode":null},{"oid":"0000-0003-2817-7943","name":"Jorge","surname":"Faria","creditName":"Jorge M. S. Faria","errorCode":null},{"oid":"0000-0003-3257-9777","name":"Ana M.","surname":"Rodrigues","creditName":null,"errorCode":null}])
(10.1117/12.2228432,[{"oid":"0000-0003-2590-034X","name":"Peppino","surname":"Fazio","creditName":null,"errorCode":null},{"oid":"0000-0003-0593-5254","name":"Mauro","surname":"Tropea","creditName":null,"errorCode":null}])
(10.1186/s40349-016-0049-8,[{"oid":"0000-0002-4271-7670","name":"Marc Nicola","surname":"Gallay","creditName":null,"errorCode":null}])
(10.1055/s-0029-1214397,[{"oid":"0000-0002-5286-7322","name":"Eva","surname":"Münster","creditName":null,"errorCode":null},{"oid":"0000-0002-5286-7322","name":"Eva","surname":"Münster","creditName":null,"errorCode":null}])
(10.1108/00197850610671991,[{"oid":"0000-0003-1239-8733","name":"viki","surname":"holton","creditName":null,"errorCode":null},{"oid":"0000-0003-1239-8733","name":"viki","surname":"holton","creditName":null,"errorCode":null}])
(10.1186/s12863-017-0480-z,[{"oid":"0000-0001-7747-0930","name":"Puthen Veettil","surname":"Jithesh","creditName":null,"errorCode":null}])
(10.1109/jsen.2011.2106156,[{"oid":"0000-0002-4213-9575","name":"Yael","surname":"Hanein","creditName":null,"errorCode":null}])
(10.1115/FEDSM2012-72228,[{"oid":"0000-0001-6692-858X","name":"Yogesh","surname":"Joshi","creditName":null,"errorCode":null}])
(10.1039/c5cp03696h,[{"oid":"0000-0002-0664-2536","name":"François","surname":"LIQUE","creditName":null,"errorCode":null},{"oid":"0000-0002-7407-303X","name":"Jacek","surname":"Klos","creditName":null,"errorCode":null}])
(10.1016/0308-8146(95)00112-3,[{"oid":"0000-0002-3438-0852","name":"José María","surname":"Fresno-Baro","creditName":null,"errorCode":null},{"oid":"0000-0002-3438-0852","name":"José María","surname":"Fresno-Baro","creditName":null,"errorCode":null},{"oid":"0000-0002-3737-9830","name":"Javier","surname":"Carballo","creditName":null,"errorCode":null},{"oid":"0000-0002-1697-178X","name":"María Josefa","surname":"González Prieto","creditName":null,"errorCode":null},{"oid":"0000-0002-5658-6720","name":"Ana","surname":"Bernardo Álvarez","creditName":null,"errorCode":null}])
(10.1016/j.jmaa.2005.05.047,[{"oid":"0000-0002-8320-3596","name":"Kazimierz","surname":"Wlodarczyk","creditName":null,"errorCode":null},{"oid":"0000-0001-6635-0781","name":"Dariusz","surname":"Wardowski","creditName":null,"errorCode":null}])
(10.1056/NEJM199312233292602,[{"oid":"0000-0002-9631-1254","name":"Martin","surname":"Larson","creditName":null,"errorCode":null}])
(10.1016/j.apergo.2008.01.001,[{"oid":"0000-0001-7844-4881","name":"Vilhelm F","surname":"Koefoed","creditName":null,"errorCode":null}])
(10.1007/s00421-011-2121-y,[{"oid":"0000-0001-6864-7706","name":"Taku","surname":"Wakahara","creditName":null,"errorCode":null}])
(10.1016/j.agrformet.2007.05.012,[{"oid":"0000-0001-5779-2764","name":"Rachid","surname":"Hadria","creditName":null,"errorCode":null},{"oid":"0000-0002-3905-7560","name":"Gilles","surname":"Boulet","creditName":null,"errorCode":null}])
(10.1393/ncb/i2007-10061-0,[{"oid":"0000-0002-7349-1109","name":"dafne","surname":"guetta","creditName":null,"errorCode":null}])
(10.1016/j.ajog.2015.11.023,[{"oid":"0000-0001-7122-6187","name":"Katie","surname":"Propst","creditName":null,"errorCode":null},{"oid":"0000-0001-7122-6187","name":"Katie","surname":"Propst","creditName":null,"errorCode":null},{"oid":"0000-0002-2044-1693","name":"Tyler","surname":"Muffly","creditName":null,"errorCode":null},{"oid":"0000-0002-2044-1693","name":"Tyler","surname":"Muffly","creditName":null,"errorCode":null}])
(doi:10.1166/jnn.2018.15274,[{"oid":"0000-0003-0582-575X","name":"David","surname":"Rojas","creditName":"D. Rojas","errorCode":null}])
(10.21061/cc.v2i1.a.10,[{"oid":"0000-0001-7242-6762","name":"Jacob","surname":"Bruggeman","creditName":null,"errorCode":null}])
(10.3390/nu10050622,[{"oid":"0000-0001-6890-5250","name":"Marjory","surname":"Moodie","creditName":null,"errorCode":null},{"oid":"0000-0002-5957-6931","name":"Jaithri","surname":"Ananthapavan","creditName":null,"errorCode":null},{"oid":"0000-0001-9736-1539","name":"Gary","surname":"Sacks","creditName":null,"errorCode":null},{"oid":"0000-0001-9736-1539","name":"Gary","surname":"Sacks","creditName":null,"errorCode":null},{"oid":"0000-0001-9736-1539","name":"Gary","surname":"Sacks","creditName":null,"errorCode":null},{"oid":"0000-0002-3206-8232","name":"Lennert","surname":"Veerman","creditName":null,"errorCode":null},{"oid":"0000-0002-3323-575X","name":"kathryn","surname":"backholer","creditName":null,"errorCode":null},{"oid":"0000-0003-2891-9476","name":"Vicki","surname":"Brown","creditName":null,"errorCode":null}])
(10.5194/bg-2018-344,[{"oid":"0000-0001-6469-7167","name":"Neus","surname":"Garcias-Bonet","creditName":null,"errorCode":null}])
(10.1038/srep01933,[{"oid":"0000-0001-8948-466X","name":"Yichi","surname":"Zhang","creditName":null,"errorCode":null}])
(10.1306/05131615170,[{"oid":"0000-0003-0397-8744","name":"Sarada","surname":"Mohanty","creditName":"Sarada P Mohanty","errorCode":null}])
(10.1080/14658011.2017.1399531,[{"oid":"0000-0003-2296-5623","name":"Janak","surname":"Sapkota","creditName":null,"errorCode":null},{"oid":"0000-0003-2296-5623","name":"Janak","surname":"Sapkota","creditName":null,"errorCode":null},{"oid":"0000-0003-4737-9823","name":"Joamin","surname":"Gonzalez-Gutierrez","creditName":null,"errorCode":null},{"oid":"0000-0001-5149-7895","name":"Clemens","surname":"Holzer","creditName":null,"errorCode":null},{"oid":"0000-0001-5149-7895","name":"Clemens","surname":"Holzer","creditName":null,"errorCode":null}])
(10.4322/acr.2015.007,[{"oid":"0000-0002-8414-4161","name":"Jussara Bianchi","surname":"Castelli","creditName":null,"errorCode":null},{"oid":"0000-0002-5092-0505","name":"Benoit","surname":"Bibas","creditName":null,"errorCode":null}])
(10.1186/s12888-017-1560-3,[{"oid":"0000-0002-9742-1359","name":"Alexandra","surname":"Pitman","creditName":null,"errorCode":null},{"oid":"0000-0003-2519-1539","name":"David","surname":"Osborn","creditName":null,"errorCode":null}])
(10.1007/s10741-019-09829-7,[{"oid":"0000-0001-7175-0464","name":"Pierpaolo","surname":"Pellicori","creditName":null,"errorCode":null}])
(10.1007/s00542-012-1613-y,[{"oid":"0000-0001-5535-9473","name":"Shota","surname":"Yabui","creditName":null,"errorCode":null},{"oid":"0000-0003-0623-5938","name":"Takenori","surname":"Atsumi","creditName":null,"errorCode":null}])
(10.1039/C9SC02399B,[{"oid":"0000-0002-1034-0856","name":"Marvin","surname":"Vega","creditName":"Marvin M Vega","errorCode":null},{"oid":"0000-0003-0994-7789","name":"Li","surname":"Fu","creditName":null,"errorCode":null}])
(10.1039/C8DT04698K,[{"oid":"0000-0002-3213-1350","name":"Likun","surname":"Tan","creditName":null,"errorCode":null}])
(10.1080/09595230802093802,[{"oid":"0000-0001-9580-1545","name":"Simon","surname":"Moyes","creditName":null,"errorCode":null}])
(10.1371/journal.pone.0136586,[{"oid":"0000-0002-6296-1427","name":"Scot","surname":"Dowd","creditName":null,"errorCode":null},{"oid":"0000-0002-2176-6932","name":"Jan","surname":"Suchodolski","creditName":null,"errorCode":null},{"oid":"0000-0002-9557-3068","name":"Cole","surname":"Mcqueen","creditName":null,"errorCode":null}])
(10.3233/RNN-2009-0473,[{"oid":"0000-0002-6746-1034","name":"Iris-Katharina","surname":"Penner","creditName":null,"errorCode":null}])
(10.1016/j.jcpa.2015.04.004,[{"oid":"0000-0001-5281-0521","name":"Antonio","surname":"Fernandez","creditName":null,"errorCode":null},{"oid":"0000-0003-3749-8845","name":"Eva","surname":"Sierra","creditName":null,"errorCode":null},{"oid":"0000-0002-1623-5010","name":"Manuel","surname":"Arbelo","creditName":"marbelo","errorCode":null}])
(10.1007/s11270-011-1042-z,[{"oid":"0000-0003-2811-1979","name":"Irvan","surname":"Dahlan","creditName":null,"errorCode":null}])
(10.1054/bjoc.2001.2096,[{"oid":"0000-0001-9484-9977","name":"Sasiwarang","surname":"Wannamethee","creditName":null,"errorCode":null}])
(10.1137/140988541,[{"oid":"0000-0001-5645-5854","name":"Ilse","surname":"Ipsen","creditName":null,"errorCode":null}])

Some files were not shown because too many files have changed in this diff Show More