forked from D-Net/dnet-hadoop
mergin with branch beta
This commit is contained in:
commit
b0969461f8
|
@ -5,13 +5,71 @@ import java.io.BufferedInputStream;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
|
||||
public class MakeTarArchive implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MakeTarArchive.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
MakeTarArchive.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/common/input_maketar_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
final String outputPath = parser.get("hdfsPath");
|
||||
log.info("hdfsPath: {}", outputPath);
|
||||
|
||||
final String hdfsNameNode = parser.get("nameNode");
|
||||
log.info("nameNode: {}", hdfsNameNode);
|
||||
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("input path : {}", inputPath);
|
||||
|
||||
final int gBperSplit = Optional
|
||||
.ofNullable(parser.get("splitSize"))
|
||||
.map(Integer::valueOf)
|
||||
.orElse(10);
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
conf.set("fs.defaultFS", hdfsNameNode);
|
||||
|
||||
FileSystem fileSystem = FileSystem.get(conf);
|
||||
|
||||
makeTArArchive(fileSystem, inputPath, outputPath, gBperSplit);
|
||||
|
||||
}
|
||||
|
||||
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit)
|
||||
throws IOException {
|
||||
|
||||
RemoteIterator<LocatedFileStatus> dirIterator = fileSystem.listLocatedStatus(new Path(inputPath));
|
||||
|
||||
while (dirIterator.hasNext()) {
|
||||
LocatedFileStatus fileStatus = dirIterator.next();
|
||||
|
||||
Path p = fileStatus.getPath();
|
||||
String pathString = p.toString();
|
||||
String entity = pathString.substring(pathString.lastIndexOf("/") + 1);
|
||||
|
||||
MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit);
|
||||
}
|
||||
}
|
||||
|
||||
private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException {
|
||||
Path hdfsWritePath = new Path(outputPath);
|
||||
if (fileSystem.exists(hdfsWritePath)) {
|
||||
|
@ -21,7 +79,7 @@ public class MakeTarArchive implements Serializable {
|
|||
return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream());
|
||||
}
|
||||
|
||||
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
|
||||
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dirName)
|
||||
throws IOException {
|
||||
|
||||
Path hdfsWritePath = new Path(outputPath);
|
||||
|
@ -37,7 +95,7 @@ public class MakeTarArchive implements Serializable {
|
|||
new Path(inputPath), true);
|
||||
|
||||
while (iterator.hasNext()) {
|
||||
writeCurrentFile(fileSystem, dir_name, iterator, ar, 0);
|
||||
writeCurrentFile(fileSystem, dirName, iterator, ar, 0);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -59,32 +117,30 @@ public class MakeTarArchive implements Serializable {
|
|||
new Path(inputPath), true);
|
||||
boolean next = fileStatusListIterator.hasNext();
|
||||
while (next) {
|
||||
TarArchiveOutputStream ar = getTar(fileSystem, outputPath + "_" + (partNum + 1) + ".tar");
|
||||
try (TarArchiveOutputStream ar = getTar(fileSystem, outputPath + "_" + (partNum + 1) + ".tar")) {
|
||||
|
||||
long current_size = 0;
|
||||
while (next && current_size < bytesPerSplit) {
|
||||
current_size = writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, current_size);
|
||||
next = fileStatusListIterator.hasNext();
|
||||
long currentSize = 0;
|
||||
while (next && currentSize < bytesPerSplit) {
|
||||
currentSize = writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, currentSize);
|
||||
next = fileStatusListIterator.hasNext();
|
||||
|
||||
}
|
||||
|
||||
partNum += 1;
|
||||
}
|
||||
|
||||
partNum += 1;
|
||||
ar.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static long writeCurrentFile(FileSystem fileSystem, String dir_name,
|
||||
private static long writeCurrentFile(FileSystem fileSystem, String dirName,
|
||||
RemoteIterator<LocatedFileStatus> fileStatusListIterator,
|
||||
TarArchiveOutputStream ar, long current_size) throws IOException {
|
||||
TarArchiveOutputStream ar, long currentSize) throws IOException {
|
||||
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
||||
|
||||
Path p = fileStatus.getPath();
|
||||
String p_string = p.toString();
|
||||
if (!p_string.endsWith("_SUCCESS")) {
|
||||
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
||||
String pString = p.toString();
|
||||
if (!pString.endsWith("_SUCCESS")) {
|
||||
String name = pString.substring(pString.lastIndexOf("/") + 1);
|
||||
if (name.startsWith("part-") & name.length() > 10) {
|
||||
String tmp = name.substring(0, 10);
|
||||
if (name.contains(".")) {
|
||||
|
@ -92,9 +148,9 @@ public class MakeTarArchive implements Serializable {
|
|||
}
|
||||
name = tmp;
|
||||
}
|
||||
TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name);
|
||||
TarArchiveEntry entry = new TarArchiveEntry(dirName + "/" + name);
|
||||
entry.setSize(fileStatus.getLen());
|
||||
current_size += fileStatus.getLen();
|
||||
currentSize += fileStatus.getLen();
|
||||
ar.putArchiveEntry(entry);
|
||||
|
||||
InputStream is = fileSystem.open(fileStatus.getPath());
|
||||
|
@ -110,7 +166,7 @@ public class MakeTarArchive implements Serializable {
|
|||
ar.closeArchiveEntry();
|
||||
|
||||
}
|
||||
return current_size;
|
||||
return currentSize;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,18 +1,18 @@
|
|||
|
||||
package eu.dnetlib.dhp.common;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.Normalizer;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.text.WordUtils;
|
||||
|
||||
import com.ctc.wstx.dtd.LargePrefixedNameSet;
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.base.Splitter;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.hash.Hashing;
|
||||
|
||||
|
@ -29,7 +29,19 @@ public class PacePerson {
|
|||
private List<String> fullname = Lists.newArrayList();
|
||||
private final String original;
|
||||
|
||||
private static Set<String> particles = null;
|
||||
private static Set<String> particles;
|
||||
|
||||
static {
|
||||
try {
|
||||
particles = new HashSet<>(IOUtils
|
||||
.readLines(
|
||||
PacePerson.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/common/name_particles.txt")));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Capitalizes a string
|
||||
|
@ -37,29 +49,20 @@ public class PacePerson {
|
|||
* @param s the string to capitalize
|
||||
* @return the input string with capital letter
|
||||
*/
|
||||
public static final String capitalize(final String s) {
|
||||
public static String capitalize(final String s) {
|
||||
if (particles.contains(s)) {
|
||||
return s;
|
||||
}
|
||||
return WordUtils.capitalize(s.toLowerCase(), ' ', '-');
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a dot to a string with length equals to 1
|
||||
*/
|
||||
public static final String dotAbbreviations(final String s) {
|
||||
public static String dotAbbreviations(final String s) {
|
||||
return s.length() == 1 ? s + "." : s;
|
||||
}
|
||||
|
||||
public static Set<String> loadFromClasspath(final String classpath) {
|
||||
final Set<String> h = new HashSet<>();
|
||||
try {
|
||||
for (final String s : IOUtils.readLines(PacePerson.class.getResourceAsStream(classpath))) {
|
||||
h.add(s);
|
||||
}
|
||||
} catch (final Throwable e) {
|
||||
return new HashSet<>();
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
/**
|
||||
* The constructor of the class. It fills the fields of the class basing on the input fullname.
|
||||
*
|
||||
|
@ -128,10 +131,6 @@ public class PacePerson {
|
|||
}
|
||||
|
||||
private List<String> splitTerms(final String s) {
|
||||
if (particles == null) {
|
||||
particles = loadFromClasspath("/eu/dnetlib/dhp/oa/graph/pace/name_particles.txt");
|
||||
}
|
||||
|
||||
final List<String> list = Lists.newArrayList();
|
||||
for (final String part : Splitter.on(" ").omitEmptyStrings().split(s)) {
|
||||
if (!particles.contains(part.toLowerCase())) {
|
||||
|
@ -187,17 +186,36 @@ public class PacePerson {
|
|||
}
|
||||
|
||||
public List<String> getCapitalFirstnames() {
|
||||
return Lists
|
||||
.newArrayList(
|
||||
Iterables.transform(getNameWithAbbreviations(), PacePerson::capitalize));
|
||||
return Optional
|
||||
.ofNullable(getNameWithAbbreviations())
|
||||
.map(
|
||||
name -> name
|
||||
.stream()
|
||||
.map(PacePerson::capitalize)
|
||||
.collect(Collectors.toList()))
|
||||
.orElse(new ArrayList<>());
|
||||
}
|
||||
|
||||
public List<String> getCapitalSurname() {
|
||||
return Lists.newArrayList(Iterables.transform(surname, PacePerson::capitalize));
|
||||
return Optional
|
||||
.ofNullable(getSurname())
|
||||
.map(
|
||||
surname -> surname
|
||||
.stream()
|
||||
.map(PacePerson::capitalize)
|
||||
.collect(Collectors.toList()))
|
||||
.orElse(new ArrayList<>());
|
||||
}
|
||||
|
||||
public List<String> getNameWithAbbreviations() {
|
||||
return Lists.newArrayList(Iterables.transform(name, PacePerson::dotAbbreviations));
|
||||
return Optional
|
||||
.ofNullable(getName())
|
||||
.map(
|
||||
name -> name
|
||||
.stream()
|
||||
.map(PacePerson::dotAbbreviations)
|
||||
.collect(Collectors.toList()))
|
||||
.orElse(new ArrayList<>());
|
||||
}
|
||||
|
||||
public boolean isAccurate() {
|
||||
|
|
|
@ -191,7 +191,7 @@ public class ZenodoAPIClient implements Serializable {
|
|||
* @throws MissingConceptDoiException
|
||||
*/
|
||||
public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
|
||||
setDepositionId(concept_rec_id);
|
||||
setDepositionId(concept_rec_id, 1);
|
||||
String json = "{}";
|
||||
|
||||
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
|
||||
|
@ -253,9 +253,10 @@ public class ZenodoAPIClient implements Serializable {
|
|||
|
||||
}
|
||||
|
||||
private void setDepositionId(String concept_rec_id) throws IOException, MissingConceptDoiException {
|
||||
private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException {
|
||||
|
||||
ZenodoModelList zenodoModelList = new Gson().fromJson(getPrevDepositions(), ZenodoModelList.class);
|
||||
ZenodoModelList zenodoModelList = new Gson()
|
||||
.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
|
||||
|
||||
for (ZenodoModel zm : zenodoModelList) {
|
||||
if (zm.getConceptrecid().equals(concept_rec_id)) {
|
||||
|
@ -263,16 +264,23 @@ public class ZenodoAPIClient implements Serializable {
|
|||
return;
|
||||
}
|
||||
}
|
||||
|
||||
throw new MissingConceptDoiException("The concept record id specified was missing in the list of depositions");
|
||||
if (zenodoModelList.size() == 0)
|
||||
throw new MissingConceptDoiException(
|
||||
"The concept record id specified was missing in the list of depositions");
|
||||
setDepositionId(concept_rec_id, page + 1);
|
||||
|
||||
}
|
||||
|
||||
private String getPrevDepositions() throws IOException {
|
||||
private String getPrevDepositions(String page) throws IOException {
|
||||
|
||||
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
|
||||
|
||||
HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
|
||||
urlBuilder.addQueryParameter("page", page);
|
||||
String url = urlBuilder.build().toString();
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(urlString)
|
||||
.url(url)
|
||||
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
|
||||
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||
.get()
|
||||
|
|
|
@ -83,4 +83,10 @@ public class Vocabulary implements Serializable {
|
|||
.orElse(null);
|
||||
}
|
||||
|
||||
public Qualifier lookup(String id) {
|
||||
return Optional
|
||||
.ofNullable(getSynonymAsQualifier(id))
|
||||
.orElse(getTermAsQualifier(id));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -81,6 +81,13 @@ public class VocabularyGroup implements Serializable {
|
|||
vocs.put(id.toLowerCase(), new Vocabulary(id, name));
|
||||
}
|
||||
|
||||
public Optional<Vocabulary> find(final String vocId) {
|
||||
return Optional
|
||||
.ofNullable(vocId)
|
||||
.map(String::toLowerCase)
|
||||
.map(vocs::get);
|
||||
}
|
||||
|
||||
public void addTerm(final String vocId, final String id, final String name) {
|
||||
if (vocabularyExists(vocId)) {
|
||||
vocs.get(vocId.toLowerCase()).addTerm(id, name);
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
|
@ -16,7 +18,6 @@ import com.github.sisyphsu.dateparser.DateParserUtils;
|
|||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import eu.dnetlib.dhp.common.vocabulary.Vocabulary;
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
|
@ -191,8 +192,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
|||
qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES));
|
||||
}
|
||||
if (Objects.nonNull(r.getSubject())) {
|
||||
r
|
||||
.setSubject(
|
||||
List<Subject> subjects = Lists
|
||||
.newArrayList(
|
||||
r
|
||||
.getSubject()
|
||||
.stream()
|
||||
|
@ -201,7 +202,18 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
|||
.filter(sp -> Objects.nonNull(sp.getQualifier()))
|
||||
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
|
||||
.map(GraphCleaningFunctions::cleanValue)
|
||||
.collect(Collectors.toList()));
|
||||
.collect(
|
||||
Collectors
|
||||
.toMap(
|
||||
s -> Optional
|
||||
.ofNullable(s.getQualifier())
|
||||
.map(q -> q.getClassid() + s.getValue())
|
||||
.orElse(s.getValue()),
|
||||
Function.identity(),
|
||||
(s1, s2) -> Collections
|
||||
.min(Lists.newArrayList(s1, s2), new SubjectProvenanceComparator())))
|
||||
.values());
|
||||
r.setSubject(subjects);
|
||||
}
|
||||
if (Objects.nonNull(r.getTitle())) {
|
||||
r
|
||||
|
@ -382,14 +394,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
|||
.filter(p -> StringUtils.isNotBlank(p.getValue()))
|
||||
.map(p -> {
|
||||
// hack to distinguish orcid from orcid_pending
|
||||
String pidProvenance = Optional
|
||||
.ofNullable(p.getDataInfo())
|
||||
.map(
|
||||
d -> Optional
|
||||
.ofNullable(d.getProvenanceaction())
|
||||
.map(Qualifier::getClassid)
|
||||
.orElse(""))
|
||||
.orElse("");
|
||||
String pidProvenance = getProvenance(p.getDataInfo());
|
||||
if (p
|
||||
.getQualifier()
|
||||
.getClassid()
|
||||
|
@ -520,6 +525,11 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
|||
return s;
|
||||
}
|
||||
|
||||
protected static Subject cleanValue(Subject s) {
|
||||
s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
|
||||
return s;
|
||||
}
|
||||
|
||||
protected static Field<String> cleanValue(Field<String> s) {
|
||||
s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
|
||||
return s;
|
||||
|
|
|
@ -3,6 +3,8 @@ package eu.dnetlib.dhp.schema.oaf.utils;
|
|||
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
|
||||
import java.sql.Array;
|
||||
import java.sql.SQLException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.function.Function;
|
||||
|
@ -12,6 +14,7 @@ import java.util.stream.Collectors;
|
|||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
|
@ -118,6 +121,17 @@ public class OafMapperUtils {
|
|||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static <T> List<T> listValues(Array values) throws SQLException {
|
||||
if (Objects.isNull(values)) {
|
||||
return null;
|
||||
}
|
||||
return Arrays
|
||||
.stream((T[]) values.getArray())
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static List<Field<String>> listFields(final DataInfo info, final List<String> values) {
|
||||
return values
|
||||
.stream()
|
||||
|
@ -128,7 +142,7 @@ public class OafMapperUtils {
|
|||
}
|
||||
|
||||
public static Qualifier unknown(final String schemeid, final String schemename) {
|
||||
return qualifier("UNKNOWN", "Unknown", schemeid, schemename);
|
||||
return qualifier(UNKNOWN, "Unknown", schemeid, schemename);
|
||||
}
|
||||
|
||||
public static AccessRight accessRight(
|
||||
|
@ -176,6 +190,17 @@ public class OafMapperUtils {
|
|||
return q;
|
||||
}
|
||||
|
||||
public static Subject subject(
|
||||
final String value,
|
||||
final String classid,
|
||||
final String classname,
|
||||
final String schemeid,
|
||||
final String schemename,
|
||||
final DataInfo dataInfo) {
|
||||
|
||||
return subject(value, qualifier(classid, classname, schemeid, schemename), dataInfo);
|
||||
}
|
||||
|
||||
public static StructuredProperty structuredProperty(
|
||||
final String value,
|
||||
final String classid,
|
||||
|
@ -187,6 +212,20 @@ public class OafMapperUtils {
|
|||
return structuredProperty(value, qualifier(classid, classname, schemeid, schemename), dataInfo);
|
||||
}
|
||||
|
||||
public static Subject subject(
|
||||
final String value,
|
||||
final Qualifier qualifier,
|
||||
final DataInfo dataInfo) {
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
final Subject s = new Subject();
|
||||
s.setValue(value);
|
||||
s.setQualifier(qualifier);
|
||||
s.setDataInfo(dataInfo);
|
||||
return s;
|
||||
}
|
||||
|
||||
public static StructuredProperty structuredProperty(
|
||||
final String value,
|
||||
final Qualifier qualifier,
|
||||
|
@ -391,4 +430,88 @@ public class OafMapperUtils {
|
|||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static KeyValue newKeyValueInstance(String key, String value, DataInfo dataInfo) {
|
||||
KeyValue kv = new KeyValue();
|
||||
kv.setDataInfo(dataInfo);
|
||||
kv.setKey(key);
|
||||
kv.setValue(value);
|
||||
return kv;
|
||||
}
|
||||
|
||||
public static Measure newMeasureInstance(String id, String value, String key, DataInfo dataInfo) {
|
||||
Measure m = new Measure();
|
||||
m.setId(id);
|
||||
m.setUnit(Arrays.asList(newKeyValueInstance(key, value, dataInfo)));
|
||||
return m;
|
||||
}
|
||||
|
||||
public static Relation getRelation(final String source,
|
||||
final String target,
|
||||
final String relType,
|
||||
final String subRelType,
|
||||
final String relClass,
|
||||
final OafEntity entity) {
|
||||
return getRelation(source, target, relType, subRelType, relClass, entity, null);
|
||||
}
|
||||
|
||||
public static Relation getRelation(final String source,
|
||||
final String target,
|
||||
final String relType,
|
||||
final String subRelType,
|
||||
final String relClass,
|
||||
final OafEntity entity,
|
||||
final String validationDate) {
|
||||
return getRelation(
|
||||
source, target, relType, subRelType, relClass, entity.getCollectedfrom(), entity.getDataInfo(),
|
||||
entity.getLastupdatetimestamp(), validationDate, null);
|
||||
}
|
||||
|
||||
public static Relation getRelation(final String source,
|
||||
final String target,
|
||||
final String relType,
|
||||
final String subRelType,
|
||||
final String relClass,
|
||||
final List<KeyValue> collectedfrom,
|
||||
final DataInfo dataInfo,
|
||||
final Long lastupdatetimestamp) {
|
||||
return getRelation(
|
||||
source, target, relType, subRelType, relClass, collectedfrom, dataInfo, lastupdatetimestamp, null, null);
|
||||
}
|
||||
|
||||
public static Relation getRelation(final String source,
|
||||
final String target,
|
||||
final String relType,
|
||||
final String subRelType,
|
||||
final String relClass,
|
||||
final List<KeyValue> collectedfrom,
|
||||
final DataInfo dataInfo,
|
||||
final Long lastupdatetimestamp,
|
||||
final String validationDate,
|
||||
final List<KeyValue> properties) {
|
||||
final Relation rel = new Relation();
|
||||
rel.setRelType(relType);
|
||||
rel.setSubRelType(subRelType);
|
||||
rel.setRelClass(relClass);
|
||||
rel.setSource(source);
|
||||
rel.setTarget(target);
|
||||
rel.setCollectedfrom(collectedfrom);
|
||||
rel.setDataInfo(dataInfo);
|
||||
rel.setLastupdatetimestamp(lastupdatetimestamp);
|
||||
rel.setValidated(StringUtils.isNotBlank(validationDate));
|
||||
rel.setValidationDate(StringUtils.isNotBlank(validationDate) ? validationDate : null);
|
||||
rel.setProperties(properties);
|
||||
return rel;
|
||||
}
|
||||
|
||||
public static String getProvenance(DataInfo dataInfo) {
|
||||
return Optional
|
||||
.ofNullable(dataInfo)
|
||||
.map(
|
||||
d -> Optional
|
||||
.ofNullable(d.getProvenanceaction())
|
||||
.map(Qualifier::getClassid)
|
||||
.orElse(""))
|
||||
.orElse("");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
|
||||
import static org.apache.commons.lang3.StringUtils.isBlank;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Subject;
|
||||
|
||||
public class SubjectProvenanceComparator implements Comparator<Subject> {
|
||||
|
||||
@Override
|
||||
public int compare(Subject left, Subject right) {
|
||||
|
||||
String lProv = getProvenance(left.getDataInfo());
|
||||
String rProv = getProvenance(right.getDataInfo());
|
||||
|
||||
if (isBlank(lProv) && isBlank(rProv))
|
||||
return 0;
|
||||
if (isBlank(lProv))
|
||||
return 1;
|
||||
if (isBlank(rProv))
|
||||
return -1;
|
||||
if (lProv.equals(rProv))
|
||||
return 0;
|
||||
if (lProv.toLowerCase().contains("crosswalk"))
|
||||
return -1;
|
||||
if (rProv.toLowerCase().contains("crosswalk"))
|
||||
return 1;
|
||||
if (lProv.toLowerCase().contains("user"))
|
||||
return -1;
|
||||
if (rProv.toLowerCase().contains("user"))
|
||||
return 1;
|
||||
if (lProv.toLowerCase().contains("propagation"))
|
||||
return -1;
|
||||
if (rProv.toLowerCase().contains("propagation"))
|
||||
return 1;
|
||||
if (lProv.toLowerCase().contains("iis"))
|
||||
return -1;
|
||||
if (rProv.toLowerCase().contains("iis"))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
|
@ -75,9 +75,14 @@ public class DHPUtils {
|
|||
|
||||
final HttpGet req = new HttpGet(url);
|
||||
|
||||
log.info("MDStoreManager request: {}", req);
|
||||
|
||||
try (final CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
try (final CloseableHttpResponse response = client.execute(req)) {
|
||||
final String json = IOUtils.toString(response.getEntity().getContent());
|
||||
|
||||
log.info("MDStoreManager response: {}", json);
|
||||
|
||||
final MDStoreWithInfo[] mdstores = objectMapper.readValue(json, MDStoreWithInfo[].class);
|
||||
return Arrays
|
||||
.stream(mdstores)
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
van
|
||||
von
|
||||
der
|
||||
de
|
||||
dell
|
||||
sig
|
||||
mr
|
||||
mrs
|
||||
mrs
|
|
@ -44,105 +44,104 @@ class OafMapperUtilsTest {
|
|||
@Test
|
||||
void testDateValidation() {
|
||||
|
||||
assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent());
|
||||
assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent());
|
||||
assertTrue(GraphCleaningFunctions.doCleanDate(" 2016-04-05").isPresent());
|
||||
assertNotNull(GraphCleaningFunctions.cleanDate("2016-05-07T12:41:19.202Z "));
|
||||
assertNotNull(GraphCleaningFunctions.cleanDate("2020-09-10 11:08:52 "));
|
||||
assertNotNull(GraphCleaningFunctions.cleanDate(" 2016-04-05"));
|
||||
|
||||
assertEquals("2016-04-05", GraphCleaningFunctions.doCleanDate("2016 Apr 05").get());
|
||||
assertEquals("2016-04-05", GraphCleaningFunctions.cleanDate("2016 Apr 05"));
|
||||
|
||||
assertEquals("2009-05-08", GraphCleaningFunctions.doCleanDate("May 8, 2009 5:57:51 PM").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, 1970").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, '70").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 1970").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 70").get());
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 2 15:04:05 2006").get());
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 2 15:04:05 MST 2006").get());
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 02 15:04:05 -0700 2006").get());
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Monday, 02-Jan-06 15:04:05 MST").get());
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 MST").get());
|
||||
assertEquals("2017-07-11", GraphCleaningFunctions.doCleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)").get());
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 -0700").get());
|
||||
assertEquals("2018-01-04", GraphCleaningFunctions.doCleanDate("Thu, 4 Jan 2018 17:53:36 +0000").get());
|
||||
assertEquals("2015-08-10", GraphCleaningFunctions.doCleanDate("Mon Aug 10 15:44:11 UTC+0100 2015").get());
|
||||
assertEquals("2009-05-08", GraphCleaningFunctions.cleanDate("May 8, 2009 5:57:51 PM"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, 1970"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, '70"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 1970"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 70"));
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 2 15:04:05 2006"));
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 2 15:04:05 MST 2006"));
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 02 15:04:05 -0700 2006"));
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Monday, 02-Jan-06 15:04:05 MST"));
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 MST"));
|
||||
assertEquals("2017-07-11", GraphCleaningFunctions.cleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)"));
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 -0700"));
|
||||
assertEquals("2018-01-04", GraphCleaningFunctions.cleanDate("Thu, 4 Jan 2018 17:53:36 +0000"));
|
||||
assertEquals("2015-08-10", GraphCleaningFunctions.cleanDate("Mon Aug 10 15:44:11 UTC+0100 2015"));
|
||||
assertEquals(
|
||||
"2015-07-03",
|
||||
GraphCleaningFunctions.doCleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)").get());
|
||||
assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 10:09am").get());
|
||||
assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 at 10:09am PST-08").get());
|
||||
assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012, 10:10:09").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7, 1970").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7th, 1970").get());
|
||||
assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006, 19:17").get());
|
||||
assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006 19:17").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 70").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 1970").get());
|
||||
assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("03 February 2013").get());
|
||||
assertEquals("2013-07-01", GraphCleaningFunctions.doCleanDate("1 July 2013").get());
|
||||
assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("2013-Feb-03").get());
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3/31/2014").get());
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03/31/2014").get());
|
||||
assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08/21/71").get());
|
||||
assertEquals("1971-01-08", GraphCleaningFunctions.doCleanDate("8/1/71").get());
|
||||
assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/2014 22:05").get());
|
||||
assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("04/08/2014 22:05").get());
|
||||
assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/14 22:05").get());
|
||||
assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("04/2/2014 03:00:51").get());
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00:00 AM").get());
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00:01 PM").get());
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00 PM").get());
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 1:00 PM").get());
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00 AM").get());
|
||||
assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("4/02/2014 03:00:51").get());
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59").get());
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59.3186369").get());
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/3/31").get());
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/03/31").get());
|
||||
assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/4/8 22:05").get());
|
||||
assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/04/08 22:05").get());
|
||||
assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/04/2 03:00:51").get());
|
||||
assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/4/02 03:00:51").get());
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59").get());
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59.3186369").get());
|
||||
assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014年04月08日").get());
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("2006-01-02T15:04:05+0000").get());
|
||||
assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09-07:00").get());
|
||||
assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09").get());
|
||||
assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get());
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.3186369").get());
|
||||
assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000").get());
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.123").get());
|
||||
assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43").get());
|
||||
assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43:22").get());
|
||||
assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 UTC").get());
|
||||
assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 GMT").get());
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 05:24:37 PM").get());
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800").get());
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800 +08").get());
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:44 +09:00").get());
|
||||
assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000 +0000 UTC").get());
|
||||
assertEquals("2015-09-30", GraphCleaningFunctions.doCleanDate("2015-09-30 18:48:56.35272715 +0000 UTC").get());
|
||||
assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 GMT").get());
|
||||
assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 UTC").get());
|
||||
GraphCleaningFunctions.cleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)"));
|
||||
assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 10:09am"));
|
||||
assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 at 10:09am PST-08"));
|
||||
assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012, 10:10:09"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7, 1970"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7th, 1970"));
|
||||
assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006, 19:17"));
|
||||
assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006 19:17"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 70"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 1970"));
|
||||
assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("03 February 2013"));
|
||||
assertEquals("2013-07-01", GraphCleaningFunctions.cleanDate("1 July 2013"));
|
||||
assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("2013-Feb-03"));
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3/31/2014"));
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03/31/2014"));
|
||||
assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08/21/71"));
|
||||
assertEquals("1971-01-08", GraphCleaningFunctions.cleanDate("8/1/71"));
|
||||
assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/2014 22:05"));
|
||||
assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("04/08/2014 22:05"));
|
||||
assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/14 22:05"));
|
||||
assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("04/2/2014 03:00:51"));
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00:00 AM"));
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00:01 PM"));
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00 PM"));
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 1:00 PM"));
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00 AM"));
|
||||
assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("4/02/2014 03:00:51"));
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59"));
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59.3186369"));
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/3/31"));
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/03/31"));
|
||||
assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/4/8 22:05"));
|
||||
assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/04/08 22:05"));
|
||||
assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/04/2 03:00:51"));
|
||||
assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/4/02 03:00:51"));
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59"));
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59.3186369"));
|
||||
assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014年04月08日"));
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("2006-01-02T15:04:05+0000"));
|
||||
assertEquals("2009-08-13", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09-07:00"));
|
||||
assertEquals("2009-08-12", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09"));
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.3186369"));
|
||||
assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000"));
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.123"));
|
||||
assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43"));
|
||||
assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43:22"));
|
||||
assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 UTC"));
|
||||
assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 GMT"));
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 05:24:37 PM"));
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800"));
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800 +08"));
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:44 +09:00"));
|
||||
assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000 +0000 UTC"));
|
||||
assertEquals("2015-09-30", GraphCleaningFunctions.cleanDate("2015-09-30 18:48:56.35272715 +0000 UTC"));
|
||||
assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 GMT"));
|
||||
assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 UTC"));
|
||||
assertEquals(
|
||||
"2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001").get());
|
||||
"2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001"));
|
||||
assertEquals(
|
||||
"2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001").get());
|
||||
assertEquals("2017-07-19", GraphCleaningFunctions.doCleanDate("2017-07-19 03:21:51+00:00").get());
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26").get());
|
||||
assertEquals("2014-04-01", GraphCleaningFunctions.doCleanDate("2014-04").get());
|
||||
assertEquals("2014-01-01", GraphCleaningFunctions.doCleanDate("2014").get());
|
||||
assertEquals("2014-05-11", GraphCleaningFunctions.doCleanDate("2014-05-11 08:20:13,787").get());
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3.31.2014").get());
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03.31.2014").get());
|
||||
assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08.21.71").get());
|
||||
assertEquals("2014-03-01", GraphCleaningFunctions.doCleanDate("2014.03").get());
|
||||
assertEquals("2014-03-30", GraphCleaningFunctions.doCleanDate("2014.03.30").get());
|
||||
assertEquals("2014-06-01", GraphCleaningFunctions.doCleanDate("20140601").get());
|
||||
assertEquals("2014-07-22", GraphCleaningFunctions.doCleanDate("20140722105203").get());
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("1332151919").get());
|
||||
assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367189").get());
|
||||
assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222").get());
|
||||
assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222333").get());
|
||||
"2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001"));
|
||||
assertEquals("2017-07-19", GraphCleaningFunctions.cleanDate("2017-07-19 03:21:51+00:00"));
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26"));
|
||||
assertEquals("2014-04-01", GraphCleaningFunctions.cleanDate("2014-04"));
|
||||
assertEquals("2014-01-01", GraphCleaningFunctions.cleanDate("2014"));
|
||||
assertEquals("2014-05-11", GraphCleaningFunctions.cleanDate("2014-05-11 08:20:13,787"));
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3.31.2014"));
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03.31.2014"));
|
||||
assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08.21.71"));
|
||||
assertEquals("2014-03-01", GraphCleaningFunctions.cleanDate("2014.03"));
|
||||
assertEquals("2014-03-30", GraphCleaningFunctions.cleanDate("2014.03.30"));
|
||||
assertEquals("2014-06-01", GraphCleaningFunctions.cleanDate("20140601"));
|
||||
assertEquals("2014-07-22", GraphCleaningFunctions.cleanDate("20140722105203"));
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("1332151919"));
|
||||
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367189"));
|
||||
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222"));
|
||||
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222333"));
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -172,6 +172,61 @@ public class PromoteActionPayloadForGraphTableJobTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldPromoteActionPayload_custom() throws Exception {
|
||||
|
||||
Class<? extends Oaf> rowClazz = Publication.class;
|
||||
Class<? extends Oaf> actionPayloadClazz = Result.class;
|
||||
MergeAndGet.Strategy strategy = MergeAndGet.Strategy.MERGE_FROM_AND_GET;
|
||||
|
||||
// given
|
||||
Path inputGraphTableDir = createGraphTable(inputGraphRootDir, rowClazz);
|
||||
Path inputActionPayloadDir = createActionPayload(inputActionPayloadRootDir, rowClazz, actionPayloadClazz);
|
||||
Path outputGraphTableDir = outputDir.resolve("graph").resolve(rowClazz.getSimpleName().toLowerCase());
|
||||
|
||||
// when
|
||||
PromoteActionPayloadForGraphTableJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-inputGraphTablePath",
|
||||
inputGraphTableDir.toString(),
|
||||
"-graphTableClassName",
|
||||
rowClazz.getCanonicalName(),
|
||||
"-inputActionPayloadPath",
|
||||
inputActionPayloadDir.toString(),
|
||||
"-actionPayloadClassName",
|
||||
actionPayloadClazz.getCanonicalName(),
|
||||
"-outputGraphTablePath",
|
||||
outputGraphTableDir.toString(),
|
||||
"-mergeAndGetStrategy",
|
||||
strategy.name(),
|
||||
"--shouldGroupById",
|
||||
"true"
|
||||
});
|
||||
|
||||
// then
|
||||
assertTrue(Files.exists(outputGraphTableDir));
|
||||
|
||||
List<? extends Oaf> actualOutputRows = readGraphTableFromJobOutput(outputGraphTableDir.toString(), rowClazz)
|
||||
.collectAsList()
|
||||
.stream()
|
||||
.sorted(Comparator.comparingInt(Object::hashCode))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
Publication p = actualOutputRows
|
||||
.stream()
|
||||
.map(o -> (Publication) o)
|
||||
.filter(o -> "50|4ScienceCRIS::6a67ed3daba1c380bf9de3c13ed9c879".equals(o.getId()))
|
||||
.findFirst()
|
||||
.get();
|
||||
|
||||
assertNotNull(p.getMeasures());
|
||||
assertTrue(p.getMeasures().size() > 0);
|
||||
|
||||
}
|
||||
|
||||
public static Stream<Arguments> promoteJobTestParams() {
|
||||
return Stream
|
||||
.of(
|
||||
|
|
|
@ -17,4 +17,5 @@
|
|||
{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}},"lastupdatetimestamp":1572018243405,"id":"50|CSC_________::00019460865d6cc381b36076131a5bc1","originalId":[],"collectedfrom":[],"pid":[],"dateofcollection":"","dateoftransformation":"","extraInfo":[],"oaiprovenance":null,"author":[],"resulttype":{"classid":"","classname":"","schemeid":"","schemename":""},"language":{"classid":"","classname":"","schemeid":"","schemename":""},"country":[],"subject":[{"value":"Computer Science::Networking and Internet Architecture","qualifier":{"classid":"arxiv","classname":"arxiv","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7416","inferenceprovenance":"iis::document_classes","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[],"relevantdate":[],"description":[],"dateofacceptance":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"publisher":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"embargoenddate":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"","classname":"","schemeid":"","schemename":""},"coverage":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":null,"instance":[]}
|
||||
{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}},"lastupdatetimestamp":1572018243405,"id":"50|CSC_________::00019460865d6cc381b36076131a5bc1","originalId":[],"collectedfrom":[],"pid":[],"dateofcollection":"","dateoftransformation":"","extraInfo":[],"oaiprovenance":null,"author":[],"resulttype":{"classid":"","classname":"","schemeid":"","schemename":""},"language":{"classid":"","classname":"","schemeid":"","schemename":""},"country":[],"subject":[{"value":"Computer Science::Networking and Internet Architecture","qualifier":{"classid":"arxiv","classname":"arxiv","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7416","inferenceprovenance":"iis::document_classes","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[],"relevantdate":[],"description":[],"dateofacceptance":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"publisher":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"embargoenddate":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"","classname":"","schemeid":"","schemename":""},"coverage":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":null,"instance":[]}
|
||||
{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}},"lastupdatetimestamp":1572018240982,"id":"50|CSC_________::0001d663c95c4132355e1765375a5275","originalId":[],"collectedfrom":[],"pid":[],"dateofcollection":"","dateoftransformation":"","extraInfo":[],"oaiprovenance":null,"author":[],"resulttype":{"classid":"","classname":"","schemeid":"","schemename":""},"language":{"classid":"","classname":"","schemeid":"","schemename":""},"country":[],"subject":[{"value":"animal diseases","qualifier":{"classid":"mesheuropmc","classname":"mesheuropmc","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7461","inferenceprovenance":"iis::document_classes","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[],"relevantdate":[],"description":[],"dateofacceptance":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"publisher":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"embargoenddate":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"","classname":"","schemeid":"","schemename":""},"coverage":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":null,"instance":[]}
|
||||
{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}},"lastupdatetimestamp":1572018240982,"id":"50|CSC_________::0001d663c95c4132355e1765375a5275","originalId":[],"collectedfrom":[],"pid":[],"dateofcollection":"","dateoftransformation":"","extraInfo":[],"oaiprovenance":null,"author":[],"resulttype":{"classid":"","classname":"","schemeid":"","schemename":""},"language":{"classid":"","classname":"","schemeid":"","schemename":""},"country":[],"subject":[{"value":"animal diseases","qualifier":{"classid":"mesheuropmc","classname":"mesheuropmc","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7461","inferenceprovenance":"iis::document_classes","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[],"relevantdate":[],"description":[],"dateofacceptance":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"publisher":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"embargoenddate":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"","classname":"","schemeid":"","schemename":""},"coverage":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":null,"instance":[]}
|
||||
{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}},"lastupdatetimestamp":1572018240982,"id":"50|CSC_________::0001d663c95c4132355e1765375a5275","originalId":[],"collectedfrom":[],"pid":[],"dateofcollection":"","dateoftransformation":"","extraInfo":[],"oaiprovenance":null,"author":[],"resulttype":{"classid":"","classname":"","schemeid":"","schemename":""},"language":{"classid":"","classname":"","schemeid":"","schemename":""},"country":[],"subject":[{"value":"animal diseases","qualifier":{"classid":"mesheuropmc","classname":"mesheuropmc","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7461","inferenceprovenance":"iis::document_classes","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[],"relevantdate":[],"description":[],"dateofacceptance":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"publisher":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"embargoenddate":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"","classname":"","schemeid":"","schemename":""},"coverage":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":null,"instance":[]}
|
||||
{"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"id":"50|4ScienceCRIS::6a67ed3daba1c380bf9de3c13ed9c879","originalId":null,"pid":null,"dateofcollection":null,"dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"processingchargeamount":null,"processingchargecurrency":null,"measures":[{"id":"influence","unit":[{"key":"score","value":"1.64385446761e-08","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id":"popularity_alt","unit":[{"key":"score","value":"18.9590813696","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id":"popularity","unit":[{"key":"score","value":"6.00577981643e-08","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]}],"author":null,"resulttype":null,"language":null,"country":null,"subject":null,"title":null,"relevantdate":null,"description":null,"dateofacceptance":null,"publisher":null,"embargoenddate":null,"source":null,"fulltext":null,"format":null,"contributor":null,"resourcetype":null,"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":null}
|
|
@ -13,6 +13,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.dhp.schema.oaf.Subject;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
|
||||
public class Constants {
|
||||
|
@ -27,6 +28,8 @@ public class Constants {
|
|||
public static final String UPDATE_CLASS_NAME = "Inferred by OpenAIRE";
|
||||
public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip";
|
||||
public static final String UPDATE_SUBJECT_SDG_CLASS_ID = "subject:sdg";
|
||||
public static final String UPDATE_MEASURE_USAGE_COUNTS_CLASS_ID = "measure:usage_counts";
|
||||
public static final String UPDATE_KEY_USAGE_COUNTS = "count";
|
||||
|
||||
public static final String FOS_CLASS_ID = "FOS";
|
||||
public static final String FOS_CLASS_NAME = "Fields of Science and Technology classification";
|
||||
|
@ -56,13 +59,13 @@ public class Constants {
|
|||
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
||||
}
|
||||
|
||||
public static StructuredProperty getSubject(String sbj, String classid, String classname,
|
||||
public static Subject getSubject(String sbj, String classid, String classname,
|
||||
String diqualifierclassid) {
|
||||
if (sbj.equals(NULL))
|
||||
return null;
|
||||
StructuredProperty sp = new StructuredProperty();
|
||||
sp.setValue(sbj);
|
||||
sp
|
||||
Subject s = new Subject();
|
||||
s.setValue(sbj);
|
||||
s
|
||||
.setQualifier(
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
|
@ -70,7 +73,7 @@ public class Constants {
|
|||
classname,
|
||||
ModelConstants.DNET_SUBJECT_TYPOLOGIES,
|
||||
ModelConstants.DNET_SUBJECT_TYPOLOGIES));
|
||||
sp
|
||||
s
|
||||
.setDataInfo(
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
|
@ -86,7 +89,7 @@ public class Constants {
|
|||
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
||||
""));
|
||||
|
||||
return sp;
|
||||
return s;
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.dhp.schema.oaf.Subject;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
|
||||
|
@ -79,7 +80,7 @@ public class PrepareFOSSparkJob implements Serializable {
|
|||
HashSet<String> level3 = new HashSet<>();
|
||||
addLevels(level1, level2, level3, first);
|
||||
it.forEachRemaining(v -> addLevels(level1, level2, level3, v));
|
||||
List<StructuredProperty> sbjs = new ArrayList<>();
|
||||
List<Subject> sbjs = new ArrayList<>();
|
||||
level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
|
||||
level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
|
||||
level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
|
||||
|
|
|
@ -24,6 +24,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.dhp.schema.oaf.Subject;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
|
||||
|
@ -73,7 +74,7 @@ public class PrepareSDGSparkJob implements Serializable {
|
|||
Result r = new Result();
|
||||
r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI));
|
||||
SDGDataModel first = it.next();
|
||||
List<StructuredProperty> sbjs = new ArrayList<>();
|
||||
List<Subject> sbjs = new ArrayList<>();
|
||||
sbjs.add(getSubject(first.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID));
|
||||
it
|
||||
.forEachRemaining(
|
||||
|
|
|
@ -3,7 +3,6 @@ package eu.dnetlib.dhp.actionmanager.ror;
|
|||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
|
||||
|
@ -39,7 +38,6 @@ import org.slf4j.LoggerFactory;
|
|||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.ror.model.ExternalIdType;
|
||||
import eu.dnetlib.dhp.actionmanager.ror.model.Relationship;
|
||||
import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
|
@ -51,7 +49,6 @@ import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
|||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
import scala.Tuple2;
|
||||
|
@ -168,38 +165,10 @@ public class GenerateRorActionSetJob {
|
|||
final List<AtomicAction<? extends Oaf>> res = new ArrayList<>();
|
||||
res.add(new AtomicAction<>(Organization.class, o));
|
||||
|
||||
for (final Relationship rorRel : r.getRelationships()) {
|
||||
if (rorRel.getType().equalsIgnoreCase("parent")) {
|
||||
final String orgId1 = calculateOpenaireId(r.getId());
|
||||
final String orgId2 = calculateOpenaireId(rorRel.getId());
|
||||
res
|
||||
.add(
|
||||
new AtomicAction<>(Relation.class,
|
||||
calculateHierarchyRel(orgId1, orgId2, ModelConstants.IS_PARENT_OF)));
|
||||
res
|
||||
.add(
|
||||
new AtomicAction<>(Relation.class,
|
||||
calculateHierarchyRel(orgId2, orgId1, ModelConstants.IS_CHILD_OF)));
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
private static Relation calculateHierarchyRel(final String source, final String target, final String relClass) {
|
||||
final Relation rel = new Relation();
|
||||
rel.setSource(source);
|
||||
rel.setTarget(target);
|
||||
rel.setRelType(ORG_ORG_RELTYPE);
|
||||
rel.setSubRelType(ModelConstants.RELATIONSHIP);
|
||||
rel.setRelClass(relClass);
|
||||
rel.setCollectedfrom(ROR_COLLECTED_FROM);
|
||||
rel.setDataInfo(ROR_DATA_INFO);
|
||||
rel.setLastupdatetimestamp(System.currentTimeMillis());
|
||||
return rel;
|
||||
}
|
||||
|
||||
private static String calculateOpenaireId(final String rorId) {
|
||||
return String.format("20|%s::%s", ROR_NS_PREFIX, DHPUtils.md5(rorId));
|
||||
}
|
||||
|
|
|
@ -0,0 +1,156 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.usagestats;
|
||||
|
||||
import static eu.dnetlib.dhp.actionmanager.Constants.*;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Measure;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* created the Atomic Action for each type of results
|
||||
*/
|
||||
public class SparkAtomicActionUsageJob implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionUsageJob.class);
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
public static <I extends Result> void main(String[] args) throws Exception {
|
||||
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
SparkAtomicActionUsageJob.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/actionmanager/usagestats/input_actionset_parameter.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath {}: ", outputPath);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
|
||||
|
||||
final String dbname = parser.get("usagestatsdb");
|
||||
|
||||
final String workingPath = parser.get("workingPath");
|
||||
|
||||
runWithSparkHiveSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
prepareResults(dbname, spark, workingPath);
|
||||
writeActionSet(spark, workingPath, outputPath);
|
||||
});
|
||||
}
|
||||
|
||||
public static void prepareResults(String db, SparkSession spark, String workingPath) {
|
||||
spark
|
||||
.sql(
|
||||
"Select result_id, downloads, views " +
|
||||
"from " + db + ".usage_stats")
|
||||
.as(Encoders.bean(UsageStatsModel.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingPath);
|
||||
}
|
||||
|
||||
public static void writeActionSet(SparkSession spark, String inputPath, String outputPath) {
|
||||
readPath(spark, inputPath, UsageStatsModel.class)
|
||||
.groupByKey((MapFunction<UsageStatsModel, String>) us -> us.getResult_id(), Encoders.STRING())
|
||||
.mapGroups((MapGroupsFunction<String, UsageStatsModel, Result>) (k, it) -> {
|
||||
UsageStatsModel first = it.next();
|
||||
it.forEachRemaining(us -> {
|
||||
first.setDownloads(first.getDownloads() + us.getDownloads());
|
||||
first.setViews(first.getViews() + us.getViews());
|
||||
});
|
||||
|
||||
Result res = new Result();
|
||||
res.setId("50|" + k);
|
||||
|
||||
res.setMeasures(getMeasure(first.getDownloads(), first.getViews()));
|
||||
return res;
|
||||
}, Encoders.bean(Result.class))
|
||||
.toJavaRDD()
|
||||
.map(p -> new AtomicAction(p.getClass(), p))
|
||||
.mapToPair(
|
||||
aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()),
|
||||
new Text(OBJECT_MAPPER.writeValueAsString(aa))))
|
||||
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
||||
|
||||
}
|
||||
|
||||
private static List<Measure> getMeasure(Long downloads, Long views) {
|
||||
DataInfo dataInfo = OafMapperUtils
|
||||
.dataInfo(
|
||||
false,
|
||||
UPDATE_DATA_INFO_TYPE,
|
||||
true,
|
||||
false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
UPDATE_MEASURE_USAGE_COUNTS_CLASS_ID,
|
||||
UPDATE_CLASS_NAME,
|
||||
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
||||
"");
|
||||
|
||||
return Arrays
|
||||
.asList(
|
||||
OafMapperUtils
|
||||
.newMeasureInstance("downloads", String.valueOf(downloads), UPDATE_KEY_USAGE_COUNTS, dataInfo),
|
||||
OafMapperUtils.newMeasureInstance("views", String.valueOf(views), UPDATE_KEY_USAGE_COUNTS, dataInfo));
|
||||
|
||||
}
|
||||
|
||||
private static void removeOutputDir(SparkSession spark, String path) {
|
||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||
}
|
||||
|
||||
public static <R> Dataset<R> readPath(
|
||||
SparkSession spark, String inputPath, Class<R> clazz) {
|
||||
return spark
|
||||
.read()
|
||||
.textFile(inputPath)
|
||||
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.usagestats;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class UsageStatsModel implements Serializable {
|
||||
private String result_id;
|
||||
private Long downloads;
|
||||
private Long views;
|
||||
|
||||
public String getResult_id() {
|
||||
return result_id;
|
||||
}
|
||||
|
||||
public void setResult_id(String result_id) {
|
||||
this.result_id = result_id;
|
||||
}
|
||||
|
||||
public Long getDownloads() {
|
||||
return downloads;
|
||||
}
|
||||
|
||||
public void setDownloads(Long downloads) {
|
||||
this.downloads = downloads;
|
||||
}
|
||||
|
||||
public Long getViews() {
|
||||
return views;
|
||||
}
|
||||
|
||||
public void setViews(Long views) {
|
||||
this.views = views;
|
||||
}
|
||||
}
|
|
@ -19,6 +19,8 @@ import org.slf4j.LoggerFactory;
|
|||
import eu.dnetlib.dhp.aggregation.common.ReporterCallback;
|
||||
import eu.dnetlib.dhp.aggregation.common.ReportingJob;
|
||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.file.FileCollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.file.FileGZipCollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
|
||||
|
@ -114,6 +116,10 @@ public class CollectorWorker extends ReportingJob {
|
|||
return new OaiCollectorPlugin(clientParams);
|
||||
case rest_json2xml:
|
||||
return new RestCollectorPlugin(clientParams);
|
||||
case file:
|
||||
return new FileCollectorPlugin(fileSystem);
|
||||
case fileGzip:
|
||||
return new FileGZipCollectorPlugin(fileSystem);
|
||||
case other:
|
||||
final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
|
||||
.ofNullable(api.getParams().get("other_plugin_type"))
|
||||
|
|
|
@ -10,7 +10,7 @@ import eu.dnetlib.dhp.common.collection.CollectorException;
|
|||
public interface CollectorPlugin {
|
||||
|
||||
enum NAME {
|
||||
oai, other, rest_json2xml;
|
||||
oai, other, rest_json2xml, file, fileGzip;
|
||||
|
||||
public enum OTHER_NAME {
|
||||
mdstore_mongodb_dump, mdstore_mongodb
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.file;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.Optional;
|
||||
import java.util.Spliterator;
|
||||
import java.util.Spliterators;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.utils.XMLIterator;
|
||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
|
||||
public abstract class AbstractSplittedRecordPlugin implements CollectorPlugin {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(AbstractSplittedRecordPlugin.class);
|
||||
|
||||
public static final String SPLIT_ON_ELEMENT = "splitOnElement";
|
||||
|
||||
private final FileSystem fileSystem;
|
||||
|
||||
public AbstractSplittedRecordPlugin(FileSystem fileSystem) {
|
||||
this.fileSystem = fileSystem;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<String> collect(ApiDescriptor api, AggregatorReport report) throws CollectorException {
|
||||
|
||||
// get path to file
|
||||
final Path filePath = Optional
|
||||
.ofNullable(api.getBaseUrl())
|
||||
.map(Path::new)
|
||||
.orElseThrow(() -> new CollectorException("missing baseUrl"));
|
||||
|
||||
log.info("baseUrl: {}", filePath);
|
||||
|
||||
// check that path to file exists
|
||||
try {
|
||||
if (!fileSystem.exists(filePath)) {
|
||||
throw new CollectorException("path does not exist: " + filePath);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new CollectorException(e);
|
||||
}
|
||||
|
||||
// get split element
|
||||
final String splitOnElement = Optional
|
||||
.ofNullable(api.getParams().get(SPLIT_ON_ELEMENT))
|
||||
.orElseThrow(
|
||||
() -> new CollectorException(String
|
||||
.format("missing parameter '%s', required by the AbstractSplittedRecordPlugin", SPLIT_ON_ELEMENT)));
|
||||
|
||||
log.info("splitOnElement: {}", splitOnElement);
|
||||
|
||||
final BufferedInputStream bis = getBufferedInputStream(filePath);
|
||||
|
||||
Iterator<String> xmlIterator = new XMLIterator(splitOnElement, bis);
|
||||
|
||||
return StreamSupport
|
||||
.stream(
|
||||
Spliterators.spliteratorUnknownSize(xmlIterator, Spliterator.ORDERED),
|
||||
false);
|
||||
}
|
||||
|
||||
abstract protected BufferedInputStream getBufferedInputStream(final Path filePath) throws CollectorException;
|
||||
|
||||
public FileSystem getFileSystem() {
|
||||
return fileSystem;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.file;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
|
||||
public class FileCollectorPlugin extends AbstractSplittedRecordPlugin {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(FileCollectorPlugin.class);
|
||||
|
||||
public FileCollectorPlugin(FileSystem fileSystem) {
|
||||
super(fileSystem);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BufferedInputStream getBufferedInputStream(final Path filePath) throws CollectorException {
|
||||
|
||||
log.info("filePath: {}", filePath);
|
||||
|
||||
try {
|
||||
FileSystem fs = super.getFileSystem();
|
||||
return new BufferedInputStream(fs.open(filePath));
|
||||
} catch (Exception e) {
|
||||
throw new CollectorException("Error reading file " + filePath, e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.file;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
|
||||
public class FileGZipCollectorPlugin extends AbstractSplittedRecordPlugin {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(FileGZipCollectorPlugin.class);
|
||||
|
||||
public FileGZipCollectorPlugin(FileSystem fileSystem) {
|
||||
super(fileSystem);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BufferedInputStream getBufferedInputStream(final Path filePath) throws CollectorException {
|
||||
|
||||
log.info("filePath: {}", filePath);
|
||||
|
||||
try {
|
||||
FileSystem fs = super.getFileSystem();
|
||||
GZIPInputStream stream = new GZIPInputStream(fs.open(filePath));
|
||||
return new BufferedInputStream(stream);
|
||||
} catch (Exception e) {
|
||||
throw new CollectorException("Error reading file " + filePath, e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -19,7 +19,7 @@ import org.dom4j.io.XMLWriter;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.collection.XmlCleaner;
|
||||
import eu.dnetlib.dhp.collection.plugin.utils.XmlCleaner;
|
||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
import eu.dnetlib.dhp.common.collection.HttpConnector2;
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.w3c.dom.Node;
|
|||
import org.w3c.dom.NodeList;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
import eu.dnetlib.dhp.collection.JsonUtils;
|
||||
import eu.dnetlib.dhp.collection.plugin.utils.JsonUtils;
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection;
|
||||
package eu.dnetlib.dhp.collection.plugin.utils;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
|
@ -0,0 +1,177 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.utils;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.StringWriter;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.util.Iterator;
|
||||
|
||||
import javax.xml.stream.XMLEventFactory;
|
||||
import javax.xml.stream.XMLEventReader;
|
||||
import javax.xml.stream.XMLEventWriter;
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLOutputFactory;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.events.StartElement;
|
||||
import javax.xml.stream.events.XMLEvent;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
public class XMLIterator implements Iterator<String> {
|
||||
|
||||
private static final Log log = LogFactory.getLog(XMLIterator.class);
|
||||
|
||||
private ThreadLocal<XMLInputFactory> inputFactory = new ThreadLocal<XMLInputFactory>() {
|
||||
|
||||
@Override
|
||||
protected XMLInputFactory initialValue() {
|
||||
return XMLInputFactory.newInstance();
|
||||
}
|
||||
};
|
||||
|
||||
private ThreadLocal<XMLOutputFactory> outputFactory = new ThreadLocal<XMLOutputFactory>() {
|
||||
|
||||
@Override
|
||||
protected XMLOutputFactory initialValue() {
|
||||
return XMLOutputFactory.newInstance();
|
||||
}
|
||||
};
|
||||
|
||||
private ThreadLocal<XMLEventFactory> eventFactory = new ThreadLocal<XMLEventFactory>() {
|
||||
|
||||
@Override
|
||||
protected XMLEventFactory initialValue() {
|
||||
return XMLEventFactory.newInstance();
|
||||
}
|
||||
};
|
||||
|
||||
public static final String UTF_8 = "UTF-8";
|
||||
|
||||
final XMLEventReader parser;
|
||||
|
||||
private XMLEvent current = null;
|
||||
|
||||
private String element;
|
||||
|
||||
private InputStream inputStream;
|
||||
|
||||
public XMLIterator(final String element, final InputStream inputStream) {
|
||||
super();
|
||||
this.element = element;
|
||||
this.inputStream = inputStream;
|
||||
this.parser = getParser();
|
||||
try {
|
||||
this.current = findElement(parser);
|
||||
} catch (XMLStreamException e) {
|
||||
log.warn("cannot init parser position. No element found: " + element);
|
||||
current = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return current != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String next() {
|
||||
String result = null;
|
||||
try {
|
||||
result = copy(parser);
|
||||
current = findElement(parser);
|
||||
return result;
|
||||
} catch (XMLStreamException e) {
|
||||
throw new RuntimeException(String.format("error copying xml, built so far: '%s'", result), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@SuppressWarnings("finally")
|
||||
private String copy(final XMLEventReader parser) throws XMLStreamException {
|
||||
final StringWriter result = new StringWriter();
|
||||
try {
|
||||
final XMLEventWriter writer = outputFactory.get().createXMLEventWriter(result);
|
||||
final StartElement start = current.asStartElement();
|
||||
final StartElement newRecord = eventFactory
|
||||
.get()
|
||||
.createStartElement(start.getName(), start.getAttributes(), start.getNamespaces());
|
||||
|
||||
// new root record
|
||||
writer.add(newRecord);
|
||||
|
||||
// copy the rest as it is
|
||||
while (parser.hasNext()) {
|
||||
final XMLEvent event = parser.nextEvent();
|
||||
|
||||
// TODO: replace with depth tracking instead of close tag tracking.
|
||||
if (event.isEndElement() && event.asEndElement().getName().getLocalPart().equals(element)) {
|
||||
writer.add(event);
|
||||
break;
|
||||
}
|
||||
|
||||
writer.add(event);
|
||||
}
|
||||
writer.close();
|
||||
} finally {
|
||||
return result.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for the next occurrence of the splitter element.
|
||||
*
|
||||
* @param parser
|
||||
* @return
|
||||
* @throws XMLStreamException
|
||||
*/
|
||||
private XMLEvent findElement(final XMLEventReader parser) throws XMLStreamException {
|
||||
|
||||
/*
|
||||
* if (current != null && element.equals(current.asStartElement().getName().getLocalPart())) { return current; }
|
||||
*/
|
||||
|
||||
XMLEvent peek = parser.peek();
|
||||
if (peek != null && peek.isStartElement()) {
|
||||
String name = peek.asStartElement().getName().getLocalPart();
|
||||
if (element.equals(name)) {
|
||||
return peek;
|
||||
}
|
||||
}
|
||||
|
||||
while (parser.hasNext()) {
|
||||
final XMLEvent event = parser.nextEvent();
|
||||
if (event != null && event.isStartElement()) {
|
||||
String name = event.asStartElement().getName().getLocalPart();
|
||||
if (element.equals(name)) {
|
||||
return event;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private XMLEventReader getParser() {
|
||||
try {
|
||||
return inputFactory.get().createXMLEventReader(sanitize(inputStream));
|
||||
} catch (XMLStreamException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private Reader sanitize(final InputStream in) {
|
||||
final CharsetDecoder charsetDecoder = Charset.forName(UTF_8).newDecoder();
|
||||
charsetDecoder.onMalformedInput(CodingErrorAction.REPLACE);
|
||||
charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
||||
return new InputStreamReader(in, charsetDecoder);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection;
|
||||
package eu.dnetlib.dhp.collection.plugin.utils;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
|
@ -17,6 +17,9 @@ public class PMArticle implements Serializable {
|
|||
* the Pubmed Identifier
|
||||
*/
|
||||
private String pmid;
|
||||
|
||||
private String pmcId;
|
||||
|
||||
/**
|
||||
* the DOI
|
||||
*/
|
||||
|
@ -122,7 +125,7 @@ public class PMArticle implements Serializable {
|
|||
|
||||
/**
|
||||
* The full journal title (taken from NLM cataloging data following NLM rules for how to compile a serial name) is exported in this element.
|
||||
* Some characters that are not part of the NLM MEDLINE/PubMed Character Set reside in a relatively small number of full journal titles.
|
||||
* Some characters that are not part of the NLM MEDLINE/PubMed Character Set reside in a relatively small number of full journal titles.
|
||||
* The NLM journal title abbreviation is exported in the <MedlineTA> element.
|
||||
*
|
||||
* @return the pubmed Journal Extracted
|
||||
|
@ -140,10 +143,11 @@ public class PMArticle implements Serializable {
|
|||
}
|
||||
|
||||
/**
|
||||
* English-language abstracts are taken directly from the published article.
|
||||
* If the article does not have a published abstract, the National Library of Medicine does not create one,
|
||||
* thus the record lacks the <Abstract> and <AbstractText> elements. However, in the absence of a formally
|
||||
* labeled abstract in the published article, text from a substantive "summary", "summary and conclusions" or "conclusions and summary" may be used.
|
||||
* <ArticleTitle> contains the entire title of the journal article. <ArticleTitle> is always in English;
|
||||
* those titles originally published in a non-English language and translated for <ArticleTitle> are enclosed in square brackets.
|
||||
* All titles end with a period unless another punctuation mark such as a question mark or bracket is present.
|
||||
* Explanatory information about the title itself is enclosed in parentheses, e.g.: (author's transl).
|
||||
* Corporate/collective authors may appear at the end of <ArticleTitle> for citations up to about the year 2000.
|
||||
*
|
||||
* @return the extracted pubmed Title
|
||||
*/
|
||||
|
@ -250,4 +254,13 @@ public class PMArticle implements Serializable {
|
|||
public List<PMGrant> getGrants() {
|
||||
return grants;
|
||||
}
|
||||
|
||||
public String getPmcId() {
|
||||
return pmcId;
|
||||
}
|
||||
|
||||
public PMArticle setPmcId(String pmcId) {
|
||||
this.pmcId = pmcId;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
[
|
||||
{
|
||||
"paramName": "issm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "when true will stop SparkSession after job execution",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "hmu",
|
||||
"paramLongName": "hive_metastore_uris",
|
||||
"paramDescription": "the URI for the hive metastore",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "o",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the path of the new ActionSet",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "sdb",
|
||||
"paramLongName": "usagestatsdb",
|
||||
"paramDescription": "the name of the db to be used",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "wp",
|
||||
"paramLongName": "workingPath",
|
||||
"paramDescription": "the workingPath where to save the content of the usage_stats table",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
|
@ -1,25 +1,12 @@
|
|||
<workflow-app name="sub_dump_community_products" xmlns="uri:oozie:workflow:0.5">
|
||||
|
||||
<workflow-app name="UsageStatsCounts" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>the source path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>the output path</description>
|
||||
<description>the path where to store the actionset</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveDbName</name>
|
||||
<description>the target hive database name</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveJdbcUrl</name>
|
||||
<description>hive server jdbc url</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveMetastoreUris</name>
|
||||
<description>hive server metastore URIs</description>
|
||||
<name>usagestatsdb</name>
|
||||
<description>the name of the db to be used</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
|
@ -76,50 +63,19 @@
|
|||
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="common_action_community_funder"/>
|
||||
|
||||
<start to="atomicactions"/>
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="common_action_community_funder">
|
||||
<sub-workflow>
|
||||
<app-path>${wf:appPath()}/dump_common
|
||||
</app-path>
|
||||
<propagate-configuration/>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<value>${sourcePath}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>selectedResults</name>
|
||||
<value>${sourcePath}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>communityMapPath</name>
|
||||
<value>${workingDir}/communityMap</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<value>${workingDir}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</sub-workflow>
|
||||
<ok to="splitForCommunities" />
|
||||
<error to="Kill" />
|
||||
</action>
|
||||
|
||||
|
||||
|
||||
<action name="splitForCommunities">
|
||||
<action name="atomicactions">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Split dumped result for community</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<name>Produces the atomic action with the usage stats count for results</name>
|
||||
<class>eu.dnetlib.dhp.actionmanager.usagestats.SparkAtomicActionUsageJob</class>
|
||||
<jar>dhp-aggregation-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
|
@ -130,16 +86,14 @@
|
|||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/ext</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hiveMetastoreUris}</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||
<arg>--usagestatsdb</arg><arg>${usagestatsdb}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/usageDb</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -15,7 +15,7 @@
|
|||
"official_name": "Aperta TÜBİTAK Open Archive"
|
||||
},
|
||||
"BL.CAM": {
|
||||
"openaire_id": "re3data_____::r3d100010620",
|
||||
"openaire_id": "opendoar____::109",
|
||||
"datacite_name": "Apollo",
|
||||
"official_name": "Apollo"
|
||||
},
|
||||
|
@ -196,7 +196,7 @@
|
|||
},
|
||||
"CSIC.DIGITAL": {
|
||||
"openaire_id": "re3data_____::r3d100011076",
|
||||
"datacite_name": "DIGITAL.CSIC",
|
||||
"datacite_name": "Digital CSIC",
|
||||
"official_name": "DIGITAL.CSIC"
|
||||
},
|
||||
"BL.DRI": {
|
||||
|
@ -644,6 +644,11 @@
|
|||
"datacite_name": "PANGAEA",
|
||||
"official_name": "PANGAEA"
|
||||
},
|
||||
"TIB.PANGAEA": {
|
||||
"openaire_id": "re3data_____::r3d100010134",
|
||||
"datacite_name": "PANGAEA",
|
||||
"official_name": "PANGAEA"
|
||||
},
|
||||
"NASAPDS.NASAPDS": {
|
||||
"openaire_id": "re3data_____::r3d100010121",
|
||||
"datacite_name": "PDS",
|
||||
|
@ -896,7 +901,7 @@
|
|||
},
|
||||
"FIGSHARE.UCT": {
|
||||
"openaire_id": "re3data_____::r3d100012633",
|
||||
"datacite_name": "ZivaHub",
|
||||
"datacite_name": "University of Cape Town (UCT)",
|
||||
"official_name": "ZivaHub"
|
||||
},
|
||||
"BL.UCLAN": {
|
||||
|
@ -1030,9 +1035,9 @@
|
|||
"official_name": "ZBW Journal Data Archive"
|
||||
},
|
||||
"CERN.ZENODO": {
|
||||
"openaire_id": "re3data_____::r3d100010468",
|
||||
"openaire_id": "opendoar____::2659",
|
||||
"datacite_name": "Zenodo",
|
||||
"official_name": "Zenodo"
|
||||
"official_name": "ZENODO"
|
||||
},
|
||||
"ZBW.ZEW": {
|
||||
"openaire_id": "re3data_____::r3d100010399",
|
||||
|
|
|
@ -19,7 +19,7 @@ import java.time.chrono.ThaiBuddhistDate
|
|||
import java.time.format.DateTimeFormatter
|
||||
import java.util.{Date, Locale}
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.io.{Codec, Source}
|
||||
import scala.io.Source
|
||||
|
||||
object DataciteToOAFTransformation {
|
||||
|
||||
|
@ -47,13 +47,18 @@ object DataciteToOAFTransformation {
|
|||
}
|
||||
|
||||
/** This method should skip record if json contains invalid text
|
||||
* defined in gile datacite_filter
|
||||
* defined in file datacite_filter
|
||||
*
|
||||
* @param json
|
||||
* @param record : not parsed Datacite record
|
||||
* @param json : parsed record
|
||||
* @return True if the record should be skipped
|
||||
*/
|
||||
def skip_record(json: String): Boolean = {
|
||||
datacite_filter.exists(f => json.contains(f))
|
||||
def skip_record(record: String, json: org.json4s.JValue): Boolean = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
datacite_filter.exists(f => record.contains(f)) || (json \\ "publisher")
|
||||
.extractOrElse[String]("")
|
||||
.equalsIgnoreCase("FAIRsharing")
|
||||
|
||||
}
|
||||
|
||||
@deprecated("this method will be removed", "dhp")
|
||||
|
@ -93,6 +98,10 @@ object DataciteToOAFTransformation {
|
|||
|
||||
}
|
||||
|
||||
/** This utility method indicates whether the embargo date has been reached
|
||||
* @param embargo_end_date
|
||||
* @return True if the embargo date has been reached, false otherwise
|
||||
*/
|
||||
def embargo_end(embargo_end_date: String): Boolean = {
|
||||
val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]"))
|
||||
val td = LocalDate.now()
|
||||
|
@ -137,6 +146,21 @@ object DataciteToOAFTransformation {
|
|||
}
|
||||
}
|
||||
|
||||
/** *
|
||||
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
|
||||
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
|
||||
* to generate one of the following main entities:
|
||||
* - publication
|
||||
* - dataset
|
||||
* - software
|
||||
* - otherresearchproduct
|
||||
*
|
||||
* @param resourceType
|
||||
* @param resourceTypeGeneral
|
||||
* @param schemaOrg
|
||||
* @param vocabularies
|
||||
* @return
|
||||
*/
|
||||
def getTypeQualifier(
|
||||
resourceType: String,
|
||||
resourceTypeGeneral: String,
|
||||
|
@ -247,7 +271,7 @@ object DataciteToOAFTransformation {
|
|||
.exists(i => i.getHostedby != null && "figshare".equalsIgnoreCase(i.getHostedby.getValue))
|
||||
if (hosted_by_figshare) {
|
||||
r.getInstance().asScala.foreach(i => i.setAccessright(ModelConstants.OPEN_ACCESS_RIGHT()))
|
||||
val l: List[StructuredProperty] = List()
|
||||
val l: List[Subject] = List()
|
||||
r.setSubject(l.asJava)
|
||||
}
|
||||
}
|
||||
|
@ -304,12 +328,13 @@ object DataciteToOAFTransformation {
|
|||
vocabularies: VocabularyGroup,
|
||||
exportLinks: Boolean
|
||||
): List[Oaf] = {
|
||||
if (skip_record(input))
|
||||
return List()
|
||||
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json = parse(input)
|
||||
|
||||
if (skip_record(input, json))
|
||||
return List()
|
||||
|
||||
val resourceType = (json \ "attributes" \ "types" \ "resourceType").extractOrElse[String](null)
|
||||
val resourceTypeGeneral =
|
||||
(json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null)
|
||||
|
@ -324,6 +349,7 @@ object DataciteToOAFTransformation {
|
|||
if (result == null)
|
||||
return List()
|
||||
|
||||
// DOI is mapped on a PID inside a Instance object
|
||||
val doi_q = OafMapperUtils.qualifier(
|
||||
"doi",
|
||||
"doi",
|
||||
|
@ -332,6 +358,8 @@ object DataciteToOAFTransformation {
|
|||
)
|
||||
val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo)
|
||||
result.setPid(List(pid).asJava)
|
||||
|
||||
// This identifiere will be replaced in a second moment using the PID logic generation
|
||||
result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true))
|
||||
result.setOriginalId(List(doi).asJava)
|
||||
|
||||
|
@ -380,6 +408,10 @@ object DataciteToOAFTransformation {
|
|||
a
|
||||
}
|
||||
|
||||
if (authors == null || authors.isEmpty || !authors.exists(a => a != null))
|
||||
return List()
|
||||
result.setAuthor(authors.asJava)
|
||||
|
||||
val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List())
|
||||
|
||||
result.setTitle(
|
||||
|
@ -403,10 +435,6 @@ object DataciteToOAFTransformation {
|
|||
.asJava
|
||||
)
|
||||
|
||||
if (authors == null || authors.isEmpty || !authors.exists(a => a != null))
|
||||
return List()
|
||||
result.setAuthor(authors.asJava)
|
||||
|
||||
val dates = (json \\ "dates").extract[List[DateType]]
|
||||
val publication_year = (json \\ "publicationYear").extractOrElse[String](null)
|
||||
|
||||
|
@ -486,7 +514,7 @@ object DataciteToOAFTransformation {
|
|||
subjects
|
||||
.filter(s => s.subject.nonEmpty)
|
||||
.map(s =>
|
||||
OafMapperUtils.structuredProperty(
|
||||
OafMapperUtils.subject(
|
||||
s.subject.get,
|
||||
SUBJ_CLASS,
|
||||
SUBJ_CLASS,
|
||||
|
@ -578,7 +606,12 @@ object DataciteToOAFTransformation {
|
|||
JField("awardUri", JString(awardUri)) <- fundingReferences
|
||||
} yield awardUri
|
||||
|
||||
val oid = result.getId
|
||||
result.setId(IdentifierFactory.createIdentifier(result))
|
||||
if (!result.getId.equalsIgnoreCase(oid)) {
|
||||
result.setOriginalId((oid :: List(doi)).asJava)
|
||||
}
|
||||
|
||||
var relations: List[Relation] =
|
||||
awardUris.flatMap(a => get_projectRelation(a, result.getId)).filter(r => r != null)
|
||||
|
||||
|
|
|
@ -281,7 +281,7 @@ object BioDBToOAF {
|
|||
d.setSubject(
|
||||
subjects
|
||||
.map(s =>
|
||||
OafMapperUtils.structuredProperty(
|
||||
OafMapperUtils.subject(
|
||||
s,
|
||||
SUBJ_CLASS,
|
||||
SUBJ_CLASS,
|
||||
|
|
|
@ -98,6 +98,7 @@ class PMParser(xml: XMLEventReader) extends Iterator[PMArticle] {
|
|||
case "PMID" => currentArticle.setPmid(text.trim)
|
||||
case "ArticleId" =>
|
||||
if ("doi".equalsIgnoreCase(currentArticleType)) currentArticle.setDoi(text.trim)
|
||||
if ("pmc".equalsIgnoreCase(currentArticleType)) currentArticle.setPmcId(text.trim)
|
||||
case "Language" => currentArticle.setLanguage(text.trim)
|
||||
case "ISSN" => currentJournal.setIssn(text.trim)
|
||||
case "GrantID" => currentGrant.setGrantID(text.trim)
|
||||
|
|
|
@ -4,9 +4,12 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
|||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType}
|
||||
import eu.dnetlib.dhp.schema.oaf._
|
||||
import collection.JavaConverters._
|
||||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import org.apache.commons.lang3.StringUtils
|
||||
|
||||
import collection.JavaConverters._
|
||||
import java.util.regex.Pattern
|
||||
import scala.collection.mutable.ListBuffer
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -14,6 +17,9 @@ object PubMedToOaf {
|
|||
|
||||
val SUBJ_CLASS = "keywords"
|
||||
|
||||
val OAI_HEADER = "oai:pubmedcentral.nih.gov:"
|
||||
val OLD_PMC_PREFIX = "od_______267::"
|
||||
|
||||
val urlMap = Map(
|
||||
"pmid" -> "https://pubmed.ncbi.nlm.nih.gov/",
|
||||
"doi" -> "https://dx.doi.org/"
|
||||
|
@ -50,6 +56,15 @@ object PubMedToOaf {
|
|||
null
|
||||
}
|
||||
|
||||
def createOriginalOpenaireId(article: PMArticle): String = {
|
||||
if (StringUtils.isNotEmpty(article.getPmcId)) {
|
||||
val md5 = DHPUtils.md5(s"$OAI_HEADER${article.getPmcId.replace("PMC", "")}")
|
||||
s"$OLD_PMC_PREFIX$md5"
|
||||
} else
|
||||
null
|
||||
|
||||
}
|
||||
|
||||
/** Create an instance of class extends Result
|
||||
* starting from OAF instanceType value
|
||||
*
|
||||
|
@ -122,16 +137,27 @@ object PubMedToOaf {
|
|||
return null
|
||||
|
||||
// MAP PMID into pid with classid = classname = pmid
|
||||
val pidList: List[StructuredProperty] = List(
|
||||
OafMapperUtils.structuredProperty(
|
||||
article.getPmid,
|
||||
PidType.pmid.toString,
|
||||
PidType.pmid.toString,
|
||||
val pidList = ListBuffer[StructuredProperty]()
|
||||
|
||||
pidList += OafMapperUtils.structuredProperty(
|
||||
article.getPmid,
|
||||
PidType.pmid.toString,
|
||||
PidType.pmid.toString,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
dataInfo
|
||||
)
|
||||
|
||||
if (StringUtils.isNotBlank(article.getPmcId)) {
|
||||
pidList += OafMapperUtils.structuredProperty(
|
||||
article.getPmcId,
|
||||
PidType.pmc.toString,
|
||||
PidType.pmc.toString,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
dataInfo
|
||||
)
|
||||
)
|
||||
}
|
||||
if (pidList == null)
|
||||
return null
|
||||
|
||||
|
@ -186,6 +212,7 @@ object PubMedToOaf {
|
|||
val urlLists: List[String] = pidList
|
||||
.map(s => (urlMap.getOrElse(s.getQualifier.getClassid, ""), s.getValue))
|
||||
.filter(t => t._1.nonEmpty)
|
||||
.toList
|
||||
.map(t => t._1 + t._2)
|
||||
if (urlLists != null)
|
||||
pubmedInstance.setUrl(urlLists.asJava)
|
||||
|
@ -238,8 +265,8 @@ object PubMedToOaf {
|
|||
result.setLanguage(term)
|
||||
}
|
||||
|
||||
val subjects: List[StructuredProperty] = article.getSubjects.asScala.map(s =>
|
||||
OafMapperUtils.structuredProperty(
|
||||
val subjects: List[Subject] = article.getSubjects.asScala.map(s =>
|
||||
OafMapperUtils.subject(
|
||||
s.getValue,
|
||||
SUBJ_CLASS,
|
||||
SUBJ_CLASS,
|
||||
|
@ -262,7 +289,14 @@ object PubMedToOaf {
|
|||
|
||||
if (authors != null && authors.nonEmpty)
|
||||
result.setAuthor(authors.asJava)
|
||||
result.setOriginalId(pidList.map(s => s.getValue).asJava)
|
||||
|
||||
if (StringUtils.isNotEmpty(article.getPmcId)) {
|
||||
val originalIDS = ListBuffer[String]()
|
||||
originalIDS += createOriginalOpenaireId(article)
|
||||
pidList.map(s => s.getValue).foreach(p => originalIDS += p)
|
||||
result.setOriginalId(originalIDS.asJava)
|
||||
} else
|
||||
result.setOriginalId(pidList.map(s => s.getValue).asJava)
|
||||
|
||||
result.setId(article.getPmid)
|
||||
|
||||
|
|
|
@ -72,7 +72,7 @@ public class ProduceTest {
|
|||
|
||||
JavaRDD<Result> tmp = getResultJavaRDD();
|
||||
|
||||
List<StructuredProperty> sbjs = tmp
|
||||
List<Subject> sbjs = tmp
|
||||
.filter(row -> row.getSubject() != null && row.getSubject().size() > 0)
|
||||
.flatMap(row -> row.getSubject().iterator())
|
||||
.collect();
|
||||
|
@ -169,7 +169,7 @@ public class ProduceTest {
|
|||
.getSubject()
|
||||
.size());
|
||||
|
||||
List<StructuredProperty> sbjs = tmp
|
||||
List<Subject> sbjs = tmp
|
||||
.filter(row -> row.getId().equals(doi))
|
||||
.flatMap(row -> row.getSubject().iterator())
|
||||
.collect();
|
||||
|
@ -396,7 +396,7 @@ public class ProduceTest {
|
|||
.getSubject()
|
||||
.size());
|
||||
|
||||
List<StructuredProperty> sbjs = tmp
|
||||
List<Subject> sbjs = tmp
|
||||
.filter(row -> row.getId().equals(doi))
|
||||
.flatMap(row -> row.getSubject().iterator())
|
||||
.collect();
|
||||
|
@ -508,7 +508,7 @@ public class ProduceTest {
|
|||
.getSubject()
|
||||
.size());
|
||||
|
||||
List<StructuredProperty> sbjs = tmp
|
||||
List<Subject> sbjs = tmp
|
||||
.filter(row -> row.getId().equals(doi))
|
||||
.flatMap(row -> row.getSubject().iterator())
|
||||
.collect();
|
||||
|
@ -537,7 +537,7 @@ public class ProduceTest {
|
|||
|
||||
JavaRDD<Result> tmp = getResultJavaRDDPlusSDG();
|
||||
|
||||
List<StructuredProperty> sbjs_sdg = tmp
|
||||
List<Subject> sbjs_sdg = tmp
|
||||
.filter(row -> row.getSubject() != null && row.getSubject().size() > 0)
|
||||
.flatMap(row -> row.getSubject().iterator())
|
||||
.filter(sbj -> sbj.getQualifier().getClassid().equals(Constants.SDG_CLASS_ID))
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
package eu.dnetlib.dhp.actionmanager.ror;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.util.List;
|
||||
|
@ -38,25 +39,20 @@ class GenerateRorActionSetJobTest {
|
|||
.readValue(IOUtils.toString(getClass().getResourceAsStream("ror_org.json")), RorOrganization.class);
|
||||
final List<AtomicAction<? extends Oaf>> aas = GenerateRorActionSetJob.convertRorOrg(r);
|
||||
|
||||
Assertions.assertEquals(3, aas.size());
|
||||
Assertions.assertEquals(1, aas.size());
|
||||
assertEquals(Organization.class, aas.get(0).getClazz());
|
||||
assertEquals(Relation.class, aas.get(1).getClazz());
|
||||
assertEquals(Relation.class, aas.get(2).getClazz());
|
||||
|
||||
final Organization o = (Organization) aas.get(0).getPayload();
|
||||
final Relation r1 = (Relation) aas.get(1).getPayload();
|
||||
final Relation r2 = (Relation) aas.get(2).getPayload();
|
||||
|
||||
assertEquals(o.getId(), r1.getSource());
|
||||
assertEquals(r1.getSource(), r2.getTarget());
|
||||
assertEquals(r2.getSource(), r1.getTarget());
|
||||
assertEquals(ModelConstants.IS_PARENT_OF, r1.getRelClass());
|
||||
assertEquals(ModelConstants.IS_CHILD_OF, r2.getRelClass());
|
||||
assertNotNull(o);
|
||||
|
||||
assertNotNull(o.getCountry());
|
||||
assertEquals("AU", o.getCountry().getClassid());
|
||||
|
||||
assertNotNull(o.getLegalname());
|
||||
assertEquals("Mount Stromlo Observatory", o.getLegalname().getValue());
|
||||
|
||||
System.out.println(mapper.writeValueAsString(o));
|
||||
System.out.println(mapper.writeValueAsString(r1));
|
||||
System.out.println(mapper.writeValueAsString(r2));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -0,0 +1,256 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.usagestats;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class SparkAtomicActionCountJobTest {
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
private static final Logger log = LoggerFactory
|
||||
.getLogger(SparkAtomicActionCountJobTest.class);
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files
|
||||
.createTempDirectory(SparkAtomicActionCountJobTest.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(SparkAtomicActionCountJobTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("hive.metastore.local", "true");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(SparkAtomicActionCountJobTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void afterAll() throws IOException {
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
spark.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
void testMatch() {
|
||||
String usageScoresPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/actionmanager/usagestats/usagestatsdb")
|
||||
.getPath();
|
||||
|
||||
SparkAtomicActionUsageJob.writeActionSet(spark, usageScoresPath, workingDir.toString() + "/actionSet");
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Result> tmp = sc
|
||||
.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
|
||||
.map(usm -> OBJECT_MAPPER.readValue(usm._2.getBytes(), AtomicAction.class))
|
||||
.map(aa -> (Result) aa.getPayload());
|
||||
|
||||
Assertions.assertEquals(9, tmp.count());
|
||||
|
||||
tmp.foreach(r -> Assertions.assertEquals(2, r.getMeasures().size()));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> r
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.forEach(
|
||||
m -> m
|
||||
.getUnit()
|
||||
.stream()
|
||||
.forEach(u -> Assertions.assertFalse(u.getDataInfo().getDeletedbyinference()))));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> r
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.forEach(
|
||||
m -> m.getUnit().stream().forEach(u -> Assertions.assertTrue(u.getDataInfo().getInferred()))));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> r
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.forEach(
|
||||
m -> m
|
||||
.getUnit()
|
||||
.stream()
|
||||
.forEach(u -> Assertions.assertFalse(u.getDataInfo().getInvisible()))));
|
||||
|
||||
tmp
|
||||
.foreach(
|
||||
r -> r
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.forEach(
|
||||
m -> m
|
||||
.getUnit()
|
||||
.stream()
|
||||
.forEach(
|
||||
u -> Assertions
|
||||
.assertEquals(
|
||||
"measure:usage_counts",
|
||||
u.getDataInfo().getProvenanceaction().getClassid()))));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> r
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.forEach(
|
||||
m -> m
|
||||
.getUnit()
|
||||
.stream()
|
||||
.forEach(
|
||||
u -> Assertions
|
||||
.assertEquals(
|
||||
"Inferred by OpenAIRE",
|
||||
u.getDataInfo().getProvenanceaction().getClassname()))));
|
||||
|
||||
tmp
|
||||
.foreach(
|
||||
r -> r
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.forEach(
|
||||
m -> m
|
||||
.getUnit()
|
||||
.stream()
|
||||
.forEach(
|
||||
u -> Assertions
|
||||
.assertEquals(
|
||||
"count",
|
||||
u.getKey()))));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")).count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"0",
|
||||
tmp
|
||||
.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.filter(m -> m.getId().equals("downloads"))
|
||||
.collect(Collectors.toList())
|
||||
.get(0)
|
||||
.getUnit()
|
||||
.get(0)
|
||||
.getValue());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"5",
|
||||
tmp
|
||||
.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.filter(m -> m.getId().equals("views"))
|
||||
.collect(Collectors.toList())
|
||||
.get(0)
|
||||
.getUnit()
|
||||
.get(0)
|
||||
.getValue());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"0",
|
||||
tmp
|
||||
.filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.filter(m -> m.getId().equals("downloads"))
|
||||
.collect(Collectors.toList())
|
||||
.get(0)
|
||||
.getUnit()
|
||||
.get(0)
|
||||
.getValue());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"1",
|
||||
tmp
|
||||
.filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.filter(m -> m.getId().equals("views"))
|
||||
.collect(Collectors.toList())
|
||||
.get(0)
|
||||
.getUnit()
|
||||
.get(0)
|
||||
.getValue());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"2",
|
||||
tmp
|
||||
.filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.filter(m -> m.getId().equals("downloads"))
|
||||
.collect(Collectors.toList())
|
||||
.get(0)
|
||||
.getUnit()
|
||||
.get(0)
|
||||
.getValue());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"6",
|
||||
tmp
|
||||
.filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.filter(m -> m.getId().equals("views"))
|
||||
.collect(Collectors.toList())
|
||||
.get(0)
|
||||
.getUnit()
|
||||
.get(0)
|
||||
.getValue());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.file;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocalFileSystem;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
import net.bytebuddy.asm.Advice;
|
||||
|
||||
public class FileCollectorPluginTest {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(FileGZipCollectorPluginTest.class);
|
||||
|
||||
private final ApiDescriptor api = new ApiDescriptor();
|
||||
|
||||
private FileCollectorPlugin plugin;
|
||||
|
||||
private static final String SPLIT_ON_ELEMENT = "repository";
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws IOException {
|
||||
|
||||
final String gzipFile = this
|
||||
.getClass()
|
||||
.getResource("/eu/dnetlib/dhp/collection/plugin/file/opendoar.xml")
|
||||
.getFile();
|
||||
|
||||
api.setBaseUrl(gzipFile);
|
||||
|
||||
HashMap<String, String> params = new HashMap<>();
|
||||
params.put("splitOnElement", SPLIT_ON_ELEMENT);
|
||||
|
||||
api.setParams(params);
|
||||
|
||||
FileSystem fs = FileSystem.get(new Configuration());
|
||||
plugin = new FileCollectorPlugin(fs);
|
||||
}
|
||||
|
||||
@Test
|
||||
void test() throws CollectorException {
|
||||
|
||||
final Stream<String> stream = plugin.collect(api, new AggregatorReport());
|
||||
|
||||
stream.limit(10).forEach(s -> {
|
||||
Assertions.assertTrue(s.length() > 0);
|
||||
log.info(s);
|
||||
});
|
||||
}
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.file;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.util.HashMap;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocalFileSystem;
|
||||
import org.junit.jupiter.api.*;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.Mockito;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
|
||||
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
public class FileGZipCollectorPluginTest {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(FileGZipCollectorPluginTest.class);
|
||||
|
||||
private final ApiDescriptor api = new ApiDescriptor();
|
||||
|
||||
private FileGZipCollectorPlugin plugin;
|
||||
|
||||
private static final String SPLIT_ON_ELEMENT = "repository";
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws IOException {
|
||||
|
||||
final String gzipFile = Objects
|
||||
.requireNonNull(
|
||||
this
|
||||
.getClass()
|
||||
.getResource("/eu/dnetlib/dhp/collection/plugin/file/opendoar.xml.gz"))
|
||||
.getFile();
|
||||
|
||||
api.setBaseUrl(gzipFile);
|
||||
|
||||
HashMap<String, String> params = new HashMap<>();
|
||||
params.put("splitOnElement", SPLIT_ON_ELEMENT);
|
||||
|
||||
api.setParams(params);
|
||||
|
||||
FileSystem fs = FileSystem.get(new Configuration());
|
||||
plugin = new FileGZipCollectorPlugin(fs);
|
||||
}
|
||||
|
||||
@Test
|
||||
void test() throws CollectorException {
|
||||
|
||||
final Stream<String> stream = plugin.collect(api, new AggregatorReport());
|
||||
|
||||
stream.limit(10).forEach(s -> {
|
||||
Assertions.assertTrue(s.length() > 0);
|
||||
log.info(s);
|
||||
});
|
||||
}
|
||||
}
|
|
@ -19,7 +19,9 @@ import org.apache.spark.sql.Encoder;
|
|||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
import org.junit.jupiter.api.*;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.DisplayName;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
@ -50,7 +52,7 @@ class TransformationJobTest extends AbstractVocabularyTest {
|
|||
@Test
|
||||
@DisplayName("Test Date cleaner")
|
||||
void testDateCleaner() throws Exception {
|
||||
DateCleaner dc = new DateCleaner();
|
||||
final DateCleaner dc = new DateCleaner();
|
||||
assertEquals("1982-09-20", dc.clean("20/09/1982"));
|
||||
assertEquals("2002-09-20", dc.clean("20-09-2002"));
|
||||
assertEquals("2002-09-20", dc.clean("2002-09-20"));
|
||||
|
@ -68,9 +70,9 @@ class TransformationJobTest extends AbstractVocabularyTest {
|
|||
mr.setProvenance(new Provenance("DSID", "DSNAME", "PREFIX"));
|
||||
mr.setBody(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/input_zenodo.xml")));
|
||||
// We Load the XSLT transformation Rule from the classpath
|
||||
XSLTTransformationFunction tr = loadTransformationRule("/eu/dnetlib/dhp/transform/zenodo_tr.xslt");
|
||||
final XSLTTransformationFunction tr = loadTransformationRule("/eu/dnetlib/dhp/transform/zenodo_tr.xslt");
|
||||
|
||||
MetadataRecord result = tr.call(mr);
|
||||
final MetadataRecord result = tr.call(mr);
|
||||
|
||||
// Print the record
|
||||
System.out.println(result.getBody());
|
||||
|
@ -86,9 +88,9 @@ class TransformationJobTest extends AbstractVocabularyTest {
|
|||
mr.setProvenance(new Provenance("DSID", "DSNAME", "PREFIX"));
|
||||
mr.setBody(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/input_itgv4.xml")));
|
||||
// We Load the XSLT transformation Rule from the classpath
|
||||
XSLTTransformationFunction tr = loadTransformationRule("/eu/dnetlib/dhp/transform/zenodo_tr.xslt");
|
||||
final XSLTTransformationFunction tr = loadTransformationRule("/eu/dnetlib/dhp/transform/zenodo_tr.xslt");
|
||||
|
||||
MetadataRecord result = tr.call(mr);
|
||||
final MetadataRecord result = tr.call(mr);
|
||||
|
||||
// Print the record
|
||||
System.out.println(result.getBody());
|
||||
|
@ -108,9 +110,9 @@ class TransformationJobTest extends AbstractVocabularyTest {
|
|||
mr.setProvenance(new Provenance("DSID", "DSNAME", "PREFIX"));
|
||||
mr.setBody(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/input_itgv4.xml")));
|
||||
// We Load the XSLT transformation Rule from the classpath
|
||||
XSLTTransformationFunction tr = loadTransformationRule(xslTransformationScript);
|
||||
final XSLTTransformationFunction tr = loadTransformationRule(xslTransformationScript);
|
||||
|
||||
MetadataRecord result = tr.call(mr);
|
||||
final MetadataRecord result = tr.call(mr);
|
||||
|
||||
// Print the record
|
||||
System.out.println(result.getBody());
|
||||
|
@ -129,9 +131,9 @@ class TransformationJobTest extends AbstractVocabularyTest {
|
|||
mr.setProvenance(new Provenance("DSID", "DSNAME", "PREFIX"));
|
||||
mr.setBody(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/input_omicsdi.xml")));
|
||||
// We Load the XSLT transformation Rule from the classpath
|
||||
XSLTTransformationFunction tr = loadTransformationRule(xslTransformationScript);
|
||||
final XSLTTransformationFunction tr = loadTransformationRule(xslTransformationScript);
|
||||
|
||||
MetadataRecord result = tr.call(mr);
|
||||
final MetadataRecord result = tr.call(mr);
|
||||
|
||||
// Print the record
|
||||
System.out.println(result.getBody());
|
||||
|
@ -140,7 +142,8 @@ class TransformationJobTest extends AbstractVocabularyTest {
|
|||
|
||||
@Test
|
||||
@DisplayName("Test TransformSparkJobNode.main with oaiOpenaire_datacite (v4)")
|
||||
void transformTestITGv4OAIdatacite(@TempDir Path testDir) throws Exception {
|
||||
void transformTestITGv4OAIdatacite(@TempDir
|
||||
final Path testDir) throws Exception {
|
||||
|
||||
try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) {
|
||||
|
||||
|
@ -203,7 +206,8 @@ class TransformationJobTest extends AbstractVocabularyTest {
|
|||
|
||||
@Test
|
||||
@DisplayName("Test TransformSparkJobNode.main")
|
||||
void transformTest(@TempDir Path testDir) throws Exception {
|
||||
void transformTest(@TempDir
|
||||
final Path testDir) throws Exception {
|
||||
|
||||
try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) {
|
||||
|
||||
|
@ -256,6 +260,25 @@ class TransformationJobTest extends AbstractVocabularyTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Test Transform Single XML using cnr_explora_tr XSLTTransformator")
|
||||
void testCnrExploraTransformSaxonHE() throws Exception {
|
||||
|
||||
// We Set the input Record getting the XML from the classpath
|
||||
final MetadataRecord mr = new MetadataRecord();
|
||||
|
||||
mr.setProvenance(new Provenance("openaire____::cnr_explora", "CNR ExploRA", "cnr_________"));
|
||||
mr.setBody(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/input_cnr_explora.xml")));
|
||||
// We Load the XSLT transformation Rule from the classpath
|
||||
final XSLTTransformationFunction tr = loadTransformationRule("/eu/dnetlib/dhp/transform/cnr_explora_tr.xslt");
|
||||
|
||||
final MetadataRecord result = tr.call(mr);
|
||||
|
||||
// Print the record
|
||||
System.out.println(result.getBody());
|
||||
// TODO Create significant Assert
|
||||
}
|
||||
|
||||
private XSLTTransformationFunction loadTransformationRule(final String path) throws Exception {
|
||||
final String trValue = IOUtils.toString(this.getClass().getResourceAsStream(path));
|
||||
final LongAccumulator la = new LongAccumulator();
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,12 @@
|
|||
{"result_id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":4}
|
||||
{"result_id":"dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6","downloads":0,"views":1}
|
||||
{"result_id":"doi_________::17eda2ff77407538fbe5d3d719b9d1c0","downloads":0,"views":1}
|
||||
{"result_id":"doi_________::1d4dc08605fd0a2be1105d30c63bfea1","downloads":1,"views":3}
|
||||
{"result_id":"doi_________::2e3527822854ca9816f6dfea5bff61a8","downloads":1,"views":1}
|
||||
{"result_id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":2,"views":3}
|
||||
{"result_id":"doi_________::3085e4c6e051378ca6157fe7f0430c1f","downloads":0,"views":3}
|
||||
{"result_id":"doi_________::33f710e6dd30cc5e67e35b371ddc33cf","downloads":0,"views":1}
|
||||
{"result_id":"doi_________::39738ebf10654732dd3a7af9f24655f8","downloads":1,"views":3}
|
||||
{"result_id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":1,"views":8}
|
||||
{"result_id":"doi_________::3c3b65f07c1a06c7894397eda1d11bbf","downloads":0,"views":2}
|
||||
{"result_id":"doi_________::4938a71a884dd481d329657aa543b850","downloads":0,"views":3}
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
@ -195,7 +195,9 @@
|
|||
<Title>Biochemical and biophysical research communications</Title>
|
||||
<ISOAbbreviation>Biochem Biophys Res Commun</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Delineation of the intimate details of the backbone conformation of pyridine nucleotide coenzymes in aqueous solution.</ArticleTitle>
|
||||
<ArticleTitle>Delineation of the intimate details of the backbone conformation of pyridine nucleotide
|
||||
coenzymes in aqueous solution.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1173-9</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -473,7 +475,9 @@
|
|||
<Title>Biochemical and biophysical research communications</Title>
|
||||
<ISOAbbreviation>Biochem Biophys Res Commun</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Effect of chloroquine on cultured fibroblasts: release of lysosomal hydrolases and inhibition of their uptake.</ArticleTitle>
|
||||
<ArticleTitle>Effect of chloroquine on cultured fibroblasts: release of lysosomal hydrolases and
|
||||
inhibition of their uptake.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1338-43</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -657,7 +661,8 @@
|
|||
<Title>Biochemical and biophysical research communications</Title>
|
||||
<ISOAbbreviation>Biochem Biophys Res Commun</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Atomic models for the polypeptide backbones of myohemerythrin and hemerythrin.</ArticleTitle>
|
||||
<ArticleTitle>Atomic models for the polypeptide backbones of myohemerythrin and hemerythrin.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1349-56</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -1627,7 +1632,9 @@
|
|||
<Title>Biochemical pharmacology</Title>
|
||||
<ISOAbbreviation>Biochem Pharmacol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Comparison between procaine and isocarboxazid metabolism in vitro by a liver microsomal amidase-esterase.</ArticleTitle>
|
||||
<ArticleTitle>Comparison between procaine and isocarboxazid metabolism in vitro by a liver microsomal
|
||||
amidase-esterase.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1517-21</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -2030,7 +2037,9 @@
|
|||
<Title>Biochemical pharmacology</Title>
|
||||
<ISOAbbreviation>Biochem Pharmacol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Radiochemical assay of glutathione S-epoxide transferase and its enhancement by phenobarbital in rat liver in vivo.</ArticleTitle>
|
||||
<ArticleTitle>Radiochemical assay of glutathione S-epoxide transferase and its enhancement by
|
||||
phenobarbital in rat liver in vivo.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1569-72</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -2350,7 +2359,9 @@
|
|||
<Title>Biochemical pharmacology</Title>
|
||||
<ISOAbbreviation>Biochem Pharmacol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Identification of adenylate cyclase-coupled beta-adrenergic receptors with radiolabeled beta-adrenergic antagonists.</ArticleTitle>
|
||||
<ArticleTitle>Identification of adenylate cyclase-coupled beta-adrenergic receptors with radiolabeled
|
||||
beta-adrenergic antagonists.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1651-8</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -2598,7 +2609,9 @@
|
|||
<Title>Biochemical pharmacology</Title>
|
||||
<ISOAbbreviation>Biochem Pharmacol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>The effect of adrenaline and of alpha- and beta-adrenergic blocking agents on ATP concentration and on incorporation of 32Pi into ATP in rat fat cells.</ArticleTitle>
|
||||
<ArticleTitle>The effect of adrenaline and of alpha- and beta-adrenergic blocking agents on ATP
|
||||
concentration and on incorporation of 32Pi into ATP in rat fat cells.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1659-62</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -2851,7 +2864,9 @@
|
|||
<Title>Biochemical pharmacology</Title>
|
||||
<ISOAbbreviation>Biochem Pharmacol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Action of propranolol on mitochondrial functions--effects on energized ion fluxes in the presence of valinomycin.</ArticleTitle>
|
||||
<ArticleTitle>Action of propranolol on mitochondrial functions--effects on energized ion fluxes in the
|
||||
presence of valinomycin.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1701-5</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -3265,7 +3280,8 @@
|
|||
</Chemical>
|
||||
<Chemical>
|
||||
<RegistryNumber>EC 2.6.1.16</RegistryNumber>
|
||||
<NameOfSubstance UI="D005945">Glutamine-Fructose-6-Phosphate Transaminase (Isomerizing)</NameOfSubstance>
|
||||
<NameOfSubstance UI="D005945">Glutamine-Fructose-6-Phosphate Transaminase (Isomerizing)
|
||||
</NameOfSubstance>
|
||||
</Chemical>
|
||||
<Chemical>
|
||||
<RegistryNumber>EC 2.7.-</RegistryNumber>
|
||||
|
@ -3324,7 +3340,9 @@
|
|||
<DescriptorName UI="D005944" MajorTopicYN="N">Glucosamine</DescriptorName>
|
||||
</MeshHeading>
|
||||
<MeshHeading>
|
||||
<DescriptorName UI="D005945" MajorTopicYN="N">Glutamine-Fructose-6-Phosphate Transaminase (Isomerizing)</DescriptorName>
|
||||
<DescriptorName UI="D005945" MajorTopicYN="N">Glutamine-Fructose-6-Phosphate Transaminase
|
||||
(Isomerizing)
|
||||
</DescriptorName>
|
||||
<QualifierName UI="Q000378" MajorTopicYN="N">metabolism</QualifierName>
|
||||
</MeshHeading>
|
||||
<MeshHeading>
|
||||
|
@ -3463,7 +3481,8 @@
|
|||
<Title>Biochemical pharmacology</Title>
|
||||
<ISOAbbreviation>Biochem Pharmacol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Inhibition of aldehyde reductase by acidic metabolites of the biogenic amines.</ArticleTitle>
|
||||
<ArticleTitle>Inhibition of aldehyde reductase by acidic metabolites of the biogenic amines.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1731-3</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -3696,7 +3715,9 @@
|
|||
<Title>Biochemical pharmacology</Title>
|
||||
<ISOAbbreviation>Biochem Pharmacol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Effects of 5,6-dihydroxytryptamine on tyrosine-hydroxylase activity in central catecholaminergic neurons of the rat.</ArticleTitle>
|
||||
<ArticleTitle>Effects of 5,6-dihydroxytryptamine on tyrosine-hydroxylase activity in central
|
||||
catecholaminergic neurons of the rat.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1739-42</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -4602,12 +4623,19 @@
|
|||
<Title>Arzneimittel-Forschung</Title>
|
||||
<ISOAbbreviation>Arzneimittelforschung</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>[Biochemical studies on camomile components/III. In vitro studies about the antipeptic activity of (--)-alpha-bisabolol (author's transl)].</ArticleTitle>
|
||||
<ArticleTitle>[Biochemical studies on camomile components/III. In vitro studies about the antipeptic
|
||||
activity of (--)-alpha-bisabolol (author's transl)].
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1352-4</MedlinePgn>
|
||||
</Pagination>
|
||||
<Abstract>
|
||||
<AbstractText>(--)-alpha-Bisabolol has a primary antipeptic action depending on dosage, which is not caused by an alteration of the pH-value. The proteolytic activity of pepsin is reduced by 50 percent through addition of bisabolol in the ratio of 1/0.5. The antipeptic action of bisabolol only occurs in case of direct contact. In case of a previous contact with the substrate, the inhibiting effect is lost.</AbstractText>
|
||||
<AbstractText>(--)-alpha-Bisabolol has a primary antipeptic action depending on dosage, which is not
|
||||
caused by an alteration of the pH-value. The proteolytic activity of pepsin is reduced by 50
|
||||
percent through addition of bisabolol in the ratio of 1/0.5. The antipeptic action of bisabolol
|
||||
only occurs in case of direct contact. In case of a previous contact with the substrate, the
|
||||
inhibiting effect is lost.
|
||||
</AbstractText>
|
||||
</Abstract>
|
||||
<AuthorList CompleteYN="Y">
|
||||
<Author ValidYN="Y">
|
||||
|
@ -4626,7 +4654,9 @@
|
|||
<PublicationType UI="D004740">English Abstract</PublicationType>
|
||||
<PublicationType UI="D016428">Journal Article</PublicationType>
|
||||
</PublicationTypeList>
|
||||
<VernacularTitle>Biochemische Untersuchungen von Kamilleninhaltsstoffen. III. In-vitro-Versuche über die antipeptische Wirkung des (-)-alpha-Bisabolols</VernacularTitle>
|
||||
<VernacularTitle>Biochemische Untersuchungen von Kamilleninhaltsstoffen. III. In-vitro-Versuche über die
|
||||
antipeptische Wirkung des (-)-alpha-Bisabolols
|
||||
</VernacularTitle>
|
||||
</Article>
|
||||
<MedlineJournalInfo>
|
||||
<Country>Germany</Country>
|
||||
|
@ -4753,12 +4783,37 @@
|
|||
<Title>Arzneimittel-Forschung</Title>
|
||||
<ISOAbbreviation>Arzneimittelforschung</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>[Demonstration of tumor inhibiting properties of a strongly immunostimulating low-molecular weight substance. Comparative studies with ifosfamide on the immuno-labile DS carcinosarcoma. Stimulation of the autoimmune activity for approx. 20 days by BA 1, a N-(2-cyanoethylene)-urea. Novel prophylactic possibilities].</ArticleTitle>
|
||||
<ArticleTitle>[Demonstration of tumor inhibiting properties of a strongly immunostimulating
|
||||
low-molecular weight substance. Comparative studies with ifosfamide on the immuno-labile DS
|
||||
carcinosarcoma. Stimulation of the autoimmune activity for approx. 20 days by BA 1, a
|
||||
N-(2-cyanoethylene)-urea. Novel prophylactic possibilities].
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1369-79</MedlinePgn>
|
||||
</Pagination>
|
||||
<Abstract>
|
||||
<AbstractText>A report is given on the recent discovery of outstanding immunological properties in BA 1 [N-(2-cyanoethylene)-urea] having a (low) molecular mass M = 111.104. Experiments in 214 DS carcinosarcoma bearing Wistar rats have shown that BA 1, at a dosage of only about 12 percent LD50 (150 mg kg) and negligible lethality (1.7 percent), results in a recovery rate of 40 percent without hyperglycemia and, in one test, of 80 percent with hyperglycemia. Under otherwise unchanged conditions the reference substance ifosfamide (IF) -- a further development of cyclophosphamide -- applied without hyperglycemia in its most efficient dosage of 47 percent LD50 (150 mg kg) brought about a recovery rate of 25 percent at a lethality of 18 percent. (Contrary to BA 1, 250-min hyperglycemia caused no further improvement of the recovery rate.) However this comparison is characterized by the fact that both substances exhibit two quite different (complementary) mechanisms of action. Leucocyte counts made after application of the said cancerostatics and dosages have shown a pronounced stimulation with BA 1 and with ifosfamide, the known suppression in the post-therapeutic interval usually found with standard cancerostatics. In combination with the cited plaque test for BA 1, blood pictures then allow conclusions on the immunity status. Since IF can be taken as one of the most efficient cancerostatics--there is no other chemotherapeutic known up to now that has a more significant effect on the DS carcinosarcoma in rats -- these findings are of special importance. Finally, the total amount of leucocytes and lymphocytes as well as their time behaviour was determined from the blood picture of tumour-free rats after i.v. application of BA 1. The thus obtained numerical values clearly show that further research work on the prophylactic use of this substance seems to be necessary and very promising.</AbstractText>
|
||||
<AbstractText>A report is given on the recent discovery of outstanding immunological properties in
|
||||
BA 1 [N-(2-cyanoethylene)-urea] having a (low) molecular mass M = 111.104. Experiments in 214 DS
|
||||
carcinosarcoma bearing Wistar rats have shown that BA 1, at a dosage of only about 12 percent
|
||||
LD50 (150 mg kg) and negligible lethality (1.7 percent), results in a recovery rate of 40
|
||||
percent without hyperglycemia and, in one test, of 80 percent with hyperglycemia. Under
|
||||
otherwise unchanged conditions the reference substance ifosfamide (IF) -- a further development
|
||||
of cyclophosphamide -- applied without hyperglycemia in its most efficient dosage of 47 percent
|
||||
LD50 (150 mg kg) brought about a recovery rate of 25 percent at a lethality of 18 percent.
|
||||
(Contrary to BA 1, 250-min hyperglycemia caused no further improvement of the recovery rate.)
|
||||
However this comparison is characterized by the fact that both substances exhibit two quite
|
||||
different (complementary) mechanisms of action. Leucocyte counts made after application of the
|
||||
said cancerostatics and dosages have shown a pronounced stimulation with BA 1 and with
|
||||
ifosfamide, the known suppression in the post-therapeutic interval usually found with standard
|
||||
cancerostatics. In combination with the cited plaque test for BA 1, blood pictures then allow
|
||||
conclusions on the immunity status. Since IF can be taken as one of the most efficient
|
||||
cancerostatics--there is no other chemotherapeutic known up to now that has a more significant
|
||||
effect on the DS carcinosarcoma in rats -- these findings are of special importance. Finally,
|
||||
the total amount of leucocytes and lymphocytes as well as their time behaviour was determined
|
||||
from the blood picture of tumour-free rats after i.v. application of BA 1. The thus obtained
|
||||
numerical values clearly show that further research work on the prophylactic use of this
|
||||
substance seems to be necessary and very promising.
|
||||
</AbstractText>
|
||||
</Abstract>
|
||||
<AuthorList CompleteYN="Y">
|
||||
<Author ValidYN="Y">
|
||||
|
@ -4778,7 +4833,11 @@
|
|||
<PublicationType UI="D004740">English Abstract</PublicationType>
|
||||
<PublicationType UI="D016428">Journal Article</PublicationType>
|
||||
</PublicationTypeList>
|
||||
<VernacularTitle>Nachweis krebshemmender Eigenschaften einer stark immunstimulierenden Verbindung kleiner Molekülmasse. Versuche am immunlabilen DS-Karzinosarkom im Vergleich mit Ifosfamid. Stimulierung der körpereigenen Abwehr über etwa 20 Tage durch BA 1, einen N-(2-Cyanthylen)-harnstoff. Neue prophylaktische Möglichkeiten</VernacularTitle>
|
||||
<VernacularTitle>Nachweis krebshemmender Eigenschaften einer stark immunstimulierenden Verbindung
|
||||
kleiner Molekülmasse. Versuche am immunlabilen DS-Karzinosarkom im Vergleich mit Ifosfamid.
|
||||
Stimulierung der körpereigenen Abwehr über etwa 20 Tage durch BA 1, einen
|
||||
N-(2-Cyanthylen)-harnstoff. Neue prophylaktische Möglichkeiten
|
||||
</VernacularTitle>
|
||||
</Article>
|
||||
<MedlineJournalInfo>
|
||||
<Country>Germany</Country>
|
||||
|
@ -5016,7 +5075,20 @@
|
|||
<MedlinePgn>1400-3</MedlinePgn>
|
||||
</Pagination>
|
||||
<Abstract>
|
||||
<AbstractText>The distribution of blood flow to the subendocardial, medium and subepicardial layers of the left ventricular free wall was studied in anaesthetized dogs under normoxic (A), hypoxic (B) conditions and under pharmacologically induced (etafenone) coronary vasodilation (C). Regional myocardial blood flow was determined by means of the particle distribution method. In normoxia a transmural gradient of flow was observed, with the subendocardial layers receiving a significantly higher flow rate compared with the subepicardial layers. In hypoxia induced vasodilation this transmural gradient of flow was persistent. In contrast a marked redistribution of regional flow was observed under pharmacologically induced vasodilation. The transmural gradient decreased. In contrast to some findings these experiments demonstrate that a considerable vasodilatory capacity exists in all layers of the myocardium and can be utilized by drugs. The differences observed for the intramural distribution pattern of flow under hypoxia and drug induced vasodilation support the hypothesis that this pattern reflects corresponding gradients of regional myocardial metabolism.</AbstractText>
|
||||
<AbstractText>The distribution of blood flow to the subendocardial, medium and subepicardial layers
|
||||
of the left ventricular free wall was studied in anaesthetized dogs under normoxic (A), hypoxic
|
||||
(B) conditions and under pharmacologically induced (etafenone) coronary vasodilation (C).
|
||||
Regional myocardial blood flow was determined by means of the particle distribution method. In
|
||||
normoxia a transmural gradient of flow was observed, with the subendocardial layers receiving a
|
||||
significantly higher flow rate compared with the subepicardial layers. In hypoxia induced
|
||||
vasodilation this transmural gradient of flow was persistent. In contrast a marked
|
||||
redistribution of regional flow was observed under pharmacologically induced vasodilation. The
|
||||
transmural gradient decreased. In contrast to some findings these experiments demonstrate that a
|
||||
considerable vasodilatory capacity exists in all layers of the myocardium and can be utilized by
|
||||
drugs. The differences observed for the intramural distribution pattern of flow under hypoxia
|
||||
and drug induced vasodilation support the hypothesis that this pattern reflects corresponding
|
||||
gradients of regional myocardial metabolism.
|
||||
</AbstractText>
|
||||
</Abstract>
|
||||
<AuthorList CompleteYN="Y">
|
||||
<Author ValidYN="Y">
|
||||
|
@ -5185,4 +5257,151 @@
|
|||
</ReferenceList>
|
||||
</PubmedData>
|
||||
</PubmedArticle>
|
||||
<PubmedArticle>
|
||||
<MedlineCitation Status="MEDLINE" Owner="NLM">
|
||||
<PMID Version="1">4917185</PMID>
|
||||
<DateCompleted>
|
||||
<Year>1970</Year>
|
||||
<Month>10</Month>
|
||||
<Day>27</Day>
|
||||
</DateCompleted>
|
||||
<DateRevised>
|
||||
<Year>2018</Year>
|
||||
<Month>11</Month>
|
||||
<Day>13</Day>
|
||||
</DateRevised>
|
||||
<Article PubModel="Print">
|
||||
<Journal>
|
||||
<ISSN IssnType="Print">0003-6919</ISSN>
|
||||
<JournalIssue CitedMedium="Print">
|
||||
<Volume>19</Volume>
|
||||
<Issue>6</Issue>
|
||||
<PubDate>
|
||||
<Year>1970</Year>
|
||||
<Month>Jun</Month>
|
||||
</PubDate>
|
||||
</JournalIssue>
|
||||
<Title>Applied microbiology</Title>
|
||||
<ISOAbbreviation>Appl Microbiol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Bactericidal activity of a broad-spectrum illumination source.</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1013-4</MedlinePgn>
|
||||
</Pagination>
|
||||
<Abstract>
|
||||
|
||||
<AbstractText>Several hours of exposure to Vita-Lite lamps, which have a unique spectral
|
||||
distribution, give significant killing of cells of Staphylococcus aureus.
|
||||
</AbstractText>
|
||||
</Abstract>
|
||||
<AuthorList CompleteYN="Y">
|
||||
<Author ValidYN="Y">
|
||||
<LastName>Himmelfarb</LastName>
|
||||
<ForeName>P</ForeName>
|
||||
<Initials>P</Initials>
|
||||
</Author>
|
||||
<Author ValidYN="Y">
|
||||
<LastName>Scott</LastName>
|
||||
<ForeName>A</ForeName>
|
||||
<Initials>A</Initials>
|
||||
</Author>
|
||||
<Author ValidYN="Y">
|
||||
<LastName>Thayer</LastName>
|
||||
<ForeName>P S</ForeName>
|
||||
<Initials>PS</Initials>
|
||||
</Author>
|
||||
</AuthorList>
|
||||
<Language>eng</Language>
|
||||
<PublicationTypeList>
|
||||
<PublicationType UI="D016428">Journal Article</PublicationType>
|
||||
</PublicationTypeList>
|
||||
</Article>
|
||||
<MedlineJournalInfo>
|
||||
<Country>United States</Country>
|
||||
<MedlineTA>Appl Microbiol</MedlineTA>
|
||||
<NlmUniqueID>7605802</NlmUniqueID>
|
||||
<ISSNLinking>0003-6919</ISSNLinking>
|
||||
</MedlineJournalInfo>
|
||||
<CitationSubset>IM</CitationSubset>
|
||||
<MeshHeadingList>
|
||||
<MeshHeading>
|
||||
<DescriptorName UI="D001431" MajorTopicYN="N">Bacteriological Techniques</DescriptorName>
|
||||
<QualifierName UI="Q000295" MajorTopicYN="Y">instrumentation</QualifierName>
|
||||
</MeshHeading>
|
||||
<MeshHeading>
|
||||
<DescriptorName UI="D008027" MajorTopicYN="Y">Light</DescriptorName>
|
||||
</MeshHeading>
|
||||
<MeshHeading>
|
||||
<DescriptorName UI="D011830" MajorTopicYN="N">Radiation Effects</DescriptorName>
|
||||
</MeshHeading>
|
||||
<MeshHeading>
|
||||
<DescriptorName UI="D012706" MajorTopicYN="N">Serratia marcescens</DescriptorName>
|
||||
<QualifierName UI="Q000254" MajorTopicYN="N">growth & development</QualifierName>
|
||||
<QualifierName UI="Q000528" MajorTopicYN="Y">radiation effects</QualifierName>
|
||||
</MeshHeading>
|
||||
<MeshHeading>
|
||||
<DescriptorName UI="D013210" MajorTopicYN="N">Staphylococcus</DescriptorName>
|
||||
<QualifierName UI="Q000254" MajorTopicYN="N">growth & development</QualifierName>
|
||||
<QualifierName UI="Q000528" MajorTopicYN="Y">radiation effects</QualifierName>
|
||||
</MeshHeading>
|
||||
<MeshHeading>
|
||||
<DescriptorName UI="D013242" MajorTopicYN="N">Sterilization</DescriptorName>
|
||||
</MeshHeading>
|
||||
</MeshHeadingList>
|
||||
</MedlineCitation>
|
||||
<PubmedData>
|
||||
<History>
|
||||
<PubMedPubDate PubStatus="pubmed">
|
||||
<Year>1970</Year>
|
||||
<Month>6</Month>
|
||||
<Day>1</Day>
|
||||
</PubMedPubDate>
|
||||
<PubMedPubDate PubStatus="medline">
|
||||
<Year>1970</Year>
|
||||
<Month>6</Month>
|
||||
<Day>1</Day>
|
||||
<Hour>0</Hour>
|
||||
<Minute>1</Minute>
|
||||
</PubMedPubDate>
|
||||
<PubMedPubDate PubStatus="entrez">
|
||||
<Year>1970</Year>
|
||||
<Month>6</Month>
|
||||
<Day>1</Day>
|
||||
<Hour>0</Hour>
|
||||
<Minute>0</Minute>
|
||||
</PubMedPubDate>
|
||||
</History>
|
||||
<PublicationStatus>ppublish</PublicationStatus>
|
||||
<ArticleIdList>
|
||||
<ArticleId IdType="pubmed">4917185</ArticleId>
|
||||
<ArticleId IdType="pmc">PMC376844</ArticleId>
|
||||
</ArticleIdList>
|
||||
<ReferenceList>
|
||||
<Reference>
|
||||
<Citation>Photochem Photobiol. 1969 Jan;9(1):99-102</Citation>
|
||||
<ArticleIdList>
|
||||
<ArticleId IdType="pubmed">4889809</ArticleId>
|
||||
</ArticleIdList>
|
||||
</Reference>
|
||||
<Reference>
|
||||
<Citation>Endocrinology. 1969 Dec;85(6):1218-21</Citation>
|
||||
<ArticleIdList>
|
||||
<ArticleId IdType="pubmed">5347623</ArticleId>
|
||||
</ArticleIdList>
|
||||
</Reference>
|
||||
<Reference>
|
||||
<Citation>Arch Mikrobiol. 1956;24(1):60-79</Citation>
|
||||
<ArticleIdList>
|
||||
<ArticleId IdType="pubmed">13327987</ArticleId>
|
||||
</ArticleIdList>
|
||||
</Reference>
|
||||
<Reference>
|
||||
<Citation>J Bacteriol. 1941 Sep;42(3):353-66</Citation>
|
||||
<ArticleIdList>
|
||||
<ArticleId IdType="pubmed">16560457</ArticleId>
|
||||
</ArticleIdList>
|
||||
</Reference>
|
||||
</ReferenceList>
|
||||
</PubmedData>
|
||||
</PubmedArticle>
|
||||
</PubmedArticleSet>
|
|
@ -0,0 +1,214 @@
|
|||
<xsl:stylesheet
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
|
||||
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
|
||||
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
||||
xmlns:datacite="http://datacite.org/schema/kernel-4"
|
||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
|
||||
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
exclude-result-prefixes="xsl vocabulary dateCleaner" version="2.0">
|
||||
<xsl:param name="varOfficialName" />
|
||||
<xsl:param name="varDataSourceId" />
|
||||
<xsl:param name="varFP7" select="'corda_______::'" />
|
||||
<xsl:param name="varH2020" select="'corda__h2020::'" />
|
||||
<xsl:param name="repoCode"
|
||||
select="substring-before(//*[local-name() = 'header']/*[local-name()='recordIdentifier'], ':')" />
|
||||
<xsl:param name="index" select="0" />
|
||||
<xsl:param name="transDate" select="current-dateTime()" />
|
||||
|
||||
<xsl:template match="/">
|
||||
<record>
|
||||
<xsl:apply-templates select="//*[local-name() = 'header']" />
|
||||
<metadata>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:title" />
|
||||
<xsl:with-param name="targetElement" select="'dc:title'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:creator/replace(., '^(.*)\|.*$', '$1')" />
|
||||
<xsl:with-param name="targetElement" select="'dc:creator'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:contributor" />
|
||||
<xsl:with-param name="targetElement" select="'dc:contributor'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:description" />
|
||||
<xsl:with-param name="targetElement" select="'dc:description'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:subject" />
|
||||
<xsl:with-param name="targetElement" select="'dc:subject'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:publisher" />
|
||||
<xsl:with-param name="targetElement" select="'dc:publisher'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:format" />
|
||||
<xsl:with-param name="targetElement" select="'dc:format'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:type" />
|
||||
<xsl:with-param name="targetElement" select="'dc:type'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:source" />
|
||||
<xsl:with-param name="targetElement" select="'dc:source'" />
|
||||
</xsl:call-template>
|
||||
<dc:language>
|
||||
<xsl:value-of select="vocabulary:clean( //dc:language, 'dnet:languages')" />
|
||||
</dc:language>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:rights" />
|
||||
<xsl:with-param name="targetElement" select="'dc:rights'" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:relation[not(starts-with(.,'info:cnr-pdr'))]" />
|
||||
<xsl:with-param name="targetElement" select="'dc:relation'" />
|
||||
</xsl:call-template>
|
||||
|
||||
<xsl:call-template name="allElements">
|
||||
<xsl:with-param name="sourceElement" select="//dc:identifier[starts-with(., 'http')]" />
|
||||
<xsl:with-param name="targetElement" select="'dc:identifier'" />
|
||||
</xsl:call-template>
|
||||
<xsl:for-each select="//dc:relation">
|
||||
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', 'i')">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varFP7, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/fp7/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
<xsl:if test="matches(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', 'i')">
|
||||
<oaf:projectid>
|
||||
<xsl:value-of select="concat($varH2020, replace(normalize-space(.), '(info:eu-repo/grantagreement/ec/h2020/)(\d\d\d\d\d\d)(.*)', '$2', 'i'))" />
|
||||
</oaf:projectid>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
|
||||
<oaf:accessrights>
|
||||
<xsl:value-of select="vocabulary:clean( //dc:rights, 'dnet:access_modes')" />
|
||||
</oaf:accessrights>
|
||||
|
||||
<xsl:variable name="varCobjCategory" select="vocabulary:clean( //dc:type, 'dnet:publication_resource')" />
|
||||
<xsl:variable name="varSuperType" select="vocabulary:clean( $varCobjCategory, 'dnet:result_typologies')" />
|
||||
<dr:CobjCategory type="{$varSuperType}"><xsl:value-of select="$varCobjCategory" /></dr:CobjCategory>
|
||||
|
||||
|
||||
<xsl:variable name="varRefereedConvt" select="for $i in (//dc:type, //dc:description, //oai:setSpec) return vocabulary:clean( normalize-space($i), 'dnet:review_levels')" />
|
||||
<xsl:variable name="varRefereedIdntf" select="(//*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '(^|.*[\.\-_/\s\(\)%\d#])pre[\.\-_/\s\(\)%\d#]?prints?([\.\-_/\s\(\)%\d#].*)?$')][count(//dc:identifier) = 1]/'0002', //*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '(^|.*[\.\-_/\s\(\)%\d#])refereed([\.\-_/\s\(\)\d%\d#].*)?$')]/'0001', //*[string(node-name(.)) = 'dc:identifier' and matches(lower-case(.), '.*-peer-reviewed-(fulltext-)?article-.*')]/'0001')" />
|
||||
<xsl:variable name="varRefereedSourc" select="//*[string(node-name(.)) = ('dc:source', 'dc:publisher') and matches(lower-case(.), '^(.*\s)?pre[\s\-_]*prints?([\s\.,].*)?$')]/'0002'" />
|
||||
<xsl:variable name="varRefereedDescr" select="(//dc:description[matches(lower-case(.), '.*(this\s*book|this\s*volume|it)\s*constitutes\s*the\s*(thoroughly\s*)?refereed') or matches(lower-case(.), '.*peer[\.\-_/\s\(\)]?review\s*under\s*responsibility\s*of.*') or matches(lower-case(.), '(this|a)\s*(article|preprint)\s*(has\s*been\s*)?(peer[\-\s]*)?reviewed\s*and\s*recommended\s*by\s*peer[\-\s]*community')]/'0001', //dc:description[matches(., '^version\s*(préliminaire.*|preliminary.*|0$)')]/'0002')" />
|
||||
<xsl:variable name="varRefereedTitle" select="(//dc:title[matches(lower-case(.), '.*\[.*peer[\s\-\._]*review\s*:.*\]\s*$')]/'0001', //dc:title[matches(lower-case(.), '.*\(\s*pre[\s\-\._]*prints?\s*\)\s*$')]/'0002')" />
|
||||
<xsl:variable name="varRefereedSubjt" select="(//dc:subject[matches(lower-case(.), '^\s*refereed\s*$')][//oaf:datasourceprefix = 'narcis______']/'0001', //dc:subject[matches(lower-case(.), '^\s*no[nt].{0,3}refereed\s*$')][//oaf:datasourceprefix = 'narcis______']/'0002')" />
|
||||
<xsl:variable name="varRefereed" select="($varRefereedConvt, $varRefereedIdntf, $varRefereedSourc, $varRefereedDescr, $varRefereedTitle, $varRefereedSubjt)" />
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($varRefereed[. = '0001']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0001'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
<xsl:when test="count($varRefereed[. = '0002']) > 0">
|
||||
<oaf:refereed>
|
||||
<xsl:value-of select="'0002'" />
|
||||
</oaf:refereed>
|
||||
</xsl:when>
|
||||
</xsl:choose>
|
||||
|
||||
<oaf:dateAccepted>
|
||||
<xsl:value-of select="dateCleaner:dateISO( //dc:date[1] )" />
|
||||
</oaf:dateAccepted>
|
||||
|
||||
<xsl:if test="//dc:relation[starts-with(., 'http')] and //dc:rights[.='info:eu-repo/semantics/openAccess']">
|
||||
<oaf:fulltext>
|
||||
<xsl:value-of select="//dc:relation[starts-with(., 'http')]" />
|
||||
</oaf:fulltext>
|
||||
</xsl:if>
|
||||
|
||||
<oaf:hostedBy name="{$varOfficialName}" id="{$varDataSourceId}" />
|
||||
<oaf:collectedFrom name="{$varOfficialName}" id="{$varDataSourceId}ß" />
|
||||
|
||||
<xsl:variable name="varKnownFileEndings" select="('.bmp', '.doc', '.docx', '.epub', '.flv', '.jpeg', '.jpg', '.m4v', '.mp4', '.mpg', '.odp', '.pdf', '.png', '.ppt', '.tiv', '.txt', '.xls', '.xlsx', '.zip')" />
|
||||
<xsl:variable name="varIdDoi" select="distinct-values((//dc:identifier[starts-with(., '10.')][matches(., '(10[.][0-9]{4,}[^\s/>]*/[^\s>]+)')], //dc:identifier[starts-with(., 'http') and (contains(., '://dx.doi.org/10.') or contains(., '://doi.org/10.'))]/substring-after(., 'doi.org/'), //dc:identifier[starts-with(lower-case(.), 'doi:10.')]/substring-after(lower-case(.), 'doi:')))" />
|
||||
<xsl:for-each select="$varIdDoi">
|
||||
<oaf:identifier identifierType="doi">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:variable name="varIdHdl" select="distinct-values(//dc:identifier[starts-with(., 'http') and contains(., '://hdl.handle.net/')]/substring-after(., 'hdl.handle.net/'))" />
|
||||
<xsl:for-each select="$varIdHdl" >
|
||||
<oaf:identifier identifierType="handle">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:variable name="varIdUrn" select="distinct-values(//dc:identifier[starts-with(., 'urn:nbn:nl:') or starts-with(., 'URN:NBN:NL:')])" />
|
||||
<xsl:for-each select="$varIdUrn">
|
||||
<oaf:identifier identifierType="urn">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:variable name="varOrigBaseUrl" select="//*[local-name() = 'about']/*[local-name() = 'provenance']//*[local-name() = 'originDescription' and not(./*[local-name() = 'originDescription'])]/*[local-name() = 'baseURL']" />
|
||||
<xsl:variable name="varIdLdpg" select="distinct-values(//dc:identifier[(contains(substring-after(., '://'), '/') and contains($varOrigBaseUrl, substring-before(substring-after(., '://'), '/'))) or (contains(substring-after(., '://'), ':') and contains($varOrigBaseUrl, substring-before(substring-after(., '://'), ':')))][not(replace(lower-case(.), '.*(\.[a-z]*)$', '$1') = $varKnownFileEndings)])" />
|
||||
<xsl:for-each select="$varIdLdpg">
|
||||
<oaf:identifier identifierType="landingPage">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:variable name="varIdUrl" select="distinct-values(//dc:identifier[starts-with(., 'http')][not(contains(., '://dx.doi.org/') or contains(., '://doi.org/') or contains(., '://hdl.handle.net/'))][count(index-of($varIdLdpg, .)) = 0])" />
|
||||
<xsl:for-each select="$varIdUrl">
|
||||
<oaf:identifier identifierType="url">
|
||||
<xsl:value-of select="." />
|
||||
</oaf:identifier>
|
||||
</xsl:for-each>
|
||||
|
||||
<xsl:for-each select="//oai:setSpec">
|
||||
<xsl:variable name="rorDsId" select="vocabulary:clean(., 'cnr:institutes')" />
|
||||
<xsl:if test="contains($rorDsId, '/ror.org/')">
|
||||
<oaf:relation relType="resultOrganization" subRelType="affiliation" relClass="hasAuthorInstitution">
|
||||
<xsl:value-of select="concat('ror_________::', $rorDsId)" />
|
||||
</oaf:relation>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
|
||||
</metadata>
|
||||
|
||||
<xsl:copy-of select="//*[local-name() = 'about']" />
|
||||
</record>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="allElements">
|
||||
<xsl:param name="sourceElement" />
|
||||
<xsl:param name="targetElement" />
|
||||
|
||||
<xsl:for-each select="$sourceElement">
|
||||
<xsl:element name="{$targetElement}">
|
||||
<xsl:value-of select="normalize-space(.)" />
|
||||
</xsl:element>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="//*[local-name() = 'header']">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*" />
|
||||
<xsl:element name="dr:dateOfTransformation">
|
||||
<xsl:value-of select="$transDate" />
|
||||
</xsl:element>
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="node()|@*">
|
||||
<xsl:copy>
|
||||
<xsl:apply-templates select="node()|@*" />
|
||||
</xsl:copy>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
|
@ -0,0 +1,57 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<record xmlns="http://www.openarchives.org/OAI/2.0/">
|
||||
<header>
|
||||
<identifier><![CDATA[oai:it.cnr:prodotti:433382]]></identifier>
|
||||
<datestamp><![CDATA[2020-11-30T15:32:03Z]]></datestamp>
|
||||
<setSpec><![CDATA[openaire]]></setSpec>
|
||||
<setSpec><![CDATA[CDS027]]></setSpec>
|
||||
<setSpec><![CDATA[CDS080]]></setSpec>
|
||||
</header>
|
||||
<metadata>
|
||||
<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
|
||||
<dc:type><![CDATA[info:eu-repo/semantics/conferenceObject]]></dc:type>
|
||||
<dc:type><![CDATA[Presentazione]]></dc:type>
|
||||
<dc:title><![CDATA[A multiscale observing approach for understanding acidification process in a marginal sea (northern Adriatic)]]></dc:title>
|
||||
<dc:creator><![CDATA[Cantoni C.]]></dc:creator>
|
||||
<dc:creator><![CDATA[Barba L.]]></dc:creator>
|
||||
<dc:creator><![CDATA[Bastianini M.]]></dc:creator>
|
||||
<dc:creator><![CDATA[Bortoluzzi G.]]></dc:creator>
|
||||
<dc:creator><![CDATA[Celio M.]]></dc:creator>
|
||||
<dc:creator><![CDATA[Chiggiato J.]]></dc:creator>
|
||||
<dc:creator><![CDATA[Cozzi S.]]></dc:creator>
|
||||
<dc:creator><![CDATA[Luchetta A.]]></dc:creator>
|
||||
<dc:creator><![CDATA[Ravaioli M.]]></dc:creator>
|
||||
<dc:creator><![CDATA[Sparnocchia S.]]></dc:creator>
|
||||
<dc:language><![CDATA[eng]]></dc:language>
|
||||
<dc:description><![CDATA[The Northern Adriatic is a shallow, semi-enclosed industrialized sub-basin of the Mediterranean affected by significant ecosystem
|
||||
changes, which are studied through several research activities including the long-term monitoring of ILTER international network.
|
||||
Changes of pHT (-0.06) and TA (+74 ?mol/kg) in dense winter waters over the last 25 years already showed that this area is prone to
|
||||
acidification process under a complex inorganic carbon chemistry variability. To understand these changes, monthly sampling of the
|
||||
main biogeochemical and biological parameters has been carrying out since 2008 by a fixed station (PALOMA, Gulf of Trieste). In 2013
|
||||
the site has been implemented with continuous pCO2 measurements at 3 m depth and has been regularly visiting during basin wide
|
||||
surveys for the last two years. The combination of automated in situ measurements, monthly samplings and basin scale
|
||||
oceanographic cruises is used to better understand the processes controlling air-sea CO2 fluxes and inorganic carbon chemistry under
|
||||
three different scenarios: an extreme event of dense water formation, the phytoplankton blooms associated with riverine inputs and
|
||||
the late-summer marked oxygen under saturation in the deeper waters.]]></dc:description>
|
||||
<dc:source><![CDATA[ASLO - Aquatic Sciences Meeting, Granada, Spagna, 22-27/02/2015]]></dc:source>
|
||||
<dc:source><![CDATA[info:cnr-pdr/source/autori:Cantoni C., Barba L., Bastianini M., Bortoluzzi G., Celio M., Chiggiato J., Cozzi S., Luchetta A., Ravaioli M., Sparnocchia S./congresso_nome:ASLO - Aquatic Sciences Meeting/congresso_luogo:Granada, Spagna/congresso_data:22-27%2F02%2F2015/anno:2015/pagina_da:/pagina_a:/intervallo_pagine:]]></dc:source>
|
||||
<dc:date><![CDATA[2015]]></dc:date>
|
||||
<dc:identifier><![CDATA[http://www.cnr.it/prodotto/i/433382]]></dc:identifier>
|
||||
<dc:identifier><![CDATA[https://publications.cnr.it/doc/433382]]></dc:identifier>
|
||||
<dc:identifier><![CDATA[http://sgmeet.com/aslo/granada2015/]]></dc:identifier>
|
||||
<dc:relation><![CDATA[info:eu-repo/grantAgreement/EC/FP7/211574//Integrated Carbon Observation System/ICOS]]></dc:relation>
|
||||
<dc:relation><![CDATA[info:cnr-pdr/author/matricola:502/SPARNOCCHIA/STEFANIA]]></dc:relation>
|
||||
<dc:relation><![CDATA[info:cnr-pdr/author/matricola:990/BASTIANINI/MAURO]]></dc:relation>
|
||||
<dc:relation><![CDATA[info:cnr-pdr/author/matricola:5185/BARBA/LUISA]]></dc:relation>
|
||||
<dc:relation><![CDATA[info:cnr-pdr/author/matricola:5453/COZZI/STEFANO]]></dc:relation>
|
||||
<dc:relation><![CDATA[info:cnr-pdr/author/matricola:12491/LUCHETTA/ANNA]]></dc:relation>
|
||||
<dc:relation><![CDATA[info:cnr-pdr/author/matricola:12837/CANTONI/CAROLINA]]></dc:relation>
|
||||
<dc:relation><![CDATA[info:cnr-pdr/author/matricola:18161/RAVAIOLI/MARIANGELA]]></dc:relation>
|
||||
<dc:rights><![CDATA[info:eu-repo/semantics/openAccess]]></dc:rights>
|
||||
<dc:subject><![CDATA[ocean acidification]]></dc:subject>
|
||||
<dc:subject><![CDATA[Northern Adriatic Sea]]></dc:subject>
|
||||
<dc:subject><![CDATA[inorganic carbon system]]></dc:subject>
|
||||
<dc:subject><![CDATA[PALOMA]]></dc:subject>
|
||||
</oai_dc:dc>
|
||||
</metadata>
|
||||
</record>
|
|
@ -1232,4 +1232,268 @@ dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-学術雑誌論文(査
|
|||
dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-紀要論文(査読有り)
|
||||
dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-雑誌記事(査読有り)
|
||||
dnet:review_levels @=@ 0001 @=@ 原著論文(査読有り)
|
||||
dnet:review_levels @=@ 0001 @=@ 査読論文
|
||||
dnet:review_levels @=@ 0001 @=@ 査読論文
|
||||
cnr:institutes @=@ https://ror.org/00brf2d87 @=@ CDS001
|
||||
cnr:institutes @=@ https://ror.org/00brf2d87 @=@ IDASC - Istituto di Acustica e Sensoristica \"Orso Mario Corbino\"
|
||||
cnr:institutes @=@ https://ror.org/006qkqr45 @=@ CDS002
|
||||
cnr:institutes @=@ https://ror.org/006qkqr45 @=@ IAMC - Istituto per l'ambiente marino costiero
|
||||
cnr:institutes @=@ https://ror.org/054ye0e45 @=@ CDS003
|
||||
cnr:institutes @=@ https://ror.org/054ye0e45 @=@ IASI - Istituto di analisi dei sistemi ed informatica \"Antonio Ruberti\"
|
||||
cnr:institutes @=@ https://ror.org/00ygy3d85 @=@ CDS004
|
||||
cnr:institutes @=@ https://ror.org/00ygy3d85 @=@ IAC - Istituto per le applicazioni del calcolo \"Mauro Picone\"
|
||||
cnr:institutes @=@ https://ror.org/000sy1f36 @=@ CDS005
|
||||
cnr:institutes @=@ https://ror.org/000sy1f36 @=@ IASF - Istituto di astrofisica spaziale e fisica cosmica
|
||||
cnr:institutes @=@ https://ror.org/00x5wpm25 @=@ CDS006
|
||||
cnr:institutes @=@ https://ror.org/00x5wpm25 @=@ IBAM - Istituto per i beni archeologici e monumentali
|
||||
cnr:institutes @=@ https://ror.org/03eqeqg74 @=@ CDS007
|
||||
cnr:institutes @=@ https://ror.org/03eqeqg74 @=@ IBP - Istituto di biochimica delle proteine
|
||||
cnr:institutes @=@ https://ror.org/041xzk838 @=@ CDS008
|
||||
cnr:institutes @=@ https://ror.org/041xzk838 @=@ IBF - Istituto di biofisica
|
||||
cnr:institutes @=@ https://ror.org/00s2j5046 @=@ CDS009
|
||||
cnr:institutes @=@ https://ror.org/00s2j5046 @=@ IBFM - Istituto di bioimmagini e fisiologia molecolare
|
||||
cnr:institutes @=@ __CDS010__ @=@ CDS010
|
||||
cnr:institutes @=@ __CDS010__ @=@ IBAF - Istituto di biologia agro-ambientale e forestale
|
||||
cnr:institutes @=@ __CDS011__ @=@ CDS011
|
||||
cnr:institutes @=@ __CDS011__ @=@ IBC - Istituto di biologia cellulare
|
||||
cnr:institutes @=@ https://ror.org/02e5sbe24 @=@ CDS012
|
||||
cnr:institutes @=@ https://ror.org/02e5sbe24 @=@ IBBA - Istituto di biologia e biotecnologia agraria
|
||||
cnr:institutes @=@ https://ror.org/01nyatq71 @=@ CDS013
|
||||
cnr:institutes @=@ https://ror.org/01nyatq71 @=@ IBPM - Istituto di biologia e patologia molecolari
|
||||
cnr:institutes @=@ https://ror.org/01dy2q607 @=@ CDS014
|
||||
cnr:institutes @=@ https://ror.org/01dy2q607 @=@ IBIM - Istituto di biomedicina e di immunologia molecolare \"Alberto Monroy\"
|
||||
cnr:institutes @=@ https://ror.org/05nzf7q96 @=@ CDS015
|
||||
cnr:institutes @=@ https://ror.org/05nzf7q96 @=@ IBIOM - Istituto di Biomembrane, Bioenergetica e Biotecnologie Molecolari
|
||||
cnr:institutes @=@ https://ror.org/05m1yqp60 @=@ CDS016
|
||||
cnr:institutes @=@ https://ror.org/05m1yqp60 @=@ IBIMET - Istituto di biometeorologia
|
||||
cnr:institutes @=@ https://ror.org/03rqtqb02 @=@ CDS017
|
||||
cnr:institutes @=@ https://ror.org/03rqtqb02 @=@ IBB - Istituto di biostrutture e bioimmagini
|
||||
cnr:institutes @=@ https://ror.org/04r5fge26 @=@ CDS018
|
||||
cnr:institutes @=@ https://ror.org/04r5fge26 @=@ ICAR - Istituto di calcolo e reti ad alte prestazioni
|
||||
cnr:institutes @=@ https://ror.org/03wyf0g15 @=@ CDS019
|
||||
cnr:institutes @=@ https://ror.org/03wyf0g15 @=@ ICB - Istituto di chimica biomolecolare
|
||||
cnr:institutes @=@ https://ror.org/02fkw1114 @=@ CDS020
|
||||
cnr:institutes @=@ https://ror.org/02fkw1114 @=@ ICCOM - Istituto di chimica dei composti organo metallici
|
||||
cnr:institutes @=@ https://ror.org/0141vn777 @=@ CDS021
|
||||
cnr:institutes @=@ https://ror.org/0141vn777 @=@ ICRM - Istituto di chimica del riconoscimento molecolare
|
||||
cnr:institutes @=@ __CDS022__ @=@ CDS022
|
||||
cnr:institutes @=@ __CDS022__ @=@ ICTP - Istituto di chimica e tecnologia dei polimeri
|
||||
cnr:institutes @=@ __CDS023__ @=@ CDS023
|
||||
cnr:institutes @=@ __CDS023__ @=@ ICIS - Istituto di chimica inorganica e delle superfici
|
||||
cnr:institutes @=@ https://ror.org/00be3zh53 @=@ CDS024
|
||||
cnr:institutes @=@ https://ror.org/00be3zh53 @=@ ISASI - Istituto di Scienze Applicate e Sistemi Intelligenti \"Eduardo Caianiello\"
|
||||
cnr:institutes @=@ __CDS025__ @=@ CDS025
|
||||
cnr:institutes @=@ __CDS025__ @=@ ICEVO - Istituto di Studi sulle Civiltà dell'Egeo e del Vicino Oriente
|
||||
cnr:institutes @=@ https://ror.org/02ynrme92 @=@ CDS026
|
||||
cnr:institutes @=@ https://ror.org/02ynrme92 @=@ ICVBC - Istituto per la conservazione e valorizzazione dei beni culturali
|
||||
cnr:institutes @=@ https://ror.org/05wba8r86 @=@ CDS027
|
||||
cnr:institutes @=@ https://ror.org/05wba8r86 @=@ IC - Istituto di cristallografia
|
||||
cnr:institutes @=@ https://ror.org/03z58xd74 @=@ CDS028
|
||||
cnr:institutes @=@ https://ror.org/03z58xd74 @=@ IDPA - Istituto per la dinamica dei processi ambientali
|
||||
cnr:institutes @=@ https://ror.org/00n4jbh84 @=@ CDS029
|
||||
cnr:institutes @=@ https://ror.org/00n4jbh84 @=@ IEIIT - Istituto di elettronica e di ingegneria dell'informazione e delle telecomunicazioni
|
||||
cnr:institutes @=@ https://ror.org/04sn06036 @=@ CDS030
|
||||
cnr:institutes @=@ https://ror.org/04sn06036 @=@ IEOS - Istituto per l'endocrinologia e l'oncologia \"Gaetano Salvatore\"
|
||||
cnr:institutes @=@ https://ror.org/01rg40y89 @=@ CDS031
|
||||
cnr:institutes @=@ https://ror.org/01rg40y89 @=@ ICMATE - Istituto di Chimica della Materia Condensata e di Tecnologie per l'Energia
|
||||
cnr:institutes @=@ https://ror.org/00dqega85 @=@ CDS032
|
||||
cnr:institutes @=@ https://ror.org/00dqega85 @=@ IFAC - Istituto di fisica applicata \"Nello Carrara\"
|
||||
cnr:institutes @=@ https://ror.org/02n2bgz18 @=@ CDS033
|
||||
cnr:institutes @=@ https://ror.org/02n2bgz18 @=@ IFP - Istituto di fisica del plasma \"Piero Caldirola\"
|
||||
cnr:institutes @=@ __CDS034__ @=@ CDS034
|
||||
cnr:institutes @=@ __CDS034__ @=@ IFSI - Istituto di fisica dello spazio interplanetario
|
||||
cnr:institutes @=@ https://ror.org/01kdj2848 @=@ CDS035
|
||||
cnr:institutes @=@ https://ror.org/01kdj2848 @=@ IFC - Istituto di fisiologia clinica
|
||||
cnr:institutes @=@ https://ror.org/049ebw417 @=@ CDS036
|
||||
cnr:institutes @=@ https://ror.org/049ebw417 @=@ IFN - Istituto di fotonica e nanotecnologie
|
||||
cnr:institutes @=@ https://ror.org/01f5tnx94 @=@ CDS037
|
||||
cnr:institutes @=@ https://ror.org/01f5tnx94 @=@ IGI - Istituto gas ionizzati
|
||||
cnr:institutes @=@ __CDS038__ @=@ CDS038
|
||||
cnr:institutes @=@ __CDS038__ @=@ IGP - Istituto di genetica delle popolazioni
|
||||
cnr:institutes @=@ https://ror.org/04hadk112 @=@ CDS039
|
||||
cnr:institutes @=@ https://ror.org/04hadk112 @=@ IGB - Istituto di genetica e biofisica \"Adriano Buzzati Traverso\"
|
||||
cnr:institutes @=@ https://ror.org/03qpd8w66 @=@ CDS040
|
||||
cnr:institutes @=@ https://ror.org/03qpd8w66 @=@ IGM - Istituto di genetica molecolare \"Luigi Luca Cavalli Sforza\"
|
||||
cnr:institutes @=@ https://ror.org/01gtsa866 @=@ CDS041
|
||||
cnr:institutes @=@ https://ror.org/01gtsa866 @=@ IBBR - Istituto di Bioscienze e Biorisorse
|
||||
cnr:institutes @=@ https://ror.org/00ytw6m58 @=@ CDS042
|
||||
cnr:institutes @=@ https://ror.org/00ytw6m58 @=@ IGAG - Istituto di geologia ambientale e geoingegneria
|
||||
cnr:institutes @=@ https://ror.org/015bmra78 @=@ CDS043
|
||||
cnr:institutes @=@ https://ror.org/015bmra78 @=@ IGG - Istituto di geoscienze e georisorse
|
||||
cnr:institutes @=@ https://ror.org/02gdcn153 @=@ CDS044
|
||||
cnr:institutes @=@ https://ror.org/02gdcn153 @=@ IIT - Istituto di informatica e telematica
|
||||
cnr:institutes @=@ __CDS045__ @=@ CDS045
|
||||
cnr:institutes @=@ __CDS045__ @=@ ISIB - Istituto di ingegneria biomedica
|
||||
cnr:institutes @=@ https://ror.org/05hky6p02 @=@ CDS046
|
||||
cnr:institutes @=@ https://ror.org/05hky6p02 @=@ IIA - Istituto sull'inquinamento atmosferico
|
||||
cnr:institutes @=@ https://ror.org/011n2hw53 @=@ CDS047
|
||||
cnr:institutes @=@ https://ror.org/011n2hw53 @=@ ILIESI - Istituto per il lessico intellettuale europeo e storia delle idee
|
||||
cnr:institutes @=@ https://ror.org/028g3pe33 @=@ CDS048
|
||||
cnr:institutes @=@ https://ror.org/028g3pe33 @=@ ILC - Istituto di linguistica computazionale \"Antonio Zampolli\"
|
||||
cnr:institutes @=@ __CDS049__ @=@ CDS049
|
||||
cnr:institutes @=@ __CDS049__ @=@ IMAMOTER - Istituto per le macchine agricole e movimento terra
|
||||
cnr:institutes @=@ https://ror.org/03m0n3c07 @=@ CDS050
|
||||
cnr:institutes @=@ https://ror.org/03m0n3c07 @=@ IMATI - Istituto di matematica applicata e tecnologie informatiche \"Enrico Magenes\"
|
||||
cnr:institutes @=@ __CDS051__ @=@ CDS051
|
||||
cnr:institutes @=@ __CDS051__ @=@ IMCB - Istituto per i materiali compositi e biomedici
|
||||
cnr:institutes @=@ https://ror.org/00z8ws214 @=@ CDS052
|
||||
cnr:institutes @=@ https://ror.org/00z8ws214 @=@ IMEM - Istituto dei materiali per l'elettronica ed il magnetismo
|
||||
cnr:institutes @=@ __CDS053__ @=@ CDS053
|
||||
cnr:institutes @=@ __CDS053__ @=@ ISB - Istituto per i Sistemi Biologici
|
||||
cnr:institutes @=@ https://ror.org/00bc51d88 @=@ CDS054
|
||||
cnr:institutes @=@ https://ror.org/00bc51d88 @=@ NANOTEC - Istituto di Nanotecnologia
|
||||
cnr:institutes @=@ https://ror.org/024ye7w89 @=@ CDS055
|
||||
cnr:institutes @=@ https://ror.org/024ye7w89 @=@ IMAA - Istituto di metodologie per l'analisi ambientale
|
||||
cnr:institutes @=@ __CDS056__ @=@ CDS056
|
||||
cnr:institutes @=@ __CDS056__ @=@ IMGC - Istituto di metrologia \"Gustavo Colonnetti\"
|
||||
cnr:institutes @=@ https://ror.org/05vk2g845 @=@ CDS057
|
||||
cnr:institutes @=@ https://ror.org/05vk2g845 @=@ IMM - Istituto per la microelettronica e microsistemi
|
||||
cnr:institutes @=@ https://ror.org/02qwy8e97 @=@ CDS058
|
||||
cnr:institutes @=@ https://ror.org/02qwy8e97 @=@ IM - Istituto motori
|
||||
cnr:institutes @=@ __CDS059__ @=@ CDS059
|
||||
cnr:institutes @=@ __CDS059__ @=@ INMM - Istituto di neurobiologia e medicina molecolare
|
||||
cnr:institutes @=@ https://ror.org/02dr63s31 @=@ CDS060
|
||||
cnr:institutes @=@ https://ror.org/02dr63s31 @=@ IRGB - Istituto di Ricerca Genetica e Biomedica
|
||||
cnr:institutes @=@ https://ror.org/0240rwx68 @=@ CDS061
|
||||
cnr:institutes @=@ https://ror.org/0240rwx68 @=@ IN - Istituto di neuroscienze
|
||||
cnr:institutes @=@ https://ror.org/02rzxrg25 @=@ CDS062
|
||||
cnr:institutes @=@ https://ror.org/02rzxrg25 @=@ OVI - Istituto opera del vocabolario italiano
|
||||
cnr:institutes @=@ https://ror.org/05patmk97 @=@ CDS063
|
||||
cnr:institutes @=@ https://ror.org/05patmk97 @=@ IPCF - Istituto per i processi chimico-fisici
|
||||
cnr:institutes @=@ __CDS064__ @=@ CDS064
|
||||
cnr:institutes @=@ __CDS064__ @=@ IPP - Istituto per la protezione delle piante
|
||||
cnr:institutes @=@ https://ror.org/029st3z03 @=@ CDS065
|
||||
cnr:institutes @=@ https://ror.org/029st3z03 @=@ IRA - Istituto di radioastronomia
|
||||
cnr:institutes @=@ https://ror.org/0040zx077 @=@ CDS066
|
||||
cnr:institutes @=@ https://ror.org/0040zx077 @=@ IRPI - Istituto di ricerca per la protezione idrogeologica
|
||||
cnr:institutes @=@ https://ror.org/044bfsy89 @=@ CDS067
|
||||
cnr:institutes @=@ https://ror.org/044bfsy89 @=@ IRCRES - Istituto di Ricerca sulla Crescita Economica Sostenibile
|
||||
cnr:institutes @=@ https://ror.org/01n1ayq61 @=@ CDS068
|
||||
cnr:institutes @=@ https://ror.org/01n1ayq61 @=@ IRPPS - Istituto di ricerche sulla popolazione e le politiche sociali
|
||||
cnr:institutes @=@ https://ror.org/02db0kh50 @=@ CDS069
|
||||
cnr:institutes @=@ https://ror.org/02db0kh50 @=@ IRSA - Istituto di ricerca sulle acque
|
||||
cnr:institutes @=@ https://ror.org/05813wx75 @=@ CDS070
|
||||
cnr:institutes @=@ https://ror.org/05813wx75 @=@ IRC - Istituto di ricerche sulla combustione
|
||||
cnr:institutes @=@ https://ror.org/04vnwke91 @=@ CDS071
|
||||
cnr:institutes @=@ https://ror.org/04vnwke91 @=@ IRISS - Istituto di Ricerca su Innovazione e Servizi per lo Sviluppo
|
||||
cnr:institutes @=@ https://ror.org/02wxw4x45 @=@ CDS072
|
||||
cnr:institutes @=@ https://ror.org/02wxw4x45 @=@ IREA - Istituto per il rilevamento elettromagnetico dell'ambiente
|
||||
cnr:institutes @=@ https://ror.org/01j6drw72 @=@ CDS073
|
||||
cnr:institutes @=@ https://ror.org/01j6drw72 @=@ ISTEC - Istituto di scienza e tecnologia dei materiali ceramici
|
||||
cnr:institutes @=@ https://ror.org/05kacka20 @=@ CDS074
|
||||
cnr:institutes @=@ https://ror.org/05kacka20 @=@ ISTI - Istituto di scienza e tecnologie dell'informazione \"Alessandro Faedo\"
|
||||
cnr:institutes @=@ https://ror.org/00n8ttd98 @=@ CDS075
|
||||
cnr:institutes @=@ https://ror.org/00n8ttd98 @=@ ISAC - Istituto di scienze dell'atmosfera e del clima
|
||||
cnr:institutes @=@ https://ror.org/0013zhk30 @=@ CDS076
|
||||
cnr:institutes @=@ https://ror.org/0013zhk30 @=@ ISA - Istituto di Scienze dell'Alimentazione
|
||||
cnr:institutes @=@ https://ror.org/03x7xkr71 @=@ CDS077
|
||||
cnr:institutes @=@ https://ror.org/03x7xkr71 @=@ ISPA - Istituto di scienze delle produzioni alimentari
|
||||
cnr:institutes @=@ https://ror.org/05w9g2j85 @=@ CDS078
|
||||
cnr:institutes @=@ https://ror.org/05w9g2j85 @=@ ISTC - Istituto di scienze e tecnologie della cognizione
|
||||
cnr:institutes @=@ https://ror.org/032tyv240 @=@ CDS079
|
||||
cnr:institutes @=@ https://ror.org/032tyv240 @=@ ISTM - Istituto di scienze e tecnologie molecolari
|
||||
cnr:institutes @=@ https://ror.org/02hdf6119 @=@ CDS080
|
||||
cnr:institutes @=@ https://ror.org/02hdf6119 @=@ ISMAR - Istituto di scienze marine
|
||||
cnr:institutes @=@ https://ror.org/01yg57d71 @=@ CDS081
|
||||
cnr:institutes @=@ https://ror.org/01yg57d71 @=@ ISN - Istituto di scienze neurologiche
|
||||
cnr:institutes @=@ https://ror.org/021z1mz76 @=@ CDS082
|
||||
cnr:institutes @=@ https://ror.org/021z1mz76 @=@ ISOF - Istituto per la sintesi organica e la fotoreattività
|
||||
cnr:institutes @=@ https://ror.org/01wqae691 @=@ CDS083
|
||||
cnr:institutes @=@ https://ror.org/01wqae691 @=@ ISPAAM - Istituto per il sistema produzione animale in ambiente Mediterraneo
|
||||
cnr:institutes @=@ __CDS084__ @=@ CDS084
|
||||
cnr:institutes @=@ __CDS084__ @=@ ISAFoM - Istituto per i sistemi agricoli e forestali del mediterraneo
|
||||
cnr:institutes @=@ https://ror.org/00awwz417 @=@ CDS085
|
||||
cnr:institutes @=@ https://ror.org/00awwz417 @=@ ISPF - Istituto per la storia del pensiero filosofico e scientifico moderno
|
||||
cnr:institutes @=@ https://ror.org/03a111314 @=@ CDS086
|
||||
cnr:institutes @=@ https://ror.org/03a111314 @=@ ISEM - Istituto di storia dell'Europa mediterranea
|
||||
cnr:institutes @=@ https://ror.org/01zz9wh30 @=@ CDS087
|
||||
cnr:institutes @=@ https://ror.org/01zz9wh30 @=@ ISM - Istituto di struttura della materia
|
||||
cnr:institutes @=@ https://ror.org/035y5td47 @=@ CDS088
|
||||
cnr:institutes @=@ https://ror.org/035y5td47 @=@ ISGI - Istituto di studi giuridici internazionali
|
||||
cnr:institutes @=@ __CDS089__ @=@ CDS089
|
||||
cnr:institutes @=@ __CDS089__ @=@ ISPRI - Istituto sperimentale di studi socio - economici sull'innovazione e le politiche della ricerca
|
||||
cnr:institutes @=@ https://ror.org/051t1q308 @=@ CDS090
|
||||
cnr:institutes @=@ https://ror.org/051t1q308 @=@ ISSIA - Istituto di studi sui sistemi intelligenti per l'automazione
|
||||
cnr:institutes @=@ https://ror.org/05k3cs357 @=@ CDS091
|
||||
cnr:institutes @=@ https://ror.org/05k3cs357 @=@ ISSIRFA - Istituto di studi sui sistemi regionali federali e sulle autonomie \"Massimo Severo Giannini\"
|
||||
cnr:institutes @=@ https://ror.org/02gcxw165 @=@ CDS092
|
||||
cnr:institutes @=@ https://ror.org/02gcxw165 @=@ ISMA - Istituto di Studi sul Mediterraneo Antico
|
||||
cnr:institutes @=@ https://ror.org/05db0es39 @=@ CDS093
|
||||
cnr:institutes @=@ https://ror.org/05db0es39 @=@ ISMed - Istituto di studi sul Mediterraneo
|
||||
cnr:institutes @=@ https://ror.org/029k6t707 @=@ CDS094
|
||||
cnr:institutes @=@ https://ror.org/029k6t707 @=@ ISE - Istituto per lo studio degli ecosistemi
|
||||
cnr:institutes @=@ https://ror.org/00w6r1881 @=@ CDS095
|
||||
cnr:institutes @=@ https://ror.org/00w6r1881 @=@ ISMN - Istituto per lo studio dei materiali nanostrutturati
|
||||
cnr:institutes @=@ https://ror.org/01mfmr054 @=@ CDS096
|
||||
cnr:institutes @=@ https://ror.org/01mfmr054 @=@ ISMAC - Istituto per lo studio delle macromolecole
|
||||
cnr:institutes @=@ https://ror.org/058nrs650 @=@ CDS097
|
||||
cnr:institutes @=@ https://ror.org/058nrs650 @=@ ITM - Istituto per la tecnologia delle membrane
|
||||
cnr:institutes @=@ https://ror.org/0331xj092 @=@ CDS098
|
||||
cnr:institutes @=@ https://ror.org/0331xj092 @=@ ITABC - Istituto per le tecnologie applicate ai beni culturali
|
||||
cnr:institutes @=@ https://ror.org/052q58629 @=@ CDS099
|
||||
cnr:institutes @=@ https://ror.org/052q58629 @=@ ITAE - Istituto di tecnologie avanzate per l'energia \"Nicola Giordano\"
|
||||
cnr:institutes @=@ https://ror.org/04ehykb85 @=@ CDS100
|
||||
cnr:institutes @=@ https://ror.org/04ehykb85 @=@ ITB - Istituto di tecnologie biomediche
|
||||
cnr:institutes @=@ https://ror.org/0221agg28 @=@ CDS101
|
||||
cnr:institutes @=@ https://ror.org/0221agg28 @=@ ITC - Istituto per le tecnologie della costruzione
|
||||
cnr:institutes @=@ https://ror.org/02xz4xc25 @=@ CDS102
|
||||
cnr:institutes @=@ https://ror.org/02xz4xc25 @=@ ITD - Istituto per le tecnologie didattiche
|
||||
cnr:institutes @=@ __CDS103__ @=@ CDS103
|
||||
cnr:institutes @=@ __CDS103__ @=@ STIIMA - Istituto di Sistemi e Tecnologie Industriali Intelligenti per il Manifatturiero Avanzato
|
||||
cnr:institutes @=@ https://ror.org/01as2bh37 @=@ CDS104
|
||||
cnr:institutes @=@ https://ror.org/01as2bh37 @=@ ITTIG - Istituto di teoria e tecniche dell'informazione giuridica
|
||||
cnr:institutes @=@ https://ror.org/01y5w6t76 @=@ CDS105
|
||||
cnr:institutes @=@ https://ror.org/01y5w6t76 @=@ ITOI - Istituto per i trapianti d'organo e immunocitologia
|
||||
cnr:institutes @=@ https://ror.org/04xy2mq71 @=@ CDS106
|
||||
cnr:institutes @=@ https://ror.org/04xy2mq71 @=@ IVALSA - Istituto per la valorizzazione del legno e delle specie arboree
|
||||
cnr:institutes @=@ __CDS107__ @=@ CDS107
|
||||
cnr:institutes @=@ __CDS107__ @=@ IVV - Istituto di virologia vegetale
|
||||
cnr:institutes @=@ https://ror.org/013nxtf56 @=@ CDS108
|
||||
cnr:institutes @=@ https://ror.org/013nxtf56 @=@ IRSIG - Istituto di ricerca sui sistemi giudiziari
|
||||
cnr:institutes @=@ https://ror.org/05rcgef49 @=@ CDS109
|
||||
cnr:institutes @=@ https://ror.org/05rcgef49 @=@ ISC - Istituto dei sistemi complessi
|
||||
cnr:institutes @=@ __CDS110__ @=@ CDS110
|
||||
cnr:institutes @=@ __CDS110__ @=@ INFM - Centro di responsabilità scientifica INFM
|
||||
cnr:institutes @=@ https://ror.org/02dp3a879 @=@ CDS111
|
||||
cnr:institutes @=@ https://ror.org/02dp3a879 @=@ INO - Istituto nazionale di ottica
|
||||
cnr:institutes @=@ __CDS112__ @=@ CDS112
|
||||
cnr:institutes @=@ __CDS112__ @=@ IDAIC - Centro di responsabilità di attività scientifica IDAIC
|
||||
cnr:institutes @=@ https://ror.org/00p03yg71 @=@ CDS113
|
||||
cnr:institutes @=@ https://ror.org/00p03yg71 @=@ SPIN - Istituto superconduttori, materiali innovativi e dispositivi
|
||||
cnr:institutes @=@ https://ror.org/00yfw2296 @=@ CDS114
|
||||
cnr:institutes @=@ https://ror.org/00yfw2296 @=@ IOM - Istituto officina dei materiali
|
||||
cnr:institutes @=@ https://ror.org/0042e5975 @=@ CDS115
|
||||
cnr:institutes @=@ https://ror.org/0042e5975 @=@ NANO - Istituto Nanoscienze
|
||||
cnr:institutes @=@ https://ror.org/03ta8pf33 @=@ CDS116
|
||||
cnr:institutes @=@ https://ror.org/03ta8pf33 @=@ IFT - Istituto di Farmacologia Traslazionale
|
||||
cnr:institutes @=@ https://ror.org/040xhth73 @=@ CDS117
|
||||
cnr:institutes @=@ https://ror.org/040xhth73 @=@ IBCN - Istituto di Biologia Cellulare e Neurobiologia
|
||||
cnr:institutes @=@ https://ror.org/02qnx8e75 @=@ CDS118
|
||||
cnr:institutes @=@ https://ror.org/02qnx8e75 @=@ INM - Istituto di iNgegneria del Mare
|
||||
cnr:institutes @=@ https://ror.org/05nr7xa08 @=@ CDS119
|
||||
cnr:institutes @=@ https://ror.org/05nr7xa08 @=@ IPCB - Istituto per i Polimeri, Compositi e Biomateriali
|
||||
cnr:institutes @=@ https://ror.org/008fjbg42 @=@ CDS121
|
||||
cnr:institutes @=@ https://ror.org/008fjbg42 @=@ IPSP - Istituto per la Protezione Sostenibile delle Piante
|
||||
cnr:institutes @=@ __CDS122__ @=@ CDS122
|
||||
cnr:institutes @=@ __CDS122__ @=@ IRBIM - Istituto per le Risorse Biologiche e le Biotecnologie Marine
|
||||
cnr:institutes @=@ __CDS123__ @=@ CDS123
|
||||
cnr:institutes @=@ __CDS123__ @=@ ISPC - Istituto di Scienze del Patrimonio Culturale
|
||||
cnr:institutes @=@ __CDS124__ @=@ CDS124
|
||||
cnr:institutes @=@ __CDS124__ @=@ IAS - Istituto per lo studio degli impatti Antropici e Sostenibilità in ambiente marino
|
||||
cnr:institutes @=@ __CDS125__ @=@ CDS125
|
||||
cnr:institutes @=@ __CDS125__ @=@ IRET - Istituto di Ricerca sugli Ecosistemi Terrestri
|
||||
cnr:institutes @=@ https://ror.org/03a0vt050 @=@ CDS126
|
||||
cnr:institutes @=@ https://ror.org/03a0vt050 @=@ ISTP - Istituto per la Scienza e Tecnologia dei Plasmi
|
||||
cnr:institutes @=@ __CDS127__ @=@ CDS127
|
||||
cnr:institutes @=@ __CDS127__ @=@ ISP - Istituto di Scienze Polari
|
||||
cnr:institutes @=@ https://ror.org/03byxpq91 @=@ CDS128
|
||||
cnr:institutes @=@ https://ror.org/03byxpq91 @=@ IRIB - Istituto per la Ricerca e l'Innovazione Biomedica
|
||||
cnr:institutes @=@ __CDS129__ @=@ CDS129
|
||||
cnr:institutes @=@ __CDS129__ @=@ IGSG - Istituto di Informatica Giuridica e Sistemi Giudiziari
|
||||
cnr:institutes @=@ __CDS130__ @=@ CDS130
|
||||
cnr:institutes @=@ __CDS130__ @=@ IBBC - Istituto di Biochimica e Biologia Cellulare
|
||||
cnr:institutes @=@ __CDS131__ @=@ CDS131
|
||||
cnr:institutes @=@ __CDS131__ @=@ IBE - Istituto per la BioEconomia
|
||||
cnr:institutes @=@ https://ror.org/0263zy895 @=@ CDS132
|
||||
cnr:institutes @=@ https://ror.org/0263zy895 @=@ SCITEC - Istituto di Scienze e Tecnologie Chimiche \"Giulio Natta\"
|
||||
cnr:institutes @=@ __CDS133__ @=@ CDS133
|
||||
cnr:institutes @=@ __CDS133__ @=@ STEMS - Istituto di Scienze e Tecnologie per l'Energia e la Mobilità Sostenibili
|
|
@ -1077,4 +1077,137 @@ dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/DATASET/IS_SUPPLEMENTED
|
|||
dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/AUTHOR/ORCID @=@ An Open Researcher and Contributor ID (ORCID) that can be associated to an author of your publications
|
||||
dnet:review_levels @=@ dnet:review_levels @=@ 0000 @=@ Unknown
|
||||
dnet:review_levels @=@ dnet:review_levels @=@ 0002 @=@ nonPeerReviewed
|
||||
dnet:review_levels @=@ dnet:review_levels @=@ 0001 @=@ peerReviewed
|
||||
dnet:review_levels @=@ dnet:review_levels @=@ 0001 @=@ peerReviewed
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00brf2d87 @=@ https://ror.org/00brf2d87
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/006qkqr45 @=@ https://ror.org/006qkqr45
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/054ye0e45 @=@ https://ror.org/054ye0e45
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00ygy3d85 @=@ https://ror.org/00ygy3d85
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/000sy1f36 @=@ https://ror.org/000sy1f36
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00x5wpm25 @=@ https://ror.org/00x5wpm25
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03eqeqg74 @=@ https://ror.org/03eqeqg74
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/041xzk838 @=@ https://ror.org/041xzk838
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00s2j5046 @=@ https://ror.org/00s2j5046
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS010__ @=@ __CDS010__
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS011__ @=@ __CDS011__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02e5sbe24 @=@ https://ror.org/02e5sbe24
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01nyatq71 @=@ https://ror.org/01nyatq71
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01dy2q607 @=@ https://ror.org/01dy2q607
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05nzf7q96 @=@ https://ror.org/05nzf7q96
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05m1yqp60 @=@ https://ror.org/05m1yqp60
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03rqtqb02 @=@ https://ror.org/03rqtqb02
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/04r5fge26 @=@ https://ror.org/04r5fge26
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03wyf0g15 @=@ https://ror.org/03wyf0g15
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02fkw1114 @=@ https://ror.org/02fkw1114
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0141vn777 @=@ https://ror.org/0141vn777
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS022__ @=@ __CDS022__
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS023__ @=@ __CDS023__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00be3zh53 @=@ https://ror.org/00be3zh53
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS025__ @=@ __CDS025__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02ynrme92 @=@ https://ror.org/02ynrme92
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05wba8r86 @=@ https://ror.org/05wba8r86
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03z58xd74 @=@ https://ror.org/03z58xd74
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00n4jbh84 @=@ https://ror.org/00n4jbh84
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/04sn06036 @=@ https://ror.org/04sn06036
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01rg40y89 @=@ https://ror.org/01rg40y89
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00dqega85 @=@ https://ror.org/00dqega85
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02n2bgz18 @=@ https://ror.org/02n2bgz18
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS034__ @=@ __CDS034__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01kdj2848 @=@ https://ror.org/01kdj2848
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/049ebw417 @=@ https://ror.org/049ebw417
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01f5tnx94 @=@ https://ror.org/01f5tnx94
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS038__ @=@ __CDS038__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/04hadk112 @=@ https://ror.org/04hadk112
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03qpd8w66 @=@ https://ror.org/03qpd8w66
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01gtsa866 @=@ https://ror.org/01gtsa866
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00ytw6m58 @=@ https://ror.org/00ytw6m58
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/015bmra78 @=@ https://ror.org/015bmra78
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02gdcn153 @=@ https://ror.org/02gdcn153
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS045__ @=@ __CDS045__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05hky6p02 @=@ https://ror.org/05hky6p02
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/011n2hw53 @=@ https://ror.org/011n2hw53
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/028g3pe33 @=@ https://ror.org/028g3pe33
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS049__ @=@ __CDS049__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03m0n3c07 @=@ https://ror.org/03m0n3c07
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS051__ @=@ __CDS051__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00z8ws214 @=@ https://ror.org/00z8ws214
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS053__ @=@ __CDS053__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00bc51d88 @=@ https://ror.org/00bc51d88
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/024ye7w89 @=@ https://ror.org/024ye7w89
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS056__ @=@ __CDS056__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05vk2g845 @=@ https://ror.org/05vk2g845
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02qwy8e97 @=@ https://ror.org/02qwy8e97
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS059__ @=@ __CDS059__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02dr63s31 @=@ https://ror.org/02dr63s31
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0240rwx68 @=@ https://ror.org/0240rwx68
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02rzxrg25 @=@ https://ror.org/02rzxrg25
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05patmk97 @=@ https://ror.org/05patmk97
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS064__ @=@ __CDS064__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/029st3z03 @=@ https://ror.org/029st3z03
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0040zx077 @=@ https://ror.org/0040zx077
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/044bfsy89 @=@ https://ror.org/044bfsy89
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01n1ayq61 @=@ https://ror.org/01n1ayq61
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02db0kh50 @=@ https://ror.org/02db0kh50
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05813wx75 @=@ https://ror.org/05813wx75
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/04vnwke91 @=@ https://ror.org/04vnwke91
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02wxw4x45 @=@ https://ror.org/02wxw4x45
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01j6drw72 @=@ https://ror.org/01j6drw72
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05kacka20 @=@ https://ror.org/05kacka20
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00n8ttd98 @=@ https://ror.org/00n8ttd98
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0013zhk30 @=@ https://ror.org/0013zhk30
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03x7xkr71 @=@ https://ror.org/03x7xkr71
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05w9g2j85 @=@ https://ror.org/05w9g2j85
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/032tyv240 @=@ https://ror.org/032tyv240
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02hdf6119 @=@ https://ror.org/02hdf6119
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01yg57d71 @=@ https://ror.org/01yg57d71
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/021z1mz76 @=@ https://ror.org/021z1mz76
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01wqae691 @=@ https://ror.org/01wqae691
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS084__ @=@ __CDS084__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00awwz417 @=@ https://ror.org/00awwz417
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03a111314 @=@ https://ror.org/03a111314
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01zz9wh30 @=@ https://ror.org/01zz9wh30
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/035y5td47 @=@ https://ror.org/035y5td47
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS089__ @=@ __CDS089__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/051t1q308 @=@ https://ror.org/051t1q308
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05k3cs357 @=@ https://ror.org/05k3cs357
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02gcxw165 @=@ https://ror.org/02gcxw165
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05db0es39 @=@ https://ror.org/05db0es39
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/029k6t707 @=@ https://ror.org/029k6t707
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00w6r1881 @=@ https://ror.org/00w6r1881
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01mfmr054 @=@ https://ror.org/01mfmr054
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/058nrs650 @=@ https://ror.org/058nrs650
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0331xj092 @=@ https://ror.org/0331xj092
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/052q58629 @=@ https://ror.org/052q58629
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/04ehykb85 @=@ https://ror.org/04ehykb85
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0221agg28 @=@ https://ror.org/0221agg28
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02xz4xc25 @=@ https://ror.org/02xz4xc25
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS103__ @=@ __CDS103__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01as2bh37 @=@ https://ror.org/01as2bh37
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/01y5w6t76 @=@ https://ror.org/01y5w6t76
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/04xy2mq71 @=@ https://ror.org/04xy2mq71
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS107__ @=@ __CDS107__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/013nxtf56 @=@ https://ror.org/013nxtf56
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05rcgef49 @=@ https://ror.org/05rcgef49
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS110__ @=@ __CDS110__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02dp3a879 @=@ https://ror.org/02dp3a879
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS112__ @=@ __CDS112__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00p03yg71 @=@ https://ror.org/00p03yg71
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/00yfw2296 @=@ https://ror.org/00yfw2296
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0042e5975 @=@ https://ror.org/0042e5975
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03ta8pf33 @=@ https://ror.org/03ta8pf33
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/040xhth73 @=@ https://ror.org/040xhth73
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/02qnx8e75 @=@ https://ror.org/02qnx8e75
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/05nr7xa08 @=@ https://ror.org/05nr7xa08
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/008fjbg42 @=@ https://ror.org/008fjbg42
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS122__ @=@ __CDS122__
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS123__ @=@ __CDS123__
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS124__ @=@ __CDS124__
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS125__ @=@ __CDS125__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03a0vt050 @=@ https://ror.org/03a0vt050
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS127__ @=@ __CDS127__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/03byxpq91 @=@ https://ror.org/03byxpq91
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS129__ @=@ __CDS129__
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS130__ @=@ __CDS130__
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS131__ @=@ __CDS131__
|
||||
cnr:institutes @=@ cnr:institutes @=@ https://ror.org/0263zy895 @=@ https://ror.org/0263zy895
|
||||
cnr:institutes @=@ cnr:institutes @=@ __CDS133__ @=@ __CDS133__
|
||||
|
||||
|
|
|
@ -2,11 +2,14 @@ package eu.dnetlib.dhp.datacite
|
|||
|
||||
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}
|
||||
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf
|
||||
import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _}
|
||||
import org.apache.commons.io.FileUtils
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.sql.functions.{col, count}
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.JsonAST.{JField, JObject, JString}
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
import org.junit.jupiter.api.Assertions._
|
||||
import org.junit.jupiter.api.extension.ExtendWith
|
||||
import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
|
||||
|
@ -51,6 +54,9 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
|
|||
val path = getClass.getResource("/eu/dnetlib/dhp/actionmanager/datacite/dataset").getPath
|
||||
|
||||
val conf = new SparkConf()
|
||||
conf.set("spark.driver.host", "localhost")
|
||||
conf.set("spark.ui.enabled", "false")
|
||||
|
||||
val spark: SparkSession = SparkSession
|
||||
.builder()
|
||||
.config(conf)
|
||||
|
@ -70,17 +76,15 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
|
|||
|
||||
assertEquals(100, nativeSize)
|
||||
|
||||
spark.read.load(targetPath).printSchema();
|
||||
|
||||
val result: Dataset[Oaf] = spark.read.load(targetPath).as[Oaf]
|
||||
val result: Dataset[String] =
|
||||
spark.read.text(targetPath).as[String].map(DataciteUtilityTest.convertToOAF)(Encoders.STRING)
|
||||
|
||||
result
|
||||
.map(s => s.getClass.getSimpleName)
|
||||
.groupBy(col("value").alias("class"))
|
||||
.agg(count("value").alias("Total"))
|
||||
.show(false)
|
||||
|
||||
val t = spark.read.load(targetPath).count()
|
||||
val t = spark.read.text(targetPath).as[String].count()
|
||||
|
||||
assertTrue(t > 0)
|
||||
|
||||
|
@ -107,4 +111,19 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
def testFilter(): Unit = {
|
||||
val record = Source
|
||||
.fromInputStream(
|
||||
getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/record_fairsharing.json")
|
||||
)
|
||||
.mkString
|
||||
|
||||
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
||||
val res: List[Oaf] = DataciteToOAFTransformation.generateOAF(record, 0L, 0L, vocabularies, true)
|
||||
|
||||
assertTrue(res.isEmpty)
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
package eu.dnetlib.dhp.datacite
|
||||
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.JsonAST.{JField, JObject, JString}
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
|
||||
object DataciteUtilityTest {
|
||||
|
||||
def convertToOAF(input: String): String = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json = parse(input)
|
||||
|
||||
val isRelation: String = (json \\ "source").extractOrElse("NULL")
|
||||
|
||||
if (isRelation != "NULL") {
|
||||
return "Relation"
|
||||
}
|
||||
|
||||
val iType: List[String] = for {
|
||||
JObject(instance) <- json \\ "instance"
|
||||
JField("instancetype", JObject(instancetype)) <- instance
|
||||
JField("classname", JString(classname)) <- instancetype
|
||||
|
||||
} yield classname
|
||||
|
||||
val l: String = iType.head.toLowerCase()
|
||||
l
|
||||
}
|
||||
|
||||
}
|
|
@ -2,9 +2,10 @@ package eu.dnetlib.dhp.sx.bio
|
|||
|
||||
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature}
|
||||
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result}
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.PidType
|
||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Result}
|
||||
import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved
|
||||
import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMParser, PubMedToOaf}
|
||||
import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMParser, PMSubject, PubMedToOaf}
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.JsonAST.{JField, JObject, JString}
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
|
@ -16,6 +17,7 @@ import org.mockito.junit.jupiter.MockitoExtension
|
|||
import java.io.{BufferedReader, InputStream, InputStreamReader}
|
||||
import java.util.zip.GZIPInputStream
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.collection.mutable.ListBuffer
|
||||
import scala.io.Source
|
||||
import scala.xml.pull.XMLEventReader
|
||||
|
||||
|
@ -74,6 +76,95 @@ class BioScholixTest extends AbstractVocabularyTest {
|
|||
|
||||
}
|
||||
|
||||
private def checkPMArticle(article: PMArticle): Unit = {
|
||||
assertNotNull(article.getPmid)
|
||||
assertNotNull(article.getTitle)
|
||||
assertNotNull(article.getAuthors)
|
||||
article.getAuthors.asScala.foreach { a =>
|
||||
assertNotNull(a)
|
||||
assertNotNull(a.getFullName)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
def testParsingPubmedXML(): Unit = {
|
||||
val xml = new XMLEventReader(
|
||||
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
|
||||
)
|
||||
val parser = new PMParser(xml)
|
||||
parser.foreach(checkPMArticle)
|
||||
}
|
||||
|
||||
private def checkPubmedPublication(o: Oaf): Unit = {
|
||||
assertTrue(o.isInstanceOf[Publication])
|
||||
val p: Publication = o.asInstanceOf[Publication]
|
||||
assertNotNull(p.getId)
|
||||
assertNotNull(p.getTitle)
|
||||
p.getTitle.asScala.foreach(t => assertNotNull(t.getValue))
|
||||
p.getAuthor.asScala.foreach(a => assertNotNull(a.getFullname))
|
||||
assertNotNull(p.getInstance())
|
||||
p.getInstance().asScala.foreach { i =>
|
||||
assertNotNull(i.getCollectedfrom)
|
||||
assertNotNull(i.getPid)
|
||||
assertNotNull(i.getInstancetype)
|
||||
}
|
||||
assertNotNull(p.getOriginalId)
|
||||
p.getOriginalId.asScala.foreach(oId => assertNotNull(oId))
|
||||
|
||||
val hasPMC = p
|
||||
.getInstance()
|
||||
.asScala
|
||||
.exists(i => i.getPid.asScala.exists(pid => pid.getQualifier.getClassid.equalsIgnoreCase(PidType.pmc.toString)))
|
||||
|
||||
if (hasPMC) {
|
||||
assertTrue(p.getOriginalId.asScala.exists(oId => oId.startsWith("od_______267::")))
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
def testPubmedOriginalID(): Unit = {
|
||||
val article: PMArticle = new PMArticle
|
||||
|
||||
article.setPmid("1234")
|
||||
|
||||
article.setTitle("a Title")
|
||||
|
||||
// VERIFY PUBLICATION IS NOT NULL
|
||||
article.getPublicationTypes.add(new PMSubject("article", null, null))
|
||||
var publication = PubMedToOaf.convert(article, vocabularies).asInstanceOf[Publication]
|
||||
assertNotNull(publication)
|
||||
assertEquals("50|pmid________::81dc9bdb52d04dc20036dbd8313ed055", publication.getId)
|
||||
|
||||
// VERIFY PUBLICATION ID DOES NOT CHANGE ALSO IF SETTING PMC IDENTIFIER
|
||||
article.setPmcId("PMC1517292")
|
||||
publication = PubMedToOaf.convert(article, vocabularies).asInstanceOf[Publication]
|
||||
assertNotNull(publication)
|
||||
assertEquals("50|pmid________::81dc9bdb52d04dc20036dbd8313ed055", publication.getId)
|
||||
|
||||
// VERIFY ORIGINAL ID GENERATE IN OLD WAY USING PMC IDENTIFIER EXISTS
|
||||
|
||||
val oldOpenaireID = "od_______267::0000072375bc0e68fa09d4e6b7658248"
|
||||
|
||||
val hasOldOpenAIREID = publication.getOriginalId.asScala.exists(o => o.equalsIgnoreCase(oldOpenaireID))
|
||||
|
||||
assertTrue(hasOldOpenAIREID)
|
||||
}
|
||||
|
||||
@Test
|
||||
def testPubmedMapping(): Unit = {
|
||||
|
||||
val xml = new XMLEventReader(
|
||||
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
|
||||
)
|
||||
val parser = new PMParser(xml)
|
||||
val results = ListBuffer[Oaf]()
|
||||
parser.foreach(x => results += PubMedToOaf.convert(x, vocabularies))
|
||||
|
||||
results.foreach(checkPubmedPublication)
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
def testPDBToOAF(): Unit = {
|
||||
|
||||
|
|
|
@ -26,20 +26,7 @@ import eu.dnetlib.broker.objects.OaBrokerRelatedPublication;
|
|||
import eu.dnetlib.broker.objects.OaBrokerRelatedSoftware;
|
||||
import eu.dnetlib.broker.objects.OaBrokerTypedValue;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.schema.oaf.ExternalReference;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||
import eu.dnetlib.dhp.schema.oaf.Journal;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
public class ConversionUtils {
|
||||
|
||||
|
@ -71,6 +58,10 @@ public class ConversionUtils {
|
|||
return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null;
|
||||
}
|
||||
|
||||
public static OaBrokerTypedValue oafSubjectToBrokerTypedValue(final Subject sp) {
|
||||
return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null;
|
||||
}
|
||||
|
||||
public static OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) {
|
||||
if (d == null) {
|
||||
return null;
|
||||
|
@ -115,7 +106,7 @@ public class ConversionUtils {
|
|||
res.setTitles(structPropList(result.getTitle()));
|
||||
res.setAbstracts(fieldList(result.getDescription()));
|
||||
res.setLanguage(classId(result.getLanguage()));
|
||||
res.setSubjects(structPropTypedList(result.getSubject()));
|
||||
res.setSubjects(subjectList(result.getSubject()));
|
||||
res.setCreators(mappedList(result.getAuthor(), ConversionUtils::oafAuthorToBrokerAuthor));
|
||||
res.setPublicationdate(fieldValue(result.getDateofacceptance()));
|
||||
res.setPublisher(fieldValue(result.getPublisher()));
|
||||
|
@ -304,6 +295,18 @@ public class ConversionUtils {
|
|||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static List<OaBrokerTypedValue> subjectList(final List<Subject> list) {
|
||||
if (list == null) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
return list
|
||||
.stream()
|
||||
.map(ConversionUtils::oafSubjectToBrokerTypedValue)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static <F, T> List<T> mappedList(final List<F> list, final Function<F, T> func) {
|
||||
if (list == null) {
|
||||
return new ArrayList<>();
|
||||
|
|
|
@ -3,6 +3,8 @@ package eu.dnetlib.doiboost.orcid;
|
|||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.*;
|
||||
|
@ -13,6 +15,7 @@ import org.apache.hadoop.io.Text;
|
|||
import org.apache.hadoop.io.compress.GzipCodec;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.api.java.function.Function;
|
||||
|
@ -20,6 +23,7 @@ import org.apache.spark.util.LongAccumulator;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.gson.JsonElement;
|
||||
import com.google.gson.JsonParser;
|
||||
|
||||
|
@ -42,6 +46,7 @@ public class SparkDownloadOrcidWorks {
|
|||
public static final String ORCID_XML_DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
|
||||
public static final DateTimeFormatter ORCID_XML_DATETIMEFORMATTER = DateTimeFormatter
|
||||
.ofPattern(ORCID_XML_DATETIME_FORMAT);
|
||||
public static final String DOWNLOAD_WORKS_REQUEST_SEPARATOR = ",";
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
|
@ -56,7 +61,6 @@ public class SparkDownloadOrcidWorks {
|
|||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
final String workingPath = parser.get("workingPath");
|
||||
logger.info("workingPath: {}", workingPath);
|
||||
final String outputPath = parser.get("outputPath");
|
||||
|
@ -69,32 +73,22 @@ public class SparkDownloadOrcidWorks {
|
|||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
final String lastUpdateValue = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt");
|
||||
logger.info("lastUpdateValue: ", lastUpdateValue);
|
||||
|
||||
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
LongAccumulator updatedAuthorsAcc = spark.sparkContext().longAccumulator("updated_authors");
|
||||
LongAccumulator parsedAuthorsAcc = spark.sparkContext().longAccumulator("parsed_authors");
|
||||
LongAccumulator parsedWorksAcc = spark.sparkContext().longAccumulator("parsed_works");
|
||||
LongAccumulator modifiedWorksAcc = spark.sparkContext().longAccumulator("modified_works");
|
||||
LongAccumulator maxModifiedWorksLimitAcc = spark
|
||||
.sparkContext()
|
||||
.longAccumulator("max_modified_works_limit");
|
||||
LongAccumulator errorCodeFoundAcc = spark.sparkContext().longAccumulator("error_code_found");
|
||||
LongAccumulator errorLoadingJsonFoundAcc = spark
|
||||
.sparkContext()
|
||||
.longAccumulator("error_loading_json_found");
|
||||
LongAccumulator errorLoadingXMLFoundAcc = spark
|
||||
.sparkContext()
|
||||
.longAccumulator("error_loading_xml_found");
|
||||
LongAccumulator errorParsingXMLFoundAcc = spark
|
||||
.sparkContext()
|
||||
.longAccumulator("error_parsing_xml_found");
|
||||
LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloaded_records");
|
||||
LongAccumulator errorsAcc = spark.sparkContext().longAccumulator("errors");
|
||||
|
||||
JavaPairRDD<Text, Text> updatedAuthorsRDD = sc
|
||||
.sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class);
|
||||
updatedAuthorsAcc.setValue(updatedAuthorsRDD.count());
|
||||
long authorsCount = updatedAuthorsRDD.count();
|
||||
updatedAuthorsAcc.setValue(authorsCount);
|
||||
|
||||
FlatMapFunction<Tuple2<Text, Text>, String> retrieveWorkUrlFunction = data -> {
|
||||
String orcidId = data._1().toString();
|
||||
|
@ -106,11 +100,10 @@ public class SparkDownloadOrcidWorks {
|
|||
if (statusCode.equals("200")) {
|
||||
String compressedData = getJsonValue(jElement, "compressedData");
|
||||
if (StringUtils.isEmpty(compressedData)) {
|
||||
errorLoadingJsonFoundAcc.add(1);
|
||||
|
||||
} else {
|
||||
String authorSummary = ArgumentApplicationParser.decompressValue(compressedData);
|
||||
if (StringUtils.isEmpty(authorSummary)) {
|
||||
errorLoadingXMLFoundAcc.add(1);
|
||||
} else {
|
||||
try {
|
||||
workIdLastModifiedDate = XMLRecordParser
|
||||
|
@ -125,22 +118,38 @@ public class SparkDownloadOrcidWorks {
|
|||
errorCodeFoundAcc.add(1);
|
||||
}
|
||||
parsedAuthorsAcc.add(1);
|
||||
|
||||
workIdLastModifiedDate.forEach((k, v) -> {
|
||||
parsedWorksAcc.add(1);
|
||||
if (isModified(orcidId, v, lastUpdateValue)) {
|
||||
modifiedWorksAcc.add(1);
|
||||
workIds.add(orcidId.concat("/work/").concat(k));
|
||||
workIds.add(k);
|
||||
}
|
||||
});
|
||||
if (workIdLastModifiedDate.size() > 50) {
|
||||
maxModifiedWorksLimitAcc.add(1);
|
||||
if (workIds.isEmpty()) {
|
||||
return new ArrayList<String>().iterator();
|
||||
}
|
||||
return workIds.iterator();
|
||||
List<String> worksDownloadUrls = new ArrayList<>();
|
||||
|
||||
// Creation of url for reading multiple works (up to 100) with ORCID API
|
||||
// see this https://github.com/ORCID/ORCID-Source/blob/development/orcid-api-web/tutorial/works.md
|
||||
|
||||
List<List<String>> partitionedWorks = Lists.partition(workIds, 100);
|
||||
partitionedWorks.stream().forEach(p -> {
|
||||
String worksDownloadUrl = orcidId.concat("/works/");
|
||||
final StringBuffer buffer = new StringBuffer(worksDownloadUrl);
|
||||
p.forEach(id -> {
|
||||
buffer.append(id).append(DOWNLOAD_WORKS_REQUEST_SEPARATOR);
|
||||
});
|
||||
String finalUrl = buffer.substring(0, buffer.lastIndexOf(DOWNLOAD_WORKS_REQUEST_SEPARATOR));
|
||||
worksDownloadUrls.add(finalUrl);
|
||||
});
|
||||
return worksDownloadUrls.iterator();
|
||||
};
|
||||
|
||||
Function<String, Tuple2<String, String>> downloadWorkFunction = data -> {
|
||||
String relativeWorkUrl = data;
|
||||
String orcidId = relativeWorkUrl.split("/")[0];
|
||||
Function<String, Tuple2<String, String>> downloadWorksFunction = data -> {
|
||||
String relativeWorksUrl = data;
|
||||
String orcidId = relativeWorksUrl.split("/")[0];
|
||||
final DownloadedRecordData downloaded = new DownloadedRecordData();
|
||||
downloaded.setOrcidId(orcidId);
|
||||
downloaded.setLastModifiedDate(lastUpdateValue);
|
||||
|
@ -149,7 +158,7 @@ public class SparkDownloadOrcidWorks {
|
|||
httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER);
|
||||
httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml");
|
||||
httpConnector.setAuthToken(token);
|
||||
String apiUrl = "https://api.orcid.org/v3.0/" + relativeWorkUrl;
|
||||
String apiUrl = "https://api.orcid.org/v3.0/" + relativeWorksUrl;
|
||||
DownloadsReport report = new DownloadsReport();
|
||||
long startReq = System.currentTimeMillis();
|
||||
boolean downloadCompleted = false;
|
||||
|
@ -167,7 +176,6 @@ public class SparkDownloadOrcidWorks {
|
|||
} else {
|
||||
downloaded.setStatusCode(-4);
|
||||
}
|
||||
errorsAcc.add(1);
|
||||
}
|
||||
long endReq = System.currentTimeMillis();
|
||||
long reqTime = endReq - startReq;
|
||||
|
@ -176,7 +184,6 @@ public class SparkDownloadOrcidWorks {
|
|||
}
|
||||
if (downloadCompleted) {
|
||||
downloaded.setStatusCode(200);
|
||||
downloadedRecordsAcc.add(1);
|
||||
downloaded
|
||||
.setCompressedData(
|
||||
ArgumentApplicationParser
|
||||
|
@ -185,24 +192,69 @@ public class SparkDownloadOrcidWorks {
|
|||
return downloaded.toTuple2();
|
||||
};
|
||||
|
||||
FlatMapFunction<Tuple2<String, String>, Tuple2<String, String>> splitWorksFunction = data -> {
|
||||
List<Tuple2<String, String>> splittedDownloadedWorks = new ArrayList<>();
|
||||
String jsonData = data._2().toString();
|
||||
JsonElement jElement = new JsonParser().parse(jsonData);
|
||||
String orcidId = data._1().toString();
|
||||
String statusCode = getJsonValue(jElement, "statusCode");
|
||||
String lastModifiedDate = getJsonValue(jElement, "lastModifiedDate");
|
||||
String compressedData = getJsonValue(jElement, "compressedData");
|
||||
String errorMessage = getJsonValue(jElement, "errorMessage");
|
||||
String works = ArgumentApplicationParser.decompressValue(compressedData);
|
||||
|
||||
// split a single xml containing multiple works into multiple xml (a single work for each xml)
|
||||
List<String> splittedWorks = null;
|
||||
try {
|
||||
splittedWorks = XMLRecordParser
|
||||
.splitWorks(orcidId, works.getBytes(StandardCharsets.UTF_8));
|
||||
} catch (Throwable t) {
|
||||
final DownloadedRecordData errDownloaded = new DownloadedRecordData();
|
||||
errDownloaded.setOrcidId(orcidId);
|
||||
errDownloaded.setLastModifiedDate(lastModifiedDate);
|
||||
errDownloaded.setStatusCode(-10);
|
||||
errDownloaded.setErrorMessage(t.getMessage());
|
||||
splittedDownloadedWorks.add(errDownloaded.toTuple2());
|
||||
errorParsingXMLFoundAcc.add(1);
|
||||
return splittedDownloadedWorks.iterator();
|
||||
}
|
||||
splittedWorks.forEach(w -> {
|
||||
final DownloadedRecordData downloaded = new DownloadedRecordData();
|
||||
downloaded.setOrcidId(orcidId);
|
||||
downloaded.setLastModifiedDate(lastModifiedDate);
|
||||
downloaded.setStatusCode(Integer.parseInt(statusCode));
|
||||
downloaded.setErrorMessage(errorMessage);
|
||||
try {
|
||||
downloaded
|
||||
.setCompressedData(
|
||||
ArgumentApplicationParser
|
||||
.compressArgument(w));
|
||||
} catch (Throwable t) {
|
||||
downloaded.setStatusCode(-11);
|
||||
downloaded.setErrorMessage(t.getMessage());
|
||||
}
|
||||
splittedDownloadedWorks.add(downloaded.toTuple2());
|
||||
downloadedRecordsAcc.add(1);
|
||||
});
|
||||
|
||||
return splittedDownloadedWorks.iterator();
|
||||
};
|
||||
|
||||
updatedAuthorsRDD
|
||||
.flatMap(retrieveWorkUrlFunction)
|
||||
.repartition(100)
|
||||
.map(downloadWorkFunction)
|
||||
.mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2())))
|
||||
.map(downloadWorksFunction)
|
||||
.flatMap(splitWorksFunction)
|
||||
.mapToPair(w -> new Tuple2<>(new Text(w._1()), new Text(w._2())))
|
||||
.saveAsTextFile(workingPath.concat(outputPath), GzipCodec.class);
|
||||
|
||||
logger.info("updatedAuthorsAcc: {}", updatedAuthorsAcc.value());
|
||||
logger.info("parsedAuthorsAcc: {}", parsedAuthorsAcc.value());
|
||||
logger.info("parsedWorksAcc: {}", parsedWorksAcc.value());
|
||||
logger.info("modifiedWorksAcc: {}", modifiedWorksAcc.value());
|
||||
logger.info("maxModifiedWorksLimitAcc: {}", maxModifiedWorksLimitAcc.value());
|
||||
logger.info("errorCodeFoundAcc: {}", errorCodeFoundAcc.value());
|
||||
logger.info("errorLoadingJsonFoundAcc: {}", errorLoadingJsonFoundAcc.value());
|
||||
logger.info("errorLoadingXMLFoundAcc: {}", errorLoadingXMLFoundAcc.value());
|
||||
logger.info("errorParsingXMLFoundAcc: {}", errorParsingXMLFoundAcc.value());
|
||||
logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value());
|
||||
logger.info("errorsAcc: {}", errorsAcc.value());
|
||||
});
|
||||
|
||||
}
|
||||
|
|
|
@ -83,8 +83,6 @@ public class MultiAttemptsHttpConnector {
|
|||
throw new CollectorException(msg);
|
||||
}
|
||||
|
||||
log.info("Request attempt {} [{}]", retryNumber, requestUrl);
|
||||
|
||||
InputStream input = null;
|
||||
|
||||
try {
|
||||
|
@ -104,9 +102,9 @@ public class MultiAttemptsHttpConnector {
|
|||
urlConn.addRequestProperty(HttpHeaders.AUTHORIZATION, String.format("Bearer %s", getAuthToken()));
|
||||
}
|
||||
|
||||
if (log.isDebugEnabled()) {
|
||||
logHeaderFields(urlConn);
|
||||
}
|
||||
// if (log.isDebugEnabled()) {
|
||||
// logHeaderFields(urlConn);
|
||||
// }
|
||||
|
||||
int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
|
||||
if (is2xx(urlConn.getResponseCode())) {
|
||||
|
|
|
@ -1,7 +1,11 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid.xml;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.mortbay.log.Log;
|
||||
|
@ -34,6 +38,33 @@ public class XMLRecordParser {
|
|||
private static final String NS_WORK_URL = "http://www.orcid.org/ns/work";
|
||||
private static final String NS_HISTORY = "history";
|
||||
private static final String NS_HISTORY_URL = "http://www.orcid.org/ns/history";
|
||||
private static final String NS_BULK_URL = "http://www.orcid.org/ns/bulk";
|
||||
private static final String NS_BULK = "bulk";
|
||||
|
||||
private static final String namespaceList = " xmlns:internal=\"http://www.orcid.org/ns/internal\"\n" +
|
||||
" xmlns:education=\"http://www.orcid.org/ns/education\"\n" +
|
||||
" xmlns:distinction=\"http://www.orcid.org/ns/distinction\"\n" +
|
||||
" xmlns:deprecated=\"http://www.orcid.org/ns/deprecated\"\n" +
|
||||
" xmlns:other-name=\"http://www.orcid.org/ns/other-name\"\n" +
|
||||
" xmlns:membership=\"http://www.orcid.org/ns/membership\"\n" +
|
||||
" xmlns:error=\"http://www.orcid.org/ns/error\" xmlns:common=\"http://www.orcid.org/ns/common\"\n" +
|
||||
" xmlns:record=\"http://www.orcid.org/ns/record\"\n" +
|
||||
" xmlns:personal-details=\"http://www.orcid.org/ns/personal-details\"\n" +
|
||||
" xmlns:keyword=\"http://www.orcid.org/ns/keyword\" xmlns:email=\"http://www.orcid.org/ns/email\"\n" +
|
||||
" xmlns:external-identifier=\"http://www.orcid.org/ns/external-identifier\"\n" +
|
||||
" xmlns:funding=\"http://www.orcid.org/ns/funding\"\n" +
|
||||
" xmlns:preferences=\"http://www.orcid.org/ns/preferences\"\n" +
|
||||
" xmlns:address=\"http://www.orcid.org/ns/address\"\n" +
|
||||
" xmlns:invited-position=\"http://www.orcid.org/ns/invited-position\"\n" +
|
||||
" xmlns:work=\"http://www.orcid.org/ns/work\" xmlns:history=\"http://www.orcid.org/ns/history\"\n" +
|
||||
" xmlns:employment=\"http://www.orcid.org/ns/employment\"\n" +
|
||||
" xmlns:qualification=\"http://www.orcid.org/ns/qualification\"\n" +
|
||||
" xmlns:service=\"http://www.orcid.org/ns/service\" xmlns:person=\"http://www.orcid.org/ns/person\"\n" +
|
||||
" xmlns:activities=\"http://www.orcid.org/ns/activities\"\n" +
|
||||
" xmlns:researcher-url=\"http://www.orcid.org/ns/researcher-url\"\n" +
|
||||
" xmlns:peer-review=\"http://www.orcid.org/ns/peer-review\"\n" +
|
||||
" xmlns:bulk=\"http://www.orcid.org/ns/bulk\"\n" +
|
||||
" xmlns:research-resource=\"http://www.orcid.org/ns/research-resource\"";
|
||||
|
||||
private static final String NS_ERROR = "error";
|
||||
|
||||
|
@ -307,4 +338,65 @@ public class XMLRecordParser {
|
|||
}
|
||||
return authorHistory;
|
||||
}
|
||||
|
||||
public static List<String> splitWorks(String orcidId, byte[] bytes)
|
||||
throws ParseException, XPathParseException, NavException, XPathEvalException, VtdException, ModifyException,
|
||||
IOException, TranscodeException {
|
||||
|
||||
final VTDGen vg = new VTDGen();
|
||||
vg.setDoc(bytes);
|
||||
vg.parse(true);
|
||||
final VTDNav vn = vg.getNav();
|
||||
final AutoPilot ap = new AutoPilot(vn);
|
||||
ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
|
||||
ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
|
||||
ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
|
||||
ap.declareXPathNameSpace(NS_BULK, NS_BULK_URL);
|
||||
|
||||
List<String> works = new ArrayList<>();
|
||||
try {
|
||||
ap.selectXPath("//work:work");
|
||||
while (ap.evalXPath() != -1) {
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
long l = vn.getElementFragment();
|
||||
String xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>";
|
||||
bos.write(xmlHeader.getBytes(StandardCharsets.UTF_8));
|
||||
bos.write(vn.getXML().getBytes(), (int) l, (int) (l >> 32));
|
||||
works.add(bos.toString());
|
||||
bos.close();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new VtdException(e);
|
||||
}
|
||||
|
||||
List<VTDGen> vgModifiers = Arrays.asList(new VTDGen());
|
||||
List<XMLModifier> xmModifiers = Arrays.asList(new XMLModifier());
|
||||
List<ByteArrayOutputStream> buffer = Arrays.asList(new ByteArrayOutputStream());
|
||||
List<String> updatedWorks = works.stream().map(work -> {
|
||||
vgModifiers.get(0).setDoc(work.getBytes());
|
||||
try {
|
||||
vgModifiers.get(0).parse(false);
|
||||
final VTDNav vnModifier = vgModifiers.get(0).getNav();
|
||||
xmModifiers.get(0).bind(vnModifier);
|
||||
vnModifier.toElement(VTDNav.ROOT);
|
||||
int attr = vnModifier.getAttrVal("put-code");
|
||||
if (attr > -1) {
|
||||
xmModifiers
|
||||
.get(0)
|
||||
.insertAttribute(
|
||||
" path=\"/" + orcidId + "/work/" + vnModifier.toNormalizedString(attr) + "\""
|
||||
+ " " + namespaceList);
|
||||
}
|
||||
buffer.set(0, new ByteArrayOutputStream());
|
||||
xmModifiers.get(0).output(buffer.get(0));
|
||||
buffer.get(0).close();
|
||||
return buffer.get(0).toString();
|
||||
} catch (NavException | ModifyException | IOException | TranscodeException | ParseException e) {
|
||||
e.printStackTrace();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}).collect(Collectors.toList());
|
||||
|
||||
return updatedWorks;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -391,6 +391,28 @@ object DoiBoostMappingUtil {
|
|||
di
|
||||
}
|
||||
|
||||
def createSubject(value: String, classId: String, schemeId: String): Subject = {
|
||||
val s = new Subject
|
||||
s.setQualifier(OafMapperUtils.qualifier(classId, classId, schemeId, schemeId))
|
||||
s.setValue(value)
|
||||
s
|
||||
|
||||
}
|
||||
|
||||
def createSubject(
|
||||
value: String,
|
||||
classId: String,
|
||||
className: String,
|
||||
schemeId: String,
|
||||
schemeName: String
|
||||
): Subject = {
|
||||
val s = new Subject
|
||||
s.setQualifier(OafMapperUtils.qualifier(classId, className, schemeId, schemeName))
|
||||
s.setValue(value)
|
||||
s
|
||||
|
||||
}
|
||||
|
||||
def createSP(
|
||||
value: String,
|
||||
classId: String,
|
||||
|
|
|
@ -59,52 +59,6 @@ object SparkGenerateDoiBoost {
|
|||
val workingDirPath = parser.get("workingPath")
|
||||
val openaireOrganizationPath = parser.get("openaireOrganizationPath")
|
||||
|
||||
val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable {
|
||||
override def zero: Publication = new Publication
|
||||
|
||||
override def reduce(b: Publication, a: (String, Publication)): Publication = {
|
||||
|
||||
if (b == null) {
|
||||
if (a != null && a._2 != null) {
|
||||
a._2.setId(a._1)
|
||||
return a._2
|
||||
}
|
||||
} else {
|
||||
if (a != null && a._2 != null) {
|
||||
b.mergeFrom(a._2)
|
||||
b.setId(a._1)
|
||||
val authors = AuthorMerger.mergeAuthor(b.getAuthor, a._2.getAuthor)
|
||||
b.setAuthor(authors)
|
||||
return b
|
||||
}
|
||||
}
|
||||
new Publication
|
||||
}
|
||||
|
||||
override def merge(b1: Publication, b2: Publication): Publication = {
|
||||
if (b1 == null) {
|
||||
if (b2 != null)
|
||||
return b2
|
||||
} else {
|
||||
if (b2 != null) {
|
||||
b1.mergeFrom(b2)
|
||||
val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor)
|
||||
b1.setAuthor(authors)
|
||||
if (b2.getId != null && b2.getId.nonEmpty)
|
||||
b1.setId(b2.getId)
|
||||
return b1
|
||||
}
|
||||
}
|
||||
new Publication
|
||||
}
|
||||
|
||||
override def finish(reduction: Publication): Publication = reduction
|
||||
|
||||
override def bufferEncoder: Encoder[Publication] = Encoders.kryo[Publication]
|
||||
|
||||
override def outputEncoder: Encoder[Publication] = Encoders.kryo[Publication]
|
||||
}
|
||||
|
||||
implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication]
|
||||
implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization]
|
||||
implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset]
|
||||
|
@ -175,8 +129,33 @@ object SparkGenerateDoiBoost {
|
|||
.map(DoiBoostMappingUtil.fixPublication)
|
||||
.map(p => (p.getId, p))
|
||||
.groupByKey(_._1)
|
||||
.agg(crossrefAggregator.toColumn)
|
||||
.map(p => p._2)
|
||||
.reduceGroups((left, right) => {
|
||||
//Check left is not null
|
||||
if (left != null && left._1 != null) {
|
||||
//If right is null then return left
|
||||
if (right == null || right._2 == null)
|
||||
left
|
||||
else {
|
||||
// Here Left and Right are not null
|
||||
// So we have to merge
|
||||
val b1 = left._2
|
||||
val b2 = right._2
|
||||
b1.mergeFrom(b2)
|
||||
b1.mergeOAFDataInfo(b2)
|
||||
val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor)
|
||||
b1.setAuthor(authors)
|
||||
if (b2.getId != null && b2.getId.nonEmpty)
|
||||
b1.setId(b2.getId)
|
||||
//Return publication Merged
|
||||
(b1.getId, b1)
|
||||
}
|
||||
} else {
|
||||
// Left is Null so we return right
|
||||
right
|
||||
}
|
||||
})
|
||||
.filter(s => s != null && s._2 != null)
|
||||
.map(s => s._2._2)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$workingDirPath/doiBoostPublicationFiltered")
|
||||
|
@ -305,4 +284,4 @@ object SparkGenerateDoiBoost {
|
|||
.save(s"$workingDirPath/doiBoostOrganization")
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -2,7 +2,7 @@ package eu.dnetlib.doiboost.crossref
|
|||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf._
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils}
|
||||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
|
||||
|
@ -201,7 +201,7 @@ case object Crossref2Oaf {
|
|||
|
||||
if (subjectList.nonEmpty) {
|
||||
result.setSubject(
|
||||
subjectList.map(s => createSP(s, "keywords", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava
|
||||
subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -280,10 +280,10 @@ case object Crossref2Oaf {
|
|||
instance.setDateofacceptance(asField(createdDate.getValue))
|
||||
}
|
||||
val s: List[String] = List("https://doi.org/" + doi)
|
||||
// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
|
||||
// if (links.nonEmpty) {
|
||||
// instance.setUrl(links.asJava)
|
||||
// }
|
||||
// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
|
||||
// if (links.nonEmpty) {
|
||||
// instance.setUrl(links.asJava)
|
||||
// }
|
||||
if (s.nonEmpty) {
|
||||
instance.setUrl(s.asJava)
|
||||
}
|
||||
|
@ -576,14 +576,19 @@ case object Crossref2Oaf {
|
|||
|
||||
def extractDate(dt: String, datePart: List[List[Int]]): String = {
|
||||
if (StringUtils.isNotBlank(dt))
|
||||
return dt
|
||||
return GraphCleaningFunctions.cleanDate(dt)
|
||||
if (datePart != null && datePart.size == 1) {
|
||||
val res = datePart.head
|
||||
if (res.size == 3) {
|
||||
val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
|
||||
if (dp.length == 10) {
|
||||
return dp
|
||||
return GraphCleaningFunctions.cleanDate(dp)
|
||||
}
|
||||
} else if (res.size == 2) {
|
||||
val dp = f"${res.head}-${res(1)}%02d-01"
|
||||
return GraphCleaningFunctions.cleanDate(dp)
|
||||
} else if (res.size == 1) {
|
||||
return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01")
|
||||
}
|
||||
}
|
||||
null
|
||||
|
|
|
@ -2,7 +2,7 @@ package eu.dnetlib.doiboost.mag
|
|||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory
|
||||
import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty}
|
||||
import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty, Subject}
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
|
||||
import org.json4s
|
||||
|
@ -210,8 +210,8 @@ case object ConversionUtil {
|
|||
val className = "Microsoft Academic Graph classification"
|
||||
val classid = "MAG"
|
||||
|
||||
val p: List[StructuredProperty] = fieldOfStudy.subjects.flatMap(s => {
|
||||
val s1 = createSP(
|
||||
val p: List[Subject] = fieldOfStudy.subjects.flatMap(s => {
|
||||
val s1 = createSubject(
|
||||
s.DisplayName,
|
||||
classid,
|
||||
className,
|
||||
|
@ -219,10 +219,10 @@ case object ConversionUtil {
|
|||
ModelConstants.DNET_SUBJECT_TYPOLOGIES
|
||||
)
|
||||
val di = DoiBoostMappingUtil.generateDataInfo(s.Score.toString)
|
||||
var resList: List[StructuredProperty] = List(s1)
|
||||
var resList: List[Subject] = List(s1)
|
||||
if (s.MainType.isDefined) {
|
||||
val maintp = s.MainType.get
|
||||
val s2 = createSP(
|
||||
val s2 = createSubject(
|
||||
s.MainType.get,
|
||||
classid,
|
||||
className,
|
||||
|
@ -232,7 +232,7 @@ case object ConversionUtil {
|
|||
s2.setDataInfo(di)
|
||||
resList = resList ::: List(s2)
|
||||
if (maintp.contains(".")) {
|
||||
val s3 = createSP(
|
||||
val s3 = createSubject(
|
||||
maintp.split("\\.").head,
|
||||
classid,
|
||||
className,
|
||||
|
|
|
@ -161,13 +161,11 @@ public class OrcidClientTest {
|
|||
|
||||
@Test
|
||||
@Disabled
|
||||
void testReadBase64CompressedRecord() throws Exception {
|
||||
void testReadBase64CompressedWork() throws Exception {
|
||||
final String base64CompressedRecord = IOUtils
|
||||
.toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64"));
|
||||
.toString(getClass().getResourceAsStream("0000-0001-7281-6306.compressed.base64"));
|
||||
final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord);
|
||||
logToFile(testPath, "\n\ndownloaded \n\n" + recordFromSeqFile);
|
||||
final String downloadedRecord = testDownloadRecord("0000-0003-3028-6161", REQUEST_TYPE_RECORD);
|
||||
assertEquals(recordFromSeqFile, downloadedRecord);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -337,7 +335,7 @@ public class OrcidClientTest {
|
|||
@Ignore
|
||||
void testUpdatedRecord() throws Exception {
|
||||
final String base64CompressedRecord = IOUtils
|
||||
.toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64"));
|
||||
.toString(getClass().getResourceAsStream("0000-0001-7281-6306.compressed.base64"));
|
||||
final String record = ArgumentApplicationParser.decompressValue(base64CompressedRecord);
|
||||
logToFile(testPath, "\n\nrecord updated \n\n" + record);
|
||||
}
|
||||
|
|
|
@ -108,4 +108,12 @@ public class XMLRecordParserTest {
|
|||
work.setBase64CompressData(ArgumentApplicationParser.compressArgument(xml));
|
||||
OrcidClientTest.logToFile(testPath, JsonWriter.create(work));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testWorksSplit() throws Exception {
|
||||
String xml = IOUtils
|
||||
.toString(
|
||||
this.getClass().getResourceAsStream("multiple_downloaded_works.xml"));
|
||||
XMLRecordParser.splitWorks("0000-0001-7291-3210", xml.getBytes());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,368 @@
|
|||
|
||||
{
|
||||
|
||||
"indexed": {
|
||||
"date-parts": [
|
||||
[
|
||||
2022,
|
||||
4,
|
||||
22
|
||||
]
|
||||
],
|
||||
"date-time": "2022-04-22T15:30:54Z",
|
||||
"timestamp": 1650641454218
|
||||
},
|
||||
"reference-count": 31,
|
||||
"publisher": "Cambridge University Press (CUP)",
|
||||
"issue": "2",
|
||||
"license": [
|
||||
{
|
||||
"start": {
|
||||
"date-parts": [
|
||||
[
|
||||
2017,
|
||||
8,
|
||||
22
|
||||
]
|
||||
],
|
||||
"date-time": "2017-08-22T00:00:00Z",
|
||||
"timestamp": 1503360000000
|
||||
},
|
||||
"content-version": "unspecified",
|
||||
"delay-in-days": 21,
|
||||
"URL": "https://www.cambridge.org/core/terms"
|
||||
}
|
||||
],
|
||||
"content-domain": {
|
||||
"domain": [
|
||||
|
||||
],
|
||||
"crossmark-restriction": false
|
||||
},
|
||||
"short-container-title": [
|
||||
"Dance Res. J."
|
||||
],
|
||||
"published-print": {
|
||||
"date-parts": [
|
||||
[
|
||||
2017,
|
||||
8
|
||||
]
|
||||
]
|
||||
},
|
||||
"abstract": "<jats:p>Gaga, a practice developed by Israeli choreographer Ohad Naharin, is one of the most popular training methods on the global dance market. Structured as a metatechnique, or a system for negotiating techniques within one's body, Gaga teaches students to both draw on and reject multiple movement techniques to create their own movement. I consider how the paradigms of choreography, technique, and improvisation are blurred together in the pedagogical model of a metatechnique and how training dancers to shift between choreographer, dancer, and improviser has significant ramifications for understanding their agency. The metatechnique model of Gaga falls in line with neoliberal values of efficiency and a wide range of skills and knowledge; this analysis provides an understanding of recent trends in dance training in relation to contemporary political and socioeconomic structures.</jats:p>",
|
||||
"DOI": "10.1017/s0149767717000183",
|
||||
"type": "journal-article",
|
||||
"created": {
|
||||
"date-parts": [
|
||||
[
|
||||
2017,
|
||||
8,
|
||||
22
|
||||
]
|
||||
],
|
||||
"date-time": "2017-08-22T13:32:38Z",
|
||||
"timestamp": 1503408758000
|
||||
},
|
||||
"page": "26-43",
|
||||
"source": "Crossref",
|
||||
"is-referenced-by-count": 10,
|
||||
"title": [
|
||||
"Gaga as Metatechnique: Negotiating Choreography, Improvisation, and Technique in a Neoliberal Dance Market"
|
||||
],
|
||||
"prefix": "10.1017",
|
||||
"volume": "49",
|
||||
"author": [
|
||||
{
|
||||
"given": "Meghan",
|
||||
"family": "Quinlan",
|
||||
"sequence": "first",
|
||||
"affiliation": [
|
||||
|
||||
]
|
||||
}
|
||||
],
|
||||
"member": "56",
|
||||
"published-online": {
|
||||
"date-parts": [
|
||||
[
|
||||
2017,
|
||||
8,
|
||||
22
|
||||
]
|
||||
]
|
||||
},
|
||||
"reference": [
|
||||
{
|
||||
"key": "S0149767717000183_ref22",
|
||||
"first-page": "38",
|
||||
"article-title": "‘I Don't Want to do African … What About My Technique?’: Transforming Dancing Places into Spaces in the Academy",
|
||||
"volume": "4",
|
||||
"author": "Monroe",
|
||||
"year": "2011",
|
||||
"journal-title": "The Journal of Pan African Studies"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref8",
|
||||
"volume-title": "Discipline and Punish: The Birth of the Prison",
|
||||
"author": "Foucault",
|
||||
"year": "1995"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref24",
|
||||
"volume-title": "Sharing the Dance: Contact Improvisation and American Culture",
|
||||
"author": "Novack",
|
||||
"year": "1990"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref26",
|
||||
"doi-asserted-by": "publisher",
|
||||
"DOI": "10.1215/9780822387879"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref10",
|
||||
"doi-asserted-by": "publisher",
|
||||
"DOI": "10.1080/01472526.2015.1085759"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref4",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "235",
|
||||
"volume-title": "Meaning in Motion",
|
||||
"author": "Foster",
|
||||
"year": "1997",
|
||||
"DOI": "10.1215/9780822397281-013"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref17",
|
||||
"doi-asserted-by": "publisher",
|
||||
"DOI": "10.1093/acprof:oso/9780199360369.001.0001"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref30",
|
||||
"volume-title": "The Precariat: The New Dangerous Class",
|
||||
"author": "Standing",
|
||||
"year": "2011"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref7",
|
||||
"volume-title": "Choreographing Empathy: Kinesthesia in Performance",
|
||||
"author": "Foster",
|
||||
"year": "2011"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref14",
|
||||
"volume-title": "A Brief History of Neoliberalism",
|
||||
"author": "Harvey",
|
||||
"year": "2007"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref27",
|
||||
"unstructured": "Quinlan Meghan . 2016. “Gaga as Politics: A Case Study of Contemporary Dance Training.” PhD diss., University of California, Riverside."
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref11",
|
||||
"first-page": "xiii",
|
||||
"volume-title": "Taken by Surprise: A Dance Improvisation Reader",
|
||||
"author": "Gere",
|
||||
"year": "2003"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref13",
|
||||
"volume-title": "Declaration",
|
||||
"author": "Hardt",
|
||||
"year": "2012"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref29",
|
||||
"doi-asserted-by": "publisher",
|
||||
"DOI": "10.1057/9780230236844_9"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref5",
|
||||
"doi-asserted-by": "publisher",
|
||||
"DOI": "10.1057/9780230236844_6"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref28",
|
||||
"doi-asserted-by": "publisher",
|
||||
"DOI": "10.1017/S0149767700000528"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref12",
|
||||
"doi-asserted-by": "publisher",
|
||||
"DOI": "10.3998/mpub.287881"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref18",
|
||||
"first-page": "135",
|
||||
"volume-title": "Taken by Surprise: A Dance Improvisation Reader",
|
||||
"author": "Marks",
|
||||
"year": "2003"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref3",
|
||||
"doi-asserted-by": "crossref",
|
||||
"volume-title": "Bodies That Matter: On the Discursive Limits of Sex",
|
||||
"author": "Butler",
|
||||
"year": "2011",
|
||||
"DOI": "10.4324/9780203828274"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref32",
|
||||
"volume-title": "Choreographing Difference: The Body and Identity in Contemporary Dance",
|
||||
"author": "Albright",
|
||||
"year": "1997"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref16",
|
||||
"doi-asserted-by": "publisher",
|
||||
"DOI": "10.1017/S0149767714000163"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref15",
|
||||
"doi-asserted-by": "publisher",
|
||||
"DOI": "10.2307/1477803"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref21",
|
||||
"first-page": "455",
|
||||
"volume-title": "Incorporations",
|
||||
"author": "Mauss",
|
||||
"year": "1992"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref1",
|
||||
"volume-title": "The Body Eclectic: Evolving Practices in Dance Training",
|
||||
"author": "Bales",
|
||||
"year": "2008"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref9",
|
||||
"unstructured": "Gaga Movement Ltd. 2016. Gaga People. Dancers. Accessed June 28, 2016. http://gagapeople.com/english/."
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref19",
|
||||
"volume-title": "Critical Moves: Dance Studies in Theory and Politics",
|
||||
"author": "Martin",
|
||||
"year": "1998"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref2",
|
||||
"volume-title": "Undoing the Demos: Neoliberalism's Stealth Revolution",
|
||||
"author": "Brown",
|
||||
"year": "2015"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref20",
|
||||
"doi-asserted-by": "publisher",
|
||||
"DOI": "10.1162/DRAM_a_00214"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref6",
|
||||
"doi-asserted-by": "publisher",
|
||||
"DOI": "10.1215/01610775-2009-016"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref23",
|
||||
"doi-asserted-by": "publisher",
|
||||
"DOI": "10.1093/acprof:oso/9780190201661.001.0001"
|
||||
},
|
||||
{
|
||||
"key": "S0149767717000183_ref25",
|
||||
"volume-title": "Flexible Citizenship: The Cultural Logics of Transnationality",
|
||||
"author": "Ong",
|
||||
"year": "1999"
|
||||
}
|
||||
],
|
||||
"container-title": [
|
||||
"Dance Research Journal"
|
||||
],
|
||||
"original-title": [
|
||||
|
||||
],
|
||||
"language": "en",
|
||||
"link": [
|
||||
{
|
||||
"URL": "https://www.cambridge.org/core/services/aop-cambridge-core/content/view/S0149767717000183",
|
||||
"content-type": "unspecified",
|
||||
"content-version": "vor",
|
||||
"intended-application": "similarity-checking"
|
||||
}
|
||||
],
|
||||
"deposited": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
10,
|
||||
16
|
||||
]
|
||||
],
|
||||
"date-time": "2020-10-16T01:00:32Z",
|
||||
"timestamp": 1602810032000
|
||||
},
|
||||
"score": 1,
|
||||
"resource": {
|
||||
"primary": {
|
||||
"URL": "https://www.cambridge.org/core/product/identifier/S0149767717000183/type/journal_article"
|
||||
}
|
||||
},
|
||||
"subtitle": [
|
||||
|
||||
],
|
||||
"short-title": [
|
||||
|
||||
],
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
2017,
|
||||
8
|
||||
]
|
||||
]
|
||||
},
|
||||
"references-count": 31,
|
||||
"journal-issue": {
|
||||
"issue": "2",
|
||||
"published-print": {
|
||||
"date-parts": [
|
||||
[
|
||||
2017,
|
||||
8
|
||||
]
|
||||
]
|
||||
}
|
||||
},
|
||||
"alternative-id": [
|
||||
"S0149767717000183"
|
||||
],
|
||||
"URL": "http://dx.doi.org/10.1017/s0149767717000183",
|
||||
"relation": {
|
||||
|
||||
},
|
||||
"ISSN": [
|
||||
"0149-7677",
|
||||
"1940-509X"
|
||||
],
|
||||
"issn-type": [
|
||||
{
|
||||
"value": "0149-7677",
|
||||
"type": "print"
|
||||
},
|
||||
{
|
||||
"value": "1940-509X",
|
||||
"type": "electronic"
|
||||
}
|
||||
],
|
||||
"subject": [
|
||||
"Visual Arts and Performing Arts"
|
||||
],
|
||||
"published": {
|
||||
"date-parts": [
|
||||
[
|
||||
2017,
|
||||
8
|
||||
]
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,330 @@
|
|||
{
|
||||
"indexed":{
|
||||
"date-parts":[
|
||||
[
|
||||
2022,
|
||||
4,
|
||||
14
|
||||
]
|
||||
],
|
||||
"date-time":"2022-04-14T11:27:30Z",
|
||||
"timestamp":1649935650109
|
||||
},
|
||||
"reference-count":22,
|
||||
"publisher":"SAGE Publications",
|
||||
"issue":"2",
|
||||
"license":[
|
||||
{
|
||||
"start":{
|
||||
"date-parts":[
|
||||
[
|
||||
1980,
|
||||
4,
|
||||
1
|
||||
]
|
||||
],
|
||||
"date-time":"1980-04-01T00:00:00Z",
|
||||
"timestamp":323395200000
|
||||
},
|
||||
"content-version":"tdm",
|
||||
"delay-in-days":0,
|
||||
"URL":"http:\/\/journals.sagepub.com\/page\/policies\/text-and-data-mining-license"
|
||||
}
|
||||
],
|
||||
"content-domain":{
|
||||
"domain":[
|
||||
|
||||
],
|
||||
"crossmark-restriction":false
|
||||
},
|
||||
"short-container-title":[
|
||||
"Perception"
|
||||
],
|
||||
"published-print":{
|
||||
"date-parts":[
|
||||
[
|
||||
1980,
|
||||
4
|
||||
]
|
||||
]
|
||||
},
|
||||
"abstract":"<jats:p> To answer the question \u2018What is suppressed during binocular rivalry?\u2019 a series of three experiments was performed. In the first experiment observers viewed binocular rivalry between orthogonally oriented patterns. When the dominant and suppressed patterns were interchanged between the eyes observers continued seeing with the dominant eye, indicating that an eye, not a pattern, is suppressed during rivalry. In a second experiment it was found that a suppressed eye was able to contribute to stereopsis. A third experiment demonstrated that the predominance of an eye could be influenced by prior adaptation of the other eye, indicating that binocular mechanisms participate in the rivalry process. <\/jats:p>",
|
||||
"DOI":"10.1068\/p090223",
|
||||
"type":"journal-article",
|
||||
"created":{
|
||||
"date-parts":[
|
||||
[
|
||||
2007,
|
||||
1,
|
||||
23
|
||||
]
|
||||
],
|
||||
"date-time":"2007-01-23T15:21:36Z",
|
||||
"timestamp":1169565696000
|
||||
},
|
||||
"page":"223-231",
|
||||
"source":"Crossref",
|
||||
"is-referenced-by-count":123,
|
||||
"title":[
|
||||
"What is Suppressed during Binocular Rivalry?"
|
||||
],
|
||||
"prefix":"10.1177",
|
||||
"volume":"9",
|
||||
"author":[
|
||||
{
|
||||
"given":"Randolph",
|
||||
"family":"Blake",
|
||||
"sequence":"first",
|
||||
"affiliation":[
|
||||
{
|
||||
"name":"Cresap Neuroscience Laboratory, Northwestern University, Evanston, Illinois 60201, USA"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"given":"David H",
|
||||
"family":"Westendorf",
|
||||
"sequence":"additional",
|
||||
"affiliation":[
|
||||
{
|
||||
"name":"Department of Psychology, University of Arkansas, Fayetteville, Arkansas 72701, USA"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"given":"Randall",
|
||||
"family":"Overton",
|
||||
"sequence":"additional",
|
||||
"affiliation":[
|
||||
{
|
||||
"name":"Department of Psychology, Illinois State University, Normal, Illinois 61761, USA"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"member":"179",
|
||||
"published-online":{
|
||||
"date-parts":[
|
||||
[
|
||||
2016,
|
||||
6,
|
||||
25
|
||||
]
|
||||
]
|
||||
},
|
||||
"reference":[
|
||||
{
|
||||
"key":"bibr1-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1136\/bjo.37.1.37"
|
||||
},
|
||||
{
|
||||
"key":"bibr2-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1037\/0096-1523.5.2.315"
|
||||
},
|
||||
{
|
||||
"key":"bibr3-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1016\/0042-6989(74)90065-0"
|
||||
},
|
||||
{
|
||||
"key":"bibr4-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1068\/p080143"
|
||||
},
|
||||
{
|
||||
"key":"bibr5-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1016\/0042-6989(70)90036-2"
|
||||
},
|
||||
{
|
||||
"key":"bibr6-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1113\/jphysiol.1969.sp008862"
|
||||
},
|
||||
{
|
||||
"key":"bibr7-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1113\/jphysiol.1972.sp010006"
|
||||
},
|
||||
{
|
||||
"key":"bibr8-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1146\/annurev.ps.23.020172.002213"
|
||||
},
|
||||
{
|
||||
"key":"bibr9-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1126\/science.166.3902.245"
|
||||
},
|
||||
{
|
||||
"key":"bibr10-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1037\/h0075805"
|
||||
},
|
||||
{
|
||||
"key":"bibr11-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1113\/jphysiol.1968.sp008552"
|
||||
},
|
||||
{
|
||||
"key":"bibr12-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1113\/jphysiol.1965.sp007784"
|
||||
},
|
||||
{
|
||||
"key":"bibr13-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1037\/h0032455"
|
||||
},
|
||||
{
|
||||
"key":"bibr14-p090223",
|
||||
"volume-title":"Treatise on Physiological Optics",
|
||||
"volume":"3",
|
||||
"author":"von Helmholtz H",
|
||||
"year":"1866",
|
||||
"edition":"3"
|
||||
},
|
||||
{
|
||||
"key":"bibr15-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1068\/p040125"
|
||||
},
|
||||
{
|
||||
"key":"bibr16-p090223",
|
||||
"volume-title":"On Binocular Rivalry",
|
||||
"author":"Levelt W J M",
|
||||
"year":"1965"
|
||||
},
|
||||
{
|
||||
"key":"bibr17-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1001\/archopht.1935.00840020011001"
|
||||
},
|
||||
{
|
||||
"key":"bibr18-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.3758\/BF03205796"
|
||||
},
|
||||
{
|
||||
"key":"bibr19-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.3758\/BF03210180"
|
||||
},
|
||||
{
|
||||
"key":"bibr20-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1037\/0033-2909.85.2.376"
|
||||
},
|
||||
{
|
||||
"key":"bibr21-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.1016\/0042-6989(79)90169-X"
|
||||
},
|
||||
{
|
||||
"key":"bibr22-p090223",
|
||||
"doi-asserted-by":"publisher",
|
||||
"DOI":"10.3758\/BF03210465"
|
||||
}
|
||||
],
|
||||
"container-title":[
|
||||
"Perception"
|
||||
],
|
||||
"original-title":[
|
||||
|
||||
],
|
||||
"language":"en",
|
||||
"link":[
|
||||
{
|
||||
"URL":"http:\/\/journals.sagepub.com\/doi\/pdf\/10.1068\/p090223",
|
||||
"content-type":"application\/pdf",
|
||||
"content-version":"vor",
|
||||
"intended-application":"text-mining"
|
||||
},
|
||||
{
|
||||
"URL":"http:\/\/journals.sagepub.com\/doi\/pdf\/10.1068\/p090223",
|
||||
"content-type":"unspecified",
|
||||
"content-version":"vor",
|
||||
"intended-application":"similarity-checking"
|
||||
}
|
||||
],
|
||||
"deposited":{
|
||||
"date-parts":[
|
||||
[
|
||||
2021,
|
||||
12,
|
||||
3
|
||||
]
|
||||
],
|
||||
"date-time":"2021-12-03T11:49:48Z",
|
||||
"timestamp":1638532188000
|
||||
},
|
||||
"score":1,
|
||||
"resource":{
|
||||
"primary":{
|
||||
"URL":"http:\/\/journals.sagepub.com\/doi\/10.1068\/p090223"
|
||||
}
|
||||
},
|
||||
"subtitle":[
|
||||
|
||||
],
|
||||
"short-title":[
|
||||
|
||||
],
|
||||
"issued":{
|
||||
"date-parts":[
|
||||
[
|
||||
1980,
|
||||
4
|
||||
]
|
||||
]
|
||||
},
|
||||
"references-count":22,
|
||||
"journal-issue":{
|
||||
"issue":"2",
|
||||
"published-print":{
|
||||
"date-parts":[
|
||||
[
|
||||
1980,
|
||||
4
|
||||
]
|
||||
]
|
||||
}
|
||||
},
|
||||
"alternative-id":[
|
||||
"10.1068\/p090223"
|
||||
],
|
||||
"URL":"http:\/\/dx.doi.org\/10.1068\/p090223",
|
||||
"relation":{
|
||||
|
||||
},
|
||||
"ISSN":[
|
||||
"0301-0066",
|
||||
"1468-4233"
|
||||
],
|
||||
"issn-type":[
|
||||
{
|
||||
"value":"0301-0066",
|
||||
"type":"print"
|
||||
},
|
||||
{
|
||||
"value":"1468-4233",
|
||||
"type":"electronic"
|
||||
}
|
||||
],
|
||||
"subject":[
|
||||
"Artificial Intelligence",
|
||||
"Sensory Systems",
|
||||
"Experimental and Cognitive Psychology",
|
||||
"Ophthalmology"
|
||||
],
|
||||
"published":{
|
||||
"date-parts":[
|
||||
[
|
||||
1980,
|
||||
4
|
||||
]
|
||||
]
|
||||
}
|
||||
}
|
|
@ -1443,7 +1443,7 @@
|
|||
17
|
||||
]
|
||||
],
|
||||
"date-time": "2021-05-17T15:08:12Z",
|
||||
"date-time": "3021-05-17T15:08:12Z",
|
||||
"timestamp": 1621264092000
|
||||
},
|
||||
"score": 1.0,
|
||||
|
@ -1461,7 +1461,7 @@
|
|||
22
|
||||
]
|
||||
],
|
||||
"date-time": "2021-05-17T15:08:12Z"
|
||||
"date-time": "3021-05-17T15:08:12Z"
|
||||
},
|
||||
"references-count": 83,
|
||||
"journal-issue": {
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
H4sIAAAAAAAAAN1Y23LbNhB971dg+NAnkiKpSJZUS2luTerESSd22pm+QSQkISEBFiAlqxn9exe8CaRExkmc4Uw9Y9rEnrO72MUCC14+votCtCVCUs7mhms7BiLM5wFl67nx4fY3a2IgmWAW4JAzMjf2RBqPF5c7Lj7N1APFONnMjYEDPxb8utaFN3Gt8dAZD5R8MHWHk+nF1DUQAlNMzihLiGA4nBubJIlng8Fut7O58GkAz/WAyUGJMH5CqGSRIPVxkjnZRqsgOi+gMqHM72ZqoBqXxIKAShJ0UCuMzuTJhgiL4Yi0M48YnRmRaAnZ2NC4nXnE1CIkBBcd0VFio8D6PIq6ApLLde0wSS464pDLdUYMLnLIohWQBNNQtnObSF3LJ7LfdRouAOXMSAQaOqKgxLWo3eVrzaIBYQldUdIVw1OwrmuVsrxu2vgFoBYlQVZEQMmRrgAdQToXB4EgsoNXAHQOZVsKi9WKuaTdRdFE6lpUZbczlbTMxwZKi4t9O7gA1HISxSHfRxDirkSWGJ35T4pDSMuXdooaTOdLIrbU7yjaAmDU1viXVnYtZ7DLQFxpV7qPmHoFSoKFrzaNVHQs8TquXpEwJsiWkl2XyxVI5y7TsCPjSnrOV1AkeSq6InoCNVCcJhYcQUA6Hh5bKumShjSBpRSny5D6xiIzqH4u8/1q5guidmIrgOfCczzXcoaWN711vdnQm7mPbGfs/X05OIc+0RVimVgRHIRQ5UeNnuWMLce9dUDdaOY59tgdHjWe4ZzozSd5HD+VWX5IYV3DJlNH6chU0IWKqISQHsOZE6uz2LNG04lnTaaTYeWiIrZqVWf5ooudAVrpGy6TReVNRcqG6/Md3GvCjbCoo3Jx4/M4lchCL0KpFqlo6spQZ9VgCdWrKt7igq6p+uN/fYzPNDrfENxz7IcO7n3m2xqbLIxXXG5SjJ7idL1pV1uPeCMfmiDrGROahC35yUXPOHR/UcwFFnskU9hutziEnjSIOfSFcoaeMFQ0iMoJkEG5rVJJ1KigTFIfxaCDMoLWIeURRoKs4ZBR6pI02FcONly5HJxzMPf6I8xFnfu58C1JBbfeQZsc8vW+4NUhDb5Pk8zbxsRrMivZx2SxpMuE3BU666IuLsQoJYtfMSTGD8nnLGOe416YmTtojj7/8LgezCIEylo9RAdzD3u8Glc+HcwtD9Mo88qdHkyWqnZWvcFLjNdEZhLvYmq53sQ5mDhNNlzkk4BLyN5EtzaCKwl6gxkx0ZP85SlMnoTSRB+Kd56uViQx0Yv8/SUPgwgzE90UZHBpr95e2MXIb1yQDPHWfp2P/IH9T0SY6L19VSgVnFHpq7HC7DWEB6Ztoiu7MHSzoRsTPbOtQu2zDUDwOo1iHGITXeejr6COcBhWc3nJkwSLgCvrL/Oh5xseYkGB86rg8NUqc/BNqRln4XhaRgCyrhzJ2RzeMvT7asJ+Ji7YVxBLqch/ltNPQxzQysO/sICe00Svy4ldc/aRKPHh0Fyg+fpr1tLpsi82AbWcy4Ip1mxZfrWVXu2d2Ymfm6ofqzpKLbKFWmFViWcjp1tTu7pSldbpy/PGNET7pq2B8hoOOK28OBHeS00eadexXWc6HDCScuYPGL9znYuzmhuZ6VLNuIigMf6XBCgRGCo+68ATkRLjKwwetdzPqiBhlgl1n11IEq7Oaq2hzp93rRn5vpQRGjxIyjxLerZjTUbO0L2YjkfjRz8yX/e09n9LFpWSPUyBjbzhaDIeI/jHm4zcH1tcYMxS1h4+RzFsrxZ/2DSdk8rTPRRunwvt1iezzt0G4YCyHRx1xTcjG3CPocjmp0v2ZxzFv6gZMCJ+fz6/fju5fffk/Y3Wb4cnnRZX3coyTbhobtxN+Zlo5hBBAprkbe2x4SiPNE3YCFm3/m8yXzY4vRjXGqp+7B8buF7saw1jP8nXG9RePKg1xL14oDfg/SxCveHvxYPaBaMXD7QLTS/2Ty5QvXihXdh62o70C2IvLugX0n5ycLwA97QSywt3TydyccHvJ/vaB4W+DsTyA0Yv9rUPJj0dx9UHml7s6x+E+jkKyw9Q32P9VFZcFAqBeiz+A4MY5OQYIQAA
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,57 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<bulk:bulk xmlns:internal="http://www.orcid.org/ns/internal" xmlns:education="http://www.orcid.org/ns/education" xmlns:distinction="http://www.orcid.org/ns/distinction" xmlns:deprecated="http://www.orcid.org/ns/deprecated" xmlns:other-name="http://www.orcid.org/ns/other-name" xmlns:membership="http://www.orcid.org/ns/membership" xmlns:error="http://www.orcid.org/ns/error" xmlns:common="http://www.orcid.org/ns/common" xmlns:record="http://www.orcid.org/ns/record" xmlns:personal-details="http://www.orcid.org/ns/personal-details" xmlns:keyword="http://www.orcid.org/ns/keyword" xmlns:email="http://www.orcid.org/ns/email" xmlns:external-identifier="http://www.orcid.org/ns/external-identifier" xmlns:funding="http://www.orcid.org/ns/funding" xmlns:preferences="http://www.orcid.org/ns/preferences" xmlns:address="http://www.orcid.org/ns/address" xmlns:invited-position="http://www.orcid.org/ns/invited-position" xmlns:work="http://www.orcid.org/ns/work" xmlns:history="http://www.orcid.org/ns/history" xmlns:employment="http://www.orcid.org/ns/employment" xmlns:qualification="http://www.orcid.org/ns/qualification" xmlns:service="http://www.orcid.org/ns/service" xmlns:person="http://www.orcid.org/ns/person" xmlns:activities="http://www.orcid.org/ns/activities" xmlns:researcher-url="http://www.orcid.org/ns/researcher-url" xmlns:peer-review="http://www.orcid.org/ns/peer-review" xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:research-resource="http://www.orcid.org/ns/research-resource">
|
||||
<work:work put-code="16639612" visibility="public">
|
||||
<common:created-date>2015-05-23T18:56:52.486Z</common:created-date>
|
||||
<common:last-modified-date>2017-02-28T08:22:12.454Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-7291-3210</common:uri>
|
||||
<common:path>0000-0001-7291-3210</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Paolo Manghi</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>The Query Language TQL</common:title>
|
||||
</work:title>
|
||||
<work:journal-title>5th International Workshop on Web and Data Bases (WebDB02) in conjunction with ACM SIGMOD 2002</work:journal-title>
|
||||
<work:citation>
|
||||
<work:citation-type>bibtex</work:citation-type>
|
||||
<work:citation-value>@inproceedings{Conforti2002, Author= {Giovanni Conforti and Giorgio Ghelli and Antonio Albano and Dario Colazzo and Paolo Manghi and Carlo Sartiani}, Bibsource= {DBLP, http://dblp.uni-trier.de}, Booktitle= {5th International Workshop on Web and Data Bases (WebDB02) in conjunction with ACM SIGMOD 2002}, Ee= {http://www.db.ucsd.edu/webdb2002/papers/43.pdf}, Pages= {13-18}, Title= {The Query Language TQL}, Year= {2002}}
|
||||
|
||||
</work:citation-value>
|
||||
</work:citation>
|
||||
<work:type>conference-paper</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2002</common:year>
|
||||
</common:publication-date>
|
||||
<common:external-ids/>
|
||||
</work:work>
|
||||
<work:work put-code="16639628" visibility="public">
|
||||
<common:created-date>2015-05-23T18:58:18.492Z</common:created-date>
|
||||
<common:last-modified-date>2017-02-28T08:22:12.455Z</common:last-modified-date>
|
||||
<common:source>
|
||||
<common:source-orcid>
|
||||
<common:uri>https://orcid.org/0000-0001-7291-3210</common:uri>
|
||||
<common:path>0000-0001-7291-3210</common:path>
|
||||
<common:host>orcid.org</common:host>
|
||||
</common:source-orcid>
|
||||
<common:source-name>Paolo Manghi</common:source-name>
|
||||
</common:source>
|
||||
<work:title>
|
||||
<common:title>The Query Language TQL - Demo Presentation</common:title>
|
||||
</work:title>
|
||||
<work:journal-title>X Convegno nazionale su Sistemi Evoluti per Basi di Dati (SEBD)</work:journal-title>
|
||||
<work:citation>
|
||||
<work:citation-type>bibtex</work:citation-type>
|
||||
<work:citation-value>@inproceedings{Conforti2002Demo, Address= {Portoferraio, Italy}, Author= {Giovanni Conforti and Giorgio Ghelli and Antonio Albano and Dario Colazzo and Paolo Manghi and Carlo Sartiani}, Bibsource= {DBLP, http://dblp.uni-trier.de}, Booktitle= {X Convegno nazionale su Sistemi Evoluti per Basi di Dati (SEBD)}, Month= {June}, Pages= {427-431}, Title= {The Query Language TQL - Demo Presentation}, Year= {2002}}
|
||||
|
||||
</work:citation-value>
|
||||
</work:citation>
|
||||
<work:type>conference-paper</work:type>
|
||||
<common:publication-date>
|
||||
<common:year>2002</common:year>
|
||||
</common:publication-date>
|
||||
<common:external-ids/>
|
||||
</work:work>
|
||||
</bulk:bulk>
|
|
@ -7,5 +7,6 @@ log4j.appender.A1=org.apache.log4j.ConsoleAppender
|
|||
# A1 uses PatternLayout.
|
||||
log4j.logger.org = ERROR
|
||||
log4j.logger.eu.dnetlib = DEBUG
|
||||
log4j.logger.eu.dnetlib.doiboost.orcid = INFO
|
||||
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
|
|
@ -73,6 +73,20 @@ class CrossrefMappingTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
def crossrefIssueDateTest(): Unit = {
|
||||
val json =
|
||||
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/issue_date.json")).mkString
|
||||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||
|
||||
println(mapper.writeValueAsString(items.head))
|
||||
}
|
||||
|
||||
@Test
|
||||
def testOrcidID(): Unit = {
|
||||
val json = Source
|
||||
|
@ -82,7 +96,7 @@ class CrossrefMappingTest {
|
|||
.mkString
|
||||
|
||||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
assertFalse(json.isEmpty)
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
||||
|
@ -541,6 +555,31 @@ class CrossrefMappingTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
def testConvertFromCrossRef2OafIssue(): Unit = {
|
||||
val json = Source
|
||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article_nojournal.json"))
|
||||
.mkString
|
||||
assertNotNull(json)
|
||||
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
||||
|
||||
assert(items.nonEmpty)
|
||||
assert(items.size == 1)
|
||||
val pub: Publication = items.head.asInstanceOf[Publication]
|
||||
|
||||
assertNotNull(pub.getJournal.getIssnPrinted)
|
||||
assertNotNull(pub.getJournal.getIssnOnline)
|
||||
assertNotNull(pub.getJournal.getName)
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
def testSetDateOfAcceptanceCrossRef2Oaf(): Unit = {
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir;
|
|||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
@ -96,6 +97,7 @@ public class SparkBulkTagJob {
|
|||
ResultTagger resultTagger = new ResultTagger();
|
||||
readPath(spark, inputPath, resultClazz)
|
||||
.map(patchResult(), Encoders.bean(resultClazz))
|
||||
.filter(Objects::nonNull)
|
||||
.map(
|
||||
(MapFunction<R, R>) value -> resultTagger
|
||||
.enrichContextCriteria(
|
||||
|
|
|
@ -5,6 +5,8 @@ import java.io.Serializable;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.avro.generic.GenericData;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
/** Created by miriam on 01/08/2018. */
|
||||
|
@ -14,6 +16,7 @@ public class Community implements Serializable {
|
|||
private List<String> subjects = new ArrayList<>();
|
||||
private List<Provider> providers = new ArrayList<>();
|
||||
private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>();
|
||||
private SelectionConstraints constraints = new SelectionConstraints();
|
||||
|
||||
public String toJson() {
|
||||
final Gson g = new Gson();
|
||||
|
@ -57,4 +60,12 @@ public class Community implements Serializable {
|
|||
public void setZenodoCommunities(List<ZenodoCommunity> zenodoCommunities) {
|
||||
this.zenodoCommunities = zenodoCommunities;
|
||||
}
|
||||
|
||||
public SelectionConstraints getConstraints() {
|
||||
return constraints;
|
||||
}
|
||||
|
||||
public void setConstraints(SelectionConstraints constraints) {
|
||||
this.constraints = constraints;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,8 @@ public class CommunityConfiguration implements Serializable {
|
|||
private Map<String, List<Pair<String, SelectionConstraints>>> datasourceMap = new HashMap<>();
|
||||
// map zenodocommunityid -> communityid
|
||||
private Map<String, List<Pair<String, SelectionConstraints>>> zenodocommunityMap = new HashMap<>();
|
||||
// map communityid -> selectionconstraints
|
||||
private Map<String, SelectionConstraints> selectionConstraintsMap = new HashMap<>();
|
||||
|
||||
public Map<String, List<Pair<String, SelectionConstraints>>> getSubjectMap() {
|
||||
return subjectMap;
|
||||
|
@ -51,6 +53,14 @@ public class CommunityConfiguration implements Serializable {
|
|||
this.zenodocommunityMap = zenodocommunityMap;
|
||||
}
|
||||
|
||||
public Map<String, SelectionConstraints> getSelectionConstraintsMap() {
|
||||
return selectionConstraintsMap;
|
||||
}
|
||||
|
||||
public void setSelectionConstraintsMap(Map<String, SelectionConstraints> selectionConstraintsMap) {
|
||||
this.selectionConstraintsMap = selectionConstraintsMap;
|
||||
}
|
||||
|
||||
CommunityConfiguration(final Map<String, Community> communities) {
|
||||
this.communities = communities;
|
||||
init();
|
||||
|
@ -67,6 +77,9 @@ public class CommunityConfiguration implements Serializable {
|
|||
if (zenodocommunityMap == null) {
|
||||
zenodocommunityMap = Maps.newHashMap();
|
||||
}
|
||||
if (selectionConstraintsMap == null) {
|
||||
selectionConstraintsMap = Maps.newHashMap();
|
||||
}
|
||||
|
||||
for (Community c : getCommunities().values()) {
|
||||
// get subjects
|
||||
|
@ -87,6 +100,7 @@ public class CommunityConfiguration implements Serializable {
|
|||
new Pair<>(id, zc.getSelCriteria()),
|
||||
zenodocommunityMap);
|
||||
}
|
||||
selectionConstraintsMap.put(id, c.getConstraints());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -85,9 +85,22 @@ public class CommunityConfigurationFactory {
|
|||
c.setSubjects(parseSubjects(node));
|
||||
c.setProviders(parseDatasources(node));
|
||||
c.setZenodoCommunities(parseZenodoCommunities(node));
|
||||
c.setConstraints(parseConstrains(node));
|
||||
return c;
|
||||
}
|
||||
|
||||
private static SelectionConstraints parseConstrains(Node node) {
|
||||
Node aconstraints = node.selectSingleNode("./advancedConstraints");
|
||||
if (aconstraints == null) {
|
||||
return null;
|
||||
}
|
||||
SelectionConstraints selectionConstraints = new Gson()
|
||||
.fromJson(aconstraints.getText(), SelectionConstraints.class);
|
||||
|
||||
selectionConstraints.setSelection(resolver);
|
||||
return selectionConstraints;
|
||||
}
|
||||
|
||||
private static List<String> parseSubjects(final Node node) {
|
||||
|
||||
final List<String> subjects = Lists.newArrayList();
|
||||
|
|
|
@ -11,6 +11,7 @@ public class Constraint implements Serializable {
|
|||
private String verb;
|
||||
private String field;
|
||||
private String value;
|
||||
// private String element;
|
||||
private Selection selection;
|
||||
|
||||
public String getVerb() {
|
||||
|
@ -50,4 +51,12 @@ public class Constraint implements Serializable {
|
|||
public boolean verifyCriteria(String metadata) {
|
||||
return selection.apply(metadata);
|
||||
}
|
||||
|
||||
// public String getElement() {
|
||||
// return element;
|
||||
// }
|
||||
//
|
||||
// public void setElement(String element) {
|
||||
// this.element = element;
|
||||
// }
|
||||
}
|
||||
|
|
|
@ -18,6 +18,8 @@ public class QueryInformationSystem {
|
|||
+ " let $datasources := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::contentproviders')]/concept "
|
||||
+ " let $organizations := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::resultorganizations')]/concept "
|
||||
+ " let $communities := $x//CONFIGURATION/context/category[./@id=concat($x//CONFIGURATION/context/@id,'::zenodocommunities')]/concept "
|
||||
+ " let $fos := $x//CONFIGURATION/context/param[./@name='fos']/text() "
|
||||
+ " let $sdg := $x//CONFIGURATION/context/param[./@name='sdg']/text() "
|
||||
+
|
||||
"let $zenodo := $x//param[./@name='zenodoCommunity']/text() "
|
||||
+ " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] and $x//context/param[./@name = 'status']/text() != 'hidden' "
|
||||
|
@ -28,6 +30,12 @@ public class QueryInformationSystem {
|
|||
+ " {for $y in tokenize($subj,',') "
|
||||
+ " return "
|
||||
+ " <subject>{$y}</subject>} "
|
||||
+ " {for $y in tokenize($fos,',') "
|
||||
+ " return "
|
||||
+ " <subject>{$y}</subject>} "
|
||||
+ " {for $y in tokenize($sdg,',') "
|
||||
+ " return "
|
||||
+ " <subject>{$y}</subject>} "
|
||||
+ " </subjects> "
|
||||
+ " <datasources> "
|
||||
+ " {for $d in $datasources "
|
||||
|
@ -61,6 +69,9 @@ public class QueryInformationSystem {
|
|||
+ " </selcriteria> "
|
||||
+ " </zenodocommunity>} "
|
||||
+ " </zenodocommunities> "
|
||||
+ "<advancedConstraint>"
|
||||
+ "{$x//CONFIGURATION/context/param[./@name='advancedConstraint']/text()} "
|
||||
+ "</advancedConstraint>"
|
||||
+ " </community>";
|
||||
|
||||
public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl)
|
||||
|
|
|
@ -15,7 +15,10 @@ import com.google.gson.Gson;
|
|||
import com.jayway.jsonpath.DocumentContext;
|
||||
import com.jayway.jsonpath.JsonPath;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
|
||||
/** Created by miriam on 02/08/2018. */
|
||||
public class ResultTagger implements Serializable {
|
||||
|
@ -95,13 +98,6 @@ public class ResultTagger implements Serializable {
|
|||
|
||||
}
|
||||
|
||||
// result
|
||||
// .getInstance()
|
||||
// .stream()
|
||||
// .map(i -> new Pair<>(i.getCollectedfrom().getKey(), i.getHostedby().getKey()))
|
||||
// .flatMap(p -> Stream.of(p.getFst(), p.getSnd()))
|
||||
// .map(s -> StringUtils.substringAfter(s, "|"))
|
||||
// .collect(Collectors.toCollection(HashSet::new))
|
||||
tmp
|
||||
.forEach(
|
||||
dsId -> datasources
|
||||
|
@ -135,6 +131,25 @@ public class ResultTagger implements Serializable {
|
|||
|
||||
communities.addAll(czenodo);
|
||||
|
||||
/* Tagging for Advanced Constraints */
|
||||
final Set<String> aconstraints = new HashSet<>();
|
||||
|
||||
conf
|
||||
.getSelectionConstraintsMap()
|
||||
.keySet()
|
||||
.forEach(communityId -> {
|
||||
if (conf.getSelectionConstraintsMap().get(communityId) != null &&
|
||||
conf
|
||||
.getSelectionConstraintsMap()
|
||||
.get(communityId)
|
||||
.getCriteria()
|
||||
.stream()
|
||||
.anyMatch(crit -> crit.verifyCriteria(param)))
|
||||
aconstraints.add(communityId);
|
||||
});
|
||||
|
||||
communities.addAll(aconstraints);
|
||||
|
||||
clearContext(result);
|
||||
|
||||
/* Verify if there is something to bulktag */
|
||||
|
@ -143,7 +158,8 @@ public class ResultTagger implements Serializable {
|
|||
}
|
||||
|
||||
result.getContext().forEach(c -> {
|
||||
if (communities.contains(c.getId())) {
|
||||
final String cId = c.getId();
|
||||
if (communities.contains(cId)) {
|
||||
Optional<List<DataInfo>> opt_dataInfoList = Optional.ofNullable(c.getDataInfo());
|
||||
List<DataInfo> dataInfoList;
|
||||
if (opt_dataInfoList.isPresent())
|
||||
|
@ -152,30 +168,51 @@ public class ResultTagger implements Serializable {
|
|||
dataInfoList = new ArrayList<>();
|
||||
c.setDataInfo(dataInfoList);
|
||||
}
|
||||
if (subjects.contains(c.getId()))
|
||||
if (subjects.contains(cId))
|
||||
dataInfoList
|
||||
.add(
|
||||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_SUBJECT,
|
||||
CLASS_NAME_BULKTAG_SUBJECT,
|
||||
TAGGING_TRUST));
|
||||
if (datasources.contains(c.getId()))
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS,
|
||||
DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
if (datasources.contains(cId))
|
||||
dataInfoList
|
||||
.add(
|
||||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_DATASOURCE,
|
||||
CLASS_NAME_BULKTAG_DATASOURCE,
|
||||
TAGGING_TRUST));
|
||||
if (czenodo.contains(c.getId()))
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE, DNET_PROVENANCE_ACTIONS,
|
||||
DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
if (czenodo.contains(cId))
|
||||
dataInfoList
|
||||
.add(
|
||||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_CZENODO,
|
||||
CLASS_NAME_BULKTAG_ZENODO,
|
||||
TAGGING_TRUST));
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS,
|
||||
DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
if (aconstraints.contains(cId))
|
||||
dataInfoList
|
||||
.add(
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
|
||||
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -196,27 +233,48 @@ public class ResultTagger implements Serializable {
|
|||
if (subjects.contains(c))
|
||||
dataInfoList
|
||||
.add(
|
||||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_SUBJECT,
|
||||
CLASS_NAME_BULKTAG_SUBJECT,
|
||||
TAGGING_TRUST));
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT, DNET_PROVENANCE_ACTIONS,
|
||||
DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
if (datasources.contains(c))
|
||||
dataInfoList
|
||||
.add(
|
||||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_DATASOURCE,
|
||||
CLASS_NAME_BULKTAG_DATASOURCE,
|
||||
TAGGING_TRUST));
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
|
||||
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
if (czenodo.contains(c))
|
||||
dataInfoList
|
||||
.add(
|
||||
getDataInfo(
|
||||
BULKTAG_DATA_INFO_TYPE,
|
||||
CLASS_ID_CZENODO,
|
||||
CLASS_NAME_BULKTAG_ZENODO,
|
||||
TAGGING_TRUST));
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO, DNET_PROVENANCE_ACTIONS,
|
||||
DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
if (aconstraints.contains(c))
|
||||
dataInfoList
|
||||
.add(
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_ADVANCED_CONSTRAINT, CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT,
|
||||
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
|
||||
context.setDataInfo(dataInfoList);
|
||||
return context;
|
||||
})
|
||||
|
@ -226,22 +284,4 @@ public class ResultTagger implements Serializable {
|
|||
return result;
|
||||
}
|
||||
|
||||
public static DataInfo getDataInfo(
|
||||
String inference_provenance, String inference_class_id, String inference_class_name, String trust) {
|
||||
DataInfo di = new DataInfo();
|
||||
di.setInferred(true);
|
||||
di.setInferenceprovenance(inference_provenance);
|
||||
di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name));
|
||||
di.setTrust(trust);
|
||||
return di;
|
||||
}
|
||||
|
||||
public static Qualifier getQualifier(String inference_class_id, String inference_class_name) {
|
||||
Qualifier pa = new Qualifier();
|
||||
pa.setClassid(inference_class_id);
|
||||
pa.setClassname(inference_class_name);
|
||||
pa.setSchemeid(DNET_PROVENANCE_ACTIONS);
|
||||
pa.setSchemename(DNET_PROVENANCE_ACTIONS);
|
||||
return pa;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,12 +11,14 @@ public class TaggingConstants {
|
|||
public static final String CLASS_ID_SUBJECT = "community:subject";
|
||||
public static final String CLASS_ID_DATASOURCE = "community:datasource";
|
||||
public static final String CLASS_ID_CZENODO = "community:zenodocommunity";
|
||||
public static final String CLASS_ID_ADVANCED_CONSTRAINT = "community:advconstraint";
|
||||
|
||||
public static final String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/";
|
||||
|
||||
public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
|
||||
public static final String CLASS_NAME_BULKTAG_DATASOURCE = "Bulktagging for Community - Datasource";
|
||||
public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
|
||||
public static final String CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT = "Bulktagging for Community - Advanced Constraints";
|
||||
|
||||
public static final String TAGGING_TRUST = "0.8";
|
||||
}
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag.eosc;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 21/07/22
|
||||
*/
|
||||
public class DatasourceMaster implements Serializable {
|
||||
private String datasource;
|
||||
private String master;
|
||||
|
||||
public String getDatasource() {
|
||||
return datasource;
|
||||
}
|
||||
|
||||
public void setDatasource(String datasource) {
|
||||
this.datasource = datasource;
|
||||
}
|
||||
|
||||
public String getMaster() {
|
||||
return master;
|
||||
}
|
||||
|
||||
public void setMaster(String master) {
|
||||
this.master = master;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,136 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag.eosc;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 21/07/22
|
||||
*/
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.DbClient;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.common.RelationInverse;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
|
||||
public class ReadMasterDatasourceFromDB implements Closeable {
|
||||
|
||||
private final DbClient dbClient;
|
||||
private static final Log log = LogFactory.getLog(ReadMasterDatasourceFromDB.class);
|
||||
|
||||
private final BufferedWriter writer;
|
||||
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static final String QUERY = "SELECT dso.id datasource, d.id master FROM " +
|
||||
"(SELECT id FROM dsm_services WHERE id like 'eosc%') dso " +
|
||||
"FULL JOIN " +
|
||||
"(SELECT id, duplicate FROM dsm_dedup_services WHERE duplicate like 'eosc%')d " +
|
||||
"ON dso.id = d.duplicate";
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
ReadMasterDatasourceFromDB.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json")));
|
||||
|
||||
parser.parseArgument(args);
|
||||
|
||||
final String dbUrl = parser.get("postgresUrl");
|
||||
final String dbUser = parser.get("postgresUser");
|
||||
final String dbPassword = parser.get("postgresPassword");
|
||||
final String hdfsPath = parser.get("hdfsPath");
|
||||
final String hdfsNameNode = parser.get("hdfsNameNode");
|
||||
|
||||
try (
|
||||
final ReadMasterDatasourceFromDB rmd = new ReadMasterDatasourceFromDB(hdfsPath, hdfsNameNode, dbUrl, dbUser,
|
||||
dbPassword)) {
|
||||
|
||||
log.info("Processing datasources...");
|
||||
rmd.execute(QUERY, rmd::datasourceMasterMap);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public void execute(final String sql, final Function<ResultSet, DatasourceMaster> producer) {
|
||||
|
||||
dbClient.processResults(sql, rs -> writeMap(producer.apply(rs)));
|
||||
}
|
||||
|
||||
public DatasourceMaster datasourceMasterMap(ResultSet rs) {
|
||||
try {
|
||||
DatasourceMaster dm = new DatasourceMaster();
|
||||
String datasource = rs.getString("datasource");
|
||||
dm.setDatasource(datasource);
|
||||
String master = rs.getString("master");
|
||||
if (StringUtils.isNotBlank(master))
|
||||
dm.setMaster(OafMapperUtils.createOpenaireId(10, master, true));
|
||||
else
|
||||
dm.setMaster(OafMapperUtils.createOpenaireId(10, datasource, true));
|
||||
return dm;
|
||||
|
||||
} catch (final SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
dbClient.close();
|
||||
writer.close();
|
||||
}
|
||||
|
||||
public ReadMasterDatasourceFromDB(
|
||||
final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword)
|
||||
throws IOException {
|
||||
|
||||
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
conf.set("fs.defaultFS", hdfsNameNode);
|
||||
|
||||
FileSystem fileSystem = FileSystem.get(conf);
|
||||
Path hdfsWritePath = new Path(hdfsPath);
|
||||
FSDataOutputStream fsDataOutputStream = null;
|
||||
if (fileSystem.exists(hdfsWritePath)) {
|
||||
fsDataOutputStream = fileSystem.append(hdfsWritePath);
|
||||
} else {
|
||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||
}
|
||||
|
||||
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
protected void writeMap(final DatasourceMaster dm) {
|
||||
try {
|
||||
writer.write(OBJECT_MAPPER.writeValueAsString(dm));
|
||||
writer.newLine();
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,169 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag.eosc;
|
||||
|
||||
import static eu.dnetlib.dhp.PropagationConstant.readPath;
|
||||
import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir;
|
||||
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*;
|
||||
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.TAGGING_TRUST;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javax.print.attribute.DocAttributeSet;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.ForeachFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.bulktag.SparkBulkTagJob;
|
||||
import eu.dnetlib.dhp.bulktag.community.*;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 21/07/22
|
||||
*/
|
||||
public class SparkEoscBulkTag implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkEoscBulkTag.class);
|
||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
SparkEoscBulkTag.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String workingPath = parser.get("workingPath");
|
||||
log.info("workingPath: {}", workingPath);
|
||||
|
||||
String datasourceMapPath = parser.get("datasourceMapPath");
|
||||
log.info("datasourceMapPath: {}", datasourceMapPath);
|
||||
|
||||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
CommunityConfiguration cc;
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, workingPath);
|
||||
execBulkTag(spark, inputPath, workingPath, datasourceMapPath, resultClazz);
|
||||
});
|
||||
}
|
||||
|
||||
private static <R extends Result> void execBulkTag(
|
||||
SparkSession spark,
|
||||
String inputPath,
|
||||
String workingPath,
|
||||
String datasourceMapPath,
|
||||
Class<R> resultClazz) {
|
||||
|
||||
List<String> hostedByList = readPath(spark, datasourceMapPath, DatasourceMaster.class)
|
||||
.map((MapFunction<DatasourceMaster, String>) dm -> dm.getMaster(), Encoders.STRING())
|
||||
.collectAsList();
|
||||
|
||||
readPath(spark, inputPath, resultClazz)
|
||||
.map(patchResult(), Encoders.bean(resultClazz))
|
||||
.filter(Objects::nonNull)
|
||||
.map(
|
||||
(MapFunction<R, R>) value -> enrich(value, hostedByList),
|
||||
Encoders.bean(resultClazz))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingPath);
|
||||
|
||||
readPath(spark, workingPath, resultClazz)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(inputPath);
|
||||
|
||||
}
|
||||
|
||||
private static <R extends Result> R enrich(R value, List<String> hostedByList) {
|
||||
if (value
|
||||
.getInstance()
|
||||
.stream()
|
||||
.anyMatch(
|
||||
i -> (hostedByList.contains(i.getHostedby().getKey())))
|
||||
&&
|
||||
!value.getContext().stream().anyMatch(c -> c.getId().equals("eosc"))) {
|
||||
Context context = new Context();
|
||||
context.setId("eosc");
|
||||
context
|
||||
.setDataInfo(
|
||||
Arrays
|
||||
.asList(
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
|
||||
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST)));
|
||||
value.getContext().add(context);
|
||||
|
||||
}
|
||||
return value;
|
||||
|
||||
}
|
||||
|
||||
public static <R> Dataset<R> readPath(
|
||||
SparkSession spark, String inputPath, Class<R> clazz) {
|
||||
return spark
|
||||
.read()
|
||||
.textFile(inputPath)
|
||||
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
||||
}
|
||||
|
||||
// TODO remove this hack as soon as the values fixed by this method will be provided as NON null
|
||||
private static <R extends Result> MapFunction<R, R> patchResult() {
|
||||
return r -> {
|
||||
if (r.getDataInfo().getDeletedbyinference() == null) {
|
||||
r.getDataInfo().setDeletedbyinference(false);
|
||||
}
|
||||
if (r.getContext() == null) {
|
||||
r.setContext(new ArrayList<>());
|
||||
}
|
||||
return r;
|
||||
};
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,237 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag.eosc;
|
||||
|
||||
import static eu.dnetlib.dhp.PropagationConstant.readPath;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
public class SparkEoscTag {
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
|
||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
public static final String EOSC_GALAXY_WORKFLOW = "EOSC::Galaxy Workflow";
|
||||
public static final String EOSC_TWITTER_DATA = "EOSC::Twitter Data";
|
||||
public static final String EOSC_JUPYTER_NOTEBOOK = "EOSC::Jupyter Notebook";
|
||||
public static final String COMPLIES_WITH = "compliesWith";
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
SparkEoscTag.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String workingPath = parser.get("workingPath");
|
||||
log.info("workingPath: {}", workingPath);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
execEoscTag(spark, inputPath, workingPath);
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
public static EoscIfGuidelines newInstance(String code, String label, String url, String semantics) {
|
||||
EoscIfGuidelines eig = new EoscIfGuidelines();
|
||||
eig.setCode(code);
|
||||
eig.setLabel(label);
|
||||
eig.setUrl(url);
|
||||
eig.setSemanticRelation(semantics);
|
||||
return eig;
|
||||
|
||||
}
|
||||
|
||||
private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) {
|
||||
|
||||
readPath(spark, inputPath + "/software", Software.class)
|
||||
.map((MapFunction<Software, Software>) s -> {
|
||||
|
||||
if (containsCriteriaNotebook(s)) {
|
||||
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
|
||||
s.setEoscifguidelines(new ArrayList<>());
|
||||
addEIG(
|
||||
s.getEoscifguidelines(), EOSC_JUPYTER_NOTEBOOK, EOSC_JUPYTER_NOTEBOOK, "",
|
||||
COMPLIES_WITH);
|
||||
|
||||
}
|
||||
if (containsCriteriaGalaxy(s)) {
|
||||
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
|
||||
s.setEoscifguidelines(new ArrayList<>());
|
||||
|
||||
addEIG(
|
||||
s.getEoscifguidelines(), EOSC_GALAXY_WORKFLOW, EOSC_GALAXY_WORKFLOW, "", COMPLIES_WITH);
|
||||
}
|
||||
return s;
|
||||
}, Encoders.bean(Software.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingPath + "/software");
|
||||
|
||||
readPath(spark, workingPath + "/software", Software.class)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(inputPath + "/software");
|
||||
|
||||
readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class)
|
||||
.map((MapFunction<OtherResearchProduct, OtherResearchProduct>) orp -> {
|
||||
|
||||
if (!Optional.ofNullable(orp.getEoscifguidelines()).isPresent())
|
||||
orp.setEoscifguidelines(new ArrayList<>());
|
||||
|
||||
if (containsCriteriaGalaxy(orp)) {
|
||||
addEIG(
|
||||
orp.getEoscifguidelines(), EOSC_GALAXY_WORKFLOW, EOSC_GALAXY_WORKFLOW, "",
|
||||
COMPLIES_WITH);
|
||||
}
|
||||
if (containscriteriaTwitter(orp)) {
|
||||
addEIG(orp.getEoscifguidelines(), EOSC_TWITTER_DATA, EOSC_TWITTER_DATA, "", COMPLIES_WITH);
|
||||
}
|
||||
return orp;
|
||||
}, Encoders.bean(OtherResearchProduct.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingPath + "/otherresearchproduct");
|
||||
|
||||
readPath(spark, workingPath + "/otherresearchproduct", OtherResearchProduct.class)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(inputPath + "/otherresearchproduct");
|
||||
|
||||
readPath(spark, inputPath + "/dataset", Dataset.class)
|
||||
.map((MapFunction<Dataset, Dataset>) d -> {
|
||||
|
||||
if (!Optional.ofNullable(d.getEoscifguidelines()).isPresent())
|
||||
d.setEoscifguidelines(new ArrayList<>());
|
||||
if (containscriteriaTwitter(d)) {
|
||||
addEIG(d.getEoscifguidelines(), EOSC_TWITTER_DATA, EOSC_TWITTER_DATA, "", COMPLIES_WITH);
|
||||
}
|
||||
return d;
|
||||
}, Encoders.bean(Dataset.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingPath + "/dataset");
|
||||
|
||||
readPath(spark, workingPath + "/dataset", Dataset.class)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(inputPath + "/dataset");
|
||||
}
|
||||
|
||||
private static void addEIG(List<EoscIfGuidelines> eoscifguidelines, String code, String label, String url,
|
||||
String sem) {
|
||||
if (!eoscifguidelines.stream().anyMatch(eig -> eig.getCode().equals(code)))
|
||||
eoscifguidelines.add(newInstance(code, label, url, sem));
|
||||
}
|
||||
|
||||
private static boolean containscriteriaTwitter(Result r) {
|
||||
Set<String> words = getWordsSP(r.getTitle());
|
||||
words.addAll(getWordsF(r.getDescription()));
|
||||
|
||||
if (words.contains("twitter") &&
|
||||
(words.contains("data") || words.contains("dataset")))
|
||||
return true;
|
||||
|
||||
return Optional
|
||||
.ofNullable(r.getSubject())
|
||||
.map(
|
||||
s -> s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("twitter")) &&
|
||||
s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("data")))
|
||||
.orElse(false);
|
||||
}
|
||||
|
||||
private static boolean containsCriteriaGalaxy(Result r) {
|
||||
Set<String> words = getWordsSP(r.getTitle());
|
||||
words.addAll(getWordsF(r.getDescription()));
|
||||
if (words.contains("galaxy") &&
|
||||
words.contains("workflow"))
|
||||
return true;
|
||||
|
||||
return Optional
|
||||
.ofNullable(r.getSubject())
|
||||
.map(
|
||||
s -> s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
|
||||
s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
|
||||
.orElse(false);
|
||||
}
|
||||
|
||||
private static boolean containsCriteriaNotebook(Software s) {
|
||||
if (!Optional.ofNullable(s.getSubject()).isPresent())
|
||||
return false;
|
||||
if (s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("jupyter")))
|
||||
return true;
|
||||
if (s
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(
|
||||
sbj -> sbj.getValue().toLowerCase().contains("python") &&
|
||||
sbj.getValue().toLowerCase().contains("notebook")))
|
||||
return true;
|
||||
if (s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("python")) &&
|
||||
s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("notebook")))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
private static Set<String> getWordsSP(List<StructuredProperty> elem) {
|
||||
Set<String> words = new HashSet<>();
|
||||
Optional
|
||||
.ofNullable(elem)
|
||||
.ifPresent(
|
||||
e -> e
|
||||
.forEach(
|
||||
t -> words
|
||||
.addAll(
|
||||
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
||||
return words;
|
||||
}
|
||||
|
||||
private static Set<String> getWordsF(List<Field<String>> elem) {
|
||||
Set<String> words = new HashSet<>();
|
||||
Optional
|
||||
.ofNullable(elem)
|
||||
.ifPresent(
|
||||
e -> e
|
||||
.forEach(
|
||||
t -> words
|
||||
.addAll(
|
||||
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
||||
|
||||
return words;
|
||||
}
|
||||
}
|
|
@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
|
@ -85,6 +86,8 @@ public class PrepareDatasourceCountryAssociation {
|
|||
Dataset<Datasource> datasource = readPath(spark, inputPath + "/datasource", Datasource.class)
|
||||
.filter(
|
||||
(FilterFunction<Datasource>) ds -> !ds.getDataInfo().getDeletedbyinference() &&
|
||||
Optional.ofNullable(ds.getDatasourcetype()).isPresent() &&
|
||||
Optional.ofNullable(ds.getDatasourcetype().getClassid()).isPresent() &&
|
||||
(allowedtypes.contains(ds.getDatasourcetype().getClassid()) ||
|
||||
whitelist.contains(ds.getId())));
|
||||
|
||||
|
|
|
@ -108,21 +108,28 @@ public class SparkCountryPropagationJob {
|
|||
private static <R extends Result> MapFunction<Tuple2<R, ResultCountrySet>, R> getCountryMergeFn() {
|
||||
return t -> {
|
||||
Optional.ofNullable(t._2()).ifPresent(r -> {
|
||||
t._1().getCountry().addAll(merge(t._1().getCountry(), r.getCountrySet()));
|
||||
if (Optional.ofNullable(t._1().getCountry()).isPresent())
|
||||
t._1().getCountry().addAll(merge(t._1().getCountry(), r.getCountrySet()));
|
||||
else
|
||||
t._1().setCountry(merge(null, t._2().getCountrySet()));
|
||||
});
|
||||
return t._1();
|
||||
};
|
||||
}
|
||||
|
||||
private static List<Country> merge(List<Country> c1, List<CountrySbs> c2) {
|
||||
HashSet<String> countries = c1
|
||||
.stream()
|
||||
.map(Qualifier::getClassid)
|
||||
.collect(Collectors.toCollection(HashSet::new));
|
||||
HashSet<String> countries = new HashSet<>();
|
||||
if (Optional.ofNullable(c1).isPresent()) {
|
||||
countries = c1
|
||||
.stream()
|
||||
.map(Qualifier::getClassid)
|
||||
.collect(Collectors.toCollection(HashSet::new));
|
||||
}
|
||||
|
||||
HashSet<String> finalCountries = countries;
|
||||
return c2
|
||||
.stream()
|
||||
.filter(c -> !countries.contains(c.getClassid()))
|
||||
.filter(c -> !finalCountries.contains(c.getClassid()))
|
||||
.map(c -> getCountry(c.getClassid(), c.getClassname()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
[
|
||||
{
|
||||
"paramName": "p",
|
||||
"paramLongName": "hdfsPath",
|
||||
"paramDescription": "the path where storing the sequential file",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "nn",
|
||||
"paramLongName": "hdfsNameNode",
|
||||
"paramDescription": "the name node on hdfs",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "pgurl",
|
||||
"paramLongName": "postgresUrl",
|
||||
"paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "pguser",
|
||||
"paramLongName": "postgresUser",
|
||||
"paramDescription": "postgres user",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "pgpasswd",
|
||||
"paramLongName": "postgresPassword",
|
||||
"paramDescription": "postgres password",
|
||||
"paramRequired": false
|
||||
}
|
||||
]
|
|
@ -0,0 +1,21 @@
|
|||
[
|
||||
{
|
||||
"paramName":"s",
|
||||
"paramLongName":"sourcePath",
|
||||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "wp",
|
||||
"paramLongName": "workingPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "ssm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||
"paramRequired": false
|
||||
}
|
||||
|
||||
]
|
|
@ -0,0 +1,34 @@
|
|||
[
|
||||
|
||||
{
|
||||
"paramName":"s",
|
||||
"paramLongName":"sourcePath",
|
||||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "dmp",
|
||||
"paramLongName":"datasourceMapPath",
|
||||
"paramDescription": "the path where the association datasource master has been stored",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"tn",
|
||||
"paramLongName":"resultTableName",
|
||||
"paramDescription": "the name of the result table we are currently working on",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "wp",
|
||||
"paramLongName": "workingPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "ssm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||
"paramRequired": false
|
||||
}
|
||||
|
||||
]
|
|
@ -16,6 +16,21 @@
|
|||
<name>outputPath</name>
|
||||
<description>the output path</description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>postgresURL</name>
|
||||
<description>the url of the postgress server to query</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>postgresUser</name>
|
||||
<description>the username to access the postgres db</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>postgresPassword</name>
|
||||
<description>the postgres password</description>
|
||||
</property>
|
||||
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
|
@ -151,8 +166,154 @@
|
|||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="wait" to="End"/>
|
||||
<join name="wait" to="eosc_tag"/>
|
||||
|
||||
<action name="eosc_tag">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>EOSC_tagging</name>
|
||||
<class>eu.dnetlib.dhp.bulktag.eosc.SparkEoscTag</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--num-executors=${sparkExecutorNumber}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${outputPath}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
|
||||
</spark>
|
||||
<ok to="eosc_get_datasource_master"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="eosc_get_datasource_master">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.bulktag.eosc.ReadMasterDatasourceFromDB</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${workingDir}/datasourcemaster</arg>
|
||||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
|
||||
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
|
||||
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
|
||||
</java>
|
||||
<ok to="fork_eosc_context_tag"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="fork_eosc_context_tag">
|
||||
<path start="eosc_context_tag_publication"/>
|
||||
<path start="eosc_context_tag_dataset"/>
|
||||
<path start="eosc_context_tag_otherresearchproduct"/>
|
||||
<path start="eosc_context_tag_software"/>
|
||||
</fork>
|
||||
|
||||
<action name="eosc_context_tag_publication">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>EOSC_tagging</name>
|
||||
<class>eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--num-executors=${sparkExecutorNumber}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${outputPath}/publication</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/publication</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
|
||||
</spark>
|
||||
<ok to="wait_eosc_context_tag"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="eosc_context_tag_dataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>EOSC_tagging</name>
|
||||
<class>eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--num-executors=${sparkExecutorNumber}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${outputPath}/dataset</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/dataset</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
|
||||
</spark>
|
||||
<ok to="wait_eosc_context_tag"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<action name="eosc_context_tag_software">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>EOSC_tagging</name>
|
||||
<class>eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--num-executors=${sparkExecutorNumber}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${outputPath}/software</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/software</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
|
||||
</spark>
|
||||
<ok to="wait_eosc_context_tag"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<action name="eosc_context_tag_otherresearchproduct">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>EOSC_tagging</name>
|
||||
<class>eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--num-executors=${sparkExecutorNumber}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/otherresearchproduct</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
|
||||
</spark>
|
||||
<ok to="wait_eosc_context_tag"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<join name="wait_eosc_context_tag" to="End"/>
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -6,6 +6,10 @@ import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY
|
|||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
@ -23,11 +27,12 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.gson.Gson;
|
||||
import com.jayway.jsonpath.DocumentContext;
|
||||
import com.jayway.jsonpath.JsonPath;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import eu.dnetlib.dhp.bulktag.community.ProtoMap;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
public class BulkTagJobTest {
|
||||
|
||||
|
@ -39,7 +44,8 @@ public class BulkTagJobTest {
|
|||
+ " \"title\" : \"$['title'][*]['value']\","
|
||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
||||
+ " \"description\" : \"$['description'][*]['value']\"}";
|
||||
+ " \"description\" : \"$['description'][*]['value']\", "
|
||||
+ " \"subject\" :\"$['subject'][*]['value']\" }";
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
|
@ -763,10 +769,28 @@ public class BulkTagJobTest {
|
|||
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
|
||||
|
||||
idExplodeCommunity.show(false);
|
||||
Assertions.assertEquals(3, idExplodeCommunity.count());
|
||||
Assertions.assertEquals(4, idExplodeCommunity.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
3, idExplodeCommunity.filter("provenance = 'community:datasource'").count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, idExplodeCommunity.filter("provenance = 'community:advconstraint'").count());
|
||||
}
|
||||
|
||||
// @Test
|
||||
// void test1(){
|
||||
// ProtoMap params = new Gson().fromJson(pathMap, ProtoMap.class);
|
||||
// HashMap<String, String> param = new HashMap<>();
|
||||
// for (String key : params.keySet()) {
|
||||
// try {
|
||||
// param.put(key, jsonContext.read(params.get(key)));
|
||||
// } catch (com.jayway.jsonpath.PathNotFoundException e) {
|
||||
// param.put(key, new ArrayList<>());
|
||||
// }
|
||||
// }
|
||||
// return param;
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
|
|
@ -0,0 +1,162 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 22/07/22
|
||||
*/
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
//"50|475c1990cbb2::0fecfb874d9395aa69d2f4d7cd1acbea" has instance hostedby eosc
|
||||
//"50|475c1990cbb2::3185cd5d8a2b0a06bb9b23ef11748eb1" has instance hostedby eosc
|
||||
//"50|475c1990cbb2::449f28eefccf9f70c04ad70d61e041c7" has two instance one hostedby eosc
|
||||
//"50|475c1990cbb2::3894c94123e96df8a21249957cf160cb" has EoscTag
|
||||
|
||||
public class EOSCContextTaggingTest {
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(EOSCContextTaggingTest.class);
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files.createTempDirectory(EOSCContextTaggingTest.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(EOSCContextTaggingTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("hive.metastore.local", "true");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(EOSCTagJobTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void afterAll() throws IOException {
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
spark.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
void EoscContextTagTest() throws Exception {
|
||||
|
||||
spark
|
||||
.read()
|
||||
.textFile(getClass().getResource("/eu/dnetlib/dhp/bulktag/eosc/dataset/dataset_10.json").getPath())
|
||||
.map(
|
||||
(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
|
||||
Encoders.bean(Dataset.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir.toString() + "/input/dataset");
|
||||
|
||||
SparkEoscBulkTag
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
workingDir.toString() + "/input/dataset",
|
||||
"-workingPath", workingDir.toString() + "/working/dataset",
|
||||
"-datasourceMapPath",
|
||||
getClass()
|
||||
.getResource("/eu/dnetlib/dhp/bulktag/eosc/datasourceMasterAssociation/datasourceMaster")
|
||||
.getPath(),
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Dataset> tmp = sc
|
||||
.textFile(workingDir.toString() + "/input/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||
|
||||
Assertions.assertEquals(10, tmp.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
4,
|
||||
tmp
|
||||
.filter(
|
||||
s -> s.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
d -> d.getId().equals("50|475c1990cbb2::0fecfb874d9395aa69d2f4d7cd1acbea")
|
||||
&&
|
||||
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
d -> d.getId().equals("50|475c1990cbb2::3185cd5d8a2b0a06bb9b23ef11748eb1")
|
||||
&&
|
||||
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
d -> d.getId().equals("50|475c1990cbb2::3894c94123e96df8a21249957cf160cb")
|
||||
&&
|
||||
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
d -> d.getId().equals("50|475c1990cbb2::3894c94123e96df8a21249957cf160cb")
|
||||
&&
|
||||
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
||||
.count());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,713 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.eosc.SparkEoscTag;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
public class EOSCTagJobTest {
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(EOSCTagJobTest.class);
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files.createTempDirectory(EOSCTagJobTest.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(EOSCTagJobTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("hive.metastore.local", "true");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(EOSCTagJobTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void afterAll() throws IOException {
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
spark.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
void jupyterUpdatesTest() throws Exception {
|
||||
|
||||
spark
|
||||
.read()
|
||||
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/jupyter/software").getPath())
|
||||
.map(
|
||||
(MapFunction<String, Software>) value -> OBJECT_MAPPER.readValue(value, Software.class),
|
||||
Encoders.bean(Software.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir.toString() + "/input/software");
|
||||
|
||||
spark
|
||||
.read()
|
||||
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/jupyter/dataset").getPath())
|
||||
.map(
|
||||
(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
|
||||
Encoders.bean(Dataset.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir.toString() + "/input/dataset");
|
||||
|
||||
spark
|
||||
.read()
|
||||
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct").getPath())
|
||||
.map(
|
||||
(MapFunction<String, OtherResearchProduct>) value -> OBJECT_MAPPER
|
||||
.readValue(value, OtherResearchProduct.class),
|
||||
Encoders.bean(OtherResearchProduct.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir.toString() + "/input/otherresearchproduct");
|
||||
|
||||
SparkEoscTag
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
workingDir.toString() + "/input",
|
||||
"-workingPath", workingDir.toString() + "/working"
|
||||
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Software> tmp = sc
|
||||
.textFile(workingDir.toString() + "/input/software")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
|
||||
|
||||
Assertions.assertEquals(10, tmp.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
4,
|
||||
tmp
|
||||
.filter(s -> s.getEoscifguidelines() != null)
|
||||
.filter(
|
||||
s -> s
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
||||
|
||||
Assertions
|
||||
.assertFalse(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
5, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
Assertions
|
||||
.assertFalse(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines() == null);
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
8, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
Assertions
|
||||
.assertFalse(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
5, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
Assertions
|
||||
.assertFalse(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines() == null);
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
8, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
Assertions
|
||||
.assertFalse(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
||||
|
||||
List<Subject> subjects = tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::6e7a9b21a2feef45673890432af34244"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject();
|
||||
Assertions.assertEquals(7, subjects.size());
|
||||
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("jupyter")));
|
||||
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("Modeling and Simulation")));
|
||||
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("structure granulaire")));
|
||||
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("algorithme")));
|
||||
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("simulation numérique")));
|
||||
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("flux de gaz")));
|
||||
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("flux de liquide")));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
10, sc
|
||||
.textFile(workingDir.toString() + "/input/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0, sc
|
||||
.textFile(workingDir.toString() + "/input/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
|
||||
.filter(
|
||||
ds -> ds.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0, sc
|
||||
.textFile(workingDir.toString() + "/input/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
|
||||
.filter(
|
||||
ds -> ds
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
10, sc
|
||||
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0, sc
|
||||
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
|
||||
.filter(
|
||||
orp -> orp
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0, sc
|
||||
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
|
||||
.filter(
|
||||
orp -> orp
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getValue().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
|
||||
// spark.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
void galaxyUpdatesTest() throws Exception {
|
||||
spark
|
||||
.read()
|
||||
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/galaxy/software").getPath())
|
||||
.map(
|
||||
(MapFunction<String, Software>) value -> OBJECT_MAPPER.readValue(value, Software.class),
|
||||
Encoders.bean(Software.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir.toString() + "/input/software");
|
||||
|
||||
spark
|
||||
.read()
|
||||
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/galaxy/dataset").getPath())
|
||||
.map(
|
||||
(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
|
||||
Encoders.bean(Dataset.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir.toString() + "/input/dataset");
|
||||
|
||||
spark
|
||||
.read()
|
||||
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct").getPath())
|
||||
.map(
|
||||
(MapFunction<String, OtherResearchProduct>) value -> OBJECT_MAPPER
|
||||
.readValue(value, OtherResearchProduct.class),
|
||||
Encoders.bean(OtherResearchProduct.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir.toString() + "/input/otherresearchproduct");
|
||||
|
||||
SparkEoscTag
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
workingDir.toString() + "/input",
|
||||
"-workingPath", workingDir.toString() + "/working"
|
||||
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Software> tmp = sc
|
||||
.textFile(workingDir.toString() + "/input/software")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
|
||||
|
||||
Assertions.assertEquals(11, tmp.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0,
|
||||
tmp
|
||||
.filter(
|
||||
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
s -> s.getEoscifguidelines() != null)
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
s -> s.getEoscifguidelines() != null)
|
||||
.filter(
|
||||
s -> s
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
Assertions
|
||||
.assertFalse(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
5, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
8, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
Assertions
|
||||
.assertFalse(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
|
||||
|
||||
JavaRDD<OtherResearchProduct> orp = sc
|
||||
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
|
||||
|
||||
Assertions.assertEquals(10, orp.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0,
|
||||
orp
|
||||
.filter(
|
||||
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
|
||||
.count());
|
||||
orp.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, orp
|
||||
.filter(o -> o.getEoscifguidelines() != null)
|
||||
.filter(
|
||||
o -> o
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2, orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
Assertions
|
||||
.assertFalse(
|
||||
orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getCode().equals("EOSC::Galaxy Workflow")));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2, orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::1bd97baef19dbd2db3203b112bb83bc5"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
Assertions
|
||||
.assertFalse(
|
||||
orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::1bd97baef19dbd2db3203b112bb83bc5"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2, orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
Assertions
|
||||
.assertFalse(
|
||||
orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::1e400f1747487fd15998735c41a55c72"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void twitterUpdatesTest() throws Exception {
|
||||
spark
|
||||
.read()
|
||||
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/twitter/software").getPath())
|
||||
.map(
|
||||
(MapFunction<String, Software>) value -> OBJECT_MAPPER.readValue(value, Software.class),
|
||||
Encoders.bean(Software.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir.toString() + "/input/software");
|
||||
|
||||
spark
|
||||
.read()
|
||||
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/twitter/dataset").getPath())
|
||||
.map(
|
||||
(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
|
||||
Encoders.bean(Dataset.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir.toString() + "/input/dataset");
|
||||
|
||||
spark
|
||||
.read()
|
||||
.textFile(getClass().getResource("/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct").getPath())
|
||||
.map(
|
||||
(MapFunction<String, OtherResearchProduct>) value -> OBJECT_MAPPER
|
||||
.readValue(value, OtherResearchProduct.class),
|
||||
Encoders.bean(OtherResearchProduct.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir.toString() + "/input/otherresearchproduct");
|
||||
|
||||
SparkEoscTag
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
workingDir.toString() + "/input",
|
||||
"-workingPath", workingDir.toString() + "/working"
|
||||
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Software> tmp = sc
|
||||
.textFile(workingDir.toString() + "/input/software")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
|
||||
|
||||
Assertions.assertEquals(10, tmp.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0,
|
||||
tmp
|
||||
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
|
||||
.count());
|
||||
|
||||
JavaRDD<OtherResearchProduct> orp = sc
|
||||
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
|
||||
|
||||
Assertions.assertEquals(10, orp.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0,
|
||||
orp
|
||||
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
3,
|
||||
orp
|
||||
.filter(
|
||||
s -> s
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
|
||||
.count());
|
||||
|
||||
JavaRDD<Dataset> dats = sc
|
||||
.textFile(workingDir.toString() + "/input/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||
|
||||
Assertions.assertEquals(11, dats.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
3,
|
||||
dats
|
||||
.filter(
|
||||
s -> s
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
|
||||
.count());
|
||||
|
||||
}
|
||||
}
|
|
@ -1193,6 +1193,9 @@
|
|||
<organizations/>
|
||||
</community>
|
||||
<community id="science-innovation-policy">
|
||||
<advancedConstraints>{"criteria":[{"constraint":[{"verb":"equals_ignorecase","field":"subject","value":"ciencias de la comunicación"},
|
||||
{"verb":"equals","field":"subject","value":"Miriam"}]},
|
||||
{"constraint":[{"verb":"equals","field":"subject","value":"miriam"}]}]}</advancedConstraints>
|
||||
<subjects>
|
||||
<subject>Sustainability-oriented science policy</subject>
|
||||
<subject> STI policies</subject>
|
||||
|
@ -1316,7 +1319,7 @@
|
|||
<openaireId>opendoar____::358aee4cc897452c00244351e4d91f69</openaireId>
|
||||
<selcriteria>{"criteria":[{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"COVID-19"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"SARS-CoV-2"}]},
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}]}]}
|
||||
{"constraint":[{"verb":"contains_ignorecase","field":"title","value":"2019-nCoV"}}]}
|
||||
</selcriteria>
|
||||
</datasource>
|
||||
<datasource>
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,318 @@
|
|||
{"datasource":"eosc________::100percentit::100percentit.100_percent_it_trusted_cloud","master":"10|eosc________::7ef2576047f040612b983a27347471fc"}
|
||||
{"datasource":"eosc________::altec::altec.space-vis_adn_service","master":"10|eosc________::2946c48bbcc514ad76bbbf727d5d8fbc"}
|
||||
{"datasource":"eosc________::astron::astron.","master":"10|eosc________::acb262d4bfdeb6aa9b463a4a6d0d662a"}
|
||||
{"datasource":"eosc________::athena::athena.atmo-flud","master":"10|eosc________::ac448975e1d7f8b0266c8bb3b3992029"}
|
||||
{"datasource":"eosc________::athena::athena.uw-map","master":"10|eosc________::5f2a401cf8ce9dc22a3776cea519b594"}
|
||||
{"datasource":"eosc________::athena::athena.verbal_aggression_analyser_va_analyser","master":"10|eosc________::8b26233e89a50e3754972b1341130494"}
|
||||
{"datasource":"eosc________::authenix::authenix.authenix","master":"10|eosc________::3cd84764da5728473593a580efb29a40"}
|
||||
{"datasource":"eosc________::bineo::bineo.cos4bio","master":"10|eosc________::903e0526a6e56eeaf0e4561aa862ecb8"}
|
||||
{"datasource":"eosc________::blue-cloud::blue-cloud.phytoplankton_eovs","master":"10|eosc________::c2438d79b48baf817956f3856877b3b8"}
|
||||
{"datasource":"eosc________::bsc-es::bsc-es.bdrc_-_barcelona_dust_regional_center","master":"10|eosc________::756664ca614118315840eb8e985e4377"}
|
||||
{"datasource":"eosc________::bsc-es::bsc-es.openebench","master":"10|eosc________::69ed72b873b803feed5ba6ae47548419"}
|
||||
{"datasource":"eosc________::capsh::capsh.dissemin","master":"10|eosc________::e81587742e4107ce83723df17c27cb40"}
|
||||
{"datasource":"eosc________::carlzeissm::carlzeissm.aper","master":"10|eosc________::f3beb9ee5ee293b723e2edd6f990fde3"}
|
||||
{"datasource":"eosc________::ccsd::ccsd.episciences","master":"10|eosc________::e1e9de0dbf4bce79c49338d7cf9327e2"}
|
||||
{"datasource":"eosc________::cds::cds.simbad_simbad_astronomical_database_provides_basic_data_cross-identifications_bibliography_and_measurements_for_astronomical_objects_outside_the_solar_system","master":"10|eosc________::a1e41e71453ac32161f4ac3f5c0e0421"}
|
||||
{"datasource":"eosc________::centerdata::centerdata.surveycodingsorg","master":"10|eosc________::72db73ab253727c889905da50f506d10"}
|
||||
{"datasource":"eosc________::cesga::cesga.finisterrae","master":"10|eosc________::6af4303d93f72744cc4c3c815ed2c9a0"}
|
||||
{"datasource":"eosc________::cesnet::cesnet.metacentrum_cloud","master":"10|eosc________::cebfaa2d0b93502d56a8fbeb6b66cfbe"}
|
||||
{"datasource":"eosc________::cesnet::cesnet.object_based_storage","master":"10|eosc________::1c5b55339bb86ff997a256d42d7be4b0"}
|
||||
{"datasource":"eosc________::cesnet::cesnet.umsa_-_untargeted_mass_spectrometry_data_analysis","master":"10|eosc________::d928868211759352cb1604713e0347ec"}
|
||||
{"datasource":"eosc________::cessda-eric::cessda-eric.cessda_data_catalogue","master":"10|fairsharing_::936824c0191953647ec609b4f49bc964"}
|
||||
{"datasource":"eosc________::cessda-eric::cessda-eric.data_management_expert_guide_dmeg","master":"10|eosc________::22c14aaf31fc64424fa97adffe6380b9"}
|
||||
{"datasource":"eosc________::cessda-eric::cessda-eric.elsst__european_language_social_science_thesaurus","master":"10|eosc________::5b30e057381cf0200dc2cdc7b562f570"}
|
||||
{"datasource":"eosc________::cines::cines.etdr","master":"10|eosc________::3b7f7d6aafb0154025330183d59ce670"}
|
||||
{"datasource":"eosc________::clarin-eric::clarin-eric.language_resource_switchboard","master":"10|eosc________::3531aa80dbe2b1018133b510a933de40"}
|
||||
{"datasource":"eosc________::clarin-eric::clarin-eric.virtual_collection_registry","master":"10|eosc________::454e4f7f9f53d9dacf9dc3ba27902f16"}
|
||||
{"datasource":"eosc________::clarin-eric::clarin-eric.virtual_language_observatory","master":"10|eosc________::4db0c877190783461728c6714cb66cbc"}
|
||||
{"datasource":"eosc________::cloudferro::cloudferro.data_collections_catalog","master":"10|eosc________::eba1540eb9e87231fdf366eb23d16c3a"}
|
||||
{"datasource":"eosc________::cloudferro::cloudferro.data_related_services_-_eo_browser","master":"10|eosc________::c24ebda20485c08293b72561ee3c634b"}
|
||||
{"datasource":"eosc________::cloudferro::cloudferro.data_related_services_-_eo_finder","master":"10|eosc________::3d68186239b6c0f0d677ff55d9b549d1"}
|
||||
{"datasource":"eosc________::cloudferro::cloudferro.infrastructure","master":"10|eosc________::ac7e3c0151fa3f11d3a7739dddaa3416"}
|
||||
{"datasource":"eosc________::cmcc::cmcc.enes_data_space","master":"10|eosc________::2925e4df4147819e5b5d2f886f40e3a2"}
|
||||
{"datasource":"eosc________::cnb-csic::cnb-csic.3dbionotes-ws_web_application_to_annotate_biochemical_and_biomedical_information_onto_structural_models","master":"10|eosc________::77fe0a66415f2440ab60d47dcee678a5"}
|
||||
{"datasource":"eosc________::cnb-csic::cnb-csic.scipioncloud","master":"10|eosc________::7f09b7fee99363813f24aca9ebdecf61"}
|
||||
{"datasource":"eosc________::cnr-iia::cnr-iia.geo_dab","master":"10|eosc________::108b0148352c15ee1ce935699e09add3"}
|
||||
{"datasource":"eosc________::collabwith::collabwith.collabwith_marketplace","master":"10|eosc________::894a0ffa7768b228c1b46793670c85e6"}
|
||||
{"datasource":"eosc________::coronis_computing_sl::coronis_computing_sl.uw-mos","master":"10|eosc________::9cbf0a75d817e291771b8bce6440f5f4"}
|
||||
{"datasource":"eosc________::coronis_computing_sl::coronis_computing_sl.vd-maps","master":"10|eosc________::b5af1514b39d8e021554a73076a694d9"}
|
||||
{"datasource":"eosc________::creaf::creaf.nimmbus_geospatial_user_feedback","master":"10|eosc________::86c325db16448760b3390dda7e46631a"}
|
||||
{"datasource":"eosc________::creatis::creatis.virtual_imaging_platform","master":"10|eosc________::01a45ac2677f89414af91e651735846d"}
|
||||
{"datasource":"eosc________::cs_group::cs_group.ai4geo_engine","master":"10|eosc________::c61211295d27e5e08f4c64f3e3098294"}
|
||||
{"datasource":"eosc________::csc-fi::csc-fi.chipster","master":"10|eosc________::61549f785a2c93939be011b0453a6981"}
|
||||
{"datasource":"eosc________::csc-fi::csc-fi.cpouta","master":"10|eosc________::d71c843b4e00eff17db07bf9d10769f9"}
|
||||
{"datasource":"eosc________::csc-fi::csc-fi.csc_epouta","master":"10|eosc________::4493bd6a93e5b8465fda8cf7ab2dfdea"}
|
||||
{"datasource":"eosc________::csc-fi::csc-fi.rahti_container_cloud","master":"10|eosc________::cc60eb9fc76f9598ee581eff0792573b"}
|
||||
{"datasource":"eosc________::cscs::cscs.object_storage","master":"10|eosc________::3da6a817fe85ef43f7d97ef07e467d45"}
|
||||
{"datasource":"eosc________::csi_piemonte::csi_piemonte.nivola2","master":"10|eosc________::ac6483be3e556c8652b8595680795983"}
|
||||
{"datasource":"eosc________::csic::csic.csic_cloud_infrastructure","master":"10|eosc________::05ea2eb193382e22f32b32fbe9a4d961"}
|
||||
{"datasource":"eosc________::cyberbotics::cyberbotics.robotbenchmark","master":"10|eosc________::27ee094c68b7a758ca2915aca6215a1d"}
|
||||
{"datasource":"eosc________::d4science::d4science.alien_and_invasive_species_vre","master":"10|eosc________::b5cff6d55dcf6c20e78a0f1f847b3005"}
|
||||
{"datasource":"eosc________::d4science::d4science.rprototypinglab_virtual_research_environment","master":"10|eosc________::8073ab0dbb22dc3b9f17627a7b25903f"}
|
||||
{"datasource":"eosc________::d4science::d4science.visual_media_service_vre","master":"10|eosc________::eabf459f53c2bfe6247f006fcc0f4db7"}
|
||||
{"datasource":"eosc________::dariah_eric::dariah_eric.dariah-campus","master":"10|eosc________::9c63075d6642a2d269776c2b90c2f976"}
|
||||
{"datasource":"eosc________::dariah_eric::dariah_eric.ssh_open_marketplace","master":"10|eosc________::91fe494a3c21805febb03353152f1212"}
|
||||
{"datasource":"eosc________::datacite::datacite.datacite_doi_registration_service","master":"10|eosc________::c146a470f01ee7ded3b55acda9362e7f"}
|
||||
{"datasource":"eosc________::dcc-uk::dcc-uk.dmponline","master":"10|eosc________::fe480090e0739dab86b24a11177eeffd"}
|
||||
{"datasource":"eosc________::denbi::denbi.cloud","master":"10|eosc________::59399e560967488c0ae0329e0d37f5b4"}
|
||||
{"datasource":"eosc________::desy::desy.pan_data","master":"10|eosc________::52008fe404bf2e939140109162f9233f"}
|
||||
{"datasource":"eosc________::desy::desy.pan_faas","master":"10|eosc________::026939c4b12d7d71e2b05bc5acde804e"}
|
||||
{"datasource":"eosc________::desy::desy.pan_gitlab","master":"10|eosc________::f13cefc9f3207cb82f3285b05f190f78"}
|
||||
{"datasource":"eosc________::desy::desy.pan_notebook","master":"10|eosc________::500fe61cce6562797cd43797aab12be5"}
|
||||
{"datasource":"eosc________::digitalglobe::digitalglobe.earthwatch","master":"10|eosc________::020d905260267066c1926f526bb86f30"}
|
||||
{"datasource":"eosc________::dkrz::dkrz.enes_climate_analytics_service","master":"10|eosc________::1d7a1fea6694d15d9e67f08e1e77082b"}
|
||||
{"datasource":"eosc________::doabf::doabf.operas_certification","master":"10|eosc________::79b9748edeffb872a28660a9d238dcec"}
|
||||
{"datasource":"eosc________::ds-wizard::ds-wizard.data_stewardship_wizard","master":"10|eosc________::fc6bad963e15e218efc62c7befd122af"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.check-in","master":"10|eosc________::baa3c497b9499b3d8c87ea8d2b37a44f"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.cloud_compute","master":"10|eosc________::b1179384a336d409fc909fe3711d3d1f"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.cloud_container_compute","master":"10|eosc________::a66bb1ac56a3bcf2c24b0ef85ed2bdfc"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.data_transfer","master":"10|eosc________::6c0bf38e885c42161b88093517f6cd3e"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.egi_datahub","master":"10|eosc________::5a260dae80795584ac08df133adb1fad"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.fitsm_training","master":"10|eosc________::927b4455c0a21692d2a9f634bccd8309"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.high-throughput_compute","master":"10|eosc________::e27ec11ac7b7d6ffbbce668b7d1f81d5"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.iso_27001_training","master":"10|eosc________::98a6655b6421166c5c29baa2f5815de3"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.notebook","master":"10|eosc________::1d37909a6a31147a09ee9f2e579a6706"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.online_storage","master":"10|eosc________::d8b94284582d3e2185a782ae2ba42186"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.training_infrastructure","master":"10|eosc________::38cdb8e44638f2e561c466f0dd26cf96"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.workload_manager","master":"10|eosc________::ff515071cd88afb40599edcb6637f47e"}
|
||||
{"datasource":"eosc________::ehri::ehri.begrenzte_flucht","master":"10|eosc________::01d1445605fc1d25e6a7f21ba995d724"}
|
||||
{"datasource":"eosc________::ehri::ehri.diplomatic_reports","master":"10|eosc________::11714353d2ed069ca30b177d4b4d9e0f"}
|
||||
{"datasource":"eosc________::ehri::ehri.early_holocaust_testimony","master":"10|eosc________::0a4974b0bb295b98f88cb7c793f91c17"}
|
||||
{"datasource":"eosc________::ehri::ehri.ehri_document_blog","master":"10|eosc________::fb9291f8dac099986eafe957b169ed97"}
|
||||
{"datasource":"eosc________::ehri::ehri.international_research_portal_for_records_related_to_nazi-era_cultural_property","master":"10|eosc________::01c5b10e57f9cbb4f3125f427375487e"}
|
||||
{"datasource":"eosc________::ehri::ehri.the_ehri_portal","master":"10|eosc________::6ad4d5352fd192b5fecd76bbd7a7e8b7"}
|
||||
{"datasource":"eosc________::eiscat::eiscat.eiscat_data_access_portal","master":"10|eosc________::0f06a55c8333ae4d197c1d263b2be6ba"}
|
||||
{"datasource":"eosc________::elixir-italy::elixir-italy.laniakea_recas","master":"10|eosc________::01e84abe377339ea57ed521ac39130e9"}
|
||||
{"datasource":"eosc________::elixir-uk::elixir-uk.cyverse_uk","master":"10|eosc________::6a6a05847befec6587bef7673112f5e5"}
|
||||
{"datasource":"eosc________::elixir-uk::elixir-uk.workflowhub","master":"10|fairsharing_::c8cd63e1bf13c5016881652983fb615a"}
|
||||
{"datasource":"eosc________::elsevier::elsevier.digital_commons","master":"10|eosc________::67d38b6a1f43184676b113369554676b"}
|
||||
{"datasource":"eosc________::embl-ebi::embl-ebi.embassy_cloud","master":"10|eosc________::7f8b24797312b851916ee1be0f836de6"}
|
||||
{"datasource":"eosc________::embl-ebi::embl-ebi.identifiersorg","master":"10|eosc________::564e9f467aad251143e12e2e6ec19768"}
|
||||
{"datasource":"eosc________::embl-ebi::embl-ebi.identifiersorg_central_registry","master":"10|eosc________::441caf7eaa4a6602aceae36b2697b924"}
|
||||
{"datasource":"eosc________::embl-ebi::embl-ebi.identifiersorg_resolution_services","master":"10|eosc________::8df6273a1cb2289dbbe3a4b5fe05aa53"}
|
||||
{"datasource":"eosc________::emso_eric::emso_eric.emso_eric_data_portal","master":"10|eosc________::94a41630bd9ddea4a88ec0bfba1b9d95"}
|
||||
{"datasource":"eosc________::enermaps::enermaps.enermaps_data_management_tool","master":"10|eosc________::11496ee8a69b4b955200da7f2c12fe3b"}
|
||||
{"datasource":"eosc________::enhancer::enhancer.openrdmeu","master":"10|eosc________::04820bece2545235144903dec056bcbd"}
|
||||
{"datasource":"eosc________::enhancer::enhancer.swiss_escience_grid_certificates","master":"10|eosc________::4968516eb3b1ad6d883e74a84827e963"}
|
||||
{"datasource":"eosc________::eodc::eodc.data_catalogue_service","master":"10|eosc________::21c44a2b6946e02300dbe36a8edec650"}
|
||||
{"datasource":"eosc________::eodc::eodc.jupyterhub_for_global_copernicus_data","master":"10|eosc________::f99ccd68bf3de6a0a3b0db3441a41bbd"}
|
||||
{"datasource":"eosc________::eosc-dih::eosc-dih.piloting_and_co-design_of_the_business_pilots","master":"10|eosc________::178f3e4832afe9e477d761d2f3d95f85"}
|
||||
{"datasource":"eosc________::eox::eox.edc_eoxhub_workspace","master":"10|eosc________::d71468878e069cf484fc988d276c6d9a"}
|
||||
{"datasource":"eosc________::esa-int::esa-int.geoss_web_portal","master":"10|eosc________::d7bac1ce234c20e3ab43a74eefa34782"}
|
||||
{"datasource":"eosc________::esrf::esrf.the_european_synchrotron_radiation_facility_data_portal","master":"10|fairsharing_::2996962656838a97af4c5f926fe6f1b0"}
|
||||
{"datasource":"eosc________::ess::ess.pan-learning-org","master":"10|eosc________::1298286d3a7cc48fa525b118218c7836"}
|
||||
{"datasource":"eosc________::ess_eric::ess_eric.european_social_survey_ess_as_a_service","master":"10|eosc________::faa60b95b602690861be9305812a5c07"}
|
||||
{"datasource":"eosc________::eudat::eudat.b2access","master":"10|eosc________::4dee0695b946b545dc8d52c56598fbbf"}
|
||||
{"datasource":"eosc________::eudat::eudat.b2drop","master":"10|eosc________::4c6a514f1392ac1d159214e61785849a"}
|
||||
{"datasource":"eosc________::eudat::eudat.b2find","master":"10|eosc________::6069f46dfcc89ccf8043581c9034558e"}
|
||||
{"datasource":"eosc________::eudat::eudat.b2handle","master":"10|eosc________::a23be7f6265fd1ad957eed16b5c8bdc4"}
|
||||
{"datasource":"eosc________::eudat::eudat.b2note","master":"10|eosc________::dfd1d6816b4182e25e84f6cf10d108ed"}
|
||||
{"datasource":"eosc________::eudat::eudat.b2safe","master":"10|re3data_____::a632666349a0bb9a36096c9e152d34cc"}
|
||||
{"datasource":"eosc________::eudat::eudat.b2share","master":"10|eosc________::f959324bdb00f052d547b95da205062f"}
|
||||
{"datasource":"eosc________::eurac::eurac.edp-portal_-_metadata_catalogue_of_eurac_research","master":"10|eosc________::274d73061a925a29d8743b3e1022d0dc"}
|
||||
{"datasource":"eosc________::europeana::europeana.europeana_apis","master":"10|eosc________::91de8c90ebde3dc1c8d41f339fe3fac7"}
|
||||
{"datasource":"eosc________::exoscale::exoscale.european_cloud_hosting","master":"10|eosc________::12b7e6fef784084b817a42f2990fe3f2"}
|
||||
{"datasource":"eosc________::expertai::expertai.document_enrichment_api","master":"10|eosc________::6812b902471f12506c8e6441195aff57"}
|
||||
{"datasource":"eosc________::expertai::expertai.recommender_api","master":"10|eosc________::c40634543c1217686f0a8f5e8592d100"}
|
||||
{"datasource":"eosc________::expertai::expertai.search_api","master":"10|eosc________::79440bc8082949f56cbabef796cec7f1"}
|
||||
{"datasource":"eosc________::fairdi::fairdi.nomad_repository","master":"10|eosc________::b9000c95a6fde9930ae74f4071e14cb2"}
|
||||
{"datasource":"eosc________::figshare::figshare.figshare","master":"10|eosc________::5e6bd062c6b85e2d176b2e61636b8971"}
|
||||
{"datasource":"eosc________::forschungsdaten::forschungsdaten.forschungsdateninfo","master":"10|eosc________::c9185fdb68af7d515e56054da546bc94"}
|
||||
{"datasource":"eosc________::forth::forth.openbioeu","master":"10|eosc________::2db71171816e994877fb960b9fcd89f2"}
|
||||
{"datasource":"eosc________::fssda::fssda.data_service_portal_aila","master":"10|eosc________::ef1f75ea6d244563bc6cfb0c3d3affa4"}
|
||||
{"datasource":"eosc________::fssda::fssda.kuha2_metadata_server","master":"10|eosc________::b6af28d7c292dbbe816cd0d6a9a66f16"}
|
||||
{"datasource":"eosc________::gbif-es::gbif-es.collections_registry","master":"10|eosc________::ac6da0cfbd07f8605c57a799c41dc947"}
|
||||
{"datasource":"eosc________::gbif-es::gbif-es.e-Learning_platform","master":"10|eosc________::9059ca88ca8292881ffba9ad8d943d04"}
|
||||
{"datasource":"eosc________::gbif-es::gbif-es.images_portal","master":"10|eosc________::6991e5dd230956156129669934798cd8"}
|
||||
{"datasource":"eosc________::gbif-es::gbif-es.occurrence_records","master":"10|eosc________::948a9a53e2a9c94d32f99785eccff662"}
|
||||
{"datasource":"eosc________::gbif-es::gbif-es.regions_module","master":"10|eosc________::11189c308854c8d8113161edc7fbd3de"}
|
||||
{"datasource":"eosc________::gbif-es::gbif-es.spatial_portal","master":"10|eosc________::665f73f5e4b6a3693fec9426a6ce6ae8"}
|
||||
{"datasource":"eosc________::gbif-es::gbif-es.species_portal","master":"10|eosc________::9fe2f2ccb3d17452bd6e7424f60340ce"}
|
||||
{"datasource":"eosc________::gbif::gbif.gbif_species_occurrence_data","master":"10|fairsharing_::6e5025ccc7d638ae4e724da8938450a6"}
|
||||
{"datasource":"eosc________::gbif_portugal::gbif_portugal.gbif_portugal_occurrence_records","master":"10|eosc________::fcd4f4efdecb4e675fdee043043f69fc"}
|
||||
{"datasource":"eosc________::gcc_umcg::gcc_umcg.molgenis","master":"10|eosc________::7f255ebbb3715f258e8d7c470209e675"}
|
||||
{"datasource":"eosc________::geant::geant.clouds_service_infrastructure_as_a_service","master":"10|eosc________::7debc69506a8019515d350707e8c82d7"}
|
||||
{"datasource":"eosc________::geant::geant.edugain","master":"10|eosc________::3ded12106e7e870242f7ec39345b3b97"}
|
||||
{"datasource":"eosc________::geant::geant.edumeet_-_webbased_videoconferencing_platform","master":"10|eosc________::dcf8b262f7f61d44eedf409a29d30abc"}
|
||||
{"datasource":"eosc________::geant::geant.eduroam","master":"10|eosc________::e7fd04aab1f224aaa2b5d3478694748b"}
|
||||
{"datasource":"eosc________::geant::geant.eduteams","master":"10|eosc________::f3b04fa1e741f17a842fcbea35e04318"}
|
||||
{"datasource":"eosc________::geant::geant.eduvpn_-_access_your_institutes_network_or_the_internet_using_an_encrypted_connection","master":"10|eosc________::aeb7c573f2742ec5ef8b7332b6b614cb"}
|
||||
{"datasource":"eosc________::geant::geant.inacademia","master":"10|eosc________::26cb3be539a5bbb25533d3b1bdb9d6aa"}
|
||||
{"datasource":"eosc________::geant::geant.ip","master":"10|eosc________::59cd8dbce2703f4eea69a54a959aae89"}
|
||||
{"datasource":"eosc________::geant::geant.l3vpn","master":"10|eosc________::1e70cff61071ce42baffa6dafaf3165e"}
|
||||
{"datasource":"eosc________::geant::geant.lambda","master":"10|eosc________::20a8114b376bf4c455c034b7b4513805"}
|
||||
{"datasource":"eosc________::geant::geant.mdvpn","master":"10|eosc________::54fbf0ac4e42a2ce51e400d9783b51ba"}
|
||||
{"datasource":"eosc________::geant::geant.open","master":"10|eosc________::9ae24d8c63e9ff986fbd20705b334919"}
|
||||
{"datasource":"eosc________::geant::geant.perfsonar","master":"10|eosc________::1bdda4f743377914fabd0f365a8b6ee2"}
|
||||
{"datasource":"eosc________::geant::geant.plus","master":"10|eosc________::eef45e860d52aff4932f254599d5b713"}
|
||||
{"datasource":"eosc________::geant::geant.transits_training","master":"10|eosc________::831e2b596060c60d7d4bc79c200a2254"}
|
||||
{"datasource":"eosc________::geant::geant.trusted_certificate_service","master":"10|eosc________::30817adfb6c625d7fd36b657e2fabc74"}
|
||||
{"datasource":"eosc________::geant::geant.wifimon","master":"10|eosc________::6116f3b14f34658593529f6810068c4e"}
|
||||
{"datasource":"eosc________::genias::genias.e-irg_knowledge_base","master":"10|eosc________::ddc5ab67fed353917716eb2d5c86ce68"}
|
||||
{"datasource":"eosc________::gesis::gesis.doi_registration_service","master":"10|eosc________::71f37a7ebd8495a59c46e637ee5463da"}
|
||||
{"datasource":"eosc________::grnet::grnet.agora_resource_portfolio_management_tool","master":"10|eosc________::461aa754c52b7eed605f9e0955470de5"}
|
||||
{"datasource":"eosc________::grnet::grnet.argo_monitoring_engine","master":"10|eosc________::e91a3b4dfb62113b9b67b0ac97e566b4"}
|
||||
{"datasource":"eosc________::grnet::grnet.aris","master":"10|eosc________::6b381464ec768e3cf55ccacdb00b5988"}
|
||||
{"datasource":"eosc________::grnet::grnet.aris_-_archival_service","master":"10|eosc________::32158f91e33cf6fb6c63561cbc7ffd24"}
|
||||
{"datasource":"eosc________::grnet::grnet.ni4os-europe_login","master":"10|eosc________::aeaa8f7fc2948930bfa4f970cd96837e"}
|
||||
{"datasource":"eosc________::grnet::grnet.ni4os-europe_repository_service","master":"10|eosc________::d6933cb7acd6fa7a2f7a42562c432fb5"}
|
||||
{"datasource":"eosc________::grycap::grycap.elastic_cloud_compute_cluster","master":"10|eosc________::c6d3c380ce5499d8d20cc9bbeb3b43ff"}
|
||||
{"datasource":"eosc________::grycap::grycap.infrastructure_manager","master":"10|eosc________::e8a2eeb06a205c3299af49f5c233ce16"}
|
||||
{"datasource":"eosc________::grycap::grycap.saps_surface_energy_balance_automated_processing_service","master":"10|eosc________::a7ae875b2487576c35f1bc8e1c857c14"}
|
||||
{"datasource":"eosc________::hn::hn.isidore","master":"10|re3data_____::fabe5c1aaa2e2d4c847e01647b87bf60"}
|
||||
{"datasource":"eosc________::hostkey::hostkey.gpu_servers_grant_program","master":"10|eosc________::d45f87107eb536b4be97e112fac15787"}
|
||||
{"datasource":"eosc________::icos_eric::icos_eric.data_discovery_and_access_portal","master":"10|eosc________::84ada2e91828ce72fa6d02736cdd90f1"}
|
||||
{"datasource":"eosc________::ifca-csic::ifca-csic.deepaas_training_facility","master":"10|eosc________::5414e2342e67d64b11b835e7fd58869d"}
|
||||
{"datasource":"eosc________::ifca-csic::ifca-csic.ifca-csic_cloud_infrastructure","master":"10|eosc________::838e5c334e8115e4831d5f21435aa19b"}
|
||||
{"datasource":"eosc________::ifca-csic::ifca-csic.plant_classification","master":"10|eosc________::32c26f83acaef8d89cc6c7a2f8abd198"}
|
||||
{"datasource":"eosc________::ifca-csic::ifca-csic.remote_monitoring_and_smart_sensing","master":"10|eosc________::0335d29ec68ef9ebad8326cba79455f2"}
|
||||
{"datasource":"eosc________::ifin-hh::ifin-hh.cloudifin","master":"10|eosc________::04d791df0b61b0f5060f241c70924991"}
|
||||
{"datasource":"eosc________::iisas::iisas.dynamic_dns_service","master":"10|eosc________::2381e3b55d048130f2dffd437123d501"}
|
||||
{"datasource":"eosc________::iisas::iisas.fedcloudclient_egi_fedcloud_client","master":"10|eosc________::3668885b6512a039673b9f4638c88600"}
|
||||
{"datasource":"eosc________::iisas::iisas.modelling_service_for_water_supply_systems","master":"10|eosc________::b1d6d2cebddf52f6647102a30690fba9"}
|
||||
{"datasource":"eosc________::ill::ill.ill_data_portal","master":"10|eosc________::714498cf1efec13c2206db4b1e4f1c30"}
|
||||
{"datasource":"eosc________::ill::ill.panosc_software_catalogue","master":"10|eosc________::bc63c5a78abd38a7d9df043e0853a9ce"}
|
||||
{"datasource":"eosc________::inaf::inaf.space-ml_caesar_service","master":"10|eosc________::ba42c5e4332ff16c6cd28573012bc2f9"}
|
||||
{"datasource":"eosc________::inaf::inaf.space-vis_vialactea_service","master":"10|eosc________::ce2ca563bceae686b763326ed53e7b54"}
|
||||
{"datasource":"eosc________::infn::infn.dynamic_on_demand_analysis_service","master":"10|eosc________::f884894e05c5a54646f0b5715e5495d6"}
|
||||
{"datasource":"eosc________::infn::infn.fgsg_science_software_on_demand","master":"10|eosc________::452af4e76a64b6ee7e4bdc86527687f7"}
|
||||
{"datasource":"eosc________::infn::infn.indigo_identity_and_access_management","master":"10|eosc________::d23115c40a4e256725f140330d001861"}
|
||||
{"datasource":"eosc________::infn::infn.infn-cloud_object_storage_dice","master":"10|eosc________::fe0c28e8657cb84e3b775156106c03d1"}
|
||||
{"datasource":"eosc________::infn::infn.paas_orchestrator","master":"10|eosc________::146240bb16057a93e11631edee570f76"}
|
||||
{"datasource":"eosc________::infrafrontier::infrafrontier.training_in_mouse_functional_genomics","master":"10|eosc________::64d6597d10f4e617152f4a612a87eaba"}
|
||||
{"datasource":"eosc________::inria::inria.software_heritage_archive","master":"10|fairsharing_::2c758933af02c0b301906f2819ae1268"}
|
||||
{"datasource":"eosc________::jelastic::jelastic.platform-as-a-service","master":"10|eosc________::bfcae4ab00df41a3c43efbb879586e8f"}
|
||||
{"datasource":"eosc________::kit::kit.eosc-performance","master":"10|eosc________::e52ab75587c1dd98db80568197f04586"}
|
||||
{"datasource":"eosc________::kit::kit.o3as_ozone_assessment","master":"10|eosc________::aaf27a5f35a790617247abecd84b100f"}
|
||||
{"datasource":"eosc________::komanord::komanord.guardomic","master":"10|eosc________::b1e06c9d2c472e9441ee72e83a934d40"}
|
||||
{"datasource":"eosc________::lago::lago.onedatasim","master":"10|eosc________::2b2163e8b82320fed69a017a3e5fb657"}
|
||||
{"datasource":"eosc________::lifewatch-eric::lifewatch-eric.plants_identification_app","master":"10|eosc________::6fc6ed0894391496d3c4967d45933d1a"}
|
||||
{"datasource":"eosc________::lindatclariah-cz::lindatclariah-cz.elixirfm","master":"10|eosc________::6dd7c323776a028cef0619cb34bdf48c"}
|
||||
{"datasource":"eosc________::lindatclariah-cz::lindatclariah-cz.ker_-_keyword_extractor","master":"10|eosc________::09915f038900aa43cb0c76aa89f10cda"}
|
||||
{"datasource":"eosc________::lindatclariah-cz::lindatclariah-cz.lindatclariah-cz_repository","master":"10|eosc________::3daee6a29fb1d9a0f624cdd5973c33ea"}
|
||||
{"datasource":"eosc________::lindatclariah-cz::lindatclariah-cz.machine_translation","master":"10|eosc________::3ae4551729381cfd03c433fb0de0c971"}
|
||||
{"datasource":"eosc________::lindatclariah-cz::lindatclariah-cz.morphodita","master":"10|eosc________::f2ceebdc1a41d65504ff27f7297c833b"}
|
||||
{"datasource":"eosc________::lindatclariah-cz::lindatclariah-cz.nametag","master":"10|eosc________::71e3226e7a868e2215335ffb29073285"}
|
||||
{"datasource":"eosc________::lindatclariah-cz::lindatclariah-cz.udpipe_tool_for_lemmatization_morphological_analysis_pos_tagging_and_dependency_parsing_in_multiple_languages","master":"10|eosc________::2dfc64c2951d9be3f1e2b576633ea425"}
|
||||
{"datasource":"eosc________::lnec::lnec-pt.opencoasts_portal","master":"10|eosc________::7e99655aeda0b5f06efb3eea424dff54"}
|
||||
{"datasource":"eosc________::lnec::lnec.worsica_-_water_monitoring_sentinel_cloud_platform","master":"10|eosc________::c2f55ab774c3cbbd9a330eebaa74dc36"}
|
||||
{"datasource":"eosc________::materialscloud::materialscloud.aiiDA_lab","master":"10|eosc________::dfd970a812cf2e0298eb28c681bc109f"}
|
||||
{"datasource":"eosc________::materialscloud::materialscloud.materials_cloud_archive","master":"10|fairsharing_::a431d70133ef6cf688bc4f6093922b48"}
|
||||
{"datasource":"eosc________::meeo::meeo.adam_platform","master":"10|eosc________::b17fedb87dd9985b6a5e51db593446d6"}
|
||||
{"datasource":"eosc________::meeo::meeo.adam_space","master":"10|eosc________::24bfbca4cf4fedc5a4a662fe67a30d7e"}
|
||||
{"datasource":"eosc________::mobile_observation_integration_service::mobile_observation_integration_service.dark_sky_meter_datasource","master":"10|eosc________::160638e73224aeb7e4f98fd237672919"}
|
||||
{"datasource":"eosc________::msw::msw.polaris_os","master":"10|eosc________::12348ba5b2c5902fd400cb3f1ab773ee"}
|
||||
{"datasource":"eosc________::obp::obp.thoth","master":"10|eosc________::680198ec3f51a744de8a7603d542a0e1"}
|
||||
{"datasource":"eosc________::openaire::openaire.amnesia","master":"10|eosc________::ac57e2dd5b3ee01909d7a592523bb96f"}
|
||||
{"datasource":"eosc________::openaire::openaire.argos","master":"10|eosc________::92145beb3257af0510ee61ef10d16870"}
|
||||
{"datasource":"eosc________::openaire::openaire.broker","master":"10|eosc________::c8c6e8d211d6df4ee8a187fa1134bd92"}
|
||||
{"datasource":"eosc________::openaire::openaire.data_provider_dashboard","master":"10|eosc________::809d4c77a7acf9ac0cc2990d4264ae51"}
|
||||
{"datasource":"eosc________::openaire::openaire.digital_humanities_and_cultural_heritage_openaire_community_gateway","master":"10|eosc________::b9110e9735dd467abc969fe8e2f1efa3"}
|
||||
{"datasource":"eosc________::openaire::openaire.discovery_portal","master":"10|eosc________::992052173b689c8cea94e8e8d99f0238"}
|
||||
{"datasource":"eosc________::openaire::openaire.european_marine_science_openaire_dashboard","master":"10|eosc________::950a99851df85c90ec2e933e1d55e164"}
|
||||
{"datasource":"eosc________::openaire::openaire.funder_dashboard","master":"10|eosc________::196eea80ab9d73766cd2e8b6ab85872f"}
|
||||
{"datasource":"eosc________::openaire::openaire.graph","master":"10|eosc________::c122caed52a88b57732b814a74141000"}
|
||||
{"datasource":"eosc________::openaire::openaire.greek_sustainable_development_solutions_network_sdsn_openaire_dashboard","master":"10|eosc________::8100e41e3a5b18170bc5ede2cc393331"}
|
||||
{"datasource":"eosc________::openaire::openaire.inference","master":"10|eosc________::c491811e9a6afa69cdcab0f92fca6f7b"}
|
||||
{"datasource":"eosc________::openaire::openaire.neuroinformatics_openaire_dashboard","master":"10|eosc________::6e3adcce4d0d4229a9749584dfd5e7a8"}
|
||||
{"datasource":"eosc________::openaire::openaire.open_science_helpdesk","master":"10|eosc________::d66db88d4c6c354fe7ebcd4c3dce334e"}
|
||||
{"datasource":"eosc________::openaire::openaire.open_science_observatory","master":"10|eosc________::441ee64860eb79808b7cf0bb08262be6"}
|
||||
{"datasource":"eosc________::openaire::openaire.open_science_training","master":"10|eosc________::99847506cdff50afa4945d60a9661ea3"}
|
||||
{"datasource":"eosc________::openaire::openaire.openaire_login","master":"10|eosc________::818973a9375c0fa545499e1bb9ad0ab2"}
|
||||
{"datasource":"eosc________::openaire::openaire.openapc","master":"10|eosc________::a28cc193bc938573e892b8aad0017702"}
|
||||
{"datasource":"eosc________::openaire::openaire.research_community_dashboard","master":"10|eosc________::e1a866322f76407fb161a253dc5b539c"}
|
||||
{"datasource":"eosc________::openaire::openaire.scholexplorer","master":"10|eosc________::6b34adede04121175566ef8c70f1e520"}
|
||||
{"datasource":"eosc________::openaire::openaire.technical_support_towards_openaire_compliance","master":"10|eosc________::cdb8e94b386f9b6780a47194bd1bc7f7"}
|
||||
{"datasource":"eosc________::openaire::openaire.topos_observatory_for_organisations","master":"10|eosc________::a7d2b95257273b5ea3f3a23fd8a60d48"}
|
||||
{"datasource":"eosc________::openaire::openaire.usage_statistics","master":"10|eosc________::8aa345dc7321fc97906bf4c193a05a8f"}
|
||||
{"datasource":"eosc________::openaire::openaire.validator","master":"10|eosc________::f2c13efbaa2a33af3e4e6a54805ac379"}
|
||||
{"datasource":"eosc________::openaire::openaire.zenodo","master":"10|opendoar____::358aee4cc897452c00244351e4d91f69"}
|
||||
{"datasource":"eosc________::openbiomaps::openbiomaps.openbiomaps","master":"10|eosc________::32edf5a4edbdea0899d6ba588d083efd"}
|
||||
{"datasource":"eosc________::openedition::openedition.operas_research_for_society","master":"10|eosc________::2cdf4f57007b990b7ad7a884796f9b15"}
|
||||
{"datasource":"eosc________::openknowledgemaps::openknowledgemaps.open_knowledge_maps","master":"10|eosc________::f3819d0f8e8bf57d383b23d31a3c0099"}
|
||||
{"datasource":"eosc________::openminted::openminted.builder_of_tdm_applications","master":"10|eosc________::fdd26c19dd490260bc6c48b5813f4ac3"}
|
||||
{"datasource":"eosc________::openminted::openminted.catalogue_of_ancillary_resources","master":"10|eosc________::ab4e37e85a1975b204b66683ed3888a8"}
|
||||
{"datasource":"eosc________::openminted::openminted.catalogue_of_corpora","master":"10|eosc________::2cf744a594ea30fd31e976bffa8f2b71"}
|
||||
{"datasource":"eosc________::openminted::openminted.catalogue_of_tdm_applications","master":"10|eosc________::ef5f343c5cf11fa2d40407ec308bb34a"}
|
||||
{"datasource":"eosc________::openminted::openminted.catalogue_of_tdm_components","master":"10|eosc________::4275243a94677f19a5b74e5afb1f94cf"}
|
||||
{"datasource":"eosc________::openminted::openminted.consulting_on_licences_for_tdm","master":"10|eosc________::522000b4c90b209aa7be961449ca910f"}
|
||||
{"datasource":"eosc________::openminted::openminted.corpus_builder_for_scholarly_works","master":"10|eosc________::c64725d47af63bc2114b4214b684a392"}
|
||||
{"datasource":"eosc________::openminted::openminted.support_and_training","master":"10|eosc________::84501ff99e5e429f5f083ab8ca0be7e4"}
|
||||
{"datasource":"eosc________::openminted::openminted.tdm_applications_executor","master":"10|eosc________::e9ae655ce2ff1eaa19d0b3475ce5e660"}
|
||||
{"datasource":"eosc________::operas::operas.gotriple_discovery_platform","master":"10|eosc________::f687e24dc56aaeeb561c95865a5071cc"}
|
||||
{"datasource":"eosc________::operas::operas.operas_metrics_service","master":"10|eosc________::5960e1289f623625210f720c6173592d"}
|
||||
{"datasource":"eosc________::oslo_university::oslo_university.services_for_sensitive_data_tsd","master":"10|eosc________::743b01351510f88e24be1c700c581f68"}
|
||||
{"datasource":"eosc________::osmooc::osmooc.open_science_mooc","master":"10|eosc________::e101101e8653b6607a3ad9fea3b7d1fe"}
|
||||
{"datasource":"eosc________::oxford_e-research_centre::oxford_e-research_centre.fairsharing","master":"10|openaire____::bf5a61cc330e21ffa90eed3eb1533466"}
|
||||
{"datasource":"eosc________::phenomenal::phenomenal.phenomenal","master":"10|eosc________::79e19b14aeee0d94e9a79110a6e6ad32"}
|
||||
{"datasource":"eosc________::plantnet::plantnet.plntnet_identification_service","master":"10|eosc________::5ce89743eafdd8578591d84150f547e4"}
|
||||
{"datasource":"eosc________::prace::prace.application_enabling_support","master":"10|eosc________::c87fd74ed685337fdbcff504373fc513"}
|
||||
{"datasource":"eosc________::prace::prace.code_vault","master":"10|eosc________::dbab7889c81b59ec753040a762f6569a"}
|
||||
{"datasource":"eosc________::prace::prace.deci_access","master":"10|eosc________::c7cedb82b1beea5382601d48807212aa"}
|
||||
{"datasource":"eosc________::prace::prace.mooc","master":"10|eosc________::d6ff8167d31dccebe33a272513422b53"}
|
||||
{"datasource":"eosc________::prace::prace.patc","master":"10|eosc________::1ab1b123bd559ee7f7c7ec2ee353f0c0"}
|
||||
{"datasource":"eosc________::prace::prace.preparatory_access","master":"10|eosc________::39430adf529f1ab9e33da444b3708fcf"}
|
||||
{"datasource":"eosc________::prace::prace.project_access","master":"10|eosc________::b58e957946983b686c76ee19dfab8d70"}
|
||||
{"datasource":"eosc________::prace::prace.ptc","master":"10|eosc________::b3ca18e8884bfe2422d3723313fef79c"}
|
||||
{"datasource":"eosc________::prace::prace.seasonal_schools_and_international_summer_school","master":"10|eosc________::590c71318d9d94c32981e3195567d546"}
|
||||
{"datasource":"eosc________::prace::prace.shape","master":"10|eosc________::38b5a26f74e4808270a2d4f305d2f3a5"}
|
||||
{"datasource":"eosc________::prace::prace.training_portal","master":"10|eosc________::25966a269ab2343ac9c4d982c341d87f"}
|
||||
{"datasource":"eosc________::predictia::predictia.climadjust","master":"10|eosc________::14743eb22da3524893784faf409aac70"}
|
||||
{"datasource":"eosc________::psi::psi.psi_public_data_repository","master":"10|re3data_____::1e55174ff77ed2d804871281201dbb50"}
|
||||
{"datasource":"eosc________::psi::psi.remote_desktop_service","master":"10|eosc________::c82e26eb6e65d008de03b349dffc11fc"}
|
||||
{"datasource":"eosc________::psnc::psnc.rohub","master":"10|eosc________::c87f08707b5235172e85b374e39a82dc"}
|
||||
{"datasource":"eosc________::psnc::psnc.symbiote","master":"10|eosc________::ef0cd965a0d0a3df80ecfae4b3b08aad"}
|
||||
{"datasource":"eosc________::rasdaman::rasdaman.datacube","master":"10|eosc________::bb1678f7b15d8c15fde6e240a4f95f93"}
|
||||
{"datasource":"eosc________::rbi::rbi.dariah_science_gateway","master":"10|eosc________::b51b448421d926293b3781f4ac90f4f4"}
|
||||
{"datasource":"eosc________::readcoop::readcoop.transkribus","master":"10|eosc________::a80411026809e6eaa896439e1b9764f4"}
|
||||
{"datasource":"eosc________::rli::rli.open_energy_platform","master":"10|fairsharing_::0cbed40c0d920b94126eaf5e707be1f5"}
|
||||
{"datasource":"eosc________::ror-org::ror-org.identifier","master":"10|eosc________::6fe92c2346db22322ddf6b677d449b0e"}
|
||||
{"datasource":"eosc________::sciences_po::sciences_po.ethnic_and_migrant_minority_survey_registry","master":"10|eosc________::0cde986dc2bf015912e407f0f83ee402"}
|
||||
{"datasource":"eosc________::sciences_po::sciences_po.wpss_for_ess","master":"10|eosc________::9a5bb11c495443aad944b04f5fcb5c07"}
|
||||
{"datasource":"eosc________::scigne::scigne.cloud_compute","master":"10|eosc________::7c63e3284c36b5977c553192dce506b3"}
|
||||
{"datasource":"eosc________::scipedia::scipedia.scipedia","master":"10|eosc________::850abcddc76069f2c3c1cf77ad4beec9"}
|
||||
{"datasource":"eosc________::scipedia::scipedia.topos_for_individuals","master":"10|eosc________::e6214b58f39a25b53eecda340f95ee7b"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.doi_minting_service","master":"10|eosc________::f87f72147a3c82c4f77684e40101e90e"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.european_directory_of_marine_environmental_data_edmed","master":"10|eosc________::d79706389f0b864306feb47aac1f5766"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.european_directory_of_marine_environmental_research_projects","master":"10|eosc________::baa9d2d6cdd8507fcbf76242e4c25d76"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.european_directory_of_marine_organisations_edmo","master":"10|eosc________::5d23c66c26e0df209fc415c1e9ad0316"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.european_directory_of_the_cruise_summary_reports_csr","master":"10|eosc________::fd70912c66037dc11f710587e281eeaf"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.european_directory_of_the_initial_ocean-observing_systems_edios","master":"10|eosc________::846016e987d1feaf2a36083f88dba1f2"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.seadatanet_cdi","master":"10|eosc________::36cd158d6b1bbdbfb443c68b8da00335"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.vocabulary_services_-_underpinned_by_the_nerc_vocabulary_server_nvs","master":"10|eosc________::4416d18ec7a57e553979fbfa4d862483"}
|
||||
{"datasource":"eosc________::sinergise::sinergise.sentinel_hub","master":"10|eosc________::d36ae944fa207461bcb7b2b3a6c94de8"}
|
||||
{"datasource":"eosc________::sixsq::sixsq.nuvla_multi-cloud_application_management_platform","master":"10|eosc________::38438cc3190a3815359efb53b9dd98eb"}
|
||||
{"datasource":"eosc________::sks::sks.digital_production_for_conferences_workshops_roundtables_and_other_academic_and_professional_events","master":"10|eosc________::f6b51bef4a5f1478e980673339f2b2f3"}
|
||||
{"datasource":"eosc________::smartsmear::smartsmear.smartsmear","master":"10|eosc________::d17a9325ca64ffad59e04659ed5404f7"}
|
||||
{"datasource":"eosc________::sobigdata::sobigdata.tagme","master":"10|eosc________::0c3b8b80d9d6d38effd28bfa6a140a12"}
|
||||
{"datasource":"eosc________::suite5::suite5.furniture_enterprise_analytics","master":"10|eosc________::29ed60070bd91bdc19c9f278b104465c"}
|
||||
{"datasource":"eosc________::switch::switch.switchengines","master":"10|eosc________::d4143918a810115206640cfeb11e0ba6"}
|
||||
{"datasource":"eosc________::t-systems::t-systems.open_telekom_cloud","master":"10|eosc________::c489ef6564a47922359f7b833919d642"}
|
||||
{"datasource":"eosc________::terradue::terradue.eo_services_for_earthquake_response_and_landslides_analysis","master":"10|eosc________::ab3140d145deb5fdb02eeefbc5ebc471"}
|
||||
{"datasource":"eosc________::tib::tib.open_research_knowledge_graph_orkg","master":"10|eosc________::ed6bd695c7a99297f360bc2fc915be90"}
|
||||
{"datasource":"eosc________::ubora::ubora.ubora","master":"10|eosc________::bacf05aff1c6dcf3133a0352d5eb14c4"}
|
||||
{"datasource":"eosc________::ubora::ubora.ubora_e-platform","master":"10|eosc________::947fde33605ba61216a07135ee1551f2"}
|
||||
{"datasource":"eosc________::ugr-es::ugr-es.glacier_lagoons_of_sierra_nevada","master":"10|eosc________::8a966c0efca298ad5ec130d323c29935"}
|
||||
{"datasource":"eosc________::uit::uit.dataverseno","master":"10|eosc________::92b76aa81a5b8443fcf17d3ae3c34211"}
|
||||
{"datasource":"eosc________::uit::uit.the_troms_repository_of_language_and_linguistics_trolling","master":"10|fairsharing_::a36b0dcd1e6384abc0e1867860ad3ee3"}
|
||||
{"datasource":"eosc________::ukaea::ukaea.prominence","master":"10|eosc________::06ce999c7cf77ea5a65f87bb563cd625"}
|
||||
{"datasource":"eosc________::ukri_-_stfc::ukri_-_stfc.cvmfs_test","master":"10|eosc________::53aaa0a24d0edc47c23e722135c29dde"}
|
||||
{"datasource":"eosc________::ukri_-_stfc::ukri_-_stfc.rucio","master":"10|eosc________::c19a8251c6bf563365c555572ace903e"}
|
||||
{"datasource":"eosc________::uni-freiburg::uni-freiburg.european_galaxy_server","master":"10|eosc________::cc00fc2385475b80accec001dfb85efb"}
|
||||
{"datasource":"eosc________::unibo::unibo.opencitations","master":"10|eosc________::573c29ecaf76ab961743bfc8a7d911ec"}
|
||||
{"datasource":"eosc________::unifl::unifl.snap4city","master":"10|eosc________::9a55c40c3c082b7a8352ecbc56a87996"}
|
||||
{"datasource":"eosc________::unige::unige.astronomical_online_data_analysis_astrooda","master":"10|eosc________::63f6119d3170cccf979daada3c5b524e"}
|
||||
{"datasource":"eosc________::unitartu::unitartu.ut.rocket","master":"10|eosc________::da3450589a9d56212963b20cf729974c"}
|
||||
{"datasource":"eosc________::upv-es::upv-es.lemonade","master":"10|eosc________::afdd227beada491f77d7944d7a0eafc9"}
|
||||
{"datasource":"eosc________::vamdc::vamdc.portal","master":"10|eosc________::4dab2bb6e9a9ad223cd63c62c2ea804e"}
|
||||
{"datasource":"eosc________::vamdc::vamdc.query_store","master":"10|eosc________::33f18bfe544c3c84ac28be6a3292d166"}
|
||||
{"datasource":"eosc________::vamdc::vamdc.species_database","master":"10|eosc________::ae3587682dec5663a1b3b625036d15d0"}
|
||||
{"datasource":"eosc________::vilnius-university::vilnius-university.the_national_open_access_research_data_archive_midas","master":"10|eosc________::4987ee0d071f68cf88f6b1a834b6733f"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.amber-based_portal_server_for_nmr_structures_amps-nmr","master":"10|eosc________::c6cca9747ef3ce296bd626bcbc4e480a"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.disvis_web_portal","master":"10|eosc________::2539ec693b683284c4e243b969ae3fc0"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.fanten_finding_anisotropy_tensor","master":"10|eosc________::99c793e3f3b856c48eaaa36682038b28"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.haddock24_web_portal","master":"10|eosc________::0f198f6a0885105809f420be23614be3"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.metalpdb","master":"10|eosc________::84676bc3d2ce17de70309dc58f428296"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.pdb-tools_web","master":"10|eosc________::b37eed45624ac30f3476f71640e59a61"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.powerfit_web_portal","master":"10|eosc________::93d4d621ed1da378c0e7dc891cefc007"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.spoton","master":"10|eosc________::76e7e0552f9c6b89db94b31ddc366b9f"}
|
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue