1
0
Fork 0
This commit is contained in:
Miriam Baglioni 2022-06-28 11:06:32 +02:00
commit ee1f1eeca2
30 changed files with 1968 additions and 114 deletions

View File

@ -1,18 +1,18 @@
package eu.dnetlib.dhp.common;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.text.WordUtils;
import com.ctc.wstx.dtd.LargePrefixedNameSet;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.hash.Hashing;
@ -29,7 +29,19 @@ public class PacePerson {
private List<String> fullname = Lists.newArrayList();
private final String original;
private static Set<String> particles = null;
private static Set<String> particles;
static {
try {
particles = new HashSet<>(IOUtils
.readLines(
PacePerson.class
.getResourceAsStream(
"/eu/dnetlib/dhp/common/name_particles.txt")));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/**
* Capitalizes a string
@ -37,29 +49,20 @@ public class PacePerson {
* @param s the string to capitalize
* @return the input string with capital letter
*/
public static final String capitalize(final String s) {
public static String capitalize(final String s) {
if (particles.contains(s)) {
return s;
}
return WordUtils.capitalize(s.toLowerCase(), ' ', '-');
}
/**
* Adds a dot to a string with length equals to 1
*/
public static final String dotAbbreviations(final String s) {
public static String dotAbbreviations(final String s) {
return s.length() == 1 ? s + "." : s;
}
public static Set<String> loadFromClasspath(final String classpath) {
final Set<String> h = new HashSet<>();
try {
for (final String s : IOUtils.readLines(PacePerson.class.getResourceAsStream(classpath))) {
h.add(s);
}
} catch (final Throwable e) {
return new HashSet<>();
}
return h;
}
/**
* The constructor of the class. It fills the fields of the class basing on the input fullname.
*
@ -128,10 +131,6 @@ public class PacePerson {
}
private List<String> splitTerms(final String s) {
if (particles == null) {
particles = loadFromClasspath("/eu/dnetlib/dhp/oa/graph/pace/name_particles.txt");
}
final List<String> list = Lists.newArrayList();
for (final String part : Splitter.on(" ").omitEmptyStrings().split(s)) {
if (!particles.contains(part.toLowerCase())) {
@ -187,17 +186,36 @@ public class PacePerson {
}
public List<String> getCapitalFirstnames() {
return Lists
.newArrayList(
Iterables.transform(getNameWithAbbreviations(), PacePerson::capitalize));
return Optional
.ofNullable(getNameWithAbbreviations())
.map(
name -> name
.stream()
.map(PacePerson::capitalize)
.collect(Collectors.toList()))
.orElse(new ArrayList<>());
}
public List<String> getCapitalSurname() {
return Lists.newArrayList(Iterables.transform(surname, PacePerson::capitalize));
return Optional
.ofNullable(getSurname())
.map(
surname -> surname
.stream()
.map(PacePerson::capitalize)
.collect(Collectors.toList()))
.orElse(new ArrayList<>());
}
public List<String> getNameWithAbbreviations() {
return Lists.newArrayList(Iterables.transform(name, PacePerson::dotAbbreviations));
return Optional
.ofNullable(getName())
.map(
name -> name
.stream()
.map(PacePerson::dotAbbreviations)
.collect(Collectors.toList()))
.orElse(new ArrayList<>());
}
public boolean isAccurate() {

View File

@ -3,7 +3,6 @@ package eu.dnetlib.dhp.actionmanager.ror;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
@ -39,7 +38,6 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.ror.model.ExternalIdType;
import eu.dnetlib.dhp.actionmanager.ror.model.Relationship;
import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
@ -51,7 +49,6 @@ import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2;
@ -168,38 +165,10 @@ public class GenerateRorActionSetJob {
final List<AtomicAction<? extends Oaf>> res = new ArrayList<>();
res.add(new AtomicAction<>(Organization.class, o));
for (final Relationship rorRel : r.getRelationships()) {
if (rorRel.getType().equalsIgnoreCase("parent")) {
final String orgId1 = calculateOpenaireId(r.getId());
final String orgId2 = calculateOpenaireId(rorRel.getId());
res
.add(
new AtomicAction<>(Relation.class,
calculateHierarchyRel(orgId1, orgId2, ModelConstants.IS_PARENT_OF)));
res
.add(
new AtomicAction<>(Relation.class,
calculateHierarchyRel(orgId2, orgId1, ModelConstants.IS_CHILD_OF)));
}
}
return res;
}
private static Relation calculateHierarchyRel(final String source, final String target, final String relClass) {
final Relation rel = new Relation();
rel.setSource(source);
rel.setTarget(target);
rel.setRelType(ORG_ORG_RELTYPE);
rel.setSubRelType(ModelConstants.RELATIONSHIP);
rel.setRelClass(relClass);
rel.setCollectedfrom(ROR_COLLECTED_FROM);
rel.setDataInfo(ROR_DATA_INFO);
rel.setLastupdatetimestamp(System.currentTimeMillis());
return rel;
}
private static String calculateOpenaireId(final String rorId) {
return String.format("20|%s::%s", ROR_NS_PREFIX, DHPUtils.md5(rorId));
}

View File

@ -19,6 +19,8 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.aggregation.common.ReporterCallback;
import eu.dnetlib.dhp.aggregation.common.ReportingJob;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.file.FileCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.file.FileGZipCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
@ -114,6 +116,10 @@ public class CollectorWorker extends ReportingJob {
return new OaiCollectorPlugin(clientParams);
case rest_json2xml:
return new RestCollectorPlugin(clientParams);
case file:
return new FileCollectorPlugin(fileSystem);
case fileGZip:
return new FileGZipCollectorPlugin(fileSystem);
case other:
final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
.ofNullable(api.getParams().get("other_plugin_type"))

View File

@ -10,7 +10,7 @@ import eu.dnetlib.dhp.common.collection.CollectorException;
public interface CollectorPlugin {
enum NAME {
oai, other, rest_json2xml;
oai, other, rest_json2xml, file, fileGZip;
public enum OTHER_NAME {
mdstore_mongodb_dump, mdstore_mongodb

View File

@ -0,0 +1,80 @@
package eu.dnetlib.dhp.collection.plugin.file;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.Optional;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.utils.XMLIterator;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
public abstract class AbstractSplittedRecordPlugin implements CollectorPlugin {
private static final Logger log = LoggerFactory.getLogger(AbstractSplittedRecordPlugin.class);
public static final String SPLIT_ON_ELEMENT = "splitOnElement";
private final FileSystem fileSystem;
public AbstractSplittedRecordPlugin(FileSystem fileSystem) {
this.fileSystem = fileSystem;
}
@Override
public Stream<String> collect(ApiDescriptor api, AggregatorReport report) throws CollectorException {
// get path to file
final Path filePath = Optional
.ofNullable(api.getBaseUrl())
.map(Path::new)
.orElseThrow(() -> new CollectorException("missing baseUrl"));
log.info("baseUrl: {}", filePath);
// check that path to file exists
try {
if (!fileSystem.exists(filePath)) {
throw new CollectorException("path does not exist: " + filePath);
}
} catch (IOException e) {
throw new CollectorException(e);
}
// get split element
final String splitOnElement = Optional
.ofNullable(api.getParams().get(SPLIT_ON_ELEMENT))
.orElseThrow(
() -> new CollectorException(String
.format("missing parameter '%s', required by the AbstractSplittedRecordPlugin", SPLIT_ON_ELEMENT)));
log.info("splitOnElement: {}", splitOnElement);
final BufferedInputStream bis = getBufferedInputStream(filePath);
Iterator<String> xmlIterator = new XMLIterator(splitOnElement, bis);
return StreamSupport
.stream(
Spliterators.spliteratorUnknownSize(xmlIterator, Spliterator.ORDERED),
false);
}
abstract protected BufferedInputStream getBufferedInputStream(final Path filePath) throws CollectorException;
public FileSystem getFileSystem() {
return fileSystem;
}
}

View File

@ -0,0 +1,33 @@
package eu.dnetlib.dhp.collection.plugin.file;
import java.io.BufferedInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.common.collection.CollectorException;
public class FileCollectorPlugin extends AbstractSplittedRecordPlugin {
private static final Logger log = LoggerFactory.getLogger(FileCollectorPlugin.class);
public FileCollectorPlugin(FileSystem fileSystem) {
super(fileSystem);
}
@Override
protected BufferedInputStream getBufferedInputStream(final Path filePath) throws CollectorException {
log.info("filePath: {}", filePath);
try {
FileSystem fs = super.getFileSystem();
return new BufferedInputStream(fs.open(filePath));
} catch (Exception e) {
throw new CollectorException("Error reading file " + filePath, e);
}
}
}

View File

@ -0,0 +1,35 @@
package eu.dnetlib.dhp.collection.plugin.file;
import java.io.BufferedInputStream;
import java.util.zip.GZIPInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.common.collection.CollectorException;
public class FileGZipCollectorPlugin extends AbstractSplittedRecordPlugin {
private static final Logger log = LoggerFactory.getLogger(FileGZipCollectorPlugin.class);
public FileGZipCollectorPlugin(FileSystem fileSystem) {
super(fileSystem);
}
@Override
protected BufferedInputStream getBufferedInputStream(final Path filePath) throws CollectorException {
log.info("filePath: {}", filePath);
try {
FileSystem fs = super.getFileSystem();
GZIPInputStream stream = new GZIPInputStream(fs.open(filePath));
return new BufferedInputStream(stream);
} catch (Exception e) {
throw new CollectorException("Error reading file " + filePath, e);
}
}
}

View File

@ -19,7 +19,7 @@ import org.dom4j.io.XMLWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.collection.XmlCleaner;
import eu.dnetlib.dhp.collection.plugin.utils.XmlCleaner;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpConnector2;

View File

@ -30,7 +30,7 @@ import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import eu.dnetlib.dhp.collection.JsonUtils;
import eu.dnetlib.dhp.collection.plugin.utils.JsonUtils;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpClientParams;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.collection;
package eu.dnetlib.dhp.collection.plugin.utils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

View File

@ -0,0 +1,177 @@
package eu.dnetlib.dhp.collection.plugin.utils;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringWriter;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.Iterator;
import javax.xml.stream.XMLEventFactory;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLEventWriter;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class XMLIterator implements Iterator<String> {
private static final Log log = LogFactory.getLog(XMLIterator.class);
private ThreadLocal<XMLInputFactory> inputFactory = new ThreadLocal<XMLInputFactory>() {
@Override
protected XMLInputFactory initialValue() {
return XMLInputFactory.newInstance();
}
};
private ThreadLocal<XMLOutputFactory> outputFactory = new ThreadLocal<XMLOutputFactory>() {
@Override
protected XMLOutputFactory initialValue() {
return XMLOutputFactory.newInstance();
}
};
private ThreadLocal<XMLEventFactory> eventFactory = new ThreadLocal<XMLEventFactory>() {
@Override
protected XMLEventFactory initialValue() {
return XMLEventFactory.newInstance();
}
};
public static final String UTF_8 = "UTF-8";
final XMLEventReader parser;
private XMLEvent current = null;
private String element;
private InputStream inputStream;
public XMLIterator(final String element, final InputStream inputStream) {
super();
this.element = element;
this.inputStream = inputStream;
this.parser = getParser();
try {
this.current = findElement(parser);
} catch (XMLStreamException e) {
log.warn("cannot init parser position. No element found: " + element);
current = null;
}
}
@Override
public boolean hasNext() {
return current != null;
}
@Override
public String next() {
String result = null;
try {
result = copy(parser);
current = findElement(parser);
return result;
} catch (XMLStreamException e) {
throw new RuntimeException(String.format("error copying xml, built so far: '%s'", result), e);
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@SuppressWarnings("finally")
private String copy(final XMLEventReader parser) throws XMLStreamException {
final StringWriter result = new StringWriter();
try {
final XMLEventWriter writer = outputFactory.get().createXMLEventWriter(result);
final StartElement start = current.asStartElement();
final StartElement newRecord = eventFactory
.get()
.createStartElement(start.getName(), start.getAttributes(), start.getNamespaces());
// new root record
writer.add(newRecord);
// copy the rest as it is
while (parser.hasNext()) {
final XMLEvent event = parser.nextEvent();
// TODO: replace with depth tracking instead of close tag tracking.
if (event.isEndElement() && event.asEndElement().getName().getLocalPart().equals(element)) {
writer.add(event);
break;
}
writer.add(event);
}
writer.close();
} finally {
return result.toString();
}
}
/**
* Looks for the next occurrence of the splitter element.
*
* @param parser
* @return
* @throws XMLStreamException
*/
private XMLEvent findElement(final XMLEventReader parser) throws XMLStreamException {
/*
* if (current != null && element.equals(current.asStartElement().getName().getLocalPart())) { return current; }
*/
XMLEvent peek = parser.peek();
if (peek != null && peek.isStartElement()) {
String name = peek.asStartElement().getName().getLocalPart();
if (element.equals(name)) {
return peek;
}
}
while (parser.hasNext()) {
final XMLEvent event = parser.nextEvent();
if (event != null && event.isStartElement()) {
String name = event.asStartElement().getName().getLocalPart();
if (element.equals(name)) {
return event;
}
}
}
return null;
}
private XMLEventReader getParser() {
try {
return inputFactory.get().createXMLEventReader(sanitize(inputStream));
} catch (XMLStreamException e) {
throw new RuntimeException(e);
}
}
private Reader sanitize(final InputStream in) {
final CharsetDecoder charsetDecoder = Charset.forName(UTF_8).newDecoder();
charsetDecoder.onMalformedInput(CodingErrorAction.REPLACE);
charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
return new InputStreamReader(in, charsetDecoder);
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.collection;
package eu.dnetlib.dhp.collection.plugin.utils;
import java.util.HashMap;
import java.util.HashSet;

View File

@ -47,13 +47,18 @@ object DataciteToOAFTransformation {
}
/** This method should skip record if json contains invalid text
* defined in gile datacite_filter
* defined in file datacite_filter
*
* @param json
* @param record : unparsed datacite record
* @param json : parsed record
* @return True if the record should be skipped
*/
def skip_record(json: String): Boolean = {
datacite_filter.exists(f => json.contains(f))
def skip_record(record: String, json: org.json4s.JValue): Boolean = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
datacite_filter.exists(f => record.contains(f)) || (json \\ "publisher")
.extractOrElse[String]("")
.equalsIgnoreCase("FAIRsharing")
}
@deprecated("this method will be removed", "dhp")
@ -304,12 +309,13 @@ object DataciteToOAFTransformation {
vocabularies: VocabularyGroup,
exportLinks: Boolean
): List[Oaf] = {
if (skip_record(input))
return List()
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json = parse(input)
if (skip_record(input, json))
return List()
val resourceType = (json \ "attributes" \ "types" \ "resourceType").extractOrElse[String](null)
val resourceTypeGeneral =
(json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null)

View File

@ -0,0 +1,61 @@
package eu.dnetlib.dhp.collection.plugin.file;
import java.io.IOException;
import java.util.HashMap;
import java.util.stream.Stream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import net.bytebuddy.asm.Advice;
public class FileCollectorPluginTest {
private static final Logger log = LoggerFactory.getLogger(FileGZipCollectorPluginTest.class);
private final ApiDescriptor api = new ApiDescriptor();
private FileCollectorPlugin plugin;
private static final String SPLIT_ON_ELEMENT = "repository";
@BeforeEach
public void setUp() throws IOException {
final String gzipFile = this
.getClass()
.getResource("/eu/dnetlib/dhp/collection/plugin/file/opendoar.xml")
.getFile();
api.setBaseUrl(gzipFile);
HashMap<String, String> params = new HashMap<>();
params.put("splitOnElement", SPLIT_ON_ELEMENT);
api.setParams(params);
FileSystem fs = FileSystem.get(new Configuration());
plugin = new FileCollectorPlugin(fs);
}
@Test
void test() throws CollectorException {
final Stream<String> stream = plugin.collect(api, new AggregatorReport());
stream.limit(10).forEach(s -> {
Assertions.assertTrue(s.length() > 0);
log.info(s);
});
}
}

View File

@ -0,0 +1,68 @@
package eu.dnetlib.dhp.collection.plugin.file;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.HashMap;
import java.util.Objects;
import java.util.stream.Stream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.junit.jupiter.api.*;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
@ExtendWith(MockitoExtension.class)
public class FileGZipCollectorPluginTest {
private static final Logger log = LoggerFactory.getLogger(FileGZipCollectorPluginTest.class);
private final ApiDescriptor api = new ApiDescriptor();
private FileGZipCollectorPlugin plugin;
private static final String SPLIT_ON_ELEMENT = "repository";
@BeforeEach
public void setUp() throws IOException {
final String gzipFile = Objects
.requireNonNull(
this
.getClass()
.getResource("/eu/dnetlib/dhp/collection/plugin/file/opendoar.xml.gz"))
.getFile();
api.setBaseUrl(gzipFile);
HashMap<String, String> params = new HashMap<>();
params.put("splitOnElement", SPLIT_ON_ELEMENT);
api.setParams(params);
FileSystem fs = FileSystem.get(new Configuration());
plugin = new FileGZipCollectorPlugin(fs);
}
@Test
void test() throws CollectorException {
final Stream<String> stream = plugin.collect(api, new AggregatorReport());
stream.limit(10).forEach(s -> {
Assertions.assertTrue(s.length() > 0);
log.info(s);
});
}
}

View File

@ -107,4 +107,19 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
}
@Test
def testFilter(): Unit = {
val record = Source
.fromInputStream(
getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/record_fairsharing.json")
)
.mkString
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
val res: List[Oaf] = DataciteToOAFTransformation.generateOAF(record, 0L, 0L, vocabularies, true)
assertTrue(res.isEmpty)
}
}

View File

@ -18,14 +18,7 @@ import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.oaiIProvenance;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.*;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.*;
@ -35,6 +28,7 @@ import com.google.common.collect.Sets;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.AccessRight;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Context;
@ -199,8 +193,13 @@ public abstract class AbstractMdRecordToOafMapper {
final List<Oaf> oafs = Lists.newArrayList(entity);
if (!oafs.isEmpty()) {
oafs.addAll(addProjectRels(doc, entity));
oafs.addAll(addOtherResultRels(doc, entity));
Set<Oaf> rels = Sets.newHashSet();
rels.addAll(addProjectRels(doc, entity));
rels.addAll(addOtherResultRels(doc, entity));
rels.addAll(addRelations(doc, entity));
oafs.addAll(rels);
}
return oafs;
@ -278,6 +277,46 @@ public abstract class AbstractMdRecordToOafMapper {
return res;
}
private List<Oaf> addRelations(Document doc, OafEntity entity) {
final List<Oaf> rels = Lists.newArrayList();
for (Object o : doc.selectNodes("//oaf:relation")) {
Element element = (Element) o;
final String target = StringUtils.trim(element.getText());
final String relType = element.attributeValue("relType");
final String subRelType = element.attributeValue("subRelType");
final String relClass = element.attributeValue("relClass");
if (StringUtils.isNotBlank(target) && StringUtils.isNotBlank(relType) && StringUtils.isNotBlank(subRelType)
&& StringUtils.isNotBlank(relClass)) {
final String relClassInverse = ModelSupport
.findInverse(ModelSupport.rel(relType, subRelType, relClass))
.getInverseRelClass();
final String validationdDate = ((Node) o).valueOf("@validationDate");
if (StringUtils.isNotBlank(target)) {
final String targetType = element.attributeValue("targetType");
if (StringUtils.isNotBlank(targetType)) {
final String targetId = createOpenaireId(targetType, target, true);
rels
.add(
getRelation(
entity.getId(), targetId, relType, subRelType, relClass, entity, validationdDate));
rels
.add(
getRelation(
targetId, entity.getId(), relType, subRelType, relClassInverse, entity,
validationdDate));
}
}
}
}
return rels;
}
protected Relation getRelation(final String source,
final String target,
final String relType,

View File

@ -57,14 +57,10 @@ class MappersTest {
final List<Oaf> list = new OafToOafMapper(vocs, false, true).processMdRecord(xml);
assertEquals(3, list.size());
assertTrue(list.get(0) instanceof Publication);
assertTrue(list.get(1) instanceof Relation);
assertTrue(list.get(2) instanceof Relation);
assertEquals(1, list.stream().filter(o -> o instanceof Publication).count());
assertEquals(4, list.stream().filter(o -> o instanceof Relation).count());
final Publication p = (Publication) list.get(0);
final Relation r1 = (Relation) list.get(1);
final Relation r2 = (Relation) list.get(2);
Publication p = (Publication) list.stream().filter(o -> o instanceof Publication).findFirst().get();
assertValidId(p.getId());
@ -125,26 +121,58 @@ class MappersTest {
assertNotNull(p.getBestaccessright());
assertEquals("OPEN", p.getBestaccessright().getClassid());
assertValidId(r1.getSource());
assertValidId(r1.getTarget());
assertValidId(r2.getSource());
assertValidId(r2.getTarget());
assertValidId(r1.getCollectedfrom().get(0).getKey());
assertValidId(r2.getCollectedfrom().get(0).getKey());
assertNotNull(r1.getDataInfo());
assertNotNull(r2.getDataInfo());
assertNotNull(r1.getDataInfo().getTrust());
assertNotNull(r2.getDataInfo().getTrust());
assertEquals(r1.getSource(), r2.getTarget());
assertEquals(r2.getSource(), r1.getTarget());
assertTrue(StringUtils.isNotBlank(r1.getRelClass()));
assertTrue(StringUtils.isNotBlank(r2.getRelClass()));
assertTrue(StringUtils.isNotBlank(r1.getRelType()));
assertTrue(StringUtils.isNotBlank(r2.getRelType()));
assertTrue(r1.getValidated());
assertTrue(r2.getValidated());
assertEquals("2020-01-01", r1.getValidationDate());
assertEquals("2020-01-01", r2.getValidationDate());
// RESULT PROJECT
List<Relation> resultProject = list
.stream()
.filter(o -> o instanceof Relation)
.map(o -> (Relation) o)
.filter(r -> ModelConstants.RESULT_PROJECT.equals(r.getRelType()))
.collect(Collectors.toList());
assertEquals(2, resultProject.size());
final Relation rp1 = resultProject.get(0);
final Relation rp2 = resultProject.get(1);
verifyRelation(rp1);
verifyRelation(rp2);
assertTrue(rp1.getValidated());
assertTrue(rp2.getValidated());
assertEquals("2020-01-01", rp1.getValidationDate());
assertEquals("2020-01-01", rp2.getValidationDate());
assertEquals(rp1.getSource(), rp2.getTarget());
assertEquals(rp2.getSource(), rp1.getTarget());
// AFFILIATIONS
List<Relation> affiliation = list
.stream()
.filter(o -> o instanceof Relation)
.map(o -> (Relation) o)
.filter(r -> ModelConstants.RESULT_ORGANIZATION.equals(r.getRelType()))
.collect(Collectors.toList());
assertEquals(2, affiliation.size());
final Relation aff1 = affiliation.get(0);
final Relation aff2 = affiliation.get(1);
verifyRelation(aff1);
verifyRelation(aff2);
assertEquals(aff1.getSource(), aff2.getTarget());
assertEquals(aff2.getSource(), aff1.getTarget());
}
private void verifyRelation(Relation r) {
assertValidId(r.getSource());
assertValidId(r.getTarget());
assertValidId(r.getCollectedfrom().get(0).getKey());
assertNotNull(r.getDataInfo());
assertNotNull(r.getDataInfo().getTrust());
assertTrue(StringUtils.isNotBlank(r.getRelClass()));
assertTrue(StringUtils.isNotBlank(r.getRelType()));
}
@Test
@ -734,6 +762,51 @@ class MappersTest {
assertFalse(p_cleaned.getTitle().isEmpty());
}
@Test
void testZenodo() throws IOException, DocumentException {
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_zenodo.xml")));
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
System.out.println("***************");
System.out.println(new ObjectMapper().writeValueAsString(list));
System.out.println("***************");
final Publication p = (Publication) list.get(0);
assertValidId(p.getId());
assertValidId(p.getCollectedfrom().get(0).getKey());
assertNotNull(p.getTitle());
assertFalse(p.getTitle().isEmpty());
assertEquals(1, p.getTitle().size());
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
assertNotNull(p.getAuthor());
assertEquals(2, p.getAuthor().size());
Author author = p
.getAuthor()
.stream()
.filter(a -> a.getPid().stream().anyMatch(pi -> pi.getValue().equals("0000-0003-3272-8007")))
.findFirst()
.get();
assertNotNull(author);
assertTrue(StringUtils.isBlank(author.getSurname()));
assertTrue(StringUtils.isBlank(author.getName()));
assertEquals("Anne van Weerden", author.getFullname());
author = p
.getAuthor()
.stream()
.filter(a -> a.getPid().stream().anyMatch(pi -> pi.getValue().equals("0000-0003-3272-8008")))
.findFirst()
.get();
assertNotNull(author);
assertFalse(StringUtils.isBlank(author.getSurname()));
assertFalse(StringUtils.isBlank(author.getName()));
assertFalse(StringUtils.isBlank(author.getFullname()));
}
@Test
void testOdfFromHdfs() throws IOException, DocumentException {
final String xml = IOUtils
@ -835,6 +908,20 @@ class MappersTest {
assertEquals("EUR", p.getProcessingchargecurrency().getValue());
}
@Test
void testROHub() throws IOException, DocumentException {
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("rohub.xml")));
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
System.out.println("***************");
System.out.println(new ObjectMapper().writeValueAsString(list));
System.out.println("***************");
// final Dataset p = (Dataset) list.get(0);
// assertValidId(p.getId());
// assertValidId(p.getCollectedfrom().get(0).getKey());
// System.out.println(p.getTitle().get(0).getValue());
// assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
}
private void assertValidId(final String id) {
// System.out.println(id);

View File

@ -497,6 +497,7 @@ dnet:publication_resource @=@ 0044 @=@ Graduate diploma
dnet:publication_resource @=@ 0044 @=@ Undergraduate diploma
dnet:publication_resource @=@ 0000 @=@ UNKNOWN
dnet:publication_resource @=@ 0042 @=@ EGI Virtual Appliance
dnet:publication_resource @=@ 0048 @=@ RO-crate
dnet:languages @=@ abk @=@ ab
dnet:languages @=@ aar @=@ aa
dnet:languages @=@ afr @=@ af

View File

@ -164,6 +164,7 @@ dnet:publication_resource @=@ dnet:publication_resource @=@ 0030 @=@ Sound
dnet:publication_resource @=@ dnet:publication_resource @=@ 0044 @=@ Thesis
dnet:publication_resource @=@ dnet:publication_resource @=@ 0000 @=@ Unknown
dnet:publication_resource @=@ dnet:publication_resource @=@ 0042 @=@ Virtual Appliance
dnet:publication_resource @=@ dnet:publication_resource @=@ 0048 @=@ Research Object
ec:funding_typologies @=@ ec:funding_typologies @=@ ec:frameworkprogram @=@ frameworkprogram
ec:funding_typologies @=@ ec:funding_typologies @=@ ec:program @=@ program
ec:funding_typologies @=@ ec:funding_typologies @=@ ec:specificprogram @=@ specificprogram

View File

@ -60,6 +60,15 @@
<oaf:fulltext>https://oneecosystem.pensoft.net/article/13718/</oaf:fulltext>
<oaf:journal eissn="2367-8194" issn="">One Ecosystem</oaf:journal>
<oaf:refereed>0001</oaf:refereed>
<oaf:relation relClass="hasAuthorInstitution"
relType="resultOrganization"
subRelType="affiliation"
targetType="organization">ror_________::https://ror.org/02gdcn153</oaf:relation>
<oaf:relation relClass="isProducedBy"
relType="resultProject"
subRelType="outcome"
targetType="project"
validationDate="2020-01-01">corda_______::226852</oaf:relation>
</metadata>
<about xmlns:oai="http://www.openarchives.org/OAI/2.0/">
<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">

View File

@ -0,0 +1,69 @@
<?xml version="1.0" encoding="UTF-8"?>
<record xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:datacite="http://datacite.org/schema/kernel-3"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:dri="http://www.driver-repository.eu/namespace/dri">>
<header xmlns="http://www.openarchives.org/OAI/2.0/">
<identifier>oai:zenodo.org:3406824</identifier>
<datestamp>2020-01-20T16:45:20Z</datestamp>
<setSpec>openaire</setSpec>
<dr:dateOfTransformation>2022-06-07T10:21:24.06Z</dr:dateOfTransformation>
<dri:objIdentifier>test________::92fe3efa47883b2f3401e6a4bd92e9d7</dri:objIdentifier>
<dri:dateOfCollection>2020-05-21T05:26:15.93Z</dri:dateOfCollection>
<dri:dateOfTransformation>2020-08-01T11:06:26.977Z</dri:dateOfTransformation>
</header>
<metadata>
<resource xmlns="http://datacite.org/schema/kernel-4"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.1/metadata.xsd">
<identifier identifierType="DOI">10.5281/zenodo.3406824</identifier>
<alternateIdentifiers xmlns="http://datacite.org/schema/kernel-3">
<alternateIdentifier alternateIdentifierType="URL">http://dx.doi.org/10.5281/zenodo.3406824</alternateIdentifier>
</alternateIdentifiers>
<creators>
<creator>
<creatorName>Anne van Weerden</creatorName>
<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0003-3272-8007</nameIdentifier>
<affiliation>Utrecht University Library</affiliation>
</creator>
<creator>
<creatorName>Anne van, Weerden</creatorName>
<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0003-3272-8008</nameIdentifier>
<affiliation>Utrecht University Library</affiliation>
</creator>
</creators>
<titles>
<title>Helen Bayly and Catherine Disney as influences in the life of Sir William Rowan Hamilton</title>
</titles>
<publisher>Zenodo</publisher>
<publicationYear>2018</publicationYear>
<subjects>
<subject>Sir William Rowan Hamilton, Lady Helena Maria Hamilton Bayly, Catherine Disney, Ireland, history, biography, nineteenth century</subject>
</subjects>
<dates>
<date dateType="Issued">2018-12-28</date>
</dates>
<language>en</language>
<resourceType resourceTypeGeneral="JournalArticle"/>
<relatedIdentifiers>
<relatedIdentifier relatedIdentifierType="DOI" relationType="IsVersionOf">10.5281/zenodo.3406823</relatedIdentifier>
</relatedIdentifiers>
<rightsList>
<rights rightsURI="https://creativecommons.org/licenses/by/4.0/legalcode">Creative Commons Attribution 4.0 International</rights>
<rights rightsURI="info:eu-repo/semantics/openAccess">Open Access</rights>
</rightsList>
<descriptions>
<description descriptionType="Abstract"><p>In the 1880s Robert Graves published a biography about Sir William Rowan Hamilton (1805-1865), to which in a 1980 biography Thomas Hankins added further information. From these biographies a picture emerged of a man who was unhappily married because he had lost the love of his life, which raised the question how such an unhappy man could produce so much beautiful mathematics. In this article it is stated that a main cause for the unhappy picture is that Graves ignored the influence on one another of Hamilton and his wife Helen Bayly, and Hankins that of Hamilton and his first and lost love Catherine Disney. It is then shown that if these influences are taken into account a very different view on Hamilton;s private life arises, in which he was happily married to a wife who enabled him to work as he needed to.</p></description>
</descriptions>
</resource>
<oaf:identifier identifierType="doi">10.5281/zenodo.3406824</oaf:identifier>
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
<oaf:dateAccepted>2018-12-28</oaf:dateAccepted>
<oaf:accessrights>OPEN</oaf:accessrights>
<oaf:license>https://creativecommons.org/licenses/by/4.0/legalcode</oaf:license>
<oaf:language>eng</oaf:language>
<oaf:hostedBy name="ZENODO" id="opendoar____::2659"/>
<oaf:collectedFrom name="ZENODO" id="opendoar____::2659"/>
</metadata>
</record>

View File

@ -0,0 +1,103 @@
<?xml version="1.0" encoding="UTF-8"?>
<record xmlns:datacite="http://datacite.org/schema/kernel-4"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<header xmlns="http://www.openarchives.org/OAI/2.0/">
<dri:objIdentifier>eosca5322f5f::4dd1aaf93ae136b65dc9ee4e6f76eac9</dri:objIdentifier>
<dri:recordIdentifier>53aa90bf-c593-4e6d-923f-d4711ac4b0e1</dri:recordIdentifier>
<dri:dateOfCollection>2022-05-25T15:35:48.262Z</dri:dateOfCollection>
<oaf:datasourceprefix>eosca5322f5f</oaf:datasourceprefix>
<identifier>53aa90bf-c593-4e6d-923f-d4711ac4b0e1</identifier>
<datestamp>2022-05-25T15:35:38Z</datestamp>
<setSpec>rohub_data</setSpec>
<setSpec>ro-crate_data</setSpec>
<dr:dateOfTransformation>2022-05-25T15:36:11.094Z</dr:dateOfTransformation>
</header>
<metadata>
<oaire:resource xmlns="http://namespace.openaire.eu/schema/oaire/">
<datacite:identifier identifierType="landingPage">https://w3id.org/ro-id/53aa90bf-c593-4e6d-923f-d4711ac4b0e1</datacite:identifier>
<datacite:alternateIdentifiers>
<datacite:alternateIdentifier alternateIdentifierType="URL">http://api.rohub.org/api/ros/53aa90bf-c593-4e6d-923f-d4711ac4b0e1/</datacite:alternateIdentifier>
</datacite:alternateIdentifiers>
<datacite:relatedIdentifiers>
<datacite:relatedIdentifier relatedIdentifierType="" relationType="">
https://github.com/NordicESMhub/RELIANCE/blob/main/content/science/notebooks/air_quality_lockdown.ipynb
</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="URI" relationType="IsPartOf">https://github.com/NordicESMhub/RELIANCE/blob/main/content/science/notebooks/air_quality_lockdown.ipynb</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="" relationType="">
https://nordicesmhub.github.io/RELIANCE/science/notebooks/air_quality_lockdown.html
</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="URI" relationType="IsPartOf">https://nordicesmhub.github.io/RELIANCE/science/notebooks/air_quality_lockdown.html</datacite:relatedIdentifier>
</datacite:relatedIdentifiers>
<creators xmlns="http://datacite.org/schema/kernel-4">
<creator>
<creator>
<creatorName>Anne Fouilloux</creatorName>
</creator>
</creator>
</creators>
<dates xmlns="http://datacite.org/schema/kernel-4">
<date dateType="Created">2021-12-19T21:18:33Z</date>
</dates>
<dc:descriptions>
<dc:description descriptionType="Abstract">The COVID-19 pandemic has led to significant reductions in economic activity, especially during lockdowns. Several studies has shown that the concentration of nitrogen dioxyde and particulate matter levels have reduced during lockdown events. Reductions in transportation sector emissions are most likely largely responsible for the NO2 anomalies. In this study, we analyze the impact of lockdown events on the air quality using data from Copernicus Atmosphere Monitoring Service over Europe and at selected locations.</dc:description>
</dc:descriptions>
<oaire:fundingReferences>
<oaire:fundingReference>
<oaire:funderName>European Commission</oaire:funderName>
<oaire:funderIdentifier funderIdentifierType="Crossref Funder ID">10.13039/501100000781</oaire:funderIdentifier>
<oaire:awardNumber awardURI="">101017502</oaire:awardNumber>
<oaire:awardTitle>Research Lifecycle Management for Earth Science Communities and Copernicus Users</oaire:awardTitle>
</oaire:fundingReference>
</oaire:fundingReferences>
<oaire:licenseCondition uri="https://opensource.org/licenses/MIT">MIT License</oaire:licenseCondition>
<dc:publisher>University of Oslo</dc:publisher>
<dc:publicationYear>2021</dc:publicationYear>
<oaire:resourceType resourceTypeGeneral="other research product" uri="http://purl.org/coar/resource_type/c_1843">RO-crate</oaire:resourceType>
<rightsList xmlns="http://datacite.org/schema/kernel-4">
<rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</rights>
</rightsList>
<sizes xmlns="http://datacite.org/schema/kernel-4">
<size>11.971 MB</size>
</sizes>
<subjects xmlns="http://datacite.org/schema/kernel-4">
<subject>Applied sciences</subject>
<subject>Meteorology</subject>
<subject>EOSC::RO-crate</subject>
</subjects>
<titles xmlns="http://datacite.org/schema/kernel-4">
<title>Impact of the Covid-19 Lockdown on Air quality over Europe</title>
</titles>
</oaire:resource>
<oaf:identifier identifierType="URL">https://w3id.org/ro-id/53aa90bf-c593-4e6d-923f-d4711ac4b0e1</oaf:identifier>
<dr:CobjCategory type="other">0048</dr:CobjCategory>
<oaf:dateAccepted/>
<oaf:accessrights>OPEN</oaf:accessrights>
<oaf:license>https://opensource.org/licenses/MIT</oaf:license>
<oaf:language>und</oaf:language>
<oaf:hostedBy id="eosc________::psnc::psnc.rohub" name="ROHub"/>
<oaf:collectedFrom id="eosc________::psnc::psnc.rohub" name="ROHub"/>
</metadata>
<about xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
<originDescription altered="true" harvestDate="2022-05-25T15:35:48.262Z">
<baseURL>https%3A%2F%2Fapi.rohub.org%2Fapi%2Foai2d%2F</baseURL>
<identifier>53aa90bf-c593-4e6d-923f-d4711ac4b0e1</identifier>
<datestamp>2022-05-25T15:35:38Z</datestamp>
<metadataNamespace/>
</originDescription>
</provenance>
<oaf:datainfo>
<oaf:inferred>false</oaf:inferred>
<oaf:deletedbyinference>false</oaf:deletedbyinference>
<oaf:trust>0.9</oaf:trust>
<oaf:inferenceprovenance/>
<oaf:provenanceaction classid="sysimport:crosswalk"
classname="Harvested" schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
</oaf:datainfo>
</about>
</record>

View File

@ -107,11 +107,6 @@ compute stats TARGET.result_sources;
create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.result_topics;
create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
compute stats TARGET.result_apc;
create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;

View File

@ -127,6 +127,7 @@ CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'resultOrganization'
and r.target like '50|%'
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS

View File

@ -93,7 +93,7 @@ where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false;
CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.target like '20|%' and r.datainfo.invisible=false;
-- datasource sources:
-- where the datasource info have been collected from.