forked from D-Net/dnet-hadoop
Merge commit 'efd96e7e664e4139321e35e8d172b884ba4b61a1' into beta2master_sept_2022
This commit is contained in:
commit
cbd48bc645
|
@ -419,4 +419,62 @@ public class OafMapperUtils {
|
|||
m.setUnit(Arrays.asList(newKeyValueInstance(key, value, dataInfo)));
|
||||
return m;
|
||||
}
|
||||
|
||||
public static Relation getRelation(final String source,
|
||||
final String target,
|
||||
final String relType,
|
||||
final String subRelType,
|
||||
final String relClass,
|
||||
final OafEntity entity) {
|
||||
return getRelation(source, target, relType, subRelType, relClass, entity, null);
|
||||
}
|
||||
|
||||
public static Relation getRelation(final String source,
|
||||
final String target,
|
||||
final String relType,
|
||||
final String subRelType,
|
||||
final String relClass,
|
||||
final OafEntity entity,
|
||||
final String validationDate) {
|
||||
return getRelation(
|
||||
source, target, relType, subRelType, relClass, entity.getCollectedfrom(), entity.getDataInfo(),
|
||||
entity.getLastupdatetimestamp(), validationDate, null);
|
||||
}
|
||||
|
||||
public static Relation getRelation(final String source,
|
||||
final String target,
|
||||
final String relType,
|
||||
final String subRelType,
|
||||
final String relClass,
|
||||
final List<KeyValue> collectedfrom,
|
||||
final DataInfo dataInfo,
|
||||
final Long lastupdatetimestamp) {
|
||||
return getRelation(
|
||||
source, target, relType, subRelType, relClass, collectedfrom, dataInfo, lastupdatetimestamp, null, null);
|
||||
}
|
||||
|
||||
public static Relation getRelation(final String source,
|
||||
final String target,
|
||||
final String relType,
|
||||
final String subRelType,
|
||||
final String relClass,
|
||||
final List<KeyValue> collectedfrom,
|
||||
final DataInfo dataInfo,
|
||||
final Long lastupdatetimestamp,
|
||||
final String validationDate,
|
||||
final List<KeyValue> properties) {
|
||||
final Relation rel = new Relation();
|
||||
rel.setRelType(relType);
|
||||
rel.setSubRelType(subRelType);
|
||||
rel.setRelClass(relClass);
|
||||
rel.setSource(source);
|
||||
rel.setTarget(target);
|
||||
rel.setCollectedfrom(collectedfrom);
|
||||
rel.setDataInfo(dataInfo);
|
||||
rel.setLastupdatetimestamp(lastupdatetimestamp);
|
||||
rel.setValidated(StringUtils.isNotBlank(validationDate));
|
||||
rel.setValidationDate(StringUtils.isNotBlank(validationDate) ? validationDate : null);
|
||||
rel.setProperties(properties);
|
||||
return rel;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -118,7 +118,7 @@ public class CollectorWorker extends ReportingJob {
|
|||
return new RestCollectorPlugin(clientParams);
|
||||
case file:
|
||||
return new FileCollectorPlugin(fileSystem);
|
||||
case fileGZip:
|
||||
case fileGzip:
|
||||
return new FileGZipCollectorPlugin(fileSystem);
|
||||
case other:
|
||||
final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
|
||||
|
|
|
@ -10,7 +10,7 @@ import eu.dnetlib.dhp.common.collection.CollectorException;
|
|||
public interface CollectorPlugin {
|
||||
|
||||
enum NAME {
|
||||
oai, other, rest_json2xml, file, fileGZip;
|
||||
oai, other, rest_json2xml, file, fileGzip;
|
||||
|
||||
public enum OTHER_NAME {
|
||||
mdstore_mongodb_dump, mdstore_mongodb
|
||||
|
|
|
@ -17,6 +17,9 @@ public class PMArticle implements Serializable {
|
|||
* the Pubmed Identifier
|
||||
*/
|
||||
private String pmid;
|
||||
|
||||
private String pmcId;
|
||||
|
||||
/**
|
||||
* the DOI
|
||||
*/
|
||||
|
@ -122,7 +125,7 @@ public class PMArticle implements Serializable {
|
|||
|
||||
/**
|
||||
* The full journal title (taken from NLM cataloging data following NLM rules for how to compile a serial name) is exported in this element.
|
||||
* Some characters that are not part of the NLM MEDLINE/PubMed Character Set reside in a relatively small number of full journal titles.
|
||||
* Some characters that are not part of the NLM MEDLINE/PubMed Character Set reside in a relatively small number of full journal titles.
|
||||
* The NLM journal title abbreviation is exported in the <MedlineTA> element.
|
||||
*
|
||||
* @return the pubmed Journal Extracted
|
||||
|
@ -140,10 +143,11 @@ public class PMArticle implements Serializable {
|
|||
}
|
||||
|
||||
/**
|
||||
* English-language abstracts are taken directly from the published article.
|
||||
* If the article does not have a published abstract, the National Library of Medicine does not create one,
|
||||
* thus the record lacks the <Abstract> and <AbstractText> elements. However, in the absence of a formally
|
||||
* labeled abstract in the published article, text from a substantive "summary", "summary and conclusions" or "conclusions and summary" may be used.
|
||||
* <ArticleTitle> contains the entire title of the journal article. <ArticleTitle> is always in English;
|
||||
* those titles originally published in a non-English language and translated for <ArticleTitle> are enclosed in square brackets.
|
||||
* All titles end with a period unless another punctuation mark such as a question mark or bracket is present.
|
||||
* Explanatory information about the title itself is enclosed in parentheses, e.g.: (author's transl).
|
||||
* Corporate/collective authors may appear at the end of <ArticleTitle> for citations up to about the year 2000.
|
||||
*
|
||||
* @return the extracted pubmed Title
|
||||
*/
|
||||
|
@ -250,4 +254,13 @@ public class PMArticle implements Serializable {
|
|||
public List<PMGrant> getGrants() {
|
||||
return grants;
|
||||
}
|
||||
|
||||
public String getPmcId() {
|
||||
return pmcId;
|
||||
}
|
||||
|
||||
public PMArticle setPmcId(String pmcId) {
|
||||
this.pmcId = pmcId;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -584,7 +584,12 @@ object DataciteToOAFTransformation {
|
|||
JField("awardUri", JString(awardUri)) <- fundingReferences
|
||||
} yield awardUri
|
||||
|
||||
val oid = result.getId
|
||||
result.setId(IdentifierFactory.createIdentifier(result))
|
||||
if (!result.getId.equalsIgnoreCase(oid)) {
|
||||
result.setOriginalId((oid :: List(doi)).asJava)
|
||||
}
|
||||
|
||||
var relations: List[Relation] =
|
||||
awardUris.flatMap(a => get_projectRelation(a, result.getId)).filter(r => r != null)
|
||||
|
||||
|
|
|
@ -98,6 +98,7 @@ class PMParser(xml: XMLEventReader) extends Iterator[PMArticle] {
|
|||
case "PMID" => currentArticle.setPmid(text.trim)
|
||||
case "ArticleId" =>
|
||||
if ("doi".equalsIgnoreCase(currentArticleType)) currentArticle.setDoi(text.trim)
|
||||
if ("pmc".equalsIgnoreCase(currentArticleType)) currentArticle.setPmcId(text.trim)
|
||||
case "Language" => currentArticle.setLanguage(text.trim)
|
||||
case "ISSN" => currentJournal.setIssn(text.trim)
|
||||
case "GrantID" => currentGrant.setGrantID(text.trim)
|
||||
|
|
|
@ -4,9 +4,12 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
|||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType}
|
||||
import eu.dnetlib.dhp.schema.oaf._
|
||||
import collection.JavaConverters._
|
||||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import org.apache.commons.lang3.StringUtils
|
||||
|
||||
import collection.JavaConverters._
|
||||
import java.util.regex.Pattern
|
||||
import scala.collection.mutable.ListBuffer
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -14,6 +17,9 @@ object PubMedToOaf {
|
|||
|
||||
val SUBJ_CLASS = "keywords"
|
||||
|
||||
val OAI_HEADER = "oai:pubmedcentral.nih.gov:"
|
||||
val OLD_PMC_PREFIX = "od_______267::"
|
||||
|
||||
val urlMap = Map(
|
||||
"pmid" -> "https://pubmed.ncbi.nlm.nih.gov/",
|
||||
"doi" -> "https://dx.doi.org/"
|
||||
|
@ -50,6 +56,15 @@ object PubMedToOaf {
|
|||
null
|
||||
}
|
||||
|
||||
def createOriginalOpenaireId(article: PMArticle): String = {
|
||||
if (StringUtils.isNotEmpty(article.getPmcId)) {
|
||||
val md5 = DHPUtils.md5(s"$OAI_HEADER${article.getPmcId.replace("PMC", "")}")
|
||||
s"$OLD_PMC_PREFIX$md5"
|
||||
} else
|
||||
null
|
||||
|
||||
}
|
||||
|
||||
/** Create an instance of class extends Result
|
||||
* starting from OAF instanceType value
|
||||
*
|
||||
|
@ -122,16 +137,27 @@ object PubMedToOaf {
|
|||
return null
|
||||
|
||||
// MAP PMID into pid with classid = classname = pmid
|
||||
val pidList: List[StructuredProperty] = List(
|
||||
OafMapperUtils.structuredProperty(
|
||||
article.getPmid,
|
||||
PidType.pmid.toString,
|
||||
PidType.pmid.toString,
|
||||
val pidList = ListBuffer[StructuredProperty]()
|
||||
|
||||
pidList += OafMapperUtils.structuredProperty(
|
||||
article.getPmid,
|
||||
PidType.pmid.toString,
|
||||
PidType.pmid.toString,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
dataInfo
|
||||
)
|
||||
|
||||
if (StringUtils.isNotBlank(article.getPmcId)) {
|
||||
pidList += OafMapperUtils.structuredProperty(
|
||||
article.getPmcId,
|
||||
PidType.pmc.toString,
|
||||
PidType.pmc.toString,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
ModelConstants.DNET_PID_TYPES,
|
||||
dataInfo
|
||||
)
|
||||
)
|
||||
}
|
||||
if (pidList == null)
|
||||
return null
|
||||
|
||||
|
@ -186,6 +212,7 @@ object PubMedToOaf {
|
|||
val urlLists: List[String] = pidList
|
||||
.map(s => (urlMap.getOrElse(s.getQualifier.getClassid, ""), s.getValue))
|
||||
.filter(t => t._1.nonEmpty)
|
||||
.toList
|
||||
.map(t => t._1 + t._2)
|
||||
if (urlLists != null)
|
||||
pubmedInstance.setUrl(urlLists.asJava)
|
||||
|
@ -262,7 +289,14 @@ object PubMedToOaf {
|
|||
|
||||
if (authors != null && authors.nonEmpty)
|
||||
result.setAuthor(authors.asJava)
|
||||
result.setOriginalId(pidList.map(s => s.getValue).asJava)
|
||||
|
||||
if (StringUtils.isNotEmpty(article.getPmcId)) {
|
||||
val originalIDS = ListBuffer[String]()
|
||||
originalIDS += createOriginalOpenaireId(article)
|
||||
pidList.map(s => s.getValue).foreach(p => originalIDS += p)
|
||||
result.setOriginalId(originalIDS.asJava)
|
||||
} else
|
||||
result.setOriginalId(pidList.map(s => s.getValue).asJava)
|
||||
|
||||
result.setId(article.getPmid)
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
package eu.dnetlib.dhp.actionmanager.ror;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.util.List;
|
||||
|
@ -38,25 +39,20 @@ class GenerateRorActionSetJobTest {
|
|||
.readValue(IOUtils.toString(getClass().getResourceAsStream("ror_org.json")), RorOrganization.class);
|
||||
final List<AtomicAction<? extends Oaf>> aas = GenerateRorActionSetJob.convertRorOrg(r);
|
||||
|
||||
Assertions.assertEquals(3, aas.size());
|
||||
Assertions.assertEquals(1, aas.size());
|
||||
assertEquals(Organization.class, aas.get(0).getClazz());
|
||||
assertEquals(Relation.class, aas.get(1).getClazz());
|
||||
assertEquals(Relation.class, aas.get(2).getClazz());
|
||||
|
||||
final Organization o = (Organization) aas.get(0).getPayload();
|
||||
final Relation r1 = (Relation) aas.get(1).getPayload();
|
||||
final Relation r2 = (Relation) aas.get(2).getPayload();
|
||||
|
||||
assertEquals(o.getId(), r1.getSource());
|
||||
assertEquals(r1.getSource(), r2.getTarget());
|
||||
assertEquals(r2.getSource(), r1.getTarget());
|
||||
assertEquals(ModelConstants.IS_PARENT_OF, r1.getRelClass());
|
||||
assertEquals(ModelConstants.IS_CHILD_OF, r2.getRelClass());
|
||||
assertNotNull(o);
|
||||
|
||||
assertNotNull(o.getCountry());
|
||||
assertEquals("AU", o.getCountry().getClassid());
|
||||
|
||||
assertNotNull(o.getLegalname());
|
||||
assertEquals("Mount Stromlo Observatory", o.getLegalname().getValue());
|
||||
|
||||
System.out.println(mapper.writeValueAsString(o));
|
||||
System.out.println(mapper.writeValueAsString(r1));
|
||||
System.out.println(mapper.writeValueAsString(r2));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -195,7 +195,9 @@
|
|||
<Title>Biochemical and biophysical research communications</Title>
|
||||
<ISOAbbreviation>Biochem Biophys Res Commun</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Delineation of the intimate details of the backbone conformation of pyridine nucleotide coenzymes in aqueous solution.</ArticleTitle>
|
||||
<ArticleTitle>Delineation of the intimate details of the backbone conformation of pyridine nucleotide
|
||||
coenzymes in aqueous solution.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1173-9</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -473,7 +475,9 @@
|
|||
<Title>Biochemical and biophysical research communications</Title>
|
||||
<ISOAbbreviation>Biochem Biophys Res Commun</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Effect of chloroquine on cultured fibroblasts: release of lysosomal hydrolases and inhibition of their uptake.</ArticleTitle>
|
||||
<ArticleTitle>Effect of chloroquine on cultured fibroblasts: release of lysosomal hydrolases and
|
||||
inhibition of their uptake.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1338-43</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -657,7 +661,8 @@
|
|||
<Title>Biochemical and biophysical research communications</Title>
|
||||
<ISOAbbreviation>Biochem Biophys Res Commun</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Atomic models for the polypeptide backbones of myohemerythrin and hemerythrin.</ArticleTitle>
|
||||
<ArticleTitle>Atomic models for the polypeptide backbones of myohemerythrin and hemerythrin.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1349-56</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -1627,7 +1632,9 @@
|
|||
<Title>Biochemical pharmacology</Title>
|
||||
<ISOAbbreviation>Biochem Pharmacol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Comparison between procaine and isocarboxazid metabolism in vitro by a liver microsomal amidase-esterase.</ArticleTitle>
|
||||
<ArticleTitle>Comparison between procaine and isocarboxazid metabolism in vitro by a liver microsomal
|
||||
amidase-esterase.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1517-21</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -2030,7 +2037,9 @@
|
|||
<Title>Biochemical pharmacology</Title>
|
||||
<ISOAbbreviation>Biochem Pharmacol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Radiochemical assay of glutathione S-epoxide transferase and its enhancement by phenobarbital in rat liver in vivo.</ArticleTitle>
|
||||
<ArticleTitle>Radiochemical assay of glutathione S-epoxide transferase and its enhancement by
|
||||
phenobarbital in rat liver in vivo.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1569-72</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -2350,7 +2359,9 @@
|
|||
<Title>Biochemical pharmacology</Title>
|
||||
<ISOAbbreviation>Biochem Pharmacol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Identification of adenylate cyclase-coupled beta-adrenergic receptors with radiolabeled beta-adrenergic antagonists.</ArticleTitle>
|
||||
<ArticleTitle>Identification of adenylate cyclase-coupled beta-adrenergic receptors with radiolabeled
|
||||
beta-adrenergic antagonists.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1651-8</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -2598,7 +2609,9 @@
|
|||
<Title>Biochemical pharmacology</Title>
|
||||
<ISOAbbreviation>Biochem Pharmacol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>The effect of adrenaline and of alpha- and beta-adrenergic blocking agents on ATP concentration and on incorporation of 32Pi into ATP in rat fat cells.</ArticleTitle>
|
||||
<ArticleTitle>The effect of adrenaline and of alpha- and beta-adrenergic blocking agents on ATP
|
||||
concentration and on incorporation of 32Pi into ATP in rat fat cells.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1659-62</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -2851,7 +2864,9 @@
|
|||
<Title>Biochemical pharmacology</Title>
|
||||
<ISOAbbreviation>Biochem Pharmacol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Action of propranolol on mitochondrial functions--effects on energized ion fluxes in the presence of valinomycin.</ArticleTitle>
|
||||
<ArticleTitle>Action of propranolol on mitochondrial functions--effects on energized ion fluxes in the
|
||||
presence of valinomycin.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1701-5</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -3265,7 +3280,8 @@
|
|||
</Chemical>
|
||||
<Chemical>
|
||||
<RegistryNumber>EC 2.6.1.16</RegistryNumber>
|
||||
<NameOfSubstance UI="D005945">Glutamine-Fructose-6-Phosphate Transaminase (Isomerizing)</NameOfSubstance>
|
||||
<NameOfSubstance UI="D005945">Glutamine-Fructose-6-Phosphate Transaminase (Isomerizing)
|
||||
</NameOfSubstance>
|
||||
</Chemical>
|
||||
<Chemical>
|
||||
<RegistryNumber>EC 2.7.-</RegistryNumber>
|
||||
|
@ -3324,7 +3340,9 @@
|
|||
<DescriptorName UI="D005944" MajorTopicYN="N">Glucosamine</DescriptorName>
|
||||
</MeshHeading>
|
||||
<MeshHeading>
|
||||
<DescriptorName UI="D005945" MajorTopicYN="N">Glutamine-Fructose-6-Phosphate Transaminase (Isomerizing)</DescriptorName>
|
||||
<DescriptorName UI="D005945" MajorTopicYN="N">Glutamine-Fructose-6-Phosphate Transaminase
|
||||
(Isomerizing)
|
||||
</DescriptorName>
|
||||
<QualifierName UI="Q000378" MajorTopicYN="N">metabolism</QualifierName>
|
||||
</MeshHeading>
|
||||
<MeshHeading>
|
||||
|
@ -3463,7 +3481,8 @@
|
|||
<Title>Biochemical pharmacology</Title>
|
||||
<ISOAbbreviation>Biochem Pharmacol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Inhibition of aldehyde reductase by acidic metabolites of the biogenic amines.</ArticleTitle>
|
||||
<ArticleTitle>Inhibition of aldehyde reductase by acidic metabolites of the biogenic amines.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1731-3</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -3696,7 +3715,9 @@
|
|||
<Title>Biochemical pharmacology</Title>
|
||||
<ISOAbbreviation>Biochem Pharmacol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Effects of 5,6-dihydroxytryptamine on tyrosine-hydroxylase activity in central catecholaminergic neurons of the rat.</ArticleTitle>
|
||||
<ArticleTitle>Effects of 5,6-dihydroxytryptamine on tyrosine-hydroxylase activity in central
|
||||
catecholaminergic neurons of the rat.
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1739-42</MedlinePgn>
|
||||
</Pagination>
|
||||
|
@ -4602,12 +4623,19 @@
|
|||
<Title>Arzneimittel-Forschung</Title>
|
||||
<ISOAbbreviation>Arzneimittelforschung</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>[Biochemical studies on camomile components/III. In vitro studies about the antipeptic activity of (--)-alpha-bisabolol (author's transl)].</ArticleTitle>
|
||||
<ArticleTitle>[Biochemical studies on camomile components/III. In vitro studies about the antipeptic
|
||||
activity of (--)-alpha-bisabolol (author's transl)].
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1352-4</MedlinePgn>
|
||||
</Pagination>
|
||||
<Abstract>
|
||||
<AbstractText>(--)-alpha-Bisabolol has a primary antipeptic action depending on dosage, which is not caused by an alteration of the pH-value. The proteolytic activity of pepsin is reduced by 50 percent through addition of bisabolol in the ratio of 1/0.5. The antipeptic action of bisabolol only occurs in case of direct contact. In case of a previous contact with the substrate, the inhibiting effect is lost.</AbstractText>
|
||||
<AbstractText>(--)-alpha-Bisabolol has a primary antipeptic action depending on dosage, which is not
|
||||
caused by an alteration of the pH-value. The proteolytic activity of pepsin is reduced by 50
|
||||
percent through addition of bisabolol in the ratio of 1/0.5. The antipeptic action of bisabolol
|
||||
only occurs in case of direct contact. In case of a previous contact with the substrate, the
|
||||
inhibiting effect is lost.
|
||||
</AbstractText>
|
||||
</Abstract>
|
||||
<AuthorList CompleteYN="Y">
|
||||
<Author ValidYN="Y">
|
||||
|
@ -4626,7 +4654,9 @@
|
|||
<PublicationType UI="D004740">English Abstract</PublicationType>
|
||||
<PublicationType UI="D016428">Journal Article</PublicationType>
|
||||
</PublicationTypeList>
|
||||
<VernacularTitle>Biochemische Untersuchungen von Kamilleninhaltsstoffen. III. In-vitro-Versuche über die antipeptische Wirkung des (-)-alpha-Bisabolols</VernacularTitle>
|
||||
<VernacularTitle>Biochemische Untersuchungen von Kamilleninhaltsstoffen. III. In-vitro-Versuche über die
|
||||
antipeptische Wirkung des (-)-alpha-Bisabolols
|
||||
</VernacularTitle>
|
||||
</Article>
|
||||
<MedlineJournalInfo>
|
||||
<Country>Germany</Country>
|
||||
|
@ -4753,12 +4783,37 @@
|
|||
<Title>Arzneimittel-Forschung</Title>
|
||||
<ISOAbbreviation>Arzneimittelforschung</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>[Demonstration of tumor inhibiting properties of a strongly immunostimulating low-molecular weight substance. Comparative studies with ifosfamide on the immuno-labile DS carcinosarcoma. Stimulation of the autoimmune activity for approx. 20 days by BA 1, a N-(2-cyanoethylene)-urea. Novel prophylactic possibilities].</ArticleTitle>
|
||||
<ArticleTitle>[Demonstration of tumor inhibiting properties of a strongly immunostimulating
|
||||
low-molecular weight substance. Comparative studies with ifosfamide on the immuno-labile DS
|
||||
carcinosarcoma. Stimulation of the autoimmune activity for approx. 20 days by BA 1, a
|
||||
N-(2-cyanoethylene)-urea. Novel prophylactic possibilities].
|
||||
</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1369-79</MedlinePgn>
|
||||
</Pagination>
|
||||
<Abstract>
|
||||
<AbstractText>A report is given on the recent discovery of outstanding immunological properties in BA 1 [N-(2-cyanoethylene)-urea] having a (low) molecular mass M = 111.104. Experiments in 214 DS carcinosarcoma bearing Wistar rats have shown that BA 1, at a dosage of only about 12 percent LD50 (150 mg kg) and negligible lethality (1.7 percent), results in a recovery rate of 40 percent without hyperglycemia and, in one test, of 80 percent with hyperglycemia. Under otherwise unchanged conditions the reference substance ifosfamide (IF) -- a further development of cyclophosphamide -- applied without hyperglycemia in its most efficient dosage of 47 percent LD50 (150 mg kg) brought about a recovery rate of 25 percent at a lethality of 18 percent. (Contrary to BA 1, 250-min hyperglycemia caused no further improvement of the recovery rate.) However this comparison is characterized by the fact that both substances exhibit two quite different (complementary) mechanisms of action. Leucocyte counts made after application of the said cancerostatics and dosages have shown a pronounced stimulation with BA 1 and with ifosfamide, the known suppression in the post-therapeutic interval usually found with standard cancerostatics. In combination with the cited plaque test for BA 1, blood pictures then allow conclusions on the immunity status. Since IF can be taken as one of the most efficient cancerostatics--there is no other chemotherapeutic known up to now that has a more significant effect on the DS carcinosarcoma in rats -- these findings are of special importance. Finally, the total amount of leucocytes and lymphocytes as well as their time behaviour was determined from the blood picture of tumour-free rats after i.v. application of BA 1. The thus obtained numerical values clearly show that further research work on the prophylactic use of this substance seems to be necessary and very promising.</AbstractText>
|
||||
<AbstractText>A report is given on the recent discovery of outstanding immunological properties in
|
||||
BA 1 [N-(2-cyanoethylene)-urea] having a (low) molecular mass M = 111.104. Experiments in 214 DS
|
||||
carcinosarcoma bearing Wistar rats have shown that BA 1, at a dosage of only about 12 percent
|
||||
LD50 (150 mg kg) and negligible lethality (1.7 percent), results in a recovery rate of 40
|
||||
percent without hyperglycemia and, in one test, of 80 percent with hyperglycemia. Under
|
||||
otherwise unchanged conditions the reference substance ifosfamide (IF) -- a further development
|
||||
of cyclophosphamide -- applied without hyperglycemia in its most efficient dosage of 47 percent
|
||||
LD50 (150 mg kg) brought about a recovery rate of 25 percent at a lethality of 18 percent.
|
||||
(Contrary to BA 1, 250-min hyperglycemia caused no further improvement of the recovery rate.)
|
||||
However this comparison is characterized by the fact that both substances exhibit two quite
|
||||
different (complementary) mechanisms of action. Leucocyte counts made after application of the
|
||||
said cancerostatics and dosages have shown a pronounced stimulation with BA 1 and with
|
||||
ifosfamide, the known suppression in the post-therapeutic interval usually found with standard
|
||||
cancerostatics. In combination with the cited plaque test for BA 1, blood pictures then allow
|
||||
conclusions on the immunity status. Since IF can be taken as one of the most efficient
|
||||
cancerostatics--there is no other chemotherapeutic known up to now that has a more significant
|
||||
effect on the DS carcinosarcoma in rats -- these findings are of special importance. Finally,
|
||||
the total amount of leucocytes and lymphocytes as well as their time behaviour was determined
|
||||
from the blood picture of tumour-free rats after i.v. application of BA 1. The thus obtained
|
||||
numerical values clearly show that further research work on the prophylactic use of this
|
||||
substance seems to be necessary and very promising.
|
||||
</AbstractText>
|
||||
</Abstract>
|
||||
<AuthorList CompleteYN="Y">
|
||||
<Author ValidYN="Y">
|
||||
|
@ -4778,7 +4833,11 @@
|
|||
<PublicationType UI="D004740">English Abstract</PublicationType>
|
||||
<PublicationType UI="D016428">Journal Article</PublicationType>
|
||||
</PublicationTypeList>
|
||||
<VernacularTitle>Nachweis krebshemmender Eigenschaften einer stark immunstimulierenden Verbindung kleiner Molekülmasse. Versuche am immunlabilen DS-Karzinosarkom im Vergleich mit Ifosfamid. Stimulierung der körpereigenen Abwehr über etwa 20 Tage durch BA 1, einen N-(2-Cyanthylen)-harnstoff. Neue prophylaktische Möglichkeiten</VernacularTitle>
|
||||
<VernacularTitle>Nachweis krebshemmender Eigenschaften einer stark immunstimulierenden Verbindung
|
||||
kleiner Molekülmasse. Versuche am immunlabilen DS-Karzinosarkom im Vergleich mit Ifosfamid.
|
||||
Stimulierung der körpereigenen Abwehr über etwa 20 Tage durch BA 1, einen
|
||||
N-(2-Cyanthylen)-harnstoff. Neue prophylaktische Möglichkeiten
|
||||
</VernacularTitle>
|
||||
</Article>
|
||||
<MedlineJournalInfo>
|
||||
<Country>Germany</Country>
|
||||
|
@ -5016,7 +5075,20 @@
|
|||
<MedlinePgn>1400-3</MedlinePgn>
|
||||
</Pagination>
|
||||
<Abstract>
|
||||
<AbstractText>The distribution of blood flow to the subendocardial, medium and subepicardial layers of the left ventricular free wall was studied in anaesthetized dogs under normoxic (A), hypoxic (B) conditions and under pharmacologically induced (etafenone) coronary vasodilation (C). Regional myocardial blood flow was determined by means of the particle distribution method. In normoxia a transmural gradient of flow was observed, with the subendocardial layers receiving a significantly higher flow rate compared with the subepicardial layers. In hypoxia induced vasodilation this transmural gradient of flow was persistent. In contrast a marked redistribution of regional flow was observed under pharmacologically induced vasodilation. The transmural gradient decreased. In contrast to some findings these experiments demonstrate that a considerable vasodilatory capacity exists in all layers of the myocardium and can be utilized by drugs. The differences observed for the intramural distribution pattern of flow under hypoxia and drug induced vasodilation support the hypothesis that this pattern reflects corresponding gradients of regional myocardial metabolism.</AbstractText>
|
||||
<AbstractText>The distribution of blood flow to the subendocardial, medium and subepicardial layers
|
||||
of the left ventricular free wall was studied in anaesthetized dogs under normoxic (A), hypoxic
|
||||
(B) conditions and under pharmacologically induced (etafenone) coronary vasodilation (C).
|
||||
Regional myocardial blood flow was determined by means of the particle distribution method. In
|
||||
normoxia a transmural gradient of flow was observed, with the subendocardial layers receiving a
|
||||
significantly higher flow rate compared with the subepicardial layers. In hypoxia induced
|
||||
vasodilation this transmural gradient of flow was persistent. In contrast a marked
|
||||
redistribution of regional flow was observed under pharmacologically induced vasodilation. The
|
||||
transmural gradient decreased. In contrast to some findings these experiments demonstrate that a
|
||||
considerable vasodilatory capacity exists in all layers of the myocardium and can be utilized by
|
||||
drugs. The differences observed for the intramural distribution pattern of flow under hypoxia
|
||||
and drug induced vasodilation support the hypothesis that this pattern reflects corresponding
|
||||
gradients of regional myocardial metabolism.
|
||||
</AbstractText>
|
||||
</Abstract>
|
||||
<AuthorList CompleteYN="Y">
|
||||
<Author ValidYN="Y">
|
||||
|
@ -5185,4 +5257,151 @@
|
|||
</ReferenceList>
|
||||
</PubmedData>
|
||||
</PubmedArticle>
|
||||
<PubmedArticle>
|
||||
<MedlineCitation Status="MEDLINE" Owner="NLM">
|
||||
<PMID Version="1">4917185</PMID>
|
||||
<DateCompleted>
|
||||
<Year>1970</Year>
|
||||
<Month>10</Month>
|
||||
<Day>27</Day>
|
||||
</DateCompleted>
|
||||
<DateRevised>
|
||||
<Year>2018</Year>
|
||||
<Month>11</Month>
|
||||
<Day>13</Day>
|
||||
</DateRevised>
|
||||
<Article PubModel="Print">
|
||||
<Journal>
|
||||
<ISSN IssnType="Print">0003-6919</ISSN>
|
||||
<JournalIssue CitedMedium="Print">
|
||||
<Volume>19</Volume>
|
||||
<Issue>6</Issue>
|
||||
<PubDate>
|
||||
<Year>1970</Year>
|
||||
<Month>Jun</Month>
|
||||
</PubDate>
|
||||
</JournalIssue>
|
||||
<Title>Applied microbiology</Title>
|
||||
<ISOAbbreviation>Appl Microbiol</ISOAbbreviation>
|
||||
</Journal>
|
||||
<ArticleTitle>Bactericidal activity of a broad-spectrum illumination source.</ArticleTitle>
|
||||
<Pagination>
|
||||
<MedlinePgn>1013-4</MedlinePgn>
|
||||
</Pagination>
|
||||
<Abstract>
|
||||
|
||||
<AbstractText>Several hours of exposure to Vita-Lite lamps, which have a unique spectral
|
||||
distribution, give significant killing of cells of Staphylococcus aureus.
|
||||
</AbstractText>
|
||||
</Abstract>
|
||||
<AuthorList CompleteYN="Y">
|
||||
<Author ValidYN="Y">
|
||||
<LastName>Himmelfarb</LastName>
|
||||
<ForeName>P</ForeName>
|
||||
<Initials>P</Initials>
|
||||
</Author>
|
||||
<Author ValidYN="Y">
|
||||
<LastName>Scott</LastName>
|
||||
<ForeName>A</ForeName>
|
||||
<Initials>A</Initials>
|
||||
</Author>
|
||||
<Author ValidYN="Y">
|
||||
<LastName>Thayer</LastName>
|
||||
<ForeName>P S</ForeName>
|
||||
<Initials>PS</Initials>
|
||||
</Author>
|
||||
</AuthorList>
|
||||
<Language>eng</Language>
|
||||
<PublicationTypeList>
|
||||
<PublicationType UI="D016428">Journal Article</PublicationType>
|
||||
</PublicationTypeList>
|
||||
</Article>
|
||||
<MedlineJournalInfo>
|
||||
<Country>United States</Country>
|
||||
<MedlineTA>Appl Microbiol</MedlineTA>
|
||||
<NlmUniqueID>7605802</NlmUniqueID>
|
||||
<ISSNLinking>0003-6919</ISSNLinking>
|
||||
</MedlineJournalInfo>
|
||||
<CitationSubset>IM</CitationSubset>
|
||||
<MeshHeadingList>
|
||||
<MeshHeading>
|
||||
<DescriptorName UI="D001431" MajorTopicYN="N">Bacteriological Techniques</DescriptorName>
|
||||
<QualifierName UI="Q000295" MajorTopicYN="Y">instrumentation</QualifierName>
|
||||
</MeshHeading>
|
||||
<MeshHeading>
|
||||
<DescriptorName UI="D008027" MajorTopicYN="Y">Light</DescriptorName>
|
||||
</MeshHeading>
|
||||
<MeshHeading>
|
||||
<DescriptorName UI="D011830" MajorTopicYN="N">Radiation Effects</DescriptorName>
|
||||
</MeshHeading>
|
||||
<MeshHeading>
|
||||
<DescriptorName UI="D012706" MajorTopicYN="N">Serratia marcescens</DescriptorName>
|
||||
<QualifierName UI="Q000254" MajorTopicYN="N">growth & development</QualifierName>
|
||||
<QualifierName UI="Q000528" MajorTopicYN="Y">radiation effects</QualifierName>
|
||||
</MeshHeading>
|
||||
<MeshHeading>
|
||||
<DescriptorName UI="D013210" MajorTopicYN="N">Staphylococcus</DescriptorName>
|
||||
<QualifierName UI="Q000254" MajorTopicYN="N">growth & development</QualifierName>
|
||||
<QualifierName UI="Q000528" MajorTopicYN="Y">radiation effects</QualifierName>
|
||||
</MeshHeading>
|
||||
<MeshHeading>
|
||||
<DescriptorName UI="D013242" MajorTopicYN="N">Sterilization</DescriptorName>
|
||||
</MeshHeading>
|
||||
</MeshHeadingList>
|
||||
</MedlineCitation>
|
||||
<PubmedData>
|
||||
<History>
|
||||
<PubMedPubDate PubStatus="pubmed">
|
||||
<Year>1970</Year>
|
||||
<Month>6</Month>
|
||||
<Day>1</Day>
|
||||
</PubMedPubDate>
|
||||
<PubMedPubDate PubStatus="medline">
|
||||
<Year>1970</Year>
|
||||
<Month>6</Month>
|
||||
<Day>1</Day>
|
||||
<Hour>0</Hour>
|
||||
<Minute>1</Minute>
|
||||
</PubMedPubDate>
|
||||
<PubMedPubDate PubStatus="entrez">
|
||||
<Year>1970</Year>
|
||||
<Month>6</Month>
|
||||
<Day>1</Day>
|
||||
<Hour>0</Hour>
|
||||
<Minute>0</Minute>
|
||||
</PubMedPubDate>
|
||||
</History>
|
||||
<PublicationStatus>ppublish</PublicationStatus>
|
||||
<ArticleIdList>
|
||||
<ArticleId IdType="pubmed">4917185</ArticleId>
|
||||
<ArticleId IdType="pmc">PMC376844</ArticleId>
|
||||
</ArticleIdList>
|
||||
<ReferenceList>
|
||||
<Reference>
|
||||
<Citation>Photochem Photobiol. 1969 Jan;9(1):99-102</Citation>
|
||||
<ArticleIdList>
|
||||
<ArticleId IdType="pubmed">4889809</ArticleId>
|
||||
</ArticleIdList>
|
||||
</Reference>
|
||||
<Reference>
|
||||
<Citation>Endocrinology. 1969 Dec;85(6):1218-21</Citation>
|
||||
<ArticleIdList>
|
||||
<ArticleId IdType="pubmed">5347623</ArticleId>
|
||||
</ArticleIdList>
|
||||
</Reference>
|
||||
<Reference>
|
||||
<Citation>Arch Mikrobiol. 1956;24(1):60-79</Citation>
|
||||
<ArticleIdList>
|
||||
<ArticleId IdType="pubmed">13327987</ArticleId>
|
||||
</ArticleIdList>
|
||||
</Reference>
|
||||
<Reference>
|
||||
<Citation>J Bacteriol. 1941 Sep;42(3):353-66</Citation>
|
||||
<ArticleIdList>
|
||||
<ArticleId IdType="pubmed">16560457</ArticleId>
|
||||
</ArticleIdList>
|
||||
</Reference>
|
||||
</ReferenceList>
|
||||
</PubmedData>
|
||||
</PubmedArticle>
|
||||
</PubmedArticleSet>
|
|
@ -2,11 +2,14 @@ package eu.dnetlib.dhp.datacite
|
|||
|
||||
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}
|
||||
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf
|
||||
import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _}
|
||||
import org.apache.commons.io.FileUtils
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.sql.functions.{col, count}
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.JsonAST.{JField, JObject, JString}
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
import org.junit.jupiter.api.Assertions._
|
||||
import org.junit.jupiter.api.extension.ExtendWith
|
||||
import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
|
||||
|
@ -70,17 +73,15 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
|
|||
|
||||
assertEquals(100, nativeSize)
|
||||
|
||||
spark.read.load(targetPath).printSchema();
|
||||
|
||||
val result: Dataset[Oaf] = spark.read.load(targetPath).as[Oaf]
|
||||
val result: Dataset[String] =
|
||||
spark.read.text(targetPath).as[String].map(DataciteUtilityTest.convertToOAF)(Encoders.STRING)
|
||||
|
||||
result
|
||||
.map(s => s.getClass.getSimpleName)
|
||||
.groupBy(col("value").alias("class"))
|
||||
.agg(count("value").alias("Total"))
|
||||
.show(false)
|
||||
|
||||
val t = spark.read.load(targetPath).count()
|
||||
val t = spark.read.text(targetPath).as[String].count()
|
||||
|
||||
assertTrue(t > 0)
|
||||
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
package eu.dnetlib.dhp.datacite
|
||||
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.JsonAST.{JField, JObject, JString}
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
|
||||
object DataciteUtilityTest {
|
||||
|
||||
def convertToOAF(input: String): String = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json = parse(input)
|
||||
|
||||
val isRelation: String = (json \\ "source").extractOrElse("NULL")
|
||||
|
||||
if (isRelation != "NULL") {
|
||||
return "Relation"
|
||||
}
|
||||
|
||||
val iType: List[String] = for {
|
||||
JObject(instance) <- json \\ "instance"
|
||||
JField("instancetype", JObject(instancetype)) <- instance
|
||||
JField("classname", JString(classname)) <- instancetype
|
||||
|
||||
} yield classname
|
||||
|
||||
val l: String = iType.head.toLowerCase()
|
||||
l
|
||||
}
|
||||
|
||||
}
|
|
@ -2,9 +2,10 @@ package eu.dnetlib.dhp.sx.bio
|
|||
|
||||
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature}
|
||||
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result}
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.PidType
|
||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Result}
|
||||
import eu.dnetlib.dhp.sx.bio.BioDBToOAF.ScholixResolved
|
||||
import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMParser, PubMedToOaf}
|
||||
import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMParser, PMSubject, PubMedToOaf}
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.JsonAST.{JField, JObject, JString}
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
|
@ -16,6 +17,7 @@ import org.mockito.junit.jupiter.MockitoExtension
|
|||
import java.io.{BufferedReader, InputStream, InputStreamReader}
|
||||
import java.util.zip.GZIPInputStream
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.collection.mutable.ListBuffer
|
||||
import scala.io.Source
|
||||
import scala.xml.pull.XMLEventReader
|
||||
|
||||
|
@ -74,6 +76,95 @@ class BioScholixTest extends AbstractVocabularyTest {
|
|||
|
||||
}
|
||||
|
||||
private def checkPMArticle(article: PMArticle): Unit = {
|
||||
assertNotNull(article.getPmid)
|
||||
assertNotNull(article.getTitle)
|
||||
assertNotNull(article.getAuthors)
|
||||
article.getAuthors.asScala.foreach { a =>
|
||||
assertNotNull(a)
|
||||
assertNotNull(a.getFullName)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
def testParsingPubmedXML(): Unit = {
|
||||
val xml = new XMLEventReader(
|
||||
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
|
||||
)
|
||||
val parser = new PMParser(xml)
|
||||
parser.foreach(checkPMArticle)
|
||||
}
|
||||
|
||||
private def checkPubmedPublication(o: Oaf): Unit = {
|
||||
assertTrue(o.isInstanceOf[Publication])
|
||||
val p: Publication = o.asInstanceOf[Publication]
|
||||
assertNotNull(p.getId)
|
||||
assertNotNull(p.getTitle)
|
||||
p.getTitle.asScala.foreach(t => assertNotNull(t.getValue))
|
||||
p.getAuthor.asScala.foreach(a => assertNotNull(a.getFullname))
|
||||
assertNotNull(p.getInstance())
|
||||
p.getInstance().asScala.foreach { i =>
|
||||
assertNotNull(i.getCollectedfrom)
|
||||
assertNotNull(i.getPid)
|
||||
assertNotNull(i.getInstancetype)
|
||||
}
|
||||
assertNotNull(p.getOriginalId)
|
||||
p.getOriginalId.asScala.foreach(oId => assertNotNull(oId))
|
||||
|
||||
val hasPMC = p
|
||||
.getInstance()
|
||||
.asScala
|
||||
.exists(i => i.getPid.asScala.exists(pid => pid.getQualifier.getClassid.equalsIgnoreCase(PidType.pmc.toString)))
|
||||
|
||||
if (hasPMC) {
|
||||
assertTrue(p.getOriginalId.asScala.exists(oId => oId.startsWith("od_______267::")))
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
def testPubmedOriginalID(): Unit = {
|
||||
val article: PMArticle = new PMArticle
|
||||
|
||||
article.setPmid("1234")
|
||||
|
||||
article.setTitle("a Title")
|
||||
|
||||
// VERIFY PUBLICATION IS NOT NULL
|
||||
article.getPublicationTypes.add(new PMSubject("article", null, null))
|
||||
var publication = PubMedToOaf.convert(article, vocabularies).asInstanceOf[Publication]
|
||||
assertNotNull(publication)
|
||||
assertEquals("50|pmid________::81dc9bdb52d04dc20036dbd8313ed055", publication.getId)
|
||||
|
||||
// VERIFY PUBLICATION ID DOES NOT CHANGE ALSO IF SETTING PMC IDENTIFIER
|
||||
article.setPmcId("PMC1517292")
|
||||
publication = PubMedToOaf.convert(article, vocabularies).asInstanceOf[Publication]
|
||||
assertNotNull(publication)
|
||||
assertEquals("50|pmid________::81dc9bdb52d04dc20036dbd8313ed055", publication.getId)
|
||||
|
||||
// VERIFY ORIGINAL ID GENERATE IN OLD WAY USING PMC IDENTIFIER EXISTS
|
||||
|
||||
val oldOpenaireID = "od_______267::0000072375bc0e68fa09d4e6b7658248"
|
||||
|
||||
val hasOldOpenAIREID = publication.getOriginalId.asScala.exists(o => o.equalsIgnoreCase(oldOpenaireID))
|
||||
|
||||
assertTrue(hasOldOpenAIREID)
|
||||
}
|
||||
|
||||
@Test
|
||||
def testPubmedMapping(): Unit = {
|
||||
|
||||
val xml = new XMLEventReader(
|
||||
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml"))
|
||||
)
|
||||
val parser = new PMParser(xml)
|
||||
val results = ListBuffer[Oaf]()
|
||||
parser.foreach(x => results += PubMedToOaf.convert(x, vocabularies))
|
||||
|
||||
results.foreach(checkPubmedPublication)
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
def testPDBToOAF(): Unit = {
|
||||
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag.eosc;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 21/07/22
|
||||
*/
|
||||
public class DatasourceMaster implements Serializable {
|
||||
private String datasource;
|
||||
private String master;
|
||||
|
||||
public String getDatasource() {
|
||||
return datasource;
|
||||
}
|
||||
|
||||
public void setDatasource(String datasource) {
|
||||
this.datasource = datasource;
|
||||
}
|
||||
|
||||
public String getMaster() {
|
||||
return master;
|
||||
}
|
||||
|
||||
public void setMaster(String master) {
|
||||
this.master = master;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,136 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag.eosc;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 21/07/22
|
||||
*/
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.DbClient;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.common.RelationInverse;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
|
||||
public class ReadMasterDatasourceFromDB implements Closeable {
|
||||
|
||||
private final DbClient dbClient;
|
||||
private static final Log log = LogFactory.getLog(ReadMasterDatasourceFromDB.class);
|
||||
|
||||
private final BufferedWriter writer;
|
||||
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static final String QUERY = "SELECT dso.id datasource, d.id master FROM " +
|
||||
"(SELECT id FROM dsm_services WHERE id like 'eosc%') dso " +
|
||||
"FULL JOIN " +
|
||||
"(SELECT id, duplicate FROM dsm_dedup_services WHERE duplicate like 'eosc%')d " +
|
||||
"ON dso.id = d.duplicate";
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
ReadMasterDatasourceFromDB.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json")));
|
||||
|
||||
parser.parseArgument(args);
|
||||
|
||||
final String dbUrl = parser.get("postgresUrl");
|
||||
final String dbUser = parser.get("postgresUser");
|
||||
final String dbPassword = parser.get("postgresPassword");
|
||||
final String hdfsPath = parser.get("hdfsPath");
|
||||
final String hdfsNameNode = parser.get("hdfsNameNode");
|
||||
|
||||
try (
|
||||
final ReadMasterDatasourceFromDB rmd = new ReadMasterDatasourceFromDB(hdfsPath, hdfsNameNode, dbUrl, dbUser,
|
||||
dbPassword)) {
|
||||
|
||||
log.info("Processing datasources...");
|
||||
rmd.execute(QUERY, rmd::datasourceMasterMap);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public void execute(final String sql, final Function<ResultSet, DatasourceMaster> producer) {
|
||||
|
||||
dbClient.processResults(sql, rs -> writeMap(producer.apply(rs)));
|
||||
}
|
||||
|
||||
public DatasourceMaster datasourceMasterMap(ResultSet rs) {
|
||||
try {
|
||||
DatasourceMaster dm = new DatasourceMaster();
|
||||
String datasource = rs.getString("datasource");
|
||||
dm.setDatasource(datasource);
|
||||
String master = rs.getString("master");
|
||||
if (StringUtils.isNotBlank(master))
|
||||
dm.setMaster(OafMapperUtils.createOpenaireId(10, master, true));
|
||||
else
|
||||
dm.setMaster(OafMapperUtils.createOpenaireId(10, datasource, true));
|
||||
return dm;
|
||||
|
||||
} catch (final SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
dbClient.close();
|
||||
writer.close();
|
||||
}
|
||||
|
||||
public ReadMasterDatasourceFromDB(
|
||||
final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword)
|
||||
throws IOException {
|
||||
|
||||
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
conf.set("fs.defaultFS", hdfsNameNode);
|
||||
|
||||
FileSystem fileSystem = FileSystem.get(conf);
|
||||
Path hdfsWritePath = new Path(hdfsPath);
|
||||
FSDataOutputStream fsDataOutputStream = null;
|
||||
if (fileSystem.exists(hdfsWritePath)) {
|
||||
fsDataOutputStream = fileSystem.append(hdfsWritePath);
|
||||
} else {
|
||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||
}
|
||||
|
||||
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
protected void writeMap(final DatasourceMaster dm) {
|
||||
try {
|
||||
writer.write(OBJECT_MAPPER.writeValueAsString(dm));
|
||||
writer.newLine();
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,170 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag.eosc;
|
||||
|
||||
import static eu.dnetlib.dhp.PropagationConstant.readPath;
|
||||
import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir;
|
||||
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*;
|
||||
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.TAGGING_TRUST;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javax.print.attribute.DocAttributeSet;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.ForeachFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.bulktag.SparkBulkTagJob;
|
||||
import eu.dnetlib.dhp.bulktag.community.*;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 21/07/22
|
||||
*/
|
||||
public class SparkEoscBulkTag implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkEoscBulkTag.class);
|
||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
SparkEoscBulkTag.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String workingPath = parser.get("workingPath");
|
||||
log.info("workingPath: {}", workingPath);
|
||||
|
||||
String datasourceMapPath = parser.get("datasourceMapPath");
|
||||
log.info("datasourceMapPath: {}", datasourceMapPath);
|
||||
|
||||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
CommunityConfiguration cc;
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, workingPath);
|
||||
execBulkTag(spark, inputPath, workingPath, datasourceMapPath, resultClazz);
|
||||
});
|
||||
}
|
||||
|
||||
private static <R extends Result> void execBulkTag(
|
||||
SparkSession spark,
|
||||
String inputPath,
|
||||
String workingPath,
|
||||
String datasourceMapPath,
|
||||
Class<R> resultClazz) {
|
||||
|
||||
List<String> hostedByList = readPath(spark, datasourceMapPath, DatasourceMaster.class)
|
||||
.map((MapFunction<DatasourceMaster, String>) dm -> dm.getMaster(), Encoders.STRING())
|
||||
.collectAsList();
|
||||
|
||||
readPath(spark, inputPath, resultClazz)
|
||||
.map(patchResult(), Encoders.bean(resultClazz))
|
||||
.filter(Objects::nonNull)
|
||||
.map(
|
||||
(MapFunction<R, R>) value -> enrich(value, hostedByList),
|
||||
Encoders.bean(resultClazz))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingPath);
|
||||
|
||||
readPath(spark, workingPath, resultClazz)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(inputPath);
|
||||
|
||||
}
|
||||
|
||||
private static <R extends Result> R enrich(R value, List<String> hostedByList) {
|
||||
if (value
|
||||
.getInstance()
|
||||
.stream()
|
||||
.anyMatch(
|
||||
i -> (hostedByList.contains(i.getHostedby().getKey())) ||
|
||||
(value.getEoscifguidelines() != null && value.getEoscifguidelines().size() > 0))
|
||||
&&
|
||||
!value.getContext().stream().anyMatch(c -> c.getId().equals("eosc"))) {
|
||||
Context context = new Context();
|
||||
context.setId("eosc");
|
||||
context
|
||||
.setDataInfo(
|
||||
Arrays
|
||||
.asList(
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, BULKTAG_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE,
|
||||
DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST)));
|
||||
value.getContext().add(context);
|
||||
|
||||
}
|
||||
return value;
|
||||
|
||||
}
|
||||
|
||||
public static <R> Dataset<R> readPath(
|
||||
SparkSession spark, String inputPath, Class<R> clazz) {
|
||||
return spark
|
||||
.read()
|
||||
.textFile(inputPath)
|
||||
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
||||
}
|
||||
|
||||
// TODO remove this hack as soon as the values fixed by this method will be provided as NON null
|
||||
private static <R extends Result> MapFunction<R, R> patchResult() {
|
||||
return r -> {
|
||||
if (r.getDataInfo().getDeletedbyinference() == null) {
|
||||
r.getDataInfo().setDeletedbyinference(false);
|
||||
}
|
||||
if (r.getContext() == null) {
|
||||
r.setContext(new ArrayList<>());
|
||||
}
|
||||
return r;
|
||||
};
|
||||
}
|
||||
|
||||
}
|
|
@ -1,13 +1,10 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag;
|
||||
package eu.dnetlib.dhp.bulktag.eosc;
|
||||
|
||||
import static eu.dnetlib.dhp.PropagationConstant.readPath;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
|
@ -21,35 +18,15 @@ import org.slf4j.LoggerFactory;
|
|||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
|
||||
public class SparkEoscTag {
|
||||
private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
|
||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
public static final Qualifier EOSC_QUALIFIER = OafMapperUtils
|
||||
.qualifier(
|
||||
"EOSC",
|
||||
"European Open Science Cloud",
|
||||
ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES);
|
||||
public static final DataInfo EOSC_DATAINFO = OafMapperUtils
|
||||
.dataInfo(
|
||||
false, "propagation", true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
"propagation:subject", "Inferred by OpenAIRE",
|
||||
ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
|
||||
"0.9");
|
||||
public final static StructuredProperty EOSC_NOTEBOOK = OafMapperUtils
|
||||
.structuredProperty(
|
||||
"EOSC::Jupyter Notebook", EOSC_QUALIFIER, EOSC_DATAINFO);
|
||||
public final static StructuredProperty EOSC_GALAXY = OafMapperUtils
|
||||
.structuredProperty(
|
||||
"EOSC::Galaxy Workflow", EOSC_QUALIFIER, EOSC_DATAINFO);
|
||||
public final static StructuredProperty EOSC_TWITTER = OafMapperUtils
|
||||
.structuredProperty(
|
||||
"EOSC::Twitter Data", EOSC_QUALIFIER, EOSC_DATAINFO);
|
||||
public static final String EOSC_GALAXY_WORKFLOW = "EOSC::Galaxy Workflow";
|
||||
public static final String EOSC_TWITTER_DATA = "EOSC::Twitter Data";
|
||||
public static final String EOSC_JUPYTER_NOTEBOOK = "EOSC::Jupyter Notebook";
|
||||
public static final String COMPLIES_WITH = "compliesWith";
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
|
@ -84,29 +61,35 @@ public class SparkEoscTag {
|
|||
});
|
||||
}
|
||||
|
||||
public static EoscIfGuidelines newInstance(String code, String label, String url, String semantics) {
|
||||
EoscIfGuidelines eig = new EoscIfGuidelines();
|
||||
eig.setCode(code);
|
||||
eig.setLabel(label);
|
||||
eig.setUrl(url);
|
||||
eig.setSemanticRelation(semantics);
|
||||
return eig;
|
||||
|
||||
}
|
||||
|
||||
private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) {
|
||||
|
||||
readPath(spark, inputPath + "/software", Software.class)
|
||||
.map((MapFunction<Software, Software>) s -> {
|
||||
List<StructuredProperty> sbject;
|
||||
if (!Optional.ofNullable(s.getSubject()).isPresent())
|
||||
s.setSubject(new ArrayList<>());
|
||||
sbject = s.getSubject();
|
||||
|
||||
if (containsCriteriaNotebook(s)) {
|
||||
sbject.add(EOSC_NOTEBOOK);
|
||||
if (sbject.stream().anyMatch(sb -> sb.getValue().equals("EOSC Jupyter Notebook"))) {
|
||||
sbject = sbject.stream().map(sb -> {
|
||||
if (sb.getValue().equals("EOSC Jupyter Notebook")) {
|
||||
return null;
|
||||
}
|
||||
return sb;
|
||||
}).filter(Objects::nonNull).collect(Collectors.toList());
|
||||
s.setSubject(sbject);
|
||||
}
|
||||
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
|
||||
s.setEoscifguidelines(new ArrayList<>());
|
||||
addEIG(
|
||||
s.getEoscifguidelines(), EOSC_JUPYTER_NOTEBOOK, EOSC_JUPYTER_NOTEBOOK, "",
|
||||
COMPLIES_WITH);
|
||||
|
||||
}
|
||||
if (containsCriteriaGalaxy(s)) {
|
||||
sbject.add(EOSC_GALAXY);
|
||||
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
|
||||
s.setEoscifguidelines(new ArrayList<>());
|
||||
|
||||
addEIG(
|
||||
s.getEoscifguidelines(), EOSC_GALAXY_WORKFLOW, EOSC_GALAXY_WORKFLOW, "", COMPLIES_WITH);
|
||||
}
|
||||
return s;
|
||||
}, Encoders.bean(Software.class))
|
||||
|
@ -123,15 +106,17 @@ public class SparkEoscTag {
|
|||
|
||||
readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class)
|
||||
.map((MapFunction<OtherResearchProduct, OtherResearchProduct>) orp -> {
|
||||
List<StructuredProperty> sbject;
|
||||
if (!Optional.ofNullable(orp.getSubject()).isPresent())
|
||||
orp.setSubject(new ArrayList<>());
|
||||
sbject = orp.getSubject();
|
||||
|
||||
if (!Optional.ofNullable(orp.getEoscifguidelines()).isPresent())
|
||||
orp.setEoscifguidelines(new ArrayList<>());
|
||||
|
||||
if (containsCriteriaGalaxy(orp)) {
|
||||
sbject.add(EOSC_GALAXY);
|
||||
addEIG(
|
||||
orp.getEoscifguidelines(), EOSC_GALAXY_WORKFLOW, EOSC_GALAXY_WORKFLOW, "",
|
||||
COMPLIES_WITH);
|
||||
}
|
||||
if (containscriteriaTwitter(orp)) {
|
||||
sbject.add(EOSC_TWITTER);
|
||||
addEIG(orp.getEoscifguidelines(), EOSC_TWITTER_DATA, EOSC_TWITTER_DATA, "", COMPLIES_WITH);
|
||||
}
|
||||
return orp;
|
||||
}, Encoders.bean(OtherResearchProduct.class))
|
||||
|
@ -148,12 +133,11 @@ public class SparkEoscTag {
|
|||
|
||||
readPath(spark, inputPath + "/dataset", Dataset.class)
|
||||
.map((MapFunction<Dataset, Dataset>) d -> {
|
||||
List<StructuredProperty> sbject;
|
||||
if (!Optional.ofNullable(d.getSubject()).isPresent())
|
||||
d.setSubject(new ArrayList<>());
|
||||
sbject = d.getSubject();
|
||||
|
||||
if (!Optional.ofNullable(d.getEoscifguidelines()).isPresent())
|
||||
d.setEoscifguidelines(new ArrayList<>());
|
||||
if (containscriteriaTwitter(d)) {
|
||||
sbject.add(EOSC_TWITTER);
|
||||
addEIG(d.getEoscifguidelines(), EOSC_TWITTER_DATA, EOSC_TWITTER_DATA, "", COMPLIES_WITH);
|
||||
}
|
||||
return d;
|
||||
}, Encoders.bean(Dataset.class))
|
||||
|
@ -169,6 +153,12 @@ public class SparkEoscTag {
|
|||
.json(inputPath + "/dataset");
|
||||
}
|
||||
|
||||
private static void addEIG(List<EoscIfGuidelines> eoscifguidelines, String code, String label, String url,
|
||||
String sem) {
|
||||
if (!eoscifguidelines.stream().anyMatch(eig -> eig.getCode().equals(code)))
|
||||
eoscifguidelines.add(newInstance(code, label, url, sem));
|
||||
}
|
||||
|
||||
private static boolean containscriteriaTwitter(Result r) {
|
||||
Set<String> words = getWordsSP(r.getTitle());
|
||||
words.addAll(getWordsF(r.getDescription()));
|
||||
|
@ -177,10 +167,12 @@ public class SparkEoscTag {
|
|||
(words.contains("data") || words.contains("dataset")))
|
||||
return true;
|
||||
|
||||
if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("twitter")) &&
|
||||
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("data")))
|
||||
return true;
|
||||
return false;
|
||||
return Optional
|
||||
.ofNullable(r.getSubject())
|
||||
.map(
|
||||
s -> s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("twitter")) &&
|
||||
s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("data")))
|
||||
.orElse(false);
|
||||
}
|
||||
|
||||
private static boolean containsCriteriaGalaxy(Result r) {
|
||||
|
@ -190,13 +182,17 @@ public class SparkEoscTag {
|
|||
words.contains("workflow"))
|
||||
return true;
|
||||
|
||||
if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
|
||||
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
|
||||
return true;
|
||||
return false;
|
||||
return Optional
|
||||
.ofNullable(r.getSubject())
|
||||
.map(
|
||||
s -> s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
|
||||
s.stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
|
||||
.orElse(false);
|
||||
}
|
||||
|
||||
private static boolean containsCriteriaNotebook(Software s) {
|
||||
if (!Optional.ofNullable(s.getSubject()).isPresent())
|
||||
return false;
|
||||
if (s.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("jupyter")))
|
||||
return true;
|
||||
if (s
|
||||
|
@ -212,13 +208,6 @@ public class SparkEoscTag {
|
|||
return false;
|
||||
}
|
||||
|
||||
private static Set<String> getSubjects(List<StructuredProperty> s) {
|
||||
Set<String> subjects = new HashSet<>();
|
||||
s.stream().forEach(sbj -> subjects.addAll(Arrays.asList(sbj.getValue().toLowerCase().split(" "))));
|
||||
s.stream().forEach(sbj -> subjects.add(sbj.getValue().toLowerCase()));
|
||||
return subjects;
|
||||
}
|
||||
|
||||
private static Set<String> getWordsSP(List<StructuredProperty> elem) {
|
||||
Set<String> words = new HashSet<>();
|
||||
Optional
|
||||
|
@ -242,10 +231,7 @@ public class SparkEoscTag {
|
|||
t -> words
|
||||
.addAll(
|
||||
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
||||
// elem
|
||||
// .forEach(
|
||||
// t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
|
||||
return words;
|
||||
|
||||
return words;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
[
|
||||
{
|
||||
"paramName": "p",
|
||||
"paramLongName": "hdfsPath",
|
||||
"paramDescription": "the path where storing the sequential file",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "nn",
|
||||
"paramLongName": "hdfsNameNode",
|
||||
"paramDescription": "the name node on hdfs",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "pgurl",
|
||||
"paramLongName": "postgresUrl",
|
||||
"paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "pguser",
|
||||
"paramLongName": "postgresUser",
|
||||
"paramDescription": "postgres user",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "pgpasswd",
|
||||
"paramLongName": "postgresPassword",
|
||||
"paramDescription": "postgres password",
|
||||
"paramRequired": false
|
||||
}
|
||||
]
|
|
@ -0,0 +1,34 @@
|
|||
[
|
||||
|
||||
{
|
||||
"paramName":"s",
|
||||
"paramLongName":"sourcePath",
|
||||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "dmp",
|
||||
"paramLongName":"datasourceMapPath",
|
||||
"paramDescription": "the path where the association datasource master has been stored",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"tn",
|
||||
"paramLongName":"resultTableName",
|
||||
"paramDescription": "the name of the result table we are currently working on",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "wp",
|
||||
"paramLongName": "workingPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "ssm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||
"paramRequired": false
|
||||
}
|
||||
|
||||
]
|
|
@ -16,6 +16,21 @@
|
|||
<name>outputPath</name>
|
||||
<description>the output path</description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>postgresURL</name>
|
||||
<description>the url of the postgress server to query</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>postgresUser</name>
|
||||
<description>the username to access the postgres db</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>postgresPassword</name>
|
||||
<description>the postgres password</description>
|
||||
</property>
|
||||
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
|
@ -211,7 +226,7 @@
|
|||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>EOSC_tagging</name>
|
||||
<class>eu.dnetlib.dhp.bulktag.SparkEoscTag</class>
|
||||
<class>eu.dnetlib.dhp.bulktag.eosc.SparkEoscTag</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--num-executors=${sparkExecutorNumber}
|
||||
|
@ -226,10 +241,132 @@
|
|||
<arg>--sourcePath</arg><arg>${outputPath}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<ok to="eosc_get_datasource_master"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="eosc_get_datasource_master">
|
||||
<java>
|
||||
<main-class>eu.dnetlib.dhp.bulktag.eosc.ReadMasterDatasourceFromDB</main-class>
|
||||
<arg>--hdfsPath</arg><arg>${workingDir}/datasourcemaster</arg>
|
||||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
|
||||
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
|
||||
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
|
||||
</java>
|
||||
<ok to="fork_eosc_context_tag"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="fork_eosc_context_tag">
|
||||
<path start="eosc_context_tag_publication"/>
|
||||
<path start="eosc_context_tag_dataset"/>
|
||||
<path start="eosc_context_tag_otherresearchproduct"/>
|
||||
<path start="eosc_context_tag_software"/>
|
||||
</fork>
|
||||
|
||||
<action name="eosc_context_tag_publication">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>EOSC_tagging</name>
|
||||
<class>eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--num-executors=${sparkExecutorNumber}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${outputPath}/publication</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/publication</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
|
||||
</spark>
|
||||
<ok to="wait_eosc_context_tag"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="eosc_context_tag_dataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>EOSC_tagging</name>
|
||||
<class>eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--num-executors=${sparkExecutorNumber}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${outputPath}/dataset</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/dataset</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
|
||||
</spark>
|
||||
<ok to="wait_eosc_context_tag"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<action name="eosc_context_tag_software">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>EOSC_tagging</name>
|
||||
<class>eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--num-executors=${sparkExecutorNumber}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${outputPath}/software</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/software</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
|
||||
</spark>
|
||||
<ok to="wait_eosc_context_tag"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<action name="eosc_context_tag_otherresearchproduct">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>EOSC_tagging</name>
|
||||
<class>eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--num-executors=${sparkExecutorNumber}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/otherresearchproduct</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
|
||||
</spark>
|
||||
<ok to="wait_eosc_context_tag"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<join name="wait_eosc_context_tag" to="End"/>
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -0,0 +1,162 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 22/07/22
|
||||
*/
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
//"50|475c1990cbb2::0fecfb874d9395aa69d2f4d7cd1acbea" has instance hostedby eosc
|
||||
//"50|475c1990cbb2::3185cd5d8a2b0a06bb9b23ef11748eb1" has instance hostedby eosc
|
||||
//"50|475c1990cbb2::449f28eefccf9f70c04ad70d61e041c7" has two instance one hostedby eosc
|
||||
//"50|475c1990cbb2::3894c94123e96df8a21249957cf160cb" has EoscTag
|
||||
|
||||
public class EOSCContextTaggingTest {
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(EOSCContextTaggingTest.class);
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files.createTempDirectory(EOSCContextTaggingTest.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(EOSCContextTaggingTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("hive.metastore.local", "true");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(EOSCTagJobTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void afterAll() throws IOException {
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
spark.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
void EoscContextTagTest() throws Exception {
|
||||
|
||||
spark
|
||||
.read()
|
||||
.textFile(getClass().getResource("/eu/dnetlib/dhp/bulktag/eosc/dataset/dataset_10.json").getPath())
|
||||
.map(
|
||||
(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
|
||||
Encoders.bean(Dataset.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir.toString() + "/input/dataset");
|
||||
|
||||
SparkEoscBulkTag
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
workingDir.toString() + "/input/dataset",
|
||||
"-workingPath", workingDir.toString() + "/working/dataset",
|
||||
"-datasourceMapPath",
|
||||
getClass()
|
||||
.getResource("/eu/dnetlib/dhp/bulktag/eosc/datasourceMasterAssociation/datasourceMaster")
|
||||
.getPath(),
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Dataset> tmp = sc
|
||||
.textFile(workingDir.toString() + "/input/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||
|
||||
Assertions.assertEquals(10, tmp.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
4,
|
||||
tmp
|
||||
.filter(
|
||||
s -> s.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
d -> d.getId().equals("50|475c1990cbb2::0fecfb874d9395aa69d2f4d7cd1acbea")
|
||||
&&
|
||||
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
d -> d.getId().equals("50|475c1990cbb2::3185cd5d8a2b0a06bb9b23ef11748eb1")
|
||||
&&
|
||||
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
d -> d.getId().equals("50|475c1990cbb2::3894c94123e96df8a21249957cf160cb")
|
||||
&&
|
||||
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
d -> d.getId().equals("50|475c1990cbb2::3894c94123e96df8a21249957cf160cb")
|
||||
&&
|
||||
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
||||
.count());
|
||||
}
|
||||
|
||||
}
|
|
@ -1,21 +1,17 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag;
|
||||
|
||||
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY_INDICATOR;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
|
@ -27,6 +23,7 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.eosc.SparkEoscTag;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
public class EOSCTagJobTest {
|
||||
|
@ -126,10 +123,23 @@ public class EOSCTagJobTest {
|
|||
.assertEquals(
|
||||
4,
|
||||
tmp
|
||||
.filter(s -> s.getEoscifguidelines() != null)
|
||||
.filter(
|
||||
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
||||
s -> s
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
|
@ -140,6 +150,16 @@ public class EOSCTagJobTest {
|
|||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
||||
|
||||
Assertions
|
||||
.assertFalse(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
|
@ -166,16 +186,24 @@ public class EOSCTagJobTest {
|
|||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines() == null);
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
9, tmp
|
||||
8, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
.assertFalse(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||
.collect()
|
||||
|
@ -183,6 +211,23 @@ public class EOSCTagJobTest {
|
|||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
@ -201,17 +246,24 @@ public class EOSCTagJobTest {
|
|||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines() == null);
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
9, tmp
|
||||
8, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
.assertFalse(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||
.collect()
|
||||
|
@ -219,14 +271,31 @@ public class EOSCTagJobTest {
|
|||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
||||
|
||||
List<StructuredProperty> subjects = tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::6e7a9b21a2feef45673890432af34244"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject();
|
||||
Assertions.assertEquals(8, subjects.size());
|
||||
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
Assertions.assertEquals(7, subjects.size());
|
||||
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("jupyter")));
|
||||
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("Modeling and Simulation")));
|
||||
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("structure granulaire")));
|
||||
|
@ -250,6 +319,17 @@ public class EOSCTagJobTest {
|
|||
.filter(
|
||||
ds -> ds.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0, sc
|
||||
.textFile(workingDir.toString() + "/input/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
|
||||
.filter(
|
||||
ds -> ds
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
@ -264,7 +344,22 @@ public class EOSCTagJobTest {
|
|||
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
|
||||
.filter(
|
||||
ds -> ds.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
||||
orp -> orp
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0, sc
|
||||
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
|
||||
.filter(
|
||||
orp -> orp
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getValue().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
|
||||
// spark.stop();
|
||||
|
@ -326,22 +421,41 @@ public class EOSCTagJobTest {
|
|||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
0,
|
||||
tmp
|
||||
.filter(
|
||||
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
s -> s.getEoscifguidelines() != null)
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
s -> s.getEoscifguidelines() != null)
|
||||
.filter(
|
||||
s -> s
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2, tmp
|
||||
1, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
.assertFalse(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
|
@ -350,6 +464,24 @@ public class EOSCTagJobTest {
|
|||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
5, tmp
|
||||
|
@ -385,22 +517,34 @@ public class EOSCTagJobTest {
|
|||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
0,
|
||||
orp
|
||||
.filter(
|
||||
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
|
||||
.count());
|
||||
orp.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
3, orp
|
||||
1, orp
|
||||
.filter(o -> o.getEoscifguidelines() != null)
|
||||
.filter(
|
||||
o -> o
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2, orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
.assertFalse(
|
||||
orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||
.collect()
|
||||
|
@ -408,6 +552,23 @@ public class EOSCTagJobTest {
|
|||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getCode().equals("EOSC::Galaxy Workflow")));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
@ -516,10 +677,20 @@ public class EOSCTagJobTest {
|
|||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
3,
|
||||
0,
|
||||
orp
|
||||
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
3,
|
||||
orp
|
||||
.filter(
|
||||
s -> s
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
|
||||
.count());
|
||||
|
||||
JavaRDD<Dataset> dats = sc
|
||||
.textFile(workingDir.toString() + "/input/dataset")
|
||||
|
@ -531,7 +702,11 @@ public class EOSCTagJobTest {
|
|||
.assertEquals(
|
||||
3,
|
||||
dats
|
||||
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
|
||||
.filter(
|
||||
s -> s
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
|
||||
.count());
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,318 @@
|
|||
{"datasource":"eosc________::100percentit::100percentit.100_percent_it_trusted_cloud","master":"10|eosc________::7ef2576047f040612b983a27347471fc"}
|
||||
{"datasource":"eosc________::altec::altec.space-vis_adn_service","master":"10|eosc________::2946c48bbcc514ad76bbbf727d5d8fbc"}
|
||||
{"datasource":"eosc________::astron::astron.","master":"10|eosc________::acb262d4bfdeb6aa9b463a4a6d0d662a"}
|
||||
{"datasource":"eosc________::athena::athena.atmo-flud","master":"10|eosc________::ac448975e1d7f8b0266c8bb3b3992029"}
|
||||
{"datasource":"eosc________::athena::athena.uw-map","master":"10|eosc________::5f2a401cf8ce9dc22a3776cea519b594"}
|
||||
{"datasource":"eosc________::athena::athena.verbal_aggression_analyser_va_analyser","master":"10|eosc________::8b26233e89a50e3754972b1341130494"}
|
||||
{"datasource":"eosc________::authenix::authenix.authenix","master":"10|eosc________::3cd84764da5728473593a580efb29a40"}
|
||||
{"datasource":"eosc________::bineo::bineo.cos4bio","master":"10|eosc________::903e0526a6e56eeaf0e4561aa862ecb8"}
|
||||
{"datasource":"eosc________::blue-cloud::blue-cloud.phytoplankton_eovs","master":"10|eosc________::c2438d79b48baf817956f3856877b3b8"}
|
||||
{"datasource":"eosc________::bsc-es::bsc-es.bdrc_-_barcelona_dust_regional_center","master":"10|eosc________::756664ca614118315840eb8e985e4377"}
|
||||
{"datasource":"eosc________::bsc-es::bsc-es.openebench","master":"10|eosc________::69ed72b873b803feed5ba6ae47548419"}
|
||||
{"datasource":"eosc________::capsh::capsh.dissemin","master":"10|eosc________::e81587742e4107ce83723df17c27cb40"}
|
||||
{"datasource":"eosc________::carlzeissm::carlzeissm.aper","master":"10|eosc________::f3beb9ee5ee293b723e2edd6f990fde3"}
|
||||
{"datasource":"eosc________::ccsd::ccsd.episciences","master":"10|eosc________::e1e9de0dbf4bce79c49338d7cf9327e2"}
|
||||
{"datasource":"eosc________::cds::cds.simbad_simbad_astronomical_database_provides_basic_data_cross-identifications_bibliography_and_measurements_for_astronomical_objects_outside_the_solar_system","master":"10|eosc________::a1e41e71453ac32161f4ac3f5c0e0421"}
|
||||
{"datasource":"eosc________::centerdata::centerdata.surveycodingsorg","master":"10|eosc________::72db73ab253727c889905da50f506d10"}
|
||||
{"datasource":"eosc________::cesga::cesga.finisterrae","master":"10|eosc________::6af4303d93f72744cc4c3c815ed2c9a0"}
|
||||
{"datasource":"eosc________::cesnet::cesnet.metacentrum_cloud","master":"10|eosc________::cebfaa2d0b93502d56a8fbeb6b66cfbe"}
|
||||
{"datasource":"eosc________::cesnet::cesnet.object_based_storage","master":"10|eosc________::1c5b55339bb86ff997a256d42d7be4b0"}
|
||||
{"datasource":"eosc________::cesnet::cesnet.umsa_-_untargeted_mass_spectrometry_data_analysis","master":"10|eosc________::d928868211759352cb1604713e0347ec"}
|
||||
{"datasource":"eosc________::cessda-eric::cessda-eric.cessda_data_catalogue","master":"10|fairsharing_::936824c0191953647ec609b4f49bc964"}
|
||||
{"datasource":"eosc________::cessda-eric::cessda-eric.data_management_expert_guide_dmeg","master":"10|eosc________::22c14aaf31fc64424fa97adffe6380b9"}
|
||||
{"datasource":"eosc________::cessda-eric::cessda-eric.elsst__european_language_social_science_thesaurus","master":"10|eosc________::5b30e057381cf0200dc2cdc7b562f570"}
|
||||
{"datasource":"eosc________::cines::cines.etdr","master":"10|eosc________::3b7f7d6aafb0154025330183d59ce670"}
|
||||
{"datasource":"eosc________::clarin-eric::clarin-eric.language_resource_switchboard","master":"10|eosc________::3531aa80dbe2b1018133b510a933de40"}
|
||||
{"datasource":"eosc________::clarin-eric::clarin-eric.virtual_collection_registry","master":"10|eosc________::454e4f7f9f53d9dacf9dc3ba27902f16"}
|
||||
{"datasource":"eosc________::clarin-eric::clarin-eric.virtual_language_observatory","master":"10|eosc________::4db0c877190783461728c6714cb66cbc"}
|
||||
{"datasource":"eosc________::cloudferro::cloudferro.data_collections_catalog","master":"10|eosc________::eba1540eb9e87231fdf366eb23d16c3a"}
|
||||
{"datasource":"eosc________::cloudferro::cloudferro.data_related_services_-_eo_browser","master":"10|eosc________::c24ebda20485c08293b72561ee3c634b"}
|
||||
{"datasource":"eosc________::cloudferro::cloudferro.data_related_services_-_eo_finder","master":"10|eosc________::3d68186239b6c0f0d677ff55d9b549d1"}
|
||||
{"datasource":"eosc________::cloudferro::cloudferro.infrastructure","master":"10|eosc________::ac7e3c0151fa3f11d3a7739dddaa3416"}
|
||||
{"datasource":"eosc________::cmcc::cmcc.enes_data_space","master":"10|eosc________::2925e4df4147819e5b5d2f886f40e3a2"}
|
||||
{"datasource":"eosc________::cnb-csic::cnb-csic.3dbionotes-ws_web_application_to_annotate_biochemical_and_biomedical_information_onto_structural_models","master":"10|eosc________::77fe0a66415f2440ab60d47dcee678a5"}
|
||||
{"datasource":"eosc________::cnb-csic::cnb-csic.scipioncloud","master":"10|eosc________::7f09b7fee99363813f24aca9ebdecf61"}
|
||||
{"datasource":"eosc________::cnr-iia::cnr-iia.geo_dab","master":"10|eosc________::108b0148352c15ee1ce935699e09add3"}
|
||||
{"datasource":"eosc________::collabwith::collabwith.collabwith_marketplace","master":"10|eosc________::894a0ffa7768b228c1b46793670c85e6"}
|
||||
{"datasource":"eosc________::coronis_computing_sl::coronis_computing_sl.uw-mos","master":"10|eosc________::9cbf0a75d817e291771b8bce6440f5f4"}
|
||||
{"datasource":"eosc________::coronis_computing_sl::coronis_computing_sl.vd-maps","master":"10|eosc________::b5af1514b39d8e021554a73076a694d9"}
|
||||
{"datasource":"eosc________::creaf::creaf.nimmbus_geospatial_user_feedback","master":"10|eosc________::86c325db16448760b3390dda7e46631a"}
|
||||
{"datasource":"eosc________::creatis::creatis.virtual_imaging_platform","master":"10|eosc________::01a45ac2677f89414af91e651735846d"}
|
||||
{"datasource":"eosc________::cs_group::cs_group.ai4geo_engine","master":"10|eosc________::c61211295d27e5e08f4c64f3e3098294"}
|
||||
{"datasource":"eosc________::csc-fi::csc-fi.chipster","master":"10|eosc________::61549f785a2c93939be011b0453a6981"}
|
||||
{"datasource":"eosc________::csc-fi::csc-fi.cpouta","master":"10|eosc________::d71c843b4e00eff17db07bf9d10769f9"}
|
||||
{"datasource":"eosc________::csc-fi::csc-fi.csc_epouta","master":"10|eosc________::4493bd6a93e5b8465fda8cf7ab2dfdea"}
|
||||
{"datasource":"eosc________::csc-fi::csc-fi.rahti_container_cloud","master":"10|eosc________::cc60eb9fc76f9598ee581eff0792573b"}
|
||||
{"datasource":"eosc________::cscs::cscs.object_storage","master":"10|eosc________::3da6a817fe85ef43f7d97ef07e467d45"}
|
||||
{"datasource":"eosc________::csi_piemonte::csi_piemonte.nivola2","master":"10|eosc________::ac6483be3e556c8652b8595680795983"}
|
||||
{"datasource":"eosc________::csic::csic.csic_cloud_infrastructure","master":"10|eosc________::05ea2eb193382e22f32b32fbe9a4d961"}
|
||||
{"datasource":"eosc________::cyberbotics::cyberbotics.robotbenchmark","master":"10|eosc________::27ee094c68b7a758ca2915aca6215a1d"}
|
||||
{"datasource":"eosc________::d4science::d4science.alien_and_invasive_species_vre","master":"10|eosc________::b5cff6d55dcf6c20e78a0f1f847b3005"}
|
||||
{"datasource":"eosc________::d4science::d4science.rprototypinglab_virtual_research_environment","master":"10|eosc________::8073ab0dbb22dc3b9f17627a7b25903f"}
|
||||
{"datasource":"eosc________::d4science::d4science.visual_media_service_vre","master":"10|eosc________::eabf459f53c2bfe6247f006fcc0f4db7"}
|
||||
{"datasource":"eosc________::dariah_eric::dariah_eric.dariah-campus","master":"10|eosc________::9c63075d6642a2d269776c2b90c2f976"}
|
||||
{"datasource":"eosc________::dariah_eric::dariah_eric.ssh_open_marketplace","master":"10|eosc________::91fe494a3c21805febb03353152f1212"}
|
||||
{"datasource":"eosc________::datacite::datacite.datacite_doi_registration_service","master":"10|eosc________::c146a470f01ee7ded3b55acda9362e7f"}
|
||||
{"datasource":"eosc________::dcc-uk::dcc-uk.dmponline","master":"10|eosc________::fe480090e0739dab86b24a11177eeffd"}
|
||||
{"datasource":"eosc________::denbi::denbi.cloud","master":"10|eosc________::59399e560967488c0ae0329e0d37f5b4"}
|
||||
{"datasource":"eosc________::desy::desy.pan_data","master":"10|eosc________::52008fe404bf2e939140109162f9233f"}
|
||||
{"datasource":"eosc________::desy::desy.pan_faas","master":"10|eosc________::026939c4b12d7d71e2b05bc5acde804e"}
|
||||
{"datasource":"eosc________::desy::desy.pan_gitlab","master":"10|eosc________::f13cefc9f3207cb82f3285b05f190f78"}
|
||||
{"datasource":"eosc________::desy::desy.pan_notebook","master":"10|eosc________::500fe61cce6562797cd43797aab12be5"}
|
||||
{"datasource":"eosc________::digitalglobe::digitalglobe.earthwatch","master":"10|eosc________::020d905260267066c1926f526bb86f30"}
|
||||
{"datasource":"eosc________::dkrz::dkrz.enes_climate_analytics_service","master":"10|eosc________::1d7a1fea6694d15d9e67f08e1e77082b"}
|
||||
{"datasource":"eosc________::doabf::doabf.operas_certification","master":"10|eosc________::79b9748edeffb872a28660a9d238dcec"}
|
||||
{"datasource":"eosc________::ds-wizard::ds-wizard.data_stewardship_wizard","master":"10|eosc________::fc6bad963e15e218efc62c7befd122af"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.check-in","master":"10|eosc________::baa3c497b9499b3d8c87ea8d2b37a44f"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.cloud_compute","master":"10|eosc________::b1179384a336d409fc909fe3711d3d1f"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.cloud_container_compute","master":"10|eosc________::a66bb1ac56a3bcf2c24b0ef85ed2bdfc"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.data_transfer","master":"10|eosc________::6c0bf38e885c42161b88093517f6cd3e"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.egi_datahub","master":"10|eosc________::5a260dae80795584ac08df133adb1fad"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.fitsm_training","master":"10|eosc________::927b4455c0a21692d2a9f634bccd8309"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.high-throughput_compute","master":"10|eosc________::e27ec11ac7b7d6ffbbce668b7d1f81d5"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.iso_27001_training","master":"10|eosc________::98a6655b6421166c5c29baa2f5815de3"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.notebook","master":"10|eosc________::1d37909a6a31147a09ee9f2e579a6706"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.online_storage","master":"10|eosc________::d8b94284582d3e2185a782ae2ba42186"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.training_infrastructure","master":"10|eosc________::38cdb8e44638f2e561c466f0dd26cf96"}
|
||||
{"datasource":"eosc________::egi-fed::egi-fed.workload_manager","master":"10|eosc________::ff515071cd88afb40599edcb6637f47e"}
|
||||
{"datasource":"eosc________::ehri::ehri.begrenzte_flucht","master":"10|eosc________::01d1445605fc1d25e6a7f21ba995d724"}
|
||||
{"datasource":"eosc________::ehri::ehri.diplomatic_reports","master":"10|eosc________::11714353d2ed069ca30b177d4b4d9e0f"}
|
||||
{"datasource":"eosc________::ehri::ehri.early_holocaust_testimony","master":"10|eosc________::0a4974b0bb295b98f88cb7c793f91c17"}
|
||||
{"datasource":"eosc________::ehri::ehri.ehri_document_blog","master":"10|eosc________::fb9291f8dac099986eafe957b169ed97"}
|
||||
{"datasource":"eosc________::ehri::ehri.international_research_portal_for_records_related_to_nazi-era_cultural_property","master":"10|eosc________::01c5b10e57f9cbb4f3125f427375487e"}
|
||||
{"datasource":"eosc________::ehri::ehri.the_ehri_portal","master":"10|eosc________::6ad4d5352fd192b5fecd76bbd7a7e8b7"}
|
||||
{"datasource":"eosc________::eiscat::eiscat.eiscat_data_access_portal","master":"10|eosc________::0f06a55c8333ae4d197c1d263b2be6ba"}
|
||||
{"datasource":"eosc________::elixir-italy::elixir-italy.laniakea_recas","master":"10|eosc________::01e84abe377339ea57ed521ac39130e9"}
|
||||
{"datasource":"eosc________::elixir-uk::elixir-uk.cyverse_uk","master":"10|eosc________::6a6a05847befec6587bef7673112f5e5"}
|
||||
{"datasource":"eosc________::elixir-uk::elixir-uk.workflowhub","master":"10|fairsharing_::c8cd63e1bf13c5016881652983fb615a"}
|
||||
{"datasource":"eosc________::elsevier::elsevier.digital_commons","master":"10|eosc________::67d38b6a1f43184676b113369554676b"}
|
||||
{"datasource":"eosc________::embl-ebi::embl-ebi.embassy_cloud","master":"10|eosc________::7f8b24797312b851916ee1be0f836de6"}
|
||||
{"datasource":"eosc________::embl-ebi::embl-ebi.identifiersorg","master":"10|eosc________::564e9f467aad251143e12e2e6ec19768"}
|
||||
{"datasource":"eosc________::embl-ebi::embl-ebi.identifiersorg_central_registry","master":"10|eosc________::441caf7eaa4a6602aceae36b2697b924"}
|
||||
{"datasource":"eosc________::embl-ebi::embl-ebi.identifiersorg_resolution_services","master":"10|eosc________::8df6273a1cb2289dbbe3a4b5fe05aa53"}
|
||||
{"datasource":"eosc________::emso_eric::emso_eric.emso_eric_data_portal","master":"10|eosc________::94a41630bd9ddea4a88ec0bfba1b9d95"}
|
||||
{"datasource":"eosc________::enermaps::enermaps.enermaps_data_management_tool","master":"10|eosc________::11496ee8a69b4b955200da7f2c12fe3b"}
|
||||
{"datasource":"eosc________::enhancer::enhancer.openrdmeu","master":"10|eosc________::04820bece2545235144903dec056bcbd"}
|
||||
{"datasource":"eosc________::enhancer::enhancer.swiss_escience_grid_certificates","master":"10|eosc________::4968516eb3b1ad6d883e74a84827e963"}
|
||||
{"datasource":"eosc________::eodc::eodc.data_catalogue_service","master":"10|eosc________::21c44a2b6946e02300dbe36a8edec650"}
|
||||
{"datasource":"eosc________::eodc::eodc.jupyterhub_for_global_copernicus_data","master":"10|eosc________::f99ccd68bf3de6a0a3b0db3441a41bbd"}
|
||||
{"datasource":"eosc________::eosc-dih::eosc-dih.piloting_and_co-design_of_the_business_pilots","master":"10|eosc________::178f3e4832afe9e477d761d2f3d95f85"}
|
||||
{"datasource":"eosc________::eox::eox.edc_eoxhub_workspace","master":"10|eosc________::d71468878e069cf484fc988d276c6d9a"}
|
||||
{"datasource":"eosc________::esa-int::esa-int.geoss_web_portal","master":"10|eosc________::d7bac1ce234c20e3ab43a74eefa34782"}
|
||||
{"datasource":"eosc________::esrf::esrf.the_european_synchrotron_radiation_facility_data_portal","master":"10|fairsharing_::2996962656838a97af4c5f926fe6f1b0"}
|
||||
{"datasource":"eosc________::ess::ess.pan-learning-org","master":"10|eosc________::1298286d3a7cc48fa525b118218c7836"}
|
||||
{"datasource":"eosc________::ess_eric::ess_eric.european_social_survey_ess_as_a_service","master":"10|eosc________::faa60b95b602690861be9305812a5c07"}
|
||||
{"datasource":"eosc________::eudat::eudat.b2access","master":"10|eosc________::4dee0695b946b545dc8d52c56598fbbf"}
|
||||
{"datasource":"eosc________::eudat::eudat.b2drop","master":"10|eosc________::4c6a514f1392ac1d159214e61785849a"}
|
||||
{"datasource":"eosc________::eudat::eudat.b2find","master":"10|eosc________::6069f46dfcc89ccf8043581c9034558e"}
|
||||
{"datasource":"eosc________::eudat::eudat.b2handle","master":"10|eosc________::a23be7f6265fd1ad957eed16b5c8bdc4"}
|
||||
{"datasource":"eosc________::eudat::eudat.b2note","master":"10|eosc________::dfd1d6816b4182e25e84f6cf10d108ed"}
|
||||
{"datasource":"eosc________::eudat::eudat.b2safe","master":"10|re3data_____::a632666349a0bb9a36096c9e152d34cc"}
|
||||
{"datasource":"eosc________::eudat::eudat.b2share","master":"10|eosc________::f959324bdb00f052d547b95da205062f"}
|
||||
{"datasource":"eosc________::eurac::eurac.edp-portal_-_metadata_catalogue_of_eurac_research","master":"10|eosc________::274d73061a925a29d8743b3e1022d0dc"}
|
||||
{"datasource":"eosc________::europeana::europeana.europeana_apis","master":"10|eosc________::91de8c90ebde3dc1c8d41f339fe3fac7"}
|
||||
{"datasource":"eosc________::exoscale::exoscale.european_cloud_hosting","master":"10|eosc________::12b7e6fef784084b817a42f2990fe3f2"}
|
||||
{"datasource":"eosc________::expertai::expertai.document_enrichment_api","master":"10|eosc________::6812b902471f12506c8e6441195aff57"}
|
||||
{"datasource":"eosc________::expertai::expertai.recommender_api","master":"10|eosc________::c40634543c1217686f0a8f5e8592d100"}
|
||||
{"datasource":"eosc________::expertai::expertai.search_api","master":"10|eosc________::79440bc8082949f56cbabef796cec7f1"}
|
||||
{"datasource":"eosc________::fairdi::fairdi.nomad_repository","master":"10|eosc________::b9000c95a6fde9930ae74f4071e14cb2"}
|
||||
{"datasource":"eosc________::figshare::figshare.figshare","master":"10|eosc________::5e6bd062c6b85e2d176b2e61636b8971"}
|
||||
{"datasource":"eosc________::forschungsdaten::forschungsdaten.forschungsdateninfo","master":"10|eosc________::c9185fdb68af7d515e56054da546bc94"}
|
||||
{"datasource":"eosc________::forth::forth.openbioeu","master":"10|eosc________::2db71171816e994877fb960b9fcd89f2"}
|
||||
{"datasource":"eosc________::fssda::fssda.data_service_portal_aila","master":"10|eosc________::ef1f75ea6d244563bc6cfb0c3d3affa4"}
|
||||
{"datasource":"eosc________::fssda::fssda.kuha2_metadata_server","master":"10|eosc________::b6af28d7c292dbbe816cd0d6a9a66f16"}
|
||||
{"datasource":"eosc________::gbif-es::gbif-es.collections_registry","master":"10|eosc________::ac6da0cfbd07f8605c57a799c41dc947"}
|
||||
{"datasource":"eosc________::gbif-es::gbif-es.e-Learning_platform","master":"10|eosc________::9059ca88ca8292881ffba9ad8d943d04"}
|
||||
{"datasource":"eosc________::gbif-es::gbif-es.images_portal","master":"10|eosc________::6991e5dd230956156129669934798cd8"}
|
||||
{"datasource":"eosc________::gbif-es::gbif-es.occurrence_records","master":"10|eosc________::948a9a53e2a9c94d32f99785eccff662"}
|
||||
{"datasource":"eosc________::gbif-es::gbif-es.regions_module","master":"10|eosc________::11189c308854c8d8113161edc7fbd3de"}
|
||||
{"datasource":"eosc________::gbif-es::gbif-es.spatial_portal","master":"10|eosc________::665f73f5e4b6a3693fec9426a6ce6ae8"}
|
||||
{"datasource":"eosc________::gbif-es::gbif-es.species_portal","master":"10|eosc________::9fe2f2ccb3d17452bd6e7424f60340ce"}
|
||||
{"datasource":"eosc________::gbif::gbif.gbif_species_occurrence_data","master":"10|fairsharing_::6e5025ccc7d638ae4e724da8938450a6"}
|
||||
{"datasource":"eosc________::gbif_portugal::gbif_portugal.gbif_portugal_occurrence_records","master":"10|eosc________::fcd4f4efdecb4e675fdee043043f69fc"}
|
||||
{"datasource":"eosc________::gcc_umcg::gcc_umcg.molgenis","master":"10|eosc________::7f255ebbb3715f258e8d7c470209e675"}
|
||||
{"datasource":"eosc________::geant::geant.clouds_service_infrastructure_as_a_service","master":"10|eosc________::7debc69506a8019515d350707e8c82d7"}
|
||||
{"datasource":"eosc________::geant::geant.edugain","master":"10|eosc________::3ded12106e7e870242f7ec39345b3b97"}
|
||||
{"datasource":"eosc________::geant::geant.edumeet_-_webbased_videoconferencing_platform","master":"10|eosc________::dcf8b262f7f61d44eedf409a29d30abc"}
|
||||
{"datasource":"eosc________::geant::geant.eduroam","master":"10|eosc________::e7fd04aab1f224aaa2b5d3478694748b"}
|
||||
{"datasource":"eosc________::geant::geant.eduteams","master":"10|eosc________::f3b04fa1e741f17a842fcbea35e04318"}
|
||||
{"datasource":"eosc________::geant::geant.eduvpn_-_access_your_institutes_network_or_the_internet_using_an_encrypted_connection","master":"10|eosc________::aeb7c573f2742ec5ef8b7332b6b614cb"}
|
||||
{"datasource":"eosc________::geant::geant.inacademia","master":"10|eosc________::26cb3be539a5bbb25533d3b1bdb9d6aa"}
|
||||
{"datasource":"eosc________::geant::geant.ip","master":"10|eosc________::59cd8dbce2703f4eea69a54a959aae89"}
|
||||
{"datasource":"eosc________::geant::geant.l3vpn","master":"10|eosc________::1e70cff61071ce42baffa6dafaf3165e"}
|
||||
{"datasource":"eosc________::geant::geant.lambda","master":"10|eosc________::20a8114b376bf4c455c034b7b4513805"}
|
||||
{"datasource":"eosc________::geant::geant.mdvpn","master":"10|eosc________::54fbf0ac4e42a2ce51e400d9783b51ba"}
|
||||
{"datasource":"eosc________::geant::geant.open","master":"10|eosc________::9ae24d8c63e9ff986fbd20705b334919"}
|
||||
{"datasource":"eosc________::geant::geant.perfsonar","master":"10|eosc________::1bdda4f743377914fabd0f365a8b6ee2"}
|
||||
{"datasource":"eosc________::geant::geant.plus","master":"10|eosc________::eef45e860d52aff4932f254599d5b713"}
|
||||
{"datasource":"eosc________::geant::geant.transits_training","master":"10|eosc________::831e2b596060c60d7d4bc79c200a2254"}
|
||||
{"datasource":"eosc________::geant::geant.trusted_certificate_service","master":"10|eosc________::30817adfb6c625d7fd36b657e2fabc74"}
|
||||
{"datasource":"eosc________::geant::geant.wifimon","master":"10|eosc________::6116f3b14f34658593529f6810068c4e"}
|
||||
{"datasource":"eosc________::genias::genias.e-irg_knowledge_base","master":"10|eosc________::ddc5ab67fed353917716eb2d5c86ce68"}
|
||||
{"datasource":"eosc________::gesis::gesis.doi_registration_service","master":"10|eosc________::71f37a7ebd8495a59c46e637ee5463da"}
|
||||
{"datasource":"eosc________::grnet::grnet.agora_resource_portfolio_management_tool","master":"10|eosc________::461aa754c52b7eed605f9e0955470de5"}
|
||||
{"datasource":"eosc________::grnet::grnet.argo_monitoring_engine","master":"10|eosc________::e91a3b4dfb62113b9b67b0ac97e566b4"}
|
||||
{"datasource":"eosc________::grnet::grnet.aris","master":"10|eosc________::6b381464ec768e3cf55ccacdb00b5988"}
|
||||
{"datasource":"eosc________::grnet::grnet.aris_-_archival_service","master":"10|eosc________::32158f91e33cf6fb6c63561cbc7ffd24"}
|
||||
{"datasource":"eosc________::grnet::grnet.ni4os-europe_login","master":"10|eosc________::aeaa8f7fc2948930bfa4f970cd96837e"}
|
||||
{"datasource":"eosc________::grnet::grnet.ni4os-europe_repository_service","master":"10|eosc________::d6933cb7acd6fa7a2f7a42562c432fb5"}
|
||||
{"datasource":"eosc________::grycap::grycap.elastic_cloud_compute_cluster","master":"10|eosc________::c6d3c380ce5499d8d20cc9bbeb3b43ff"}
|
||||
{"datasource":"eosc________::grycap::grycap.infrastructure_manager","master":"10|eosc________::e8a2eeb06a205c3299af49f5c233ce16"}
|
||||
{"datasource":"eosc________::grycap::grycap.saps_surface_energy_balance_automated_processing_service","master":"10|eosc________::a7ae875b2487576c35f1bc8e1c857c14"}
|
||||
{"datasource":"eosc________::hn::hn.isidore","master":"10|re3data_____::fabe5c1aaa2e2d4c847e01647b87bf60"}
|
||||
{"datasource":"eosc________::hostkey::hostkey.gpu_servers_grant_program","master":"10|eosc________::d45f87107eb536b4be97e112fac15787"}
|
||||
{"datasource":"eosc________::icos_eric::icos_eric.data_discovery_and_access_portal","master":"10|eosc________::84ada2e91828ce72fa6d02736cdd90f1"}
|
||||
{"datasource":"eosc________::ifca-csic::ifca-csic.deepaas_training_facility","master":"10|eosc________::5414e2342e67d64b11b835e7fd58869d"}
|
||||
{"datasource":"eosc________::ifca-csic::ifca-csic.ifca-csic_cloud_infrastructure","master":"10|eosc________::838e5c334e8115e4831d5f21435aa19b"}
|
||||
{"datasource":"eosc________::ifca-csic::ifca-csic.plant_classification","master":"10|eosc________::32c26f83acaef8d89cc6c7a2f8abd198"}
|
||||
{"datasource":"eosc________::ifca-csic::ifca-csic.remote_monitoring_and_smart_sensing","master":"10|eosc________::0335d29ec68ef9ebad8326cba79455f2"}
|
||||
{"datasource":"eosc________::ifin-hh::ifin-hh.cloudifin","master":"10|eosc________::04d791df0b61b0f5060f241c70924991"}
|
||||
{"datasource":"eosc________::iisas::iisas.dynamic_dns_service","master":"10|eosc________::2381e3b55d048130f2dffd437123d501"}
|
||||
{"datasource":"eosc________::iisas::iisas.fedcloudclient_egi_fedcloud_client","master":"10|eosc________::3668885b6512a039673b9f4638c88600"}
|
||||
{"datasource":"eosc________::iisas::iisas.modelling_service_for_water_supply_systems","master":"10|eosc________::b1d6d2cebddf52f6647102a30690fba9"}
|
||||
{"datasource":"eosc________::ill::ill.ill_data_portal","master":"10|eosc________::714498cf1efec13c2206db4b1e4f1c30"}
|
||||
{"datasource":"eosc________::ill::ill.panosc_software_catalogue","master":"10|eosc________::bc63c5a78abd38a7d9df043e0853a9ce"}
|
||||
{"datasource":"eosc________::inaf::inaf.space-ml_caesar_service","master":"10|eosc________::ba42c5e4332ff16c6cd28573012bc2f9"}
|
||||
{"datasource":"eosc________::inaf::inaf.space-vis_vialactea_service","master":"10|eosc________::ce2ca563bceae686b763326ed53e7b54"}
|
||||
{"datasource":"eosc________::infn::infn.dynamic_on_demand_analysis_service","master":"10|eosc________::f884894e05c5a54646f0b5715e5495d6"}
|
||||
{"datasource":"eosc________::infn::infn.fgsg_science_software_on_demand","master":"10|eosc________::452af4e76a64b6ee7e4bdc86527687f7"}
|
||||
{"datasource":"eosc________::infn::infn.indigo_identity_and_access_management","master":"10|eosc________::d23115c40a4e256725f140330d001861"}
|
||||
{"datasource":"eosc________::infn::infn.infn-cloud_object_storage_dice","master":"10|eosc________::fe0c28e8657cb84e3b775156106c03d1"}
|
||||
{"datasource":"eosc________::infn::infn.paas_orchestrator","master":"10|eosc________::146240bb16057a93e11631edee570f76"}
|
||||
{"datasource":"eosc________::infrafrontier::infrafrontier.training_in_mouse_functional_genomics","master":"10|eosc________::64d6597d10f4e617152f4a612a87eaba"}
|
||||
{"datasource":"eosc________::inria::inria.software_heritage_archive","master":"10|fairsharing_::2c758933af02c0b301906f2819ae1268"}
|
||||
{"datasource":"eosc________::jelastic::jelastic.platform-as-a-service","master":"10|eosc________::bfcae4ab00df41a3c43efbb879586e8f"}
|
||||
{"datasource":"eosc________::kit::kit.eosc-performance","master":"10|eosc________::e52ab75587c1dd98db80568197f04586"}
|
||||
{"datasource":"eosc________::kit::kit.o3as_ozone_assessment","master":"10|eosc________::aaf27a5f35a790617247abecd84b100f"}
|
||||
{"datasource":"eosc________::komanord::komanord.guardomic","master":"10|eosc________::b1e06c9d2c472e9441ee72e83a934d40"}
|
||||
{"datasource":"eosc________::lago::lago.onedatasim","master":"10|eosc________::2b2163e8b82320fed69a017a3e5fb657"}
|
||||
{"datasource":"eosc________::lifewatch-eric::lifewatch-eric.plants_identification_app","master":"10|eosc________::6fc6ed0894391496d3c4967d45933d1a"}
|
||||
{"datasource":"eosc________::lindatclariah-cz::lindatclariah-cz.elixirfm","master":"10|eosc________::6dd7c323776a028cef0619cb34bdf48c"}
|
||||
{"datasource":"eosc________::lindatclariah-cz::lindatclariah-cz.ker_-_keyword_extractor","master":"10|eosc________::09915f038900aa43cb0c76aa89f10cda"}
|
||||
{"datasource":"eosc________::lindatclariah-cz::lindatclariah-cz.lindatclariah-cz_repository","master":"10|eosc________::3daee6a29fb1d9a0f624cdd5973c33ea"}
|
||||
{"datasource":"eosc________::lindatclariah-cz::lindatclariah-cz.machine_translation","master":"10|eosc________::3ae4551729381cfd03c433fb0de0c971"}
|
||||
{"datasource":"eosc________::lindatclariah-cz::lindatclariah-cz.morphodita","master":"10|eosc________::f2ceebdc1a41d65504ff27f7297c833b"}
|
||||
{"datasource":"eosc________::lindatclariah-cz::lindatclariah-cz.nametag","master":"10|eosc________::71e3226e7a868e2215335ffb29073285"}
|
||||
{"datasource":"eosc________::lindatclariah-cz::lindatclariah-cz.udpipe_tool_for_lemmatization_morphological_analysis_pos_tagging_and_dependency_parsing_in_multiple_languages","master":"10|eosc________::2dfc64c2951d9be3f1e2b576633ea425"}
|
||||
{"datasource":"eosc________::lnec::lnec-pt.opencoasts_portal","master":"10|eosc________::7e99655aeda0b5f06efb3eea424dff54"}
|
||||
{"datasource":"eosc________::lnec::lnec.worsica_-_water_monitoring_sentinel_cloud_platform","master":"10|eosc________::c2f55ab774c3cbbd9a330eebaa74dc36"}
|
||||
{"datasource":"eosc________::materialscloud::materialscloud.aiiDA_lab","master":"10|eosc________::dfd970a812cf2e0298eb28c681bc109f"}
|
||||
{"datasource":"eosc________::materialscloud::materialscloud.materials_cloud_archive","master":"10|fairsharing_::a431d70133ef6cf688bc4f6093922b48"}
|
||||
{"datasource":"eosc________::meeo::meeo.adam_platform","master":"10|eosc________::b17fedb87dd9985b6a5e51db593446d6"}
|
||||
{"datasource":"eosc________::meeo::meeo.adam_space","master":"10|eosc________::24bfbca4cf4fedc5a4a662fe67a30d7e"}
|
||||
{"datasource":"eosc________::mobile_observation_integration_service::mobile_observation_integration_service.dark_sky_meter_datasource","master":"10|eosc________::160638e73224aeb7e4f98fd237672919"}
|
||||
{"datasource":"eosc________::msw::msw.polaris_os","master":"10|eosc________::12348ba5b2c5902fd400cb3f1ab773ee"}
|
||||
{"datasource":"eosc________::obp::obp.thoth","master":"10|eosc________::680198ec3f51a744de8a7603d542a0e1"}
|
||||
{"datasource":"eosc________::openaire::openaire.amnesia","master":"10|eosc________::ac57e2dd5b3ee01909d7a592523bb96f"}
|
||||
{"datasource":"eosc________::openaire::openaire.argos","master":"10|eosc________::92145beb3257af0510ee61ef10d16870"}
|
||||
{"datasource":"eosc________::openaire::openaire.broker","master":"10|eosc________::c8c6e8d211d6df4ee8a187fa1134bd92"}
|
||||
{"datasource":"eosc________::openaire::openaire.data_provider_dashboard","master":"10|eosc________::809d4c77a7acf9ac0cc2990d4264ae51"}
|
||||
{"datasource":"eosc________::openaire::openaire.digital_humanities_and_cultural_heritage_openaire_community_gateway","master":"10|eosc________::b9110e9735dd467abc969fe8e2f1efa3"}
|
||||
{"datasource":"eosc________::openaire::openaire.discovery_portal","master":"10|eosc________::992052173b689c8cea94e8e8d99f0238"}
|
||||
{"datasource":"eosc________::openaire::openaire.european_marine_science_openaire_dashboard","master":"10|eosc________::950a99851df85c90ec2e933e1d55e164"}
|
||||
{"datasource":"eosc________::openaire::openaire.funder_dashboard","master":"10|eosc________::196eea80ab9d73766cd2e8b6ab85872f"}
|
||||
{"datasource":"eosc________::openaire::openaire.graph","master":"10|eosc________::c122caed52a88b57732b814a74141000"}
|
||||
{"datasource":"eosc________::openaire::openaire.greek_sustainable_development_solutions_network_sdsn_openaire_dashboard","master":"10|eosc________::8100e41e3a5b18170bc5ede2cc393331"}
|
||||
{"datasource":"eosc________::openaire::openaire.inference","master":"10|eosc________::c491811e9a6afa69cdcab0f92fca6f7b"}
|
||||
{"datasource":"eosc________::openaire::openaire.neuroinformatics_openaire_dashboard","master":"10|eosc________::6e3adcce4d0d4229a9749584dfd5e7a8"}
|
||||
{"datasource":"eosc________::openaire::openaire.open_science_helpdesk","master":"10|eosc________::d66db88d4c6c354fe7ebcd4c3dce334e"}
|
||||
{"datasource":"eosc________::openaire::openaire.open_science_observatory","master":"10|eosc________::441ee64860eb79808b7cf0bb08262be6"}
|
||||
{"datasource":"eosc________::openaire::openaire.open_science_training","master":"10|eosc________::99847506cdff50afa4945d60a9661ea3"}
|
||||
{"datasource":"eosc________::openaire::openaire.openaire_login","master":"10|eosc________::818973a9375c0fa545499e1bb9ad0ab2"}
|
||||
{"datasource":"eosc________::openaire::openaire.openapc","master":"10|eosc________::a28cc193bc938573e892b8aad0017702"}
|
||||
{"datasource":"eosc________::openaire::openaire.research_community_dashboard","master":"10|eosc________::e1a866322f76407fb161a253dc5b539c"}
|
||||
{"datasource":"eosc________::openaire::openaire.scholexplorer","master":"10|eosc________::6b34adede04121175566ef8c70f1e520"}
|
||||
{"datasource":"eosc________::openaire::openaire.technical_support_towards_openaire_compliance","master":"10|eosc________::cdb8e94b386f9b6780a47194bd1bc7f7"}
|
||||
{"datasource":"eosc________::openaire::openaire.topos_observatory_for_organisations","master":"10|eosc________::a7d2b95257273b5ea3f3a23fd8a60d48"}
|
||||
{"datasource":"eosc________::openaire::openaire.usage_statistics","master":"10|eosc________::8aa345dc7321fc97906bf4c193a05a8f"}
|
||||
{"datasource":"eosc________::openaire::openaire.validator","master":"10|eosc________::f2c13efbaa2a33af3e4e6a54805ac379"}
|
||||
{"datasource":"eosc________::openaire::openaire.zenodo","master":"10|opendoar____::358aee4cc897452c00244351e4d91f69"}
|
||||
{"datasource":"eosc________::openbiomaps::openbiomaps.openbiomaps","master":"10|eosc________::32edf5a4edbdea0899d6ba588d083efd"}
|
||||
{"datasource":"eosc________::openedition::openedition.operas_research_for_society","master":"10|eosc________::2cdf4f57007b990b7ad7a884796f9b15"}
|
||||
{"datasource":"eosc________::openknowledgemaps::openknowledgemaps.open_knowledge_maps","master":"10|eosc________::f3819d0f8e8bf57d383b23d31a3c0099"}
|
||||
{"datasource":"eosc________::openminted::openminted.builder_of_tdm_applications","master":"10|eosc________::fdd26c19dd490260bc6c48b5813f4ac3"}
|
||||
{"datasource":"eosc________::openminted::openminted.catalogue_of_ancillary_resources","master":"10|eosc________::ab4e37e85a1975b204b66683ed3888a8"}
|
||||
{"datasource":"eosc________::openminted::openminted.catalogue_of_corpora","master":"10|eosc________::2cf744a594ea30fd31e976bffa8f2b71"}
|
||||
{"datasource":"eosc________::openminted::openminted.catalogue_of_tdm_applications","master":"10|eosc________::ef5f343c5cf11fa2d40407ec308bb34a"}
|
||||
{"datasource":"eosc________::openminted::openminted.catalogue_of_tdm_components","master":"10|eosc________::4275243a94677f19a5b74e5afb1f94cf"}
|
||||
{"datasource":"eosc________::openminted::openminted.consulting_on_licences_for_tdm","master":"10|eosc________::522000b4c90b209aa7be961449ca910f"}
|
||||
{"datasource":"eosc________::openminted::openminted.corpus_builder_for_scholarly_works","master":"10|eosc________::c64725d47af63bc2114b4214b684a392"}
|
||||
{"datasource":"eosc________::openminted::openminted.support_and_training","master":"10|eosc________::84501ff99e5e429f5f083ab8ca0be7e4"}
|
||||
{"datasource":"eosc________::openminted::openminted.tdm_applications_executor","master":"10|eosc________::e9ae655ce2ff1eaa19d0b3475ce5e660"}
|
||||
{"datasource":"eosc________::operas::operas.gotriple_discovery_platform","master":"10|eosc________::f687e24dc56aaeeb561c95865a5071cc"}
|
||||
{"datasource":"eosc________::operas::operas.operas_metrics_service","master":"10|eosc________::5960e1289f623625210f720c6173592d"}
|
||||
{"datasource":"eosc________::oslo_university::oslo_university.services_for_sensitive_data_tsd","master":"10|eosc________::743b01351510f88e24be1c700c581f68"}
|
||||
{"datasource":"eosc________::osmooc::osmooc.open_science_mooc","master":"10|eosc________::e101101e8653b6607a3ad9fea3b7d1fe"}
|
||||
{"datasource":"eosc________::oxford_e-research_centre::oxford_e-research_centre.fairsharing","master":"10|openaire____::bf5a61cc330e21ffa90eed3eb1533466"}
|
||||
{"datasource":"eosc________::phenomenal::phenomenal.phenomenal","master":"10|eosc________::79e19b14aeee0d94e9a79110a6e6ad32"}
|
||||
{"datasource":"eosc________::plantnet::plantnet.plntnet_identification_service","master":"10|eosc________::5ce89743eafdd8578591d84150f547e4"}
|
||||
{"datasource":"eosc________::prace::prace.application_enabling_support","master":"10|eosc________::c87fd74ed685337fdbcff504373fc513"}
|
||||
{"datasource":"eosc________::prace::prace.code_vault","master":"10|eosc________::dbab7889c81b59ec753040a762f6569a"}
|
||||
{"datasource":"eosc________::prace::prace.deci_access","master":"10|eosc________::c7cedb82b1beea5382601d48807212aa"}
|
||||
{"datasource":"eosc________::prace::prace.mooc","master":"10|eosc________::d6ff8167d31dccebe33a272513422b53"}
|
||||
{"datasource":"eosc________::prace::prace.patc","master":"10|eosc________::1ab1b123bd559ee7f7c7ec2ee353f0c0"}
|
||||
{"datasource":"eosc________::prace::prace.preparatory_access","master":"10|eosc________::39430adf529f1ab9e33da444b3708fcf"}
|
||||
{"datasource":"eosc________::prace::prace.project_access","master":"10|eosc________::b58e957946983b686c76ee19dfab8d70"}
|
||||
{"datasource":"eosc________::prace::prace.ptc","master":"10|eosc________::b3ca18e8884bfe2422d3723313fef79c"}
|
||||
{"datasource":"eosc________::prace::prace.seasonal_schools_and_international_summer_school","master":"10|eosc________::590c71318d9d94c32981e3195567d546"}
|
||||
{"datasource":"eosc________::prace::prace.shape","master":"10|eosc________::38b5a26f74e4808270a2d4f305d2f3a5"}
|
||||
{"datasource":"eosc________::prace::prace.training_portal","master":"10|eosc________::25966a269ab2343ac9c4d982c341d87f"}
|
||||
{"datasource":"eosc________::predictia::predictia.climadjust","master":"10|eosc________::14743eb22da3524893784faf409aac70"}
|
||||
{"datasource":"eosc________::psi::psi.psi_public_data_repository","master":"10|re3data_____::1e55174ff77ed2d804871281201dbb50"}
|
||||
{"datasource":"eosc________::psi::psi.remote_desktop_service","master":"10|eosc________::c82e26eb6e65d008de03b349dffc11fc"}
|
||||
{"datasource":"eosc________::psnc::psnc.rohub","master":"10|eosc________::c87f08707b5235172e85b374e39a82dc"}
|
||||
{"datasource":"eosc________::psnc::psnc.symbiote","master":"10|eosc________::ef0cd965a0d0a3df80ecfae4b3b08aad"}
|
||||
{"datasource":"eosc________::rasdaman::rasdaman.datacube","master":"10|eosc________::bb1678f7b15d8c15fde6e240a4f95f93"}
|
||||
{"datasource":"eosc________::rbi::rbi.dariah_science_gateway","master":"10|eosc________::b51b448421d926293b3781f4ac90f4f4"}
|
||||
{"datasource":"eosc________::readcoop::readcoop.transkribus","master":"10|eosc________::a80411026809e6eaa896439e1b9764f4"}
|
||||
{"datasource":"eosc________::rli::rli.open_energy_platform","master":"10|fairsharing_::0cbed40c0d920b94126eaf5e707be1f5"}
|
||||
{"datasource":"eosc________::ror-org::ror-org.identifier","master":"10|eosc________::6fe92c2346db22322ddf6b677d449b0e"}
|
||||
{"datasource":"eosc________::sciences_po::sciences_po.ethnic_and_migrant_minority_survey_registry","master":"10|eosc________::0cde986dc2bf015912e407f0f83ee402"}
|
||||
{"datasource":"eosc________::sciences_po::sciences_po.wpss_for_ess","master":"10|eosc________::9a5bb11c495443aad944b04f5fcb5c07"}
|
||||
{"datasource":"eosc________::scigne::scigne.cloud_compute","master":"10|eosc________::7c63e3284c36b5977c553192dce506b3"}
|
||||
{"datasource":"eosc________::scipedia::scipedia.scipedia","master":"10|eosc________::850abcddc76069f2c3c1cf77ad4beec9"}
|
||||
{"datasource":"eosc________::scipedia::scipedia.topos_for_individuals","master":"10|eosc________::e6214b58f39a25b53eecda340f95ee7b"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.doi_minting_service","master":"10|eosc________::f87f72147a3c82c4f77684e40101e90e"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.european_directory_of_marine_environmental_data_edmed","master":"10|eosc________::d79706389f0b864306feb47aac1f5766"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.european_directory_of_marine_environmental_research_projects","master":"10|eosc________::baa9d2d6cdd8507fcbf76242e4c25d76"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.european_directory_of_marine_organisations_edmo","master":"10|eosc________::5d23c66c26e0df209fc415c1e9ad0316"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.european_directory_of_the_cruise_summary_reports_csr","master":"10|eosc________::fd70912c66037dc11f710587e281eeaf"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.european_directory_of_the_initial_ocean-observing_systems_edios","master":"10|eosc________::846016e987d1feaf2a36083f88dba1f2"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.seadatanet_cdi","master":"10|eosc________::36cd158d6b1bbdbfb443c68b8da00335"}
|
||||
{"datasource":"eosc________::seadatanet::seadatanet.vocabulary_services_-_underpinned_by_the_nerc_vocabulary_server_nvs","master":"10|eosc________::4416d18ec7a57e553979fbfa4d862483"}
|
||||
{"datasource":"eosc________::sinergise::sinergise.sentinel_hub","master":"10|eosc________::d36ae944fa207461bcb7b2b3a6c94de8"}
|
||||
{"datasource":"eosc________::sixsq::sixsq.nuvla_multi-cloud_application_management_platform","master":"10|eosc________::38438cc3190a3815359efb53b9dd98eb"}
|
||||
{"datasource":"eosc________::sks::sks.digital_production_for_conferences_workshops_roundtables_and_other_academic_and_professional_events","master":"10|eosc________::f6b51bef4a5f1478e980673339f2b2f3"}
|
||||
{"datasource":"eosc________::smartsmear::smartsmear.smartsmear","master":"10|eosc________::d17a9325ca64ffad59e04659ed5404f7"}
|
||||
{"datasource":"eosc________::sobigdata::sobigdata.tagme","master":"10|eosc________::0c3b8b80d9d6d38effd28bfa6a140a12"}
|
||||
{"datasource":"eosc________::suite5::suite5.furniture_enterprise_analytics","master":"10|eosc________::29ed60070bd91bdc19c9f278b104465c"}
|
||||
{"datasource":"eosc________::switch::switch.switchengines","master":"10|eosc________::d4143918a810115206640cfeb11e0ba6"}
|
||||
{"datasource":"eosc________::t-systems::t-systems.open_telekom_cloud","master":"10|eosc________::c489ef6564a47922359f7b833919d642"}
|
||||
{"datasource":"eosc________::terradue::terradue.eo_services_for_earthquake_response_and_landslides_analysis","master":"10|eosc________::ab3140d145deb5fdb02eeefbc5ebc471"}
|
||||
{"datasource":"eosc________::tib::tib.open_research_knowledge_graph_orkg","master":"10|eosc________::ed6bd695c7a99297f360bc2fc915be90"}
|
||||
{"datasource":"eosc________::ubora::ubora.ubora","master":"10|eosc________::bacf05aff1c6dcf3133a0352d5eb14c4"}
|
||||
{"datasource":"eosc________::ubora::ubora.ubora_e-platform","master":"10|eosc________::947fde33605ba61216a07135ee1551f2"}
|
||||
{"datasource":"eosc________::ugr-es::ugr-es.glacier_lagoons_of_sierra_nevada","master":"10|eosc________::8a966c0efca298ad5ec130d323c29935"}
|
||||
{"datasource":"eosc________::uit::uit.dataverseno","master":"10|eosc________::92b76aa81a5b8443fcf17d3ae3c34211"}
|
||||
{"datasource":"eosc________::uit::uit.the_troms_repository_of_language_and_linguistics_trolling","master":"10|fairsharing_::a36b0dcd1e6384abc0e1867860ad3ee3"}
|
||||
{"datasource":"eosc________::ukaea::ukaea.prominence","master":"10|eosc________::06ce999c7cf77ea5a65f87bb563cd625"}
|
||||
{"datasource":"eosc________::ukri_-_stfc::ukri_-_stfc.cvmfs_test","master":"10|eosc________::53aaa0a24d0edc47c23e722135c29dde"}
|
||||
{"datasource":"eosc________::ukri_-_stfc::ukri_-_stfc.rucio","master":"10|eosc________::c19a8251c6bf563365c555572ace903e"}
|
||||
{"datasource":"eosc________::uni-freiburg::uni-freiburg.european_galaxy_server","master":"10|eosc________::cc00fc2385475b80accec001dfb85efb"}
|
||||
{"datasource":"eosc________::unibo::unibo.opencitations","master":"10|eosc________::573c29ecaf76ab961743bfc8a7d911ec"}
|
||||
{"datasource":"eosc________::unifl::unifl.snap4city","master":"10|eosc________::9a55c40c3c082b7a8352ecbc56a87996"}
|
||||
{"datasource":"eosc________::unige::unige.astronomical_online_data_analysis_astrooda","master":"10|eosc________::63f6119d3170cccf979daada3c5b524e"}
|
||||
{"datasource":"eosc________::unitartu::unitartu.ut.rocket","master":"10|eosc________::da3450589a9d56212963b20cf729974c"}
|
||||
{"datasource":"eosc________::upv-es::upv-es.lemonade","master":"10|eosc________::afdd227beada491f77d7944d7a0eafc9"}
|
||||
{"datasource":"eosc________::vamdc::vamdc.portal","master":"10|eosc________::4dab2bb6e9a9ad223cd63c62c2ea804e"}
|
||||
{"datasource":"eosc________::vamdc::vamdc.query_store","master":"10|eosc________::33f18bfe544c3c84ac28be6a3292d166"}
|
||||
{"datasource":"eosc________::vamdc::vamdc.species_database","master":"10|eosc________::ae3587682dec5663a1b3b625036d15d0"}
|
||||
{"datasource":"eosc________::vilnius-university::vilnius-university.the_national_open_access_research_data_archive_midas","master":"10|eosc________::4987ee0d071f68cf88f6b1a834b6733f"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.amber-based_portal_server_for_nmr_structures_amps-nmr","master":"10|eosc________::c6cca9747ef3ce296bd626bcbc4e480a"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.disvis_web_portal","master":"10|eosc________::2539ec693b683284c4e243b969ae3fc0"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.fanten_finding_anisotropy_tensor","master":"10|eosc________::99c793e3f3b856c48eaaa36682038b28"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.haddock24_web_portal","master":"10|eosc________::0f198f6a0885105809f420be23614be3"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.metalpdb","master":"10|eosc________::84676bc3d2ce17de70309dc58f428296"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.pdb-tools_web","master":"10|eosc________::b37eed45624ac30f3476f71640e59a61"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.powerfit_web_portal","master":"10|eosc________::93d4d621ed1da378c0e7dc891cefc007"}
|
||||
{"datasource":"eosc________::wenmr::wenmr.spoton","master":"10|eosc________::76e7e0552f9c6b89db94b31ddc366b9f"}
|
|
@ -275,10 +275,13 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
|
||||
res
|
||||
.add(
|
||||
getRelation(
|
||||
docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, entity, validationdDate));
|
||||
OafMapperUtils
|
||||
.getRelation(
|
||||
docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, entity, validationdDate));
|
||||
res
|
||||
.add(getRelation(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, entity, validationdDate));
|
||||
.add(
|
||||
OafMapperUtils
|
||||
.getRelation(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, entity, validationdDate));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -311,13 +314,16 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
final String targetId = createOpenaireId(targetType, target, true);
|
||||
rels
|
||||
.add(
|
||||
getRelation(
|
||||
entity.getId(), targetId, relType, subRelType, relClass, entity, validationdDate));
|
||||
OafMapperUtils
|
||||
.getRelation(
|
||||
entity.getId(), targetId, relType, subRelType, relClass, entity,
|
||||
validationdDate));
|
||||
rels
|
||||
.add(
|
||||
getRelation(
|
||||
targetId, entity.getId(), relType, subRelType, relClassInverse, entity,
|
||||
validationdDate));
|
||||
OafMapperUtils
|
||||
.getRelation(
|
||||
targetId, entity.getId(), relType, subRelType, relClassInverse, entity,
|
||||
validationdDate));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -325,36 +331,6 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
return rels;
|
||||
}
|
||||
|
||||
protected Relation getRelation(final String source,
|
||||
final String target,
|
||||
final String relType,
|
||||
final String subRelType,
|
||||
final String relClass,
|
||||
final OafEntity entity) {
|
||||
return getRelation(source, target, relType, subRelType, relClass, entity, null);
|
||||
}
|
||||
|
||||
protected Relation getRelation(final String source,
|
||||
final String target,
|
||||
final String relType,
|
||||
final String subRelType,
|
||||
final String relClass,
|
||||
final OafEntity entity,
|
||||
final String validationDate) {
|
||||
final Relation rel = new Relation();
|
||||
rel.setRelType(relType);
|
||||
rel.setSubRelType(subRelType);
|
||||
rel.setRelClass(relClass);
|
||||
rel.setSource(source);
|
||||
rel.setTarget(target);
|
||||
rel.setCollectedfrom(entity.getCollectedfrom());
|
||||
rel.setDataInfo(entity.getDataInfo());
|
||||
rel.setLastupdatetimestamp(entity.getLastupdatetimestamp());
|
||||
rel.setValidated(StringUtils.isNotBlank(validationDate));
|
||||
rel.setValidationDate(StringUtils.isNotBlank(validationDate) ? validationDate : null);
|
||||
return rel;
|
||||
}
|
||||
|
||||
protected abstract List<Oaf> addOtherResultRels(
|
||||
final Document doc,
|
||||
final OafEntity entity);
|
||||
|
|
|
@ -1,32 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.raw;
|
||||
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTICIPANT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_MERGED_IN;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.MERGES;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
||||
|
||||
import java.io.Closeable;
|
||||
|
@ -45,6 +20,8 @@ import org.apache.commons.lang3.StringUtils;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.DbClient;
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
|
@ -68,6 +45,7 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
|
|||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||
|
||||
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable {
|
||||
|
@ -437,25 +415,14 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
final List<KeyValue> collectedFrom = listKeyValues(
|
||||
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
|
||||
|
||||
final Relation r1 = new Relation();
|
||||
r1.setRelType(DATASOURCE_ORGANIZATION);
|
||||
r1.setSubRelType(PROVISION);
|
||||
r1.setRelClass(IS_PROVIDED_BY);
|
||||
r1.setSource(dsId);
|
||||
r1.setTarget(orgId);
|
||||
r1.setCollectedfrom(collectedFrom);
|
||||
r1.setDataInfo(info);
|
||||
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
final Relation r1 = OafMapperUtils
|
||||
.getRelation(
|
||||
dsId, orgId, DATASOURCE_ORGANIZATION, PRODUCES, IS_PROVIDED_BY, collectedFrom, info,
|
||||
lastUpdateTimestamp);
|
||||
|
||||
final Relation r2 = new Relation();
|
||||
r2.setRelType(DATASOURCE_ORGANIZATION);
|
||||
r2.setSubRelType(PROVISION);
|
||||
r2.setRelClass(PROVIDES);
|
||||
r2.setSource(orgId);
|
||||
r2.setTarget(dsId);
|
||||
r2.setCollectedfrom(collectedFrom);
|
||||
r2.setDataInfo(info);
|
||||
r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
final Relation r2 = OafMapperUtils
|
||||
.getRelation(
|
||||
orgId, dsId, DATASOURCE_ORGANIZATION, PRODUCES, PROVIDES, collectedFrom, info, lastUpdateTimestamp);
|
||||
|
||||
return Arrays.asList(r1, r2);
|
||||
} catch (final Exception e) {
|
||||
|
@ -471,25 +438,20 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
final List<KeyValue> collectedFrom = listKeyValues(
|
||||
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
|
||||
|
||||
final Relation r1 = new Relation();
|
||||
r1.setRelType(PROJECT_ORGANIZATION);
|
||||
r1.setSubRelType(PARTICIPATION);
|
||||
r1.setRelClass(HAS_PARTICIPANT);
|
||||
r1.setSource(projectId);
|
||||
r1.setTarget(orgId);
|
||||
r1.setCollectedfrom(collectedFrom);
|
||||
r1.setDataInfo(info);
|
||||
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
final List<KeyValue> properties = Lists
|
||||
.newArrayList(
|
||||
keyValue("contribution", String.valueOf(rs.getDouble("contribution"))),
|
||||
keyValue("currency", rs.getString("currency")));
|
||||
|
||||
final Relation r2 = new Relation();
|
||||
r2.setRelType(PROJECT_ORGANIZATION);
|
||||
r2.setSubRelType(PARTICIPATION);
|
||||
r2.setRelClass(IS_PARTICIPANT);
|
||||
r2.setSource(orgId);
|
||||
r2.setTarget(projectId);
|
||||
r2.setCollectedfrom(collectedFrom);
|
||||
r2.setDataInfo(info);
|
||||
r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
final Relation r1 = OafMapperUtils
|
||||
.getRelation(
|
||||
projectId, orgId, PROJECT_ORGANIZATION, PARTICIPATION, HAS_PARTICIPANT, collectedFrom, info,
|
||||
lastUpdateTimestamp, null, properties);
|
||||
|
||||
final Relation r2 = OafMapperUtils
|
||||
.getRelation(
|
||||
orgId, projectId, PROJECT_ORGANIZATION, PARTICIPATION, IS_PARTICIPANT, collectedFrom, info,
|
||||
lastUpdateTimestamp, null, properties);
|
||||
|
||||
return Arrays.asList(r1, r2);
|
||||
} catch (final Exception e) {
|
||||
|
@ -703,25 +665,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
final List<KeyValue> collectedFrom = listKeyValues(
|
||||
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
|
||||
|
||||
final Relation r1 = new Relation();
|
||||
r1.setRelType(ORG_ORG_RELTYPE);
|
||||
r1.setSubRelType(ModelConstants.DEDUP);
|
||||
r1.setRelClass(MERGES);
|
||||
r1.setSource(orgId1);
|
||||
r1.setTarget(orgId2);
|
||||
r1.setCollectedfrom(collectedFrom);
|
||||
r1.setDataInfo(info);
|
||||
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
final Relation r1 = OafMapperUtils
|
||||
.getRelation(orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, MERGES, collectedFrom, info, lastUpdateTimestamp);
|
||||
|
||||
final Relation r2 = new Relation();
|
||||
r2.setRelType(ORG_ORG_RELTYPE);
|
||||
r2.setSubRelType(ModelConstants.DEDUP);
|
||||
r2.setRelClass(IS_MERGED_IN);
|
||||
r2.setSource(orgId2);
|
||||
r2.setTarget(orgId1);
|
||||
r2.setCollectedfrom(collectedFrom);
|
||||
r2.setDataInfo(info);
|
||||
r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
final Relation r2 = OafMapperUtils
|
||||
.getRelation(
|
||||
orgId2, orgId1, ORG_ORG_RELTYPE, DEDUP, IS_MERGED_IN, collectedFrom, info, lastUpdateTimestamp);
|
||||
return Arrays.asList(r1, r2);
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
|
@ -738,17 +687,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
final List<KeyValue> collectedFrom = listKeyValues(
|
||||
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
|
||||
|
||||
final Relation r = new Relation();
|
||||
r.setRelType(ORG_ORG_RELTYPE);
|
||||
r.setSubRelType(ModelConstants.RELATIONSHIP);
|
||||
r.setRelClass(rs.getString("type"));
|
||||
r.setSource(orgId1);
|
||||
r.setTarget(orgId2);
|
||||
r.setCollectedfrom(collectedFrom);
|
||||
r.setDataInfo(info);
|
||||
r.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
|
||||
return Arrays.asList(r);
|
||||
return Arrays
|
||||
.asList(
|
||||
OafMapperUtils
|
||||
.getRelation(
|
||||
orgId1, orgId2, ORG_ORG_RELTYPE, RELATIONSHIP, rs.getString("type"), collectedFrom, info,
|
||||
lastUpdateTimestamp));
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
@ -765,29 +709,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
final List<KeyValue> collectedFrom = listKeyValues(
|
||||
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
|
||||
|
||||
final Relation r1 = new Relation();
|
||||
r1.setRelType(ORG_ORG_RELTYPE);
|
||||
r1.setSubRelType(ModelConstants.DEDUP);
|
||||
r1.setRelClass(relClass);
|
||||
r1.setSource(orgId1);
|
||||
r1.setTarget(orgId2);
|
||||
r1.setCollectedfrom(collectedFrom);
|
||||
r1.setDataInfo(info);
|
||||
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
|
||||
// removed because there's no difference between two sides //TODO
|
||||
// final Relation r2 = new Relation();
|
||||
// r2.setRelType(ORG_ORG_RELTYPE);
|
||||
// r2.setSubRelType(ORG_ORG_SUBRELTYPE);
|
||||
// r2.setRelClass(relClass);
|
||||
// r2.setSource(orgId2);
|
||||
// r2.setTarget(orgId1);
|
||||
// r2.setCollectedfrom(collectedFrom);
|
||||
// r2.setDataInfo(info);
|
||||
// r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
// return Arrays.asList(r1, r2);
|
||||
|
||||
return Arrays.asList(r1);
|
||||
return Arrays
|
||||
.asList(
|
||||
OafMapperUtils
|
||||
.getRelation(
|
||||
orgId1, orgId2, ORG_ORG_RELTYPE, DEDUP, relClass, collectedFrom, info,
|
||||
lastUpdateTimestamp));
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ SELECT
|
|||
po.resporganization AS resporganization,
|
||||
po.participantnumber AS participantnumber,
|
||||
po.contribution AS contribution,
|
||||
po.currency AS currency,
|
||||
NULL AS startdate,
|
||||
NULL AS enddate,
|
||||
false AS inferred,
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
[
|
||||
{"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true},
|
||||
{"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source Path", "paramRequired": true},
|
||||
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the raw graph", "paramRequired": true}
|
||||
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the raw graph", "paramRequired": true},
|
||||
{"paramName":"r", "paramLongName":"filterRelation", "paramDescription": "the relation to filter", "paramRequired": false}
|
||||
]
|
|
@ -3,5 +3,7 @@
|
|||
{"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source Path", "paramRequired": true},
|
||||
{"paramName":"su", "paramLongName":"scholixUpdatePath", "paramDescription": "the scholix updated Path", "paramRequired": false},
|
||||
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the raw graph", "paramRequired": true},
|
||||
{"paramName":"o", "paramLongName":"objectType", "paramDescription": "should be scholix or Summary", "paramRequired": true}
|
||||
{"paramName":"o", "paramLongName":"objectType", "paramDescription": "should be scholix or Summary", "paramRequired": true},
|
||||
{"paramName":"mp", "paramLongName":"maxPidNumberFilter", "paramDescription": "filter max number of pids in source/target", "paramRequired": false}
|
||||
|
||||
]
|
|
@ -0,0 +1,10 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,145 @@
|
|||
<workflow-app name="Create Scholix Dump" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>the working dir base path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>targetPath</name>
|
||||
<description>the final graph path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>relationFilter</name>
|
||||
<description>Filter relation semantic</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>maxNumberOfPid</name>
|
||||
<description>filter relation with at least #maxNumberOfPid</description>
|
||||
</property>
|
||||
|
||||
</parameters>
|
||||
|
||||
<start to="ImportDatasetEntities"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="ImportDatasetEntities">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Import JSONRDD to Dataset kryo</name>
|
||||
<class>eu.dnetlib.dhp.sx.graph.SparkConvertRDDtoDataset</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.shuffle.partitions=3000
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--master</arg><arg>yarn</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
||||
<arg>--filterRelation</arg><arg>${relationFilter}</arg>
|
||||
</spark>
|
||||
<ok to="CreateSummaries"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<action name="CreateSummaries">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Convert Entities to summaries</name>
|
||||
<class>eu.dnetlib.dhp.sx.graph.SparkCreateSummaryObject</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.shuffle.partitions=20000
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--master</arg><arg>yarn</arg>
|
||||
<arg>--sourcePath</arg><arg>${targetPath}/entities</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}/provision/summaries</arg>
|
||||
</spark>
|
||||
<ok to="CreateScholix"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="CreateScholix">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Generate Scholix Dataset</name>
|
||||
<class>eu.dnetlib.dhp.sx.graph.SparkCreateScholix</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.shuffle.partitions=30000
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--master</arg><arg>yarn</arg>
|
||||
<arg>--summaryPath</arg><arg>${targetPath}/provision/summaries</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}/provision/scholix</arg>
|
||||
<arg>--relationPath</arg><arg>${targetPath}/relation</arg>
|
||||
</spark>
|
||||
<ok to="DropJSONPath"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="DropJSONPath">
|
||||
<fs>
|
||||
<delete path='${targetPath}/json'/>
|
||||
<mkdir path='${targetPath}/json/'/>
|
||||
</fs>
|
||||
<ok to="SerializeScholix"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="SerializeScholix">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Serialize scholix to JSON</name>
|
||||
<class>eu.dnetlib.dhp.sx.graph.SparkConvertObjectToJson</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.shuffle.partitions=6000
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--master</arg><arg>yarn</arg>
|
||||
<arg>--sourcePath</arg><arg>${targetPath}/provision/scholix/scholix</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}/json/scholix_json</arg>
|
||||
<arg>--objectType</arg><arg>scholix</arg>
|
||||
<arg>--maxPidNumberFilter</arg><arg>maxNumberOfPid</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -4,6 +4,7 @@ import com.fasterxml.jackson.databind.ObjectMapper
|
|||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.schema.sx.scholix.Scholix
|
||||
import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary
|
||||
import eu.dnetlib.dhp.sx.graph.SparkConvertObjectToJson.toInt
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.hadoop.io.compress.GzipCodec
|
||||
import org.apache.spark.SparkConf
|
||||
|
@ -12,6 +13,14 @@ import org.slf4j.{Logger, LoggerFactory}
|
|||
|
||||
object SparkConvertObjectToJson {
|
||||
|
||||
def toInt(s: String): Option[Int] = {
|
||||
try {
|
||||
Some(s.toInt)
|
||||
} catch {
|
||||
case e: Exception => None
|
||||
}
|
||||
}
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val log: Logger = LoggerFactory.getLogger(getClass)
|
||||
val conf: SparkConf = new SparkConf()
|
||||
|
@ -37,6 +46,8 @@ object SparkConvertObjectToJson {
|
|||
log.info(s"objectType -> $objectType")
|
||||
val scholixUpdatePath = parser.get("scholixUpdatePath")
|
||||
log.info(s"scholixUpdatePath -> $scholixUpdatePath")
|
||||
val maxPidNumberFilter = parser.get("maxPidNumberFilter")
|
||||
log.info(s"maxPidNumberFilter -> $maxPidNumberFilter")
|
||||
|
||||
implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix]
|
||||
implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary]
|
||||
|
@ -47,12 +58,22 @@ object SparkConvertObjectToJson {
|
|||
case "scholix" =>
|
||||
log.info("Serialize Scholix")
|
||||
val d: Dataset[Scholix] = spark.read.load(sourcePath).as[Scholix]
|
||||
val u: Dataset[Scholix] = spark.read.load(s"$scholixUpdatePath/scholix").as[Scholix]
|
||||
d.union(u)
|
||||
.repartition(8000)
|
||||
.map(s => mapper.writeValueAsString(s))(Encoders.STRING)
|
||||
.rdd
|
||||
.saveAsTextFile(targetPath, classOf[GzipCodec])
|
||||
// val u: Dataset[Scholix] = spark.read.load(s"$scholixUpdatePath/scholix").as[Scholix]
|
||||
if (maxPidNumberFilter != null && toInt(maxPidNumberFilter).isDefined) {
|
||||
val mp = toInt(maxPidNumberFilter).get
|
||||
d
|
||||
.filter(s => (s.getSource.getIdentifier.size() <= mp) && (s.getTarget.getIdentifier.size() <= mp))
|
||||
.map(s => mapper.writeValueAsString(s))(Encoders.STRING)
|
||||
.rdd
|
||||
.saveAsTextFile(targetPath, classOf[GzipCodec])
|
||||
} else {
|
||||
d
|
||||
.repartition(8000)
|
||||
.map(s => mapper.writeValueAsString(s))(Encoders.STRING)
|
||||
.rdd
|
||||
.saveAsTextFile(targetPath, classOf[GzipCodec])
|
||||
}
|
||||
|
||||
case "summary" =>
|
||||
log.info("Serialize Summary")
|
||||
val d: Dataset[ScholixSummary] = spark.read.load(sourcePath).as[ScholixSummary]
|
||||
|
|
|
@ -4,9 +4,11 @@ import com.fasterxml.jackson.databind.ObjectMapper
|
|||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset}
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.commons.lang3.StringUtils
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object SparkConvertRDDtoDataset {
|
||||
|
@ -34,6 +36,9 @@ object SparkConvertRDDtoDataset {
|
|||
val t = parser.get("targetPath")
|
||||
log.info(s"targetPath -> $t")
|
||||
|
||||
val filterRelation = parser.get("filterRelation")
|
||||
log.info(s"filterRelation -> $filterRelation")
|
||||
|
||||
val entityPath = s"$t/entities"
|
||||
val relPath = s"$t/relation"
|
||||
val mapper = new ObjectMapper()
|
||||
|
@ -94,28 +99,44 @@ object SparkConvertRDDtoDataset {
|
|||
|
||||
log.info("Converting Relation")
|
||||
|
||||
val relationSemanticFilter = List(
|
||||
// "cites",
|
||||
// "iscitedby",
|
||||
"merges",
|
||||
"ismergedin",
|
||||
"HasAmongTopNSimilarDocuments",
|
||||
"IsAmongTopNSimilarDocuments"
|
||||
)
|
||||
if (filterRelation != null && StringUtils.isNoneBlank(filterRelation)) {
|
||||
|
||||
val rddRelation = spark.sparkContext
|
||||
.textFile(s"$sourcePath/relation")
|
||||
.map(s => mapper.readValue(s, classOf[Relation]))
|
||||
.filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false)
|
||||
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
|
||||
//filter OpenCitations relations
|
||||
.filter(r =>
|
||||
r.getCollectedfrom != null && r.getCollectedfrom.size() > 0 && !r.getCollectedfrom.asScala.exists(k =>
|
||||
"opencitations".equalsIgnoreCase(k.getValue)
|
||||
val rddRelation = spark.sparkContext
|
||||
.textFile(s"$sourcePath/relation")
|
||||
.map(s => mapper.readValue(s, classOf[Relation]))
|
||||
.filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false)
|
||||
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
|
||||
//filter OpenCitations relations
|
||||
.filter(r =>
|
||||
r.getCollectedfrom != null && r.getCollectedfrom.size() > 0 && !r.getCollectedfrom.asScala.exists(k =>
|
||||
"opencitations".equalsIgnoreCase(k.getValue)
|
||||
)
|
||||
)
|
||||
.filter(r => r.getSubRelType != null && r.getSubRelType.equalsIgnoreCase(filterRelation))
|
||||
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
|
||||
} else {
|
||||
|
||||
val relationSemanticFilter = List(
|
||||
"merges",
|
||||
"ismergedin",
|
||||
"HasAmongTopNSimilarDocuments",
|
||||
"IsAmongTopNSimilarDocuments"
|
||||
)
|
||||
.filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
|
||||
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
|
||||
|
||||
val rddRelation = spark.sparkContext
|
||||
.textFile(s"$sourcePath/relation")
|
||||
.map(s => mapper.readValue(s, classOf[Relation]))
|
||||
.filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false)
|
||||
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
|
||||
//filter OpenCitations relations
|
||||
.filter(r =>
|
||||
r.getCollectedfrom != null && r.getCollectedfrom.size() > 0 && !r.getCollectedfrom.asScala.exists(k =>
|
||||
"opencitations".equalsIgnoreCase(k.getValue)
|
||||
)
|
||||
)
|
||||
.filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
|
||||
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.raw;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import static org.mockito.ArgumentMatchers.anyString;
|
||||
import static org.mockito.Mockito.lenient;
|
||||
|
||||
|
@ -32,11 +30,12 @@ import com.fasterxml.jackson.core.type.TypeReference;
|
|||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
public class MigrateDbEntitiesApplicationTest {
|
||||
class MigrateDbEntitiesApplicationTest {
|
||||
|
||||
private MigrateDbEntitiesApplication app;
|
||||
|
||||
|
@ -62,7 +61,7 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testProcessService() throws Exception {
|
||||
void testProcessService() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("services_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processService(rs);
|
||||
|
@ -75,7 +74,7 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
.getCollectedfrom()
|
||||
.stream()
|
||||
.map(KeyValue::getKey)
|
||||
.forEach(dsId -> assertValidId(dsId));
|
||||
.forEach(this::assertValidId);
|
||||
|
||||
assertEquals(1, ds.getPid().size());
|
||||
assertEquals("r3d100010218", ds.getPid().get(0).getValue());
|
||||
|
@ -164,14 +163,14 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
.stream()
|
||||
.map(Qualifier::getSchemeid)
|
||||
.collect(Collectors.toCollection(HashSet::new));
|
||||
assertTrue(cpSchemeId.size() == 1);
|
||||
assertEquals(1, cpSchemeId.size());
|
||||
assertTrue(cpSchemeId.contains("eosc:contentpolicies"));
|
||||
HashSet<String> cpSchemeName = ds
|
||||
.getContentpolicies()
|
||||
.stream()
|
||||
.map(Qualifier::getSchemename)
|
||||
.collect(Collectors.toCollection(HashSet::new));
|
||||
assertTrue(cpSchemeName.size() == 1);
|
||||
assertEquals(1, cpSchemeName.size());
|
||||
assertTrue(cpSchemeName.contains("eosc:contentpolicies"));
|
||||
assertEquals(2, ds.getContentpolicies().size());
|
||||
assertEquals("Taxonomic classification", ds.getContentpolicies().get(0).getClassid());
|
||||
|
@ -194,7 +193,7 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testProcessProject() throws Exception {
|
||||
void testProcessProject() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("projects_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processProject(rs);
|
||||
|
@ -212,7 +211,7 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testProcessOrganization() throws Exception {
|
||||
void testProcessOrganization() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("organizations_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processOrganization(rs);
|
||||
|
@ -239,7 +238,7 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testProcessDatasourceOrganization() throws Exception {
|
||||
void testProcessDatasourceOrganization() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("datasourceorganization_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processServiceOrganization(rs);
|
||||
|
@ -268,7 +267,7 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testProcessProjectOrganization() throws Exception {
|
||||
void testProcessProjectOrganization() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("projectorganization_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processProjectOrganization(rs);
|
||||
|
@ -284,6 +283,38 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
assertEquals(r2.getSource(), r1.getTarget());
|
||||
assertValidId(r1.getCollectedfrom().get(0).getKey());
|
||||
assertValidId(r2.getCollectedfrom().get(0).getKey());
|
||||
|
||||
assertEquals(ModelConstants.PROJECT_ORGANIZATION, r1.getRelType());
|
||||
assertEquals(ModelConstants.PROJECT_ORGANIZATION, r2.getRelType());
|
||||
|
||||
assertEquals(ModelConstants.PARTICIPATION, r1.getSubRelType());
|
||||
assertEquals(ModelConstants.PARTICIPATION, r2.getSubRelType());
|
||||
|
||||
if (r1.getSource().startsWith("40")) {
|
||||
assertEquals(ModelConstants.HAS_PARTICIPANT, r1.getRelClass());
|
||||
assertEquals(ModelConstants.IS_PARTICIPANT, r2.getRelClass());
|
||||
} else if (r1.getSource().startsWith("20")) {
|
||||
assertEquals(ModelConstants.IS_PARTICIPANT, r1.getRelClass());
|
||||
assertEquals(ModelConstants.HAS_PARTICIPANT, r2.getRelClass());
|
||||
}
|
||||
|
||||
assertNotNull(r1.getProperties());
|
||||
checkProperty(r1, "contribution", "436754.0");
|
||||
checkProperty(r2, "contribution", "436754.0");
|
||||
|
||||
checkProperty(r1, "currency", "EUR");
|
||||
checkProperty(r2, "currency", "EUR");
|
||||
}
|
||||
|
||||
private void checkProperty(Relation r, String property, String value) {
|
||||
final List<KeyValue> p = r
|
||||
.getProperties()
|
||||
.stream()
|
||||
.filter(kv -> kv.getKey().equals(property))
|
||||
.collect(Collectors.toList());
|
||||
assertFalse(p.isEmpty());
|
||||
assertEquals(1, p.size());
|
||||
assertEquals(value, p.get(0).getValue());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -302,7 +333,7 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testProcessClaims_rels() throws Exception {
|
||||
void testProcessClaims_rels() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("claimsrel_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processClaims(rs);
|
||||
|
@ -333,9 +364,6 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
|
||||
assertValidId(r1.getCollectedfrom().get(0).getKey());
|
||||
assertValidId(r2.getCollectedfrom().get(0).getKey());
|
||||
|
||||
// System.out.println(new ObjectMapper().writeValueAsString(r1));
|
||||
// System.out.println(new ObjectMapper().writeValueAsString(r2));
|
||||
}
|
||||
|
||||
private List<TypedField> prepareMocks(final String jsonFile) throws IOException, SQLException {
|
||||
|
@ -398,7 +426,7 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
final String[] values = ((List<?>) tf.getValue())
|
||||
.stream()
|
||||
.filter(Objects::nonNull)
|
||||
.map(o -> o.toString())
|
||||
.map(Object::toString)
|
||||
.toArray(String[]::new);
|
||||
|
||||
Mockito.when(arr.getArray()).thenReturn(values);
|
||||
|
|
|
@ -2,12 +2,12 @@
|
|||
{
|
||||
"field": "project",
|
||||
"type": "string",
|
||||
"value": "nsf_________::1700003"
|
||||
"value": "corda__h2020::824273"
|
||||
},
|
||||
{
|
||||
"field": "resporganization",
|
||||
"type": "string",
|
||||
"value": "nsf_________::University_of_Notre_Dame"
|
||||
"value": "corda__h2020::999945647"
|
||||
},
|
||||
{
|
||||
"field": "participantnumber",
|
||||
|
@ -16,8 +16,13 @@
|
|||
},
|
||||
{
|
||||
"field": "contribution",
|
||||
"type": "not_used",
|
||||
"value": null
|
||||
"type": "double",
|
||||
"value": 436754
|
||||
},
|
||||
{
|
||||
"field": "currency",
|
||||
"type": "string",
|
||||
"value": "EUR"
|
||||
},
|
||||
{
|
||||
"field": "startdate",
|
||||
|
@ -52,12 +57,12 @@
|
|||
{
|
||||
"field": "collectedfromid",
|
||||
"type": "string",
|
||||
"value": "openaire____::nsf"
|
||||
"value": "openaire____::corda_h2020"
|
||||
},
|
||||
{
|
||||
"field": "collectedfromname",
|
||||
"type": "string",
|
||||
"value": "NSF - National Science Foundation"
|
||||
"value": "CORDA - COmmon Research DAta Warehouse - Horizon 2020"
|
||||
},
|
||||
{
|
||||
"field": "semantics",
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -38,7 +38,14 @@ create table TARGET.result stored as parquet as
|
|||
'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
|
||||
'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
|
||||
'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
|
||||
'openorgs____::6445d7758d3a40c4d997953b6632a368' --National Institute of Informatics (NII)
|
||||
'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
|
||||
|
||||
'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
|
||||
'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
|
||||
'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
|
||||
'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
|
||||
'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
|
||||
'openorgs____::4ac562f0376fce3539504567649cb373' -- University of Patras
|
||||
) )) foo;
|
||||
compute stats TARGET.result;
|
||||
|
||||
|
@ -107,6 +114,9 @@ compute stats TARGET.result_sources;
|
|||
create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_topics;
|
||||
|
||||
create table TARGET.result_fos stored as parquet as select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_fos;
|
||||
|
||||
create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
|
||||
create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
|
||||
create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;
|
||||
|
|
|
@ -11,13 +11,13 @@ where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false;
|
|||
CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
|
||||
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
|
||||
from ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'projectOrganization'
|
||||
WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
|
||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
|
||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
|
||||
FROM ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'resultProject'
|
||||
WHERE r.reltype = 'resultProject' and r.target like '40|%'
|
||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
||||
|
||||
create table ${stats_db_name}.project_classification STORED AS PARQUET as
|
||||
|
|
|
@ -123,6 +123,16 @@ UNION ALL
|
|||
SELECT *
|
||||
FROM ${stats_db_name}.otherresearchproduct_topics;
|
||||
|
||||
create table ${stats_db_name}.result_fos stored as parquet as
|
||||
with
|
||||
lvl1 as (select id, topic from ${stats_db_name}.result_topics where topic like '__ %' and type='Fields of Science and Technology classification'),
|
||||
lvl2 as (select id, topic from ${stats_db_name}.result_topics where topic like '____ %' and type='Fields of Science and Technology classification'),
|
||||
lvl3 as (select id, topic from ${stats_db_name}.result_topics where topic like '______ %' and type='Fields of Science and Technology classification')
|
||||
select lvl1.id, lvl1.topic as lvl1, lvl2.topic as lvl2, lvl3.topic as lvl3
|
||||
from lvl1
|
||||
join lvl2 on lvl1.id=lvl2.id and substr(lvl2.topic, 1, 2)=substr(lvl1.topic, 1, 2)
|
||||
join lvl3 on lvl3.id=lvl1.id and substr(lvl3.topic, 1, 4)=substr(lvl2.topic, 1, 4);
|
||||
|
||||
CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS
|
||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
||||
FROM ${openaire_db_name}.relation r
|
||||
|
@ -134,4 +144,5 @@ CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS
|
|||
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
|
||||
FROM ${stats_db_name}.result r
|
||||
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
|
||||
JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id;
|
||||
JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id;
|
||||
|
||||
|
|
2
pom.xml
2
pom.xml
|
@ -801,7 +801,7 @@
|
|||
<mockito-core.version>3.3.3</mockito-core.version>
|
||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||
<vtd.version>[2.12,3.0)</vtd.version>
|
||||
<dhp-schemas.version>[2.12.0]</dhp-schemas.version>
|
||||
<dhp-schemas.version>[2.12.2-SNAPSHOT]</dhp-schemas.version>
|
||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||
|
|
Loading…
Reference in New Issue