diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/OafUtils.scala b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/OafUtils.scala
index 27eec77fa2..526d657824 100644
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/OafUtils.scala
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/OafUtils.scala
@@ -15,11 +15,11 @@ object OafUtils {
}
- def generateDataInfo(trust: String = "0.9", invisibile: Boolean = false): DataInfo = {
+ def generateDataInfo(trust: String = "0.9", invisible: Boolean = false): DataInfo = {
val di = new DataInfo
di.setDeletedbyinference(false)
di.setInferred(false)
- di.setInvisible(false)
+ di.setInvisible(invisible)
di.setTrust(trust)
di.setProvenanceaction(createQualifier("sysimport:actionset", "dnet:provenanceActions"))
di
diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml
index cf0fa0efea..0445e0e1b7 100644
--- a/dhp-workflows/dhp-aggregation/pom.xml
+++ b/dhp-workflows/dhp-aggregation/pom.xml
@@ -7,10 +7,44 @@
1.2.4-SNAPSHOT
dhp-aggregation
-
+
+
+
+ net.alchim31.maven
+ scala-maven-plugin
+ ${net.alchim31.maven.version}
+
+
+ scala-compile-first
+ initialize
+
+ add-source
+ compile
+
+
+
+ scala-test-compile
+ process-test-resources
+
+ testCompile
+
+
+
+
+ ${scala.version}
+
+
+
+
+
+
+ org.apache.httpcomponents
+ httpclient
+
+
org.apache.spark
spark-core_2.11
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala
new file mode 100644
index 0000000000..852147ccdf
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala
@@ -0,0 +1,73 @@
+package eu.dnetlib.dhp.actionmanager.datacite
+
+import org.apache.commons.io.IOUtils
+import org.apache.http.client.methods.{HttpGet, HttpPost, HttpRequestBase, HttpUriRequest}
+import org.apache.http.entity.StringEntity
+import org.apache.http.impl.client.HttpClients
+
+import java.io.IOException
+
+abstract class AbstractRestClient extends Iterator[String]{
+
+ var buffer: List[String] = List()
+ var current_index:Int = 0
+
+ var scroll_value: Option[String] = None
+
+ var complete:Boolean = false
+
+
+ def extractInfo(input: String): Unit
+
+ protected def getBufferData(): Unit
+
+
+ def doHTTPGETRequest(url:String): String = {
+ val httpGet = new HttpGet(url)
+ doHTTPRequest(httpGet)
+
+ }
+
+ def doHTTPPOSTRequest(url:String, json:String): String = {
+ val httpPost = new HttpPost(url)
+ if (json != null) {
+ val entity = new StringEntity(json)
+ httpPost.setEntity(entity)
+ httpPost.setHeader("Accept", "application/json")
+ httpPost.setHeader("Content-type", "application/json")
+ }
+ doHTTPRequest(httpPost)
+ }
+
+ def hasNext: Boolean = {
+ buffer.nonEmpty && current_index < buffer.size
+ }
+
+
+ override def next(): String = {
+ val next_item:String = buffer(current_index)
+ current_index = current_index + 1
+ if (current_index == buffer.size)
+ getBufferData()
+ next_item
+ }
+
+
+ private def doHTTPRequest[A <: HttpUriRequest](r: A) :String ={
+ val client = HttpClients.createDefault
+ try {
+ val response = client.execute(r)
+ IOUtils.toString(response.getEntity.getContent)
+ } catch {
+ case e: Throwable =>
+ throw new RuntimeException("Error on executing request ", e)
+ } finally try client.close()
+ catch {
+ case e: IOException =>
+ throw new RuntimeException("Unable to close client ", e)
+ }
+ }
+
+ getBufferData()
+
+}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteAPIImporter.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteAPIImporter.scala
new file mode 100644
index 0000000000..c2ad6855cb
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteAPIImporter.scala
@@ -0,0 +1,25 @@
+package eu.dnetlib.dhp.actionmanager.datacite
+
+import org.json4s.{DefaultFormats, JValue}
+import org.json4s.jackson.JsonMethods.{compact, parse, render}
+
+class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10) extends AbstractRestClient {
+
+ override def extractInfo(input: String): Unit = {
+ implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+ lazy val json: org.json4s.JValue = parse(input)
+ buffer = (json \ "data").extract[List[JValue]].map(s => compact(render(s)))
+ val next_url = (json \ "links" \ "next").extractOrElse[String](null)
+ scroll_value = if (next_url != null && next_url.nonEmpty) Some(next_url) else None
+ if (scroll_value.isEmpty)
+ complete = true
+ current_index = 0
+ }
+
+ override def getBufferData(): Unit = {
+ if (!complete) {
+ val response = if (scroll_value.isDefined) doHTTPGETRequest(scroll_value.get) else doHTTPGETRequest(s"https://api.datacite.org/dois?page[cursor]=1&page[size]=$blocks&query=updated:[$timestamp%20TO%20*]")
+ extractInfo(response)
+ }
+ }
+}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala
new file mode 100644
index 0000000000..9418e71da0
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala
@@ -0,0 +1,475 @@
+package eu.dnetlib.dhp.actionmanager.datacite
+
+import com.fasterxml.jackson.databind.ObjectMapper
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
+import eu.dnetlib.dhp.schema.action.AtomicAction
+import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Instance, KeyValue, Oaf, OafMapperUtils, OtherResearchProduct, Publication, Qualifier, Relation, Result, Software, StructuredProperty, Dataset => OafDataset}
+import eu.dnetlib.dhp.utils.DHPUtils
+import org.apache.commons.lang3.StringUtils
+import org.json4s.DefaultFormats
+import org.json4s.JsonAST.{JField, JObject, JString}
+import org.json4s.jackson.JsonMethods.parse
+
+import java.nio.charset.CodingErrorAction
+import java.time.LocalDate
+import java.time.format.DateTimeFormatter
+import java.util.Locale
+import java.util.regex.Pattern
+import scala.collection.JavaConverters._
+import scala.io.{Codec, Source}
+
+
+
+case class DataciteType(doi:String,timestamp:Long,isActive:Boolean, json:String ){}
+
+case class NameIdentifiersType(nameIdentifierScheme: Option[String], schemeUri: Option[String], nameIdentifier: Option[String]) {}
+
+case class CreatorType(nameType: Option[String], nameIdentifiers: Option[List[NameIdentifiersType]], name: Option[String], familyName: Option[String], givenName: Option[String], affiliation: Option[List[String]]) {}
+
+case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {}
+
+case class SubjectType(subject: Option[String], subjectScheme: Option[String]) {}
+
+case class DescriptionType(descriptionType: Option[String], description: Option[String]) {}
+
+case class FundingReferenceType(funderIdentifierType: Option[String], awardTitle: Option[String], awardUri: Option[String], funderName: Option[String], funderIdentifier: Option[String], awardNumber: Option[String]) {}
+
+case class DateType(date: Option[String], dateType: Option[String]) {}
+
+case class HostedByMapType(openaire_id: String, datacite_name: String, official_name: String, similarity: Option[Float]) {}
+
+object DataciteToOAFTransformation {
+
+ implicit val codec: Codec = Codec("UTF-8")
+ codec.onMalformedInput(CodingErrorAction.REPLACE)
+ codec.onUnmappableCharacter(CodingErrorAction.REPLACE)
+
+ private val PID_VOCABULARY = "dnet:pid_types"
+ val COBJ_VOCABULARY = "dnet:publication_resource"
+ val RESULT_VOCABULARY = "dnet:result_typologies"
+ val ACCESS_MODE_VOCABULARY = "dnet:access_modes"
+ val DOI_CLASS = "doi"
+
+ val TITLE_SCHEME = "dnet:dataCite_title"
+ val SUBJ_CLASS = "keywords"
+ val SUBJ_SCHEME = "dnet:subject_classification_typologies"
+
+ val j_filter:List[String] = {
+ val s = Source.fromInputStream(getClass.getResourceAsStream("datacite_filter")).mkString
+ s.lines.toList
+ }
+
+ val mapper = new ObjectMapper()
+ val unknown_repository: HostedByMapType = HostedByMapType("openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18", "Unknown Repository", "Unknown Repository", Some(1.0F))
+
+ val dataInfo: DataInfo = generateDataInfo("0.9")
+ val DATACITE_COLLECTED_FROM: KeyValue = OafMapperUtils.keyValue("openaire____::datacite", "Datacite")
+
+ val hostedByMap: Map[String, HostedByMapType] = {
+ val s = Source.fromInputStream(getClass.getResourceAsStream("hostedBy_map.json")).mkString
+ implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+ lazy val json: org.json4s.JValue = parse(s)
+ json.extract[Map[String, HostedByMapType]]
+ }
+
+ val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern("[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", Locale.ENGLISH)
+ val df_it: DateTimeFormatter = DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN)
+
+ val funder_regex:List[(Pattern, String)] = List(
+ (Pattern.compile("(info:eu-repo/grantagreement/ec/h2020/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE),"40|corda__h2020::"),
+ (Pattern.compile("(info:eu-repo/grantagreement/ec/fp7/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE),"40|corda_______::")
+
+ )
+
+ val Date_regex: List[Pattern] = List(
+ //Y-M-D
+ Pattern.compile("(18|19|20)\\d\\d([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])", Pattern.MULTILINE),
+ //M-D-Y
+ Pattern.compile("((0[1-9]|1[012])|([1-9]))([- /.])(0[1-9]|[12][0-9]|3[01])([- /.])(18|19|20)?\\d\\d", Pattern.MULTILINE),
+ //D-M-Y
+ Pattern.compile("(?:(?:31(/|-|\\.)(?:0?[13578]|1[02]|(?:Jan|Mar|May|Jul|Aug|Oct|Dec)))\\1|(?:(?:29|30)(/|-|\\.)(?:0?[1,3-9]|1[0-2]|(?:Jan|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))\\2))(?:(?:1[6-9]|[2-9]\\d)?\\d{2})|(?:29(/|-|\\.)(?:0?2|(?:Feb))\\3(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))|(?:0?[1-9]|1\\d|2[0-8])(/|-|\\.)(?:(?:0?[1-9]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep))|(?:1[0-2]|(?:Oct|Nov|Dec)))\\4(?:(?:1[6-9]|[2-9]\\d)?\\d{2})", Pattern.MULTILINE),
+ //Y
+ Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE)
+ )
+
+
+ def filter_json(json:String):Boolean = {
+ j_filter.exists(f => json.contains(f))
+ }
+
+ def toActionSet(item:Oaf) :(String, String) = {
+ val mapper = new ObjectMapper()
+
+ item match {
+ case dataset: OafDataset =>
+ val a: AtomicAction[OafDataset] = new AtomicAction[OafDataset]
+ a.setClazz(classOf[OafDataset])
+ a.setPayload(dataset)
+ (dataset.getClass.getCanonicalName, mapper.writeValueAsString(a))
+ case publication: Publication =>
+ val a: AtomicAction[Publication] = new AtomicAction[Publication]
+ a.setClazz(classOf[Publication])
+ a.setPayload(publication)
+ (publication.getClass.getCanonicalName, mapper.writeValueAsString(a))
+ case software: Software =>
+ val a: AtomicAction[Software] = new AtomicAction[Software]
+ a.setClazz(classOf[Software])
+ a.setPayload(software)
+ (software.getClass.getCanonicalName, mapper.writeValueAsString(a))
+ case orp: OtherResearchProduct =>
+ val a: AtomicAction[OtherResearchProduct] = new AtomicAction[OtherResearchProduct]
+ a.setClazz(classOf[OtherResearchProduct])
+ a.setPayload(orp)
+ (orp.getClass.getCanonicalName, mapper.writeValueAsString(a))
+
+ case relation: Relation =>
+ val a: AtomicAction[Relation] = new AtomicAction[Relation]
+ a.setClazz(classOf[Relation])
+ a.setPayload(relation)
+ (relation.getClass.getCanonicalName, mapper.writeValueAsString(a))
+ case _ =>
+ null
+ }
+
+ }
+
+
+
+
+ def embargo_end(embargo_end_date: String): Boolean = {
+ val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]"))
+ val td = LocalDate.now()
+ td.isAfter(dt)
+ }
+
+
+ def extract_date(input: String): Option[String] = {
+ val d = Date_regex.map(pattern => {
+ val matcher = pattern.matcher(input)
+ if (matcher.find())
+ matcher.group(0)
+ else
+ null
+ }
+ ).find(s => s != null)
+
+ if (d.isDefined) {
+ val a_date = if (d.get.length == 4) s"01-01-${d.get}" else d.get
+ try {
+ return Some(LocalDate.parse(a_date, df_en).toString)
+ } catch {
+ case _: Throwable => try {
+ return Some(LocalDate.parse(a_date, df_it).toString)
+ } catch {
+ case _: Throwable => try {
+ return None
+ }
+ }
+ }
+ }
+ d
+ }
+
+ def getTypeQualifier(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies:VocabularyGroup): (Qualifier, Qualifier) = {
+ if (resourceType != null && resourceType.nonEmpty) {
+ val typeQualifier = vocabularies.getSynonymAsQualifier(COBJ_VOCABULARY, resourceType)
+ if (typeQualifier != null)
+ return (typeQualifier, vocabularies.getSynonymAsQualifier(RESULT_VOCABULARY, typeQualifier.getClassid))
+ }
+ if (schemaOrg != null && schemaOrg.nonEmpty) {
+ val typeQualifier = vocabularies.getSynonymAsQualifier(COBJ_VOCABULARY, schemaOrg)
+ if (typeQualifier != null)
+ return (typeQualifier, vocabularies.getSynonymAsQualifier(RESULT_VOCABULARY, typeQualifier.getClassid))
+
+ }
+ if (resourceTypeGeneral != null && resourceTypeGeneral.nonEmpty) {
+ val typeQualifier = vocabularies.getSynonymAsQualifier(COBJ_VOCABULARY, resourceTypeGeneral)
+ if (typeQualifier != null)
+ return (typeQualifier, vocabularies.getSynonymAsQualifier(RESULT_VOCABULARY, typeQualifier.getClassid))
+
+ }
+ null
+ }
+
+
+ def getResult(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies:VocabularyGroup): Result = {
+ val typeQualifiers: (Qualifier, Qualifier) = getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
+ if (typeQualifiers == null)
+ return null
+ val i = new Instance
+ i.setInstancetype(typeQualifiers._1)
+ typeQualifiers._2.getClassname match {
+ case "dataset" =>
+ val r = new OafDataset
+ r.setInstance(List(i).asJava)
+ return r
+ case "publication" =>
+ val r = new Publication
+ r.setInstance(List(i).asJava)
+ return r
+ case "software" =>
+ val r = new Software
+ r.setInstance(List(i).asJava)
+ return r
+ case "other" =>
+ val r = new OtherResearchProduct
+ r.setInstance(List(i).asJava)
+ return r
+ }
+ null
+ }
+
+
+ def available_date(input: String): Boolean = {
+
+ implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+ lazy val json: org.json4s.JValue = parse(input)
+ val l: List[String] = for {
+ JObject(dates) <- json \\ "dates"
+ JField("dateType", JString(dateTypes)) <- dates
+ } yield dateTypes
+
+ l.exists(p => p.equalsIgnoreCase("available"))
+
+ }
+
+
+ def generateOAFDate(dt: String, q: Qualifier): StructuredProperty = {
+ OafMapperUtils.structuredProperty(dt, q, null)
+ }
+
+ def generateRelation(sourceId:String, targetId:String, relClass:String, cf:KeyValue, di:DataInfo) :Relation = {
+
+ val r = new Relation
+ r.setSource(sourceId)
+ r.setTarget(targetId)
+ r.setRelType("resultProject")
+ r.setRelClass(relClass)
+ r.setSubRelType("outcome")
+ r.setCollectedfrom(List(cf).asJava)
+ r.setDataInfo(di)
+ r
+
+
+ }
+
+ def get_projectRelation(awardUri:String, sourceId:String):List[Relation] = {
+ val match_pattern = funder_regex.find(s =>s._1.matcher(awardUri).find())
+
+ if (match_pattern.isDefined) {
+ val m =match_pattern.get._1
+ val p = match_pattern.get._2
+ val grantId = m.matcher(awardUri).replaceAll("$2")
+ val targetId = s"$p${DHPUtils.md5(grantId)}"
+ List(
+ generateRelation(sourceId, targetId,"isProducedBy", DATACITE_COLLECTED_FROM, dataInfo),
+ generateRelation(targetId, sourceId,"produces", DATACITE_COLLECTED_FROM, dataInfo)
+ )
+ }
+ else
+ List()
+
+ }
+
+
+ def generateOAF(input:String,ts:Long, dateOfCollection:Long, vocabularies: VocabularyGroup):List[Oaf] = {
+ if (filter_json(input))
+ return List()
+
+ implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+ lazy val json = parse(input)
+
+ val resourceType = (json \ "attributes" \ "types" \ "resourceType").extractOrElse[String](null)
+ val resourceTypeGeneral = (json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null)
+ val schemaOrg = (json \ "attributes" \ "types" \ "schemaOrg").extractOrElse[String](null)
+
+ val doi = (json \ "attributes" \ "doi").extract[String]
+ if (doi.isEmpty)
+ return List()
+
+ //Mapping type based on vocabularies dnet:publication_resource and dnet:result_typologies
+ val result = getResult(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
+ if (result == null)
+ return List()
+
+
+ val doi_q = vocabularies.getSynonymAsQualifier(PID_VOCABULARY, "doi")
+ val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo)
+ result.setPid(List(pid).asJava)
+ result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true))
+ result.setOriginalId(List(doi).asJava)
+ result.setDateofcollection(s"${dateOfCollection}")
+ result.setDateoftransformation(s"$ts")
+ result.setDataInfo(dataInfo)
+
+ val creators = (json \\ "creators").extractOrElse[List[CreatorType]](List())
+
+
+ val authors = creators.zipWithIndex.map { case (c, idx) =>
+ val a = new Author
+ a.setFullname(c.name.orNull)
+ a.setName(c.givenName.orNull)
+ a.setSurname(c.familyName.orNull)
+ if (c.nameIdentifiers!= null&& c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null) {
+ a.setPid(c.nameIdentifiers.get.map(ni => {
+ val q = if (ni.nameIdentifierScheme.isDefined) vocabularies.getTermAsQualifier(PID_VOCABULARY, ni.nameIdentifierScheme.get.toLowerCase()) else null
+ if (ni.nameIdentifier!= null && ni.nameIdentifier.isDefined) {
+ OafMapperUtils.structuredProperty(ni.nameIdentifier.get, q, dataInfo)
+ }
+ else
+ null
+
+ }
+ )
+ .asJava)
+ }
+ if (c.affiliation.isDefined)
+ a.setAffiliation(c.affiliation.get.filter(af => af.nonEmpty).map(af => OafMapperUtils.field(af, dataInfo)).asJava)
+ a.setRank(idx + 1)
+ a
+ }
+
+
+
+
+ val titles:List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List())
+
+ result.setTitle(titles.filter(t => t.title.nonEmpty).map(t => {
+ if (t.titleType.isEmpty) {
+ OafMapperUtils.structuredProperty(t.title.get, "main title", "main title", TITLE_SCHEME, TITLE_SCHEME, null)
+ } else {
+ OafMapperUtils.structuredProperty(t.title.get, t.titleType.get, t.titleType.get, TITLE_SCHEME, TITLE_SCHEME, null)
+ }
+ }).asJava)
+
+ if(authors==null || authors.isEmpty || !authors.exists(a => a !=null))
+ return List()
+ result.setAuthor(authors.asJava)
+
+ val dates = (json \\ "dates").extract[List[DateType]]
+ val publication_year = (json \\ "publicationYear").extractOrElse[String](null)
+
+ val i_date = dates
+ .filter(d => d.date.isDefined && d.dateType.isDefined)
+ .find(d => d.dateType.get.equalsIgnoreCase("issued"))
+ .map(d => extract_date(d.date.get))
+ val a_date: Option[String] = dates
+ .filter(d => d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available"))
+ .map(d => extract_date(d.date.get))
+ .find(d => d != null && d.isDefined)
+ .map(d => d.get)
+
+ if (a_date.isDefined) {
+ result.setEmbargoenddate(OafMapperUtils.field(a_date.get, null))
+ }
+ if (i_date.isDefined && i_date.get.isDefined) {
+ result.setDateofacceptance(OafMapperUtils.field(i_date.get.get, null))
+ result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(i_date.get.get, null))
+ }
+ else if (publication_year != null) {
+ result.setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null))
+ result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null))
+ }
+
+
+ result.setRelevantdate(dates.filter(d => d.date.isDefined && d.dateType.isDefined)
+ .map(d => (extract_date(d.date.get), d.dateType.get))
+ .filter(d => d._1.isDefined)
+ .map(d => (d._1.get, vocabularies.getTermAsQualifier("dnet:dataCite_date", d._2.toLowerCase())))
+ .filter(d => d._2 != null)
+ .map(d => generateOAFDate(d._1, d._2)).asJava)
+
+ val subjects = (json \\ "subjects").extract[List[SubjectType]]
+
+ result.setSubject(subjects.filter(s => s.subject.nonEmpty)
+ .map(s =>
+ OafMapperUtils.structuredProperty(s.subject.get, SUBJ_CLASS, SUBJ_CLASS, SUBJ_SCHEME, SUBJ_SCHEME, null)
+ ).asJava)
+
+
+ result.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
+
+ val descriptions = (json \\ "descriptions").extract[List[DescriptionType]]
+
+ result.setDescription(
+ descriptions
+ .filter(d => d.description.isDefined).
+ map(d =>
+ OafMapperUtils.field(d.description.get, null)
+ ).filter(s => s!=null).asJava)
+
+
+ val publisher = (json \\ "publisher").extractOrElse[String](null)
+ if (publisher != null)
+ result.setPublisher(OafMapperUtils.field(publisher, null))
+
+
+ val language: String = (json \\ "language").extractOrElse[String](null)
+
+ if (language != null)
+ result.setLanguage(vocabularies.getSynonymAsQualifier("dnet:languages", language))
+
+
+ val instance = result.getInstance().get(0)
+
+ val client = (json \ "relationships" \ "client" \\ "id").extractOpt[String]
+
+ val accessRights:List[String] = for {
+ JObject(rightsList) <- json \\ "rightsList"
+ JField("rightsUri", JString(rightsUri)) <- rightsList
+ } yield rightsUri
+
+ val aRights: Option[Qualifier] = accessRights.map(r => {
+ vocabularies.getSynonymAsQualifier(ACCESS_MODE_VOCABULARY, r)
+ }).find(q => q != null)
+
+
+ val access_rights_qualifier = if (aRights.isDefined) aRights.get else OafMapperUtils.qualifier("UNKNOWN", "not available", ACCESS_MODE_VOCABULARY, ACCESS_MODE_VOCABULARY)
+
+ if (client.isDefined) {
+ val hb = hostedByMap.getOrElse(client.get.toUpperCase(), unknown_repository)
+ instance.setHostedby(OafMapperUtils.keyValue(generateDSId(hb.openaire_id), hb.official_name))
+ instance.setCollectedfrom(DATACITE_COLLECTED_FROM)
+ instance.setUrl(List(s"https://dx.doi.org/$doi").asJava)
+ instance.setAccessright(access_rights_qualifier)
+
+ //'http') and matches(., '.*(/licenses|/publicdomain|unlicense.org/|/legal-and-data-protection-notices|/download/license|/open-government-licence).*')]">
+ val license = accessRights
+ .find(r => r.startsWith("http") && r.matches(".*(/licenses|/publicdomain|unlicense\\.org/|/legal-and-data-protection-notices|/download/license|/open-government-licence).*"))
+ if (license.isDefined)
+ instance.setLicense(OafMapperUtils.field(license.get, null))
+ }
+
+
+ val awardUris:List[String] = for {
+ JObject(fundingReferences) <- json \\ "fundingReferences"
+ JField("awardUri", JString(awardUri)) <- fundingReferences
+ } yield awardUri
+
+ val relations:List[Relation] =awardUris.flatMap(a=> get_projectRelation(a, result.getId)).filter(r => r!= null)
+
+ if (relations!= null && relations.nonEmpty) {
+ List(result):::relations
+ }
+ else
+ List(result)
+ }
+
+ def generateDataInfo(trust: String): DataInfo = {
+ val di = new DataInfo
+ di.setDeletedbyinference(false)
+ di.setInferred(false)
+ di.setInvisible(false)
+ di.setTrust(trust)
+ di.setProvenanceaction(OafMapperUtils.qualifier("sysimport:actionset", "sysimport:actionset", "dnet:provenanceActions", "dnet:provenanceActions"))
+ di
+ }
+
+ def generateDSId(input: String): String = {
+ val b = StringUtils.substringBefore(input, "::")
+ val a = StringUtils.substringAfter(input, "::")
+ s"10|$b::${DHPUtils.md5(a)}"
+ }
+
+
+}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ExportActionSetJobNode.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ExportActionSetJobNode.scala
new file mode 100644
index 0000000000..9f0d257359
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ExportActionSetJobNode.scala
@@ -0,0 +1,41 @@
+package eu.dnetlib.dhp.actionmanager.datacite
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser
+import eu.dnetlib.dhp.schema.oaf.Oaf
+import org.apache.hadoop.io.Text
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.hadoop.mapred.SequenceFileOutputFormat
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
+import org.slf4j.{Logger, LoggerFactory}
+
+import scala.io.Source
+
+object ExportActionSetJobNode {
+
+ val log: Logger = LoggerFactory.getLogger(ExportActionSetJobNode.getClass)
+
+ def main(args: Array[String]): Unit = {
+ val conf = new SparkConf
+ val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/exportDataset_parameters.json")).mkString)
+ parser.parseArgument(args)
+ val master = parser.get("master")
+ val sourcePath = parser.get("sourcePath")
+ val targetPath = parser.get("targetPath")
+
+ val spark: SparkSession = SparkSession.builder().config(conf)
+ .appName(ExportActionSetJobNode.getClass.getSimpleName)
+ .master(master)
+ .getOrCreate()
+ implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
+ implicit val tEncoder:Encoder[(String,String)] = Encoders.tuple(Encoders.STRING,Encoders.STRING)
+
+ spark.read.load(sourcePath).as[Oaf]
+ .map(o =>DataciteToOAFTransformation.toActionSet(o))
+ .filter(o => o!= null)
+ .rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec])
+
+
+ }
+
+}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala
new file mode 100644
index 0000000000..6837e94b21
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala
@@ -0,0 +1,48 @@
+package eu.dnetlib.dhp.actionmanager.datacite
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
+import eu.dnetlib.dhp.model.mdstore.MetadataRecord
+import eu.dnetlib.dhp.schema.oaf.Oaf
+import eu.dnetlib.dhp.utils.ISLookupClientFactory
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
+import org.slf4j.{Logger, LoggerFactory}
+
+import scala.io.Source
+
+object GenerateDataciteDatasetSpark {
+
+ val log: Logger = LoggerFactory.getLogger(GenerateDataciteDatasetSpark.getClass)
+
+ def main(args: Array[String]): Unit = {
+ val conf = new SparkConf
+ val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json")).mkString)
+ parser.parseArgument(args)
+ val master = parser.get("master")
+ val sourcePath = parser.get("sourcePath")
+ val targetPath = parser.get("targetPath")
+ val isLookupUrl: String = parser.get("isLookupUrl")
+ log.info("isLookupUrl: {}", isLookupUrl)
+
+ val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl)
+ val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService)
+
+ val spark: SparkSession = SparkSession.builder().config(conf)
+ .appName(GenerateDataciteDatasetSpark.getClass.getSimpleName)
+ .master(master)
+ .getOrCreate()
+
+ implicit val mrEncoder: Encoder[MetadataRecord] = Encoders.kryo[MetadataRecord]
+
+ implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
+
+ import spark.implicits._
+
+ spark.read.load(sourcePath).as[DataciteType]
+ .filter(d => d.isActive)
+ .flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies))
+ .filter(d => d != null)
+ .write.mode(SaveMode.Overwrite).save(targetPath)
+ }
+}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala
new file mode 100644
index 0000000000..06fcbb5186
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala
@@ -0,0 +1,168 @@
+package eu.dnetlib.dhp.actionmanager.datacite
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, LocalFileSystem, Path}
+import org.apache.hadoop.hdfs.DistributedFileSystem
+import org.apache.hadoop.io.{IntWritable, SequenceFile, Text}
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.expressions.Aggregator
+import org.apache.spark.sql.{Dataset, Encoder, SaveMode, SparkSession}
+import org.json4s.DefaultFormats
+import org.json4s.jackson.JsonMethods.parse
+import org.apache.spark.sql.functions.max
+import org.slf4j.{Logger, LoggerFactory}
+
+import java.time.format.DateTimeFormatter._
+import java.time.{LocalDateTime, ZoneOffset}
+import scala.io.Source
+
+object ImportDatacite {
+
+ val log: Logger = LoggerFactory.getLogger(ImportDatacite.getClass)
+
+
+ def convertAPIStringToDataciteItem(input:String): DataciteType = {
+ implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+ lazy val json: org.json4s.JValue = parse(input)
+ val doi = (json \ "attributes" \ "doi").extract[String].toLowerCase
+
+ val isActive = (json \ "attributes" \ "isActive").extract[Boolean]
+
+ val timestamp_string = (json \ "attributes" \ "updated").extract[String]
+ val dt = LocalDateTime.parse(timestamp_string, ISO_DATE_TIME)
+ DataciteType(doi = doi, timestamp = dt.toInstant(ZoneOffset.UTC).toEpochMilli/1000, isActive = isActive, json = input)
+
+ }
+
+
+
+ def main(args: Array[String]): Unit = {
+
+ val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/import_from_api.json")).mkString)
+ parser.parseArgument(args)
+ val master = parser.get("master")
+
+ val hdfsuri = parser.get("namenode")
+ log.info(s"namenode is $hdfsuri")
+
+ val targetPath = parser.get("targetPath")
+ log.info(s"targetPath is $targetPath")
+
+ val dataciteDump = parser.get("dataciteDumpPath")
+ log.info(s"dataciteDump is $dataciteDump")
+
+ val hdfsTargetPath =new Path(targetPath)
+ log.info(s"hdfsTargetPath is $hdfsTargetPath")
+
+ val spark: SparkSession = SparkSession.builder()
+ .appName(ImportDatacite.getClass.getSimpleName)
+ .master(master)
+ .getOrCreate()
+
+ // ====== Init HDFS File System Object
+ val conf = new Configuration
+ // Set FileSystem URI
+ conf.set("fs.defaultFS", hdfsuri)
+
+ // Because of Maven
+ conf.set("fs.hdfs.impl", classOf[DistributedFileSystem].getName)
+ conf.set("fs.file.impl", classOf[LocalFileSystem].getName)
+ val sc:SparkContext = spark.sparkContext
+ sc.setLogLevel("ERROR")
+
+ import spark.implicits._
+
+
+ val dataciteAggregator: Aggregator[DataciteType, DataciteType, DataciteType] = new Aggregator[DataciteType, DataciteType, DataciteType] with Serializable {
+
+ override def zero: DataciteType = null
+
+ override def reduce(a: DataciteType, b: DataciteType): DataciteType = {
+ if (b == null)
+ return a
+ if (a == null)
+ return b
+ if(a.timestamp >b.timestamp) {
+ return a
+ }
+ b
+ }
+
+ override def merge(a: DataciteType, b: DataciteType): DataciteType = {
+ reduce(a,b)
+ }
+
+ override def bufferEncoder: Encoder[DataciteType] = implicitly[Encoder[DataciteType]]
+
+ override def outputEncoder: Encoder[DataciteType] = implicitly[Encoder[DataciteType]]
+
+ override def finish(reduction: DataciteType): DataciteType = reduction
+ }
+
+ val dump:Dataset[DataciteType] = spark.read.load(dataciteDump).as[DataciteType]
+ val ts = dump.select(max("timestamp")).first().getLong(0)
+
+ log.info(s"last Timestamp is $ts")
+
+ val cnt = writeSequenceFile(hdfsTargetPath, ts, conf)
+
+ log.info(s"Imported from Datacite API $cnt documents")
+
+ if (cnt > 0) {
+
+ val inputRdd:RDD[DataciteType] = sc.sequenceFile(targetPath, classOf[Int], classOf[Text])
+ .map(s => s._2.toString)
+ .map(s => convertAPIStringToDataciteItem(s))
+ spark.createDataset(inputRdd).write.mode(SaveMode.Overwrite).save(s"${targetPath}_dataset")
+
+ val ds:Dataset[DataciteType] = spark.read.load(s"${targetPath}_dataset").as[DataciteType]
+
+ dump
+ .union(ds)
+ .groupByKey(_.doi)
+ .agg(dataciteAggregator.toColumn)
+ .map(s=>s._2)
+ .repartition(4000)
+ .write.mode(SaveMode.Overwrite).save(s"${dataciteDump}_updated")
+
+ val fs = FileSystem.get(sc.hadoopConfiguration)
+ fs.delete(new Path(s"$dataciteDump"), true)
+ fs.rename(new Path(s"${dataciteDump}_updated"),new Path(s"$dataciteDump"))
+ }
+ }
+
+ private def writeSequenceFile(hdfsTargetPath: Path, timestamp: Long, conf: Configuration):Long = {
+ val client = new DataciteAPIImporter(timestamp*1000, 1000)
+ var i = 0
+ try {
+ val writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(hdfsTargetPath), SequenceFile.Writer.keyClass(classOf[IntWritable]), SequenceFile.Writer.valueClass(classOf[Text]))
+ try {
+
+ var start: Long = System.currentTimeMillis
+ var end: Long = 0
+ val key: IntWritable = new IntWritable(i)
+ val value: Text = new Text
+ while ( {
+ client.hasNext
+ }) {
+ key.set({
+ i += 1;
+ i - 1
+ })
+ value.set(client.next())
+ writer.append(key, value)
+ writer.hflush()
+ if (i % 1000 == 0) {
+ end = System.currentTimeMillis
+ val time = (end - start) / 1000.0F
+ println(s"Imported $i in $time seconds")
+ start = System.currentTimeMillis
+ }
+ }
+ } finally if (writer != null) writer.close()
+ }
+ i
+ }
+}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/datacite_filter b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/datacite_filter
new file mode 100644
index 0000000000..ad80d69980
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/datacite_filter
@@ -0,0 +1,28 @@
+TUBYDI - Assistir Filmes e Series Online Grátis
+123Movies
+WATCH FULL MOVIE
+Movierulz
+Full Movie Online
+MOVIé WatcH
+The King of Staten Island 2020 Online For Free
+Watch Train to Busan 2 2020 online for free
+Sixth Sense Movie Novelization
+Film Complet streaming vf gratuit en ligne
+watch now free
+LIVE stream watch
+LIVE stream UFC
+RBC Heritage live stream
+MLBStreams Free
+NFL Live Stream
+Live Stream Free
+Royal Ascot 2020 Live Stream
+TV Shows Full Episodes Official
+FuboTV
+Gomovies
+Online Free Trial Access
+123watch
+DÜŞÜK HAPI
+Bebek Düşürme Yöntemleri
+WHATSAP İLETİŞİM
+Cytotec
+düşük hapı
\ No newline at end of file
diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/exportDataset_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/exportDataset_parameters.json
new file mode 100644
index 0000000000..63e0803372
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/exportDataset_parameters.json
@@ -0,0 +1,21 @@
+[
+ {
+ "paramName": "s",
+ "paramLongName": "sourcePath",
+ "paramDescription": "the source mdstore path",
+ "paramRequired": true
+ },
+
+ {
+ "paramName": "t",
+ "paramLongName": "targetPath",
+ "paramDescription": "the target mdstore path",
+ "paramRequired": true
+ },
+ {
+ "paramName": "m",
+ "paramLongName": "master",
+ "paramDescription": "the master name",
+ "paramRequired": true
+ }
+]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json
new file mode 100644
index 0000000000..34fa3ed99d
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json
@@ -0,0 +1,33 @@
+[
+ {
+ "paramName": "s",
+ "paramLongName": "sourcePath",
+ "paramDescription": "the source mdstore path",
+ "paramRequired": true
+ },
+
+ {
+ "paramName": "t",
+ "paramLongName": "targetPath",
+ "paramDescription": "the target mdstore path",
+ "paramRequired": true
+ },
+ {
+ "paramName": "tr",
+ "paramLongName": "transformationRule",
+ "paramDescription": "the transformation Rule",
+ "paramRequired": true
+ },
+ {
+ "paramName": "m",
+ "paramLongName": "master",
+ "paramDescription": "the master name",
+ "paramRequired": true
+ },
+ {
+ "paramName": "i",
+ "paramLongName": "isLookupUrl",
+ "paramDescription": "the isLookup URL",
+ "paramRequired": true
+ }
+]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/hostedBy_map.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/hostedBy_map.json
new file mode 100644
index 0000000000..d014dab5aa
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/hostedBy_map.json
@@ -0,0 +1,1032 @@
+{
+ "SND.QOG": {
+ "openaire_id": "re3data_____::r3d100012231",
+ "datacite_name": "Quality of Government Institute",
+ "official_name": "Quality of Government Institute's Data",
+ "similarity": 0.8985507246376812
+ },
+ "GESIS.CESSDA": {
+ "openaire_id": "re3data_____::r3d100010202",
+ "datacite_name": "CESSDA ERIC",
+ "official_name": "CESSDA ERIC"
+ },
+ "BL.CRAN": {
+ "openaire_id": "re3data_____::r3d100012068",
+ "datacite_name": "Cranfield University",
+ "official_name": "Cranfield Online Research Data"
+ },
+ "SUL.OPENNEURO": {
+ "openaire_id": "re3data_____::r3d100010924",
+ "datacite_name": "OpenNeuro",
+ "official_name": "OpenNeuro"
+ },
+ "UNAVCO.UNAVCO": {
+ "openaire_id": "re3data_____::r3d100010872",
+ "datacite_name": "UNAVCO",
+ "official_name": "UNAVCO"
+ },
+ "SUL.SDR": {
+ "openaire_id": "re3data_____::r3d100010710",
+ "datacite_name": "Stanford Digital Repository",
+ "official_name": "Stanford Digital Repository"
+ },
+ "DK.ICES": {
+ "openaire_id": "re3data_____::r3d100011288",
+ "datacite_name": "International Council for the Exploration of the Sea (ICES)",
+ "official_name": "International Council for the Exploration of the Sea datasets",
+ "similarity": 0.8833333333333333
+ },
+ "CISTI.DFOSCIMR": {
+ "openaire_id": "re3data_____::r3d100012039",
+ "datacite_name": "Bedford Institute of Oceanography - Fisheries and Oceans Canada - Ocean Data and Information Section",
+ "official_name": "Bedford Institute of Oceanography - Oceanographic Databases"
+ },
+ "CSIC.DIGITAL": {
+ "openaire_id": "re3data_____::r3d100011076",
+ "datacite_name": "Digital CSIC",
+ "official_name": "DIGITAL.CSIC"
+ },
+ "TIB.PANGAEA": {
+ "openaire_id": "re3data_____::r3d100010134",
+ "datacite_name": "PANGAEA",
+ "official_name": "PANGAEA"
+ },
+ "PSU.DATACOM": {
+ "openaire_id": "re3data_____::r3d100010477",
+ "datacite_name": "Data Commons",
+ "official_name": "ANU Data Commons",
+ "similarity": 0.8571428571428571
+ },
+ "ANDS.CENTRE72": {
+ "openaire_id": "re3data_____::r3d100010451",
+ "datacite_name": "PARADISEC",
+ "official_name": "Pacific and Regional Archive for Digital Sources in Endangered Cultures"
+ },
+ "BL.OXDB": {
+ "openaire_id": "re3data_____::r3d100011653",
+ "datacite_name": "Oxford University Library Service Databank",
+ "official_name": "DataBank, Bodleian Libraries, University of Oxford"
+ },
+ "BL.STANDREW": {
+ "openaire_id": "re3data_____::r3d100012411",
+ "datacite_name": "University of St Andrews",
+ "official_name": "St Andrews Research portal - Research Data"
+ },
+ "TIB.BAFG": {
+ "openaire_id": "re3data_____::r3d100011664",
+ "datacite_name": "Bundesanstalt f\u00fcr Gew\u00e4sserkunde",
+ "official_name": "Geoportal der BFG"
+ },
+ "CRUI.UNIBO": {
+ "openaire_id": "re3data_____::r3d100012604",
+ "datacite_name": "Universit\u00e0 degli Studi di Bologna",
+ "official_name": "AMS Acta"
+ },
+ "GDCC.ODUM-LIBRARY": {
+ "openaire_id": "re3data_____::r3d100000005",
+ "datacite_name": "UNC Libraries",
+ "official_name": "UNC Dataverse"
+ },
+ "RG.RG": {
+ "openaire_id": "re3data_____::r3d100012227",
+ "datacite_name": "ResearchGate",
+ "official_name": "ResearchGate"
+ },
+ "TIB.EUMETSAT": {
+ "openaire_id": "re3data_____::r3d100010232",
+ "datacite_name": "EUMETSAT",
+ "official_name": "Eumetsat"
+ },
+ "SND.SMHI": {
+ "openaire_id": "re3data_____::r3d100011776",
+ "datacite_name": "Swedish Meteorological and Hydrological Institute open data",
+ "official_name": "Swedish Meteorological and Hydrological Institute open data"
+ },
+ "NOAA.NCEI": {
+ "openaire_id": "re3data_____::r3d100011801",
+ "datacite_name": "National Oceanic and Atmospheric Administration (NOAA) National Centers for Environmental Information (NCEI)",
+ "official_name": "NCEI"
+ },
+ "TIB.WDCC": {
+ "openaire_id": "re3data_____::r3d100010299",
+ "datacite_name": "World Data Center for Climate",
+ "official_name": "World Data Center for Climate"
+ },
+ "CNGB.GIGADB": {
+ "openaire_id": "re3data_____::r3d100010478",
+ "datacite_name": "GigaDB",
+ "official_name": "GigaDB"
+ },
+ "DELFT.VLIZ": {
+ "openaire_id": "re3data_____::r3d100010661",
+ "datacite_name": "Vlaams Instituut voor de Zee",
+ "official_name": "Flanders Marine Institute"
+ },
+ "NUS.SB": {
+ "openaire_id": "re3data_____::r3d100012564",
+ "datacite_name": "National University of Singapore",
+ "official_name": "ScholarBank@NUS"
+ },
+ "EDI.EDI": {
+ "openaire_id": "re3data_____::r3d100010272",
+ "datacite_name": "Environmental Data Initiative",
+ "official_name": "Environmental Data Initiative Repository"
+ },
+ "INIST.ADISP": {
+ "openaire_id": "re3data_____::r3d100010494",
+ "datacite_name": "Quetelet PROGEDO Diffusion",
+ "official_name": "Quetelet PROGEDO Diffusion"
+ },
+ "GESIS.SHARE": {
+ "openaire_id": "re3data_____::r3d100010430",
+ "datacite_name": "SHARE - ERIC",
+ "official_name": "Survey of Health, Ageing and Retirement in Europe"
+ },
+ "ANDS.CENTRE-1": {
+ "openaire_id": "re3data_____::r3d100010864",
+ "datacite_name": "Griffith University",
+ "official_name": "Griffith University Research Data Repository"
+ },
+ "BL.READING": {
+ "openaire_id": "re3data_____::r3d100012064",
+ "datacite_name": "University of Reading",
+ "official_name": "University of Reading Research Data Archive"
+ },
+ "CORNELL.CISER": {
+ "openaire_id": "re3data_____::r3d100011056",
+ "datacite_name": "CISER Data Archive",
+ "official_name": "CISER Data Archive"
+ },
+ "DRYAD.DRYAD": {
+ "openaire_id": "re3data_____::r3d100000044",
+ "datacite_name": "DRYAD",
+ "official_name": "DRYAD"
+ },
+ "CDL.PISCO": {
+ "openaire_id": "re3data_____::r3d100010947",
+ "datacite_name": "Partnership for Interdisciplinary Studies of Coastal Oceans (PISCO)",
+ "official_name": "Partnership for Interdisciplinary Studies of Coastal Oceans"
+ },
+ "IEEE.DATAPORT": {
+ "openaire_id": "re3data_____::r3d100012569",
+ "datacite_name": "IEEE DataPort",
+ "official_name": "IEEE DataPort"
+ },
+ "DELFT.MAASTRO": {
+ "openaire_id": "re3data_____::r3d100011086",
+ "datacite_name": "MAASTRO Clinic",
+ "official_name": "CancerData.org"
+ },
+ "USGS.PROD": {
+ "openaire_id": "re3data_____::r3d100010054",
+ "datacite_name": "USGS DOI Tool Production Environment",
+ "official_name": "U.S. Geological Survey"
+ },
+ "GDCC.ODUM-DV": {
+ "openaire_id": "re3data_____::r3d100000005",
+ "datacite_name": "Odum Institute Dataverse",
+ "official_name": "UNC Dataverse"
+ },
+ "CDL.SDSCSG": {
+ "openaire_id": "re3data_____::r3d100011690",
+ "datacite_name": "UCSD Signaling Gateway",
+ "official_name": "UCSD Signaling gateway"
+ },
+ "ORBIS.NKN": {
+ "openaire_id": "re3data_____::r3d100011587",
+ "datacite_name": "Northwest Knowledge Network",
+ "official_name": "Northwest Knowledge Network"
+ },
+ "ANDS.CENTRE63": {
+ "openaire_id": "re3data_____::r3d100010918",
+ "datacite_name": "Test: Atlas of Living Australia",
+ "official_name": "Atlas of Living Australia",
+ "similarity": 0.8928571428571429
+ },
+ "SML.TALKBANK": {
+ "openaire_id": "re3data_____::r3d100010887",
+ "datacite_name": "TalkBank",
+ "official_name": "TalkBank"
+ },
+ "CORNELL.LIBRARY": {
+ "openaire_id": "re3data_____::r3d100012322",
+ "datacite_name": "Cornell University Library",
+ "official_name": "eCommons - Cornell's digital repository"
+ },
+ "BL.SOTON": {
+ "openaire_id": "re3data_____::r3d100011245",
+ "datacite_name": "University of Southampton",
+ "official_name": "University of Southampton Institutional Research Repository"
+ },
+ "GESIS.DB-BANK": {
+ "openaire_id": "re3data_____::r3d100012252",
+ "datacite_name": "Forschungsdaten- und Servicezentrum der Bundesbank",
+ "official_name": "Forschungsdaten- und Servicezentrum der Bundesbank"
+ },
+ "ANDS.CENTRE68": {
+ "openaire_id": "re3data_____::r3d100010918",
+ "datacite_name": "Atlas of Living Australia",
+ "official_name": "Atlas of Living Australia"
+ },
+ "ANDS.CENTRE69": {
+ "openaire_id": "re3data_____::r3d100010914",
+ "datacite_name": "Australian Ocean Data Network",
+ "official_name": "Australian Ocean Data Network Portal"
+ },
+ "INIST.CDS": {
+ "openaire_id": "re3data_____::r3d100010584",
+ "datacite_name": "Strasbourg Astronomical Data Center",
+ "official_name": "Strasbourg Astronomical Data Center"
+ },
+ "BL.NHM": {
+ "openaire_id": "re3data_____::r3d100011675",
+ "datacite_name": "Natural History Museum, London",
+ "official_name": "Natural History Museum, Data Portal"
+ },
+ "BL.ADS": {
+ "openaire_id": "re3data_____::r3d100000006",
+ "datacite_name": "Archaeology Data Service",
+ "official_name": "Archaeology Data Service"
+ },
+ "GDCC.JHU": {
+ "openaire_id": "re3data_____::r3d100011836",
+ "datacite_name": "Johns Hopkins University Library",
+ "official_name": "Johns Hopkins Data Archive Dataverse Network"
+ },
+ "BL.ED": {
+ "openaire_id": "re3data_____::r3d100000047",
+ "datacite_name": "University of Edinburgh",
+ "official_name": "Edinburgh DataShare"
+ },
+ "BL.EXETER": {
+ "openaire_id": "re3data_____::r3d100011202",
+ "datacite_name": "University of Exeter",
+ "official_name": "Open Research Exeter"
+ },
+ "BL.NCL": {
+ "openaire_id": "re3data_____::r3d100012408",
+ "datacite_name": "Newcastle University",
+ "official_name": "NCL Data"
+ },
+ "BROWN.BDR": {
+ "openaire_id": "re3data_____::r3d100011654",
+ "datacite_name": "Brown Digital Repository",
+ "official_name": "Brown Digital Repository"
+ },
+ "GDCC.SYR-QDR": {
+ "openaire_id": "re3data_____::r3d100011038",
+ "datacite_name": "Syracuse University Qualitative Data Repository",
+ "official_name": "Qualitative Data Repository"
+ },
+ "BL.BRISTOL": {
+ "openaire_id": "re3data_____::r3d100011099",
+ "datacite_name": "University of Bristol",
+ "official_name": "data.bris Research Data Repository"
+ },
+ "DATACITE.DATACITE": {
+ "openaire_id": "openaire____::datacite",
+ "datacite_name": "DataCite",
+ "official_name": "Datacite"
+ },
+ "ESTDOI.KEEL": {
+ "openaire_id": "re3data_____::r3d100011941",
+ "datacite_name": "Keeleressursid. The Center of Estonian Language Resources",
+ "official_name": "Center of Estonian Language Resources"
+ },
+ "BL.ESSEX": {
+ "openaire_id": "re3data_____::r3d100012405",
+ "datacite_name": "University of Essex",
+ "official_name": "Research Data at Essex"
+ },
+ "PURDUE.MDF": {
+ "openaire_id": "re3data_____::r3d100012080",
+ "datacite_name": "Univ Chicago Materials Data Facility",
+ "official_name": "Materials Data Facility"
+ },
+ "DELFT.KNMI": {
+ "openaire_id": "re3data_____::r3d100011879",
+ "datacite_name": "KNMI Data Centre",
+ "official_name": "KNMI Data Centre"
+ },
+ "CUL.CIESIN": {
+ "openaire_id": "re3data_____::r3d100010207",
+ "datacite_name": "Center for International Earth Science Information Network",
+ "official_name": "Center for International Earth Science Information Network"
+ },
+ "WISC.NEOTOMA": {
+ "openaire_id": "re3data_____::r3d100011761",
+ "datacite_name": "Neotoma Paleoecological Database",
+ "official_name": "Neotoma Paleoecology Database",
+ "similarity": 0.9180327868852459
+ },
+ "IRIS.IRIS": {
+ "openaire_id": "re3data_____::r3d100010268",
+ "datacite_name": "Incorporated Research Institutions for Seismology",
+ "official_name": "Incorporated Research Institutions for Seismology"
+ },
+ "ANDS.CENTRE50": {
+ "openaire_id": "re3data_____::r3d100012378",
+ "datacite_name": "Analysis and Policy Observatory",
+ "official_name": "Analysis and Policy Observatory"
+ },
+ "FAO.RING": {
+ "openaire_id": "re3data_____::r3d100012571",
+ "datacite_name": "CIARD RING",
+ "official_name": "CIARD Ring"
+ },
+ "CUL.R2R": {
+ "openaire_id": "re3data_____::r3d100010735",
+ "datacite_name": "Rolling Deck to Repository",
+ "official_name": "Rolling Deck to Repository"
+ },
+ "DEMO.GRIIDC": {
+ "openaire_id": "re3data_____::r3d100011571",
+ "datacite_name": "Gulf of Mexico Research Initiative Information and Data Cooperative",
+ "official_name": "Gulf of Mexico Research Initiative Information and Data Cooperative"
+ },
+ "ANDS.CENTRE-6": {
+ "openaire_id": "re3data_____::r3d100012268",
+ "datacite_name": "Curtin University",
+ "official_name": "Curtin University Research Data Collection"
+ },
+ "ANDS.CENTRE-5": {
+ "openaire_id": "re3data_____::r3d100012013",
+ "datacite_name": "TERN Central Portal",
+ "official_name": "TERN Data Discovery portal"
+ },
+ "FIGSHARE.UCT": {
+ "openaire_id": "re3data_____::r3d100012633",
+ "datacite_name": "University of Cape Town (UCT)",
+ "official_name": "ZivaHub"
+ },
+ "BIBSYS.UIT-ORD": {
+ "openaire_id": "re3data_____::r3d100012538",
+ "datacite_name": "DataverseNO",
+ "official_name": "DataverseNO"
+ },
+ "CISTI.CADC": {
+ "openaire_id": "re3data_____::r3d100000016",
+ "datacite_name": "Canadian Astronomy Data Centre",
+ "official_name": "The Canadian Astronomy Data Centre",
+ "similarity": 0.9375
+ },
+ "BL.CCDC": {
+ "openaire_id": "re3data_____::r3d100010197",
+ "datacite_name": "The Cambridge Crystallographic Data Centre",
+ "official_name": "The Cambridge Structural Database"
+ },
+ "BL.UCLD": {
+ "openaire_id": "re3data_____::r3d100012417",
+ "datacite_name": "University College London",
+ "official_name": "UCL Discovery"
+ },
+ "GESIS.RKI": {
+ "openaire_id": "re3data_____::r3d100010436",
+ "datacite_name": "'Health Monitoring' Research Data Centre at the Robert Koch Institute",
+ "official_name": "'Health Monitoring' Research Data Centre at the Robert Koch Institute"
+ },
+ "BL.DRI": {
+ "openaire_id": "re3data_____::r3d100011805",
+ "datacite_name": "Digital Repository of Ireland",
+ "official_name": "Digital Repository of Ireland"
+ },
+ "TIB.KIT-IMK": {
+ "openaire_id": "re3data_____::r3d100011956",
+ "datacite_name": "Institute for Meteorology and Climate Research - Atmospheric Trace Gases and Remote Sensing",
+ "official_name": "CARIBIC"
+ },
+ "DOINZ.LANDCARE": {
+ "openaire_id": "re3data_____::r3d100011662",
+ "datacite_name": "Landcare Research New Zealand Ltd",
+ "official_name": "Landcare Research Data Repository"
+ },
+ "DEMO.EMORY": {
+ "openaire_id": "re3data_____::r3d100011559",
+ "datacite_name": "The Cancer Imaging Archive",
+ "official_name": "The Cancer Imaging Archive"
+ },
+ "UMN.DRUM": {
+ "openaire_id": "re3data_____::r3d100011393",
+ "datacite_name": "Data Repository for the University of Minnesota",
+ "official_name": "Data Repository for the University of Minnesota"
+ },
+ "CISTI.SFU": {
+ "openaire_id": "re3data_____::r3d100012512",
+ "datacite_name": "Simon Fraser University",
+ "official_name": "SFU Radar"
+ },
+ "GESIS.ICPSR": {
+ "openaire_id": "re3data_____::r3d100010255",
+ "datacite_name": "ICPSR",
+ "official_name": "Inter-university Consortium for Political and Social Research"
+ },
+ "ANDS.CENTRE49": {
+ "openaire_id": "re3data_____::r3d100012145",
+ "datacite_name": "The University of Melbourne",
+ "official_name": "melbourne.figshare.com"
+ },
+ "ZBW.IFO": {
+ "openaire_id": "re3data_____::r3d100010201",
+ "datacite_name": "LMU-ifo Economics & Business Data Center",
+ "official_name": "LMU-ifo Economics & Business Data Center"
+ },
+ "TIB.BEILST": {
+ "openaire_id": "re3data_____::r3d100012329",
+ "datacite_name": "Beilstein-Institut zur F\u00f6rderung der Chemischen Wissenschaften",
+ "official_name": "STRENDA DB"
+ },
+ "ZBW.ZBW-JDA": {
+ "openaire_id": "re3data_____::r3d100012190",
+ "datacite_name": "ZBW Journal Data Archive",
+ "official_name": "ZBW Journal Data Archive"
+ },
+ "BL.UKDA": {
+ "openaire_id": "re3data_____::r3d100010215",
+ "datacite_name": "UK Data Archive",
+ "official_name": "UK Data Archive"
+ },
+ "CERN.INSPIRE": {
+ "openaire_id": "re3data_____::r3d100011077",
+ "datacite_name": "inspirehep.net",
+ "official_name": "Inspire-HEP"
+ },
+ "CISTI.OTNDC": {
+ "openaire_id": "re3data_____::r3d100012083",
+ "datacite_name": "Ocean Tracking Network",
+ "official_name": "Ocean Tracking Network"
+ },
+ "CISTI.CC": {
+ "openaire_id": "re3data_____::r3d100012646",
+ "datacite_name": "Compute Canada",
+ "official_name": "Federated Research Data Repository"
+ },
+ "SND.ICOS": {
+ "openaire_id": "re3data_____::r3d100012203",
+ "datacite_name": "ICOS Carbon Portal",
+ "official_name": "ICOS Carbon Portal"
+ },
+ "BL.MENDELEY": {
+ "openaire_id": "re3data_____::r3d100011868",
+ "datacite_name": "Mendeley",
+ "official_name": "Mendeley Data"
+ },
+ "DELFT.UU": {
+ "openaire_id": "re3data_____::r3d100011201",
+ "datacite_name": "Universiteit Utrecht",
+ "official_name": "DataverseNL"
+ },
+ "GESIS.DSZ-BO": {
+ "openaire_id": "re3data_____::r3d100010439",
+ "datacite_name": "Data Service Center for Business and Organizational Data",
+ "official_name": "Data Service Center for Business and Organizational Data"
+ },
+ "TIB.IPK": {
+ "openaire_id": "re3data_____::r3d100011647",
+ "datacite_name": "IPK Gatersleben",
+ "official_name": "IPK Gatersleben"
+ },
+ "GDCC.HARVARD-DV": {
+ "openaire_id": "re3data_____::r3d100010051",
+ "datacite_name": "Harvard IQSS Dataverse",
+ "official_name": "Harvard Dataverse"
+ },
+ "BL.LEEDS": {
+ "openaire_id": "re3data_____::r3d100011945",
+ "datacite_name": "University of Leeds",
+ "official_name": "Research Data Leeds Repository"
+ },
+ "BL.BRUNEL": {
+ "openaire_id": "re3data_____::r3d100012140",
+ "datacite_name": "Brunel University London",
+ "official_name": "Brunel figshare"
+ },
+ "DEMO.ENVIDAT": {
+ "openaire_id": "re3data_____::r3d100012587",
+ "datacite_name": "EnviDat",
+ "official_name": "EnviDat"
+ },
+ "GDCC.NTU": {
+ "openaire_id": "re3data_____::r3d100012440",
+ "datacite_name": "Nanyang Technological University",
+ "official_name": "DR-NTU (Data)"
+ },
+ "UNM.DATAONE": {
+ "openaire_id": "re3data_____::r3d100000045",
+ "datacite_name": "DataONE",
+ "official_name": "DataONE"
+ },
+ "CSC.NRD": {
+ "openaire_id": "re3data_____::r3d100012157",
+ "datacite_name": "Ministry of Culture and Education",
+ "official_name": "IDA Research Data Storage Service"
+ },
+ "GESIS.DIPF": {
+ "openaire_id": "re3data_____::r3d100010390",
+ "datacite_name": "Research Data Centre for Education",
+ "official_name": "Research Data Centre for Education"
+ },
+ "BL.HALLAM": {
+ "openaire_id": "re3data_____::r3d100011909",
+ "datacite_name": "Sheffield Hallam University",
+ "official_name": "Sheffield Hallam University Research Data Archive"
+ },
+ "BL.LSHTM": {
+ "openaire_id": "re3data_____::r3d100011800",
+ "datacite_name": "London School of Hygiene and Tropical Medicine",
+ "official_name": "LSHTM Data Compass"
+ },
+ "SUBGOE.DARIAH": {
+ "openaire_id": "re3data_____::r3d100011345",
+ "datacite_name": "Digital Research Infrastructure for the Arts and Humanities",
+ "official_name": "DARIAH-DE Repository"
+ },
+ "SND.SU": {
+ "openaire_id": "re3data_____::r3d100012147",
+ "datacite_name": "Stockholm University",
+ "official_name": "Stockholm University repository for data"
+ },
+ "GESIS.INDEPTH": {
+ "openaire_id": "re3data_____::r3d100011392",
+ "datacite_name": "INDEPTH Network",
+ "official_name": "INDEPTH Data Repository"
+ },
+ "TIB.FLOSS": {
+ "openaire_id": "re3data_____::r3d100010863",
+ "datacite_name": "FLOSS Project, Syracuse University",
+ "official_name": "FLOSSmole"
+ },
+ "ETHZ.WGMS": {
+ "openaire_id": "re3data_____::r3d100010627",
+ "datacite_name": "World Glacier Monitoring Service",
+ "official_name": "World Glacier Monitoring Service"
+ },
+ "BL.UEL": {
+ "openaire_id": "re3data_____::r3d100012414",
+ "datacite_name": "University of East London",
+ "official_name": "Data.uel"
+ },
+ "DELFT.DATA4TU": {
+ "openaire_id": "re3data_____::r3d100010216",
+ "datacite_name": "4TU.Centre for Research Data",
+ "official_name": "4TU.Centre for Research Data"
+ },
+ "GESIS.IANUS": {
+ "openaire_id": "re3data_____::r3d100012361",
+ "datacite_name": "IANUS - FDZ Arch\u00e4ologie & Altertumswissenschaften",
+ "official_name": "IANUS Datenportal"
+ },
+ "CDL.UCSDCCA": {
+ "openaire_id": "re3data_____::r3d100011655",
+ "datacite_name": "California Coastal Atlas",
+ "official_name": "California Coastal Atlas"
+ },
+ "VIVA.VT": {
+ "openaire_id": "re3data_____::r3d100012601",
+ "datacite_name": "Virginia Tech",
+ "official_name": "VTechData"
+ },
+ "ANDS.CENTRE39": {
+ "openaire_id": "re3data_____::r3d100011640",
+ "datacite_name": "University of the Sunshine Coast",
+ "official_name": "USC Research Bank research data"
+ },
+ "DEMO.OPENKIM": {
+ "openaire_id": "re3data_____::r3d100011864",
+ "datacite_name": "OpenKIM",
+ "official_name": "OpenKIM"
+ },
+ "INIST.OTELO": {
+ "openaire_id": "re3data_____::r3d100012505",
+ "datacite_name": "Observatoire Terre Environnement de Lorraine",
+ "official_name": "ORDaR"
+ },
+ "INIST.ILL": {
+ "openaire_id": "re3data_____::r3d100012072",
+ "datacite_name": "Institut Laue-Langevin",
+ "official_name": "ILL Data Portal"
+ },
+ "ANDS.CENTRE31": {
+ "openaire_id": "re3data_____::r3d100012378",
+ "datacite_name": "Test: Analysis and Policy Observatory",
+ "official_name": "Analysis and Policy Observatory",
+ "similarity": 0.9117647058823529
+ },
+ "ANDS.CENTRE30": {
+ "openaire_id": "re3data_____::r3d100010917",
+ "datacite_name": "Test: Geoscience Australia",
+ "official_name": "Geoscience Australia",
+ "similarity": 0.8695652173913043
+ },
+ "BL.SALFORD": {
+ "openaire_id": "re3data_____::r3d100012144",
+ "datacite_name": "University of Salford",
+ "official_name": "University of Salford Data Repository"
+ },
+ "CERN.HEPDATA": {
+ "openaire_id": "re3data_____::r3d100010081",
+ "datacite_name": "HEPData.net",
+ "official_name": "HEPData"
+ },
+ "ETHZ.E-COLL": {
+ "openaire_id": "re3data_____::r3d100012557",
+ "datacite_name": "ETH Z\u00fcrich Research Collection",
+ "official_name": "ETH Z\u00fcrich Research Collection"
+ },
+ "GBIF.GBIF": {
+ "openaire_id": "re3data_____::r3d100000039",
+ "datacite_name": "Global Biodiversity Information Facility",
+ "official_name": "Global Biodiversity Information Facility"
+ },
+ "ORNLDAAC.DAAC": {
+ "openaire_id": "re3data_____::r3d100000037",
+ "datacite_name": "Oak Ridge National Laboratory Distributed Active Archive Center",
+ "official_name": "Oak Ridge National Laboratory Distributed Active Archive Center for Biogeochemical Dynamics"
+ },
+ "KAUST.KAUSTREPO": {
+ "openaire_id": "re3data_____::r3d100011898",
+ "datacite_name": "KAUST Research Repository",
+ "official_name": "UWA Research Repository",
+ "similarity": 0.875
+ },
+ "ZBW.ZEW": {
+ "openaire_id": "re3data_____::r3d100010399",
+ "datacite_name": "Zentrum f\u00fcr Europ\u00e4ische Wirtschaftsforschung GmbH (ZEW)",
+ "official_name": "ZEW Forschungsdatenzentrum"
+ },
+ "SML.TDAR": {
+ "openaire_id": "re3data_____::r3d100010347",
+ "datacite_name": "Digital Antiquity (TDAR)",
+ "official_name": "tDAR"
+ },
+ "GESIS.CSDA": {
+ "openaire_id": "re3data_____::r3d100010484",
+ "datacite_name": "Czech Social Science Data Archive",
+ "official_name": "Czech Social Science Data Archive"
+ },
+ "SND.BOLIN": {
+ "openaire_id": "re3data_____::r3d100011699",
+ "datacite_name": "Bolin Centre Database",
+ "official_name": "Bolin Centre Database"
+ },
+ "MLA.HC": {
+ "openaire_id": "re3data_____::r3d100012309",
+ "datacite_name": "Humanities Commons",
+ "official_name": "Humanities Commons"
+ },
+ "CDL.IDASHREP": {
+ "openaire_id": "re3data_____::r3d100010382",
+ "datacite_name": "iDASH Repository",
+ "official_name": "IDS Repository",
+ "similarity": 0.8666666666666667
+ },
+ "ZBMED.SNSB": {
+ "openaire_id": "re3data_____::r3d100011873",
+ "datacite_name": "Staatliche Naturwissenschaftliche Sammlungen Bayerns",
+ "official_name": "Staatliche Naturwissenschaftliche Sammlungen Bayerns - datasets",
+ "similarity": 0.9043478260869565
+ },
+ "ORBIS.OHSU": {
+ "openaire_id": "re3data_____::r3d100012244",
+ "datacite_name": "Oregon Health Sciences University",
+ "official_name": "OHSU Digital Commons"
+ },
+ "DARTLIB.CRAWDAD": {
+ "openaire_id": "re3data_____::r3d100010716",
+ "datacite_name": "CRAWDAD",
+ "official_name": "CRAWDAD"
+ },
+ "CDL.CCHDO": {
+ "openaire_id": "re3data_____::r3d100010831",
+ "datacite_name": "CLIVAR and Carbon Hydrographic Data Office",
+ "official_name": "Climate Variability and Predictability and Carbon Hydrographic Data Office"
+ },
+ "GESIS.AUSSDA": {
+ "openaire_id": "re3data_____::r3d100010483",
+ "datacite_name": "Austrian Social Science Data Archive",
+ "official_name": "AUSSDA"
+ },
+ "NSIDC.DATACTR": {
+ "openaire_id": "re3data_____::r3d100010110",
+ "datacite_name": "National Snow and Ice Data Center",
+ "official_name": "National Snow and Ice Data Center"
+ },
+ "TIB.RADAR": {
+ "openaire_id": "re3data_____::r3d100012330",
+ "datacite_name": "FIZ Karlsruhe \u2013 Leibniz-Institut f\u00fcr Informationsinfrastruktur",
+ "official_name": "RADAR"
+ },
+ "KIM.OPENKIM": {
+ "openaire_id": "re3data_____::r3d100011864",
+ "datacite_name": "Open Knowledgebase of Interatomic Models (OpenKIM)",
+ "official_name": "OpenKIM"
+ },
+ "BL.LBORO": {
+ "openaire_id": "re3data_____::r3d100012143",
+ "datacite_name": "Loughborough University",
+ "official_name": "Loughborough Data Repository"
+ },
+ "GESIS.ZPID": {
+ "openaire_id": "re3data_____::r3d100010328",
+ "datacite_name": "GESIS.ZPID",
+ "official_name": "PsychData"
+ },
+ "SML.TCIA": {
+ "openaire_id": "re3data_____::r3d100011559",
+ "datacite_name": "The Cancer Imaging Archive",
+ "official_name": "The Cancer Imaging Archive"
+ },
+ "CDL.IRIS": {
+ "openaire_id": "re3data_____::r3d100010268",
+ "datacite_name": "Incorporated Research Institutions for Seismology",
+ "official_name": "Incorporated Research Institutions for Seismology"
+ },
+ "BIBSYS.NMDC": {
+ "openaire_id": "re3data_____::r3d100012291",
+ "datacite_name": "Norwegian Marine Data Centre",
+ "official_name": "Norwegian Polar Data Centre",
+ "similarity": 0.8727272727272727
+ },
+ "ANDS.CENTRE25": {
+ "openaire_id": "re3data_____::r3d100010917",
+ "datacite_name": "Geoscience Australia",
+ "official_name": "Geoscience Australia"
+ },
+ "BL.UCLAN": {
+ "openaire_id": "re3data_____::r3d100012019",
+ "datacite_name": "University of Central Lancashire",
+ "official_name": "UCLanData"
+ },
+ "ANDS.CENTRE23": {
+ "openaire_id": "re3data_____::r3d100011898",
+ "datacite_name": "The University of Western Australia",
+ "official_name": "UWA Research Repository"
+ },
+ "CISTI.WOUDC": {
+ "openaire_id": "re3data_____::r3d100010367",
+ "datacite_name": "World Ozone and Ultraviolet Radiation Data Centre",
+ "official_name": "World Ozone and Ultraviolet Radiation Data Centre"
+ },
+ "FIGSHARE.ARS": {
+ "openaire_id": "re3data_____::r3d10001066",
+ "datacite_name": "figshare Academic Research System",
+ "official_name": "figshare"
+ },
+ "ILLINOIS.DATABANK": {
+ "openaire_id": "re3data_____::r3d100012001",
+ "datacite_name": "Illinois Data Bank",
+ "official_name": "Illinois Data Bank"
+ },
+ "BL.ECMWF": {
+ "openaire_id": "re3data_____::r3d100011726",
+ "datacite_name": "European Centre for Medium-Range Weather Forecasts",
+ "official_name": "European Centre for Medium-Range Weather Forecasts"
+ },
+ "CDL.ISSDA": {
+ "openaire_id": "re3data_____::r3d100010497",
+ "datacite_name": "Irish Social Science Data Archive (ISSDA)",
+ "official_name": "Irish Social Science Data Archive"
+ },
+ "CDL.PQR": {
+ "openaire_id": "re3data_____::r3d100012225",
+ "datacite_name": "Pitt Quantum Repository",
+ "official_name": "Pitt Quantum Repository"
+ },
+ "ANDS.CENTRE82": {
+ "openaire_id": "re3data_____::r3d100010138",
+ "datacite_name": "Test: Australian Data Archive",
+ "official_name": "Australian Data Archive",
+ "similarity": 0.8846153846153846
+ },
+ "GDCC.HARVARD-SLP": {
+ "openaire_id": "re3data_____::r3d100011861",
+ "datacite_name": "National Sleep Research Resource",
+ "official_name": "National Sleep Research Resource"
+ },
+ "CDL.IMMPORT": {
+ "openaire_id": "re3data_____::r3d100012529",
+ "datacite_name": "UCSF ImmPort",
+ "official_name": "ImmPort"
+ },
+ "GESIS.FID": {
+ "openaire_id": "re3data_____::r3d100012347",
+ "datacite_name": "FID f\u00fcr internationale und interdisziplin\u00e4re Rechtsforschung",
+ "official_name": "\u00b2Dok[\u00a7]"
+ },
+ "OCEAN.OCEAN": {
+ "openaire_id": "re3data_____::r3d100012369",
+ "datacite_name": "Code Ocean",
+ "official_name": "Code Ocean"
+ },
+ "CERN.ZENODO": {
+ "openaire_id": "re3data_____::r3d100010468",
+ "datacite_name": "Zenodo",
+ "official_name": "Zenodo"
+ },
+ "ETHZ.DA-RD": {
+ "openaire_id": "re3data_____::r3d100011626",
+ "datacite_name": "ETHZ Data Archive - Research Data",
+ "official_name": "ETH Data Archive"
+ },
+ "SND.ECDS": {
+ "openaire_id": "re3data_____::r3d100011000",
+ "datacite_name": "Environment Climate Data Sweden",
+ "official_name": "Environment Climate Data Sweden"
+ },
+ "BL.BATH": {
+ "openaire_id": "re3data_____::r3d100011947",
+ "datacite_name": "University of Bath",
+ "official_name": "University of Bath Research Data Archive"
+ },
+ "TIB.LDEO": {
+ "openaire_id": "re3data_____::r3d100012547",
+ "datacite_name": "LDEO - Lamont-Doherty Earth Observatory, Columbia University",
+ "official_name": "Lamont-Doherty Core Repository"
+ },
+ "COS.OSF": {
+ "openaire_id": "re3data_____::r3d100011137",
+ "datacite_name": "Open Science Framework",
+ "official_name": "Open Science Framework"
+ },
+ "ESTDOI.REPO": {
+ "openaire_id": "re3data_____::r3d100012333",
+ "datacite_name": "DataDOI",
+ "official_name": "DataDOI"
+ },
+ "CDL.NSFADC": {
+ "openaire_id": "re3data_____::r3d100011973",
+ "datacite_name": "NSF Arctic Data Center",
+ "official_name": "NSF Arctic Data Center"
+ },
+ "ANDS.CENTRE13": {
+ "openaire_id": "re3data_____::r3d100010477",
+ "datacite_name": "The Australian National University",
+ "official_name": "ANU Data Commons"
+ },
+ "BL.NERC": {
+ "openaire_id": "re3data_____::r3d100010199",
+ "datacite_name": "Natural Environment Research Council",
+ "official_name": "Environmental Information Data Centre"
+ },
+ "SAGEBIO.SYNAPSE": {
+ "openaire_id": "re3data_____::r3d100011894",
+ "datacite_name": "Synapse",
+ "official_name": "Synapse"
+ },
+ "ANDS.CENTRE15": {
+ "openaire_id": "re3data_____::r3d100000038",
+ "datacite_name": "Australian Antarctic Division",
+ "official_name": "Australian Antarctic Data Centre"
+ },
+ "WISC.BMRB": {
+ "openaire_id": "re3data_____::r3d100010191",
+ "datacite_name": "Biological Magnetic Resonance Bank",
+ "official_name": "Biological Magnetic Resonance Data Bank",
+ "similarity": 0.9315068493150684
+ },
+ "STSCI.MAST": {
+ "openaire_id": "re3data_____::r3d100010403",
+ "datacite_name": "Barbara A. Mikulski Archive for Space Telescopes",
+ "official_name": "Barbara A. Mikulski Archive for Space Telescopes"
+ },
+ "CDL.NSIDC": {
+ "openaire_id": "re3data_____::r3d100010110",
+ "datacite_name": "National Snow and Ice Data Center",
+ "official_name": "National Snow and Ice Data Center"
+ },
+ "BL.STRATH": {
+ "openaire_id": "re3data_____::r3d100012412",
+ "datacite_name": "University of Strathclyde",
+ "official_name": "University of Strathclyde KnowledgeBase Datasets"
+ },
+ "DEMO.TDAR": {
+ "openaire_id": "re3data_____::r3d100010347",
+ "datacite_name": "The Digital Archaeological Record (tDAR)",
+ "official_name": "tDAR"
+ },
+ "TIND.CALTECH": {
+ "openaire_id": "re3data_____::r3d100012384",
+ "datacite_name": "CaltechDATA",
+ "official_name": "CaltechDATA"
+ },
+ "GESIS.BIBB-FDZ": {
+ "openaire_id": "re3data_____::r3d100010190",
+ "datacite_name": "Forschungsdatenzentrum im Bundesinstitut f\u00fcr Berufsbildung",
+ "official_name": "Forschungsdatenzentrum im Bundesinstitut f\u00fcr Berufsbildung"
+ },
+ "ANDS.CENTRE87": {
+ "openaire_id": "re3data_____::r3d100010138",
+ "datacite_name": "Australian Data Archive",
+ "official_name": "Australian Data Archive"
+ },
+ "GESIS.NEPS": {
+ "openaire_id": "re3data_____::r3d100010736",
+ "datacite_name": "Nationales Bildungspanel (National Educational Panel Study, NEPS)",
+ "official_name": "Nationales Bildungspanel"
+ },
+ "CDL.UCBCRCNS": {
+ "openaire_id": "re3data_____::r3d100011269",
+ "datacite_name": "Collaborative Research in Computational Neuroscience (CRCNS)",
+ "official_name": "Collaborative Research in Computational Neuroscience"
+ },
+ "TIB.UKON": {
+ "openaire_id": "re3data_____::r3d100010469",
+ "datacite_name": "Movebank",
+ "official_name": "Movebank"
+ },
+ "UMN.IPUMS": {
+ "openaire_id": "re3data_____::r3d100010794",
+ "datacite_name": "Minnesota Population Center",
+ "official_name": "Minnesota Population Center"
+ },
+ "TIB.BIKF": {
+ "openaire_id": "re3data_____::r3d100012379",
+ "datacite_name": "Senckenberg Data & Metadata Repository",
+ "official_name": "Senckenberg Data & Metadata Repository"
+ },
+ "TDL.GRIIDC": {
+ "openaire_id": "re3data_____::r3d100011571",
+ "datacite_name": "Gulf of Mexico Research Initiative Information and Data Cooperative",
+ "official_name": "Gulf of Mexico Research Initiative Information and Data Cooperative"
+ },
+ "DELFT.NIBG": {
+ "openaire_id": "re3data_____::r3d100012167",
+ "datacite_name": "Sound and Vision",
+ "official_name": "Sound and Vision"
+ },
+ "BL.SURREY": {
+ "openaire_id": "re3data_____::r3d100012232",
+ "datacite_name": "University of Surrey",
+ "official_name": "Surrey Research Insight"
+ },
+ "OSTI.ORNLNGEE": {
+ "openaire_id": "re3data_____::r3d100011676",
+ "datacite_name": "NGEE-Arctic (Next Generation Ecosystems Experiement)",
+ "official_name": "NGEE Arctic"
+ },
+ "TIB.WDCRSAT": {
+ "openaire_id": "re3data_____::r3d100010156",
+ "datacite_name": "World Data Center for Remote Sensing of the Atmosphere",
+ "official_name": "The World Data Center for Remote Sensing of the Atmosphere",
+ "similarity": 0.9642857142857143
+ },
+ "ZBMED.DSMZ": {
+ "openaire_id": "re3data_____::r3d100010219",
+ "datacite_name": "DSMZ",
+ "official_name": "DSMZ"
+ },
+ "DOINZ.NZAU": {
+ "openaire_id": "re3data_____::r3d100012110",
+ "datacite_name": "University of Auckland Data Publishing and Discovery Service",
+ "official_name": "University of Auckland Data Repository"
+ },
+ "INIST.RESIF": {
+ "openaire_id": "re3data_____::r3d100012222",
+ "datacite_name": "R\u00e9seau sismologique et g\u00e9od\u00e9sique fran\u00e7ais",
+ "official_name": "RESIF Seismic Data Portal"
+ },
+ "CDL.NCEAS": {
+ "openaire_id": "re3data_____::r3d100010093",
+ "datacite_name": "National Center for Ecological Analysis and Synthesis (NCEAS)",
+ "official_name": "National Center for Ecological Analysis and Synthesis Data Repository"
+ },
+ "ZBMED.EMP": {
+ "openaire_id": "re3data_____::r3d100010234",
+ "datacite_name": "eyeMoviePedia",
+ "official_name": "eyeMoviePedia"
+ },
+ "ZBMED.BIOFRESH": {
+ "openaire_id": "re3data_____::r3d100011651",
+ "datacite_name": "Project BioFresh, Leibniz-Institute of Freshwater Ecology and Inland Fisheries",
+ "official_name": "Freshwater Biodiversity Data Portal"
+ },
+ "INIST.IFREMER": {
+ "openaire_id": "re3data_____::r3d100011867",
+ "datacite_name": "Institut Fran\u00e7ais de Recherche pour l'Exploitation de la Mer",
+ "official_name": "SEANOE"
+ },
+ "ETHZ.SICAS": {
+ "openaire_id": "re3data_____::r3d100011560",
+ "datacite_name": "SICAS",
+ "official_name": "Sicas Medical Image Repository"
+ },
+ "SND.SND": {
+ "openaire_id": "re3data_____::r3d100010146",
+ "datacite_name": "Swedish National Data Service",
+ "official_name": "Swedish National Data Service"
+ },
+ "DELFT.EASY": {
+ "openaire_id": "re3data_____::r3d100011201",
+ "datacite_name": "DANS",
+ "official_name": "DataverseNL"
+ },
+ "WH.WHOAS": {
+ "openaire_id": "re3data_____::r3d100010423",
+ "datacite_name": "Woods Hole Open Access Server",
+ "official_name": "Woods Hole Open Access Server"
+ },
+ "DATACITE.UCSC": {
+ "openaire_id": "re3data_____::r3d100010243",
+ "datacite_name": "UCSC Genome Browser",
+ "official_name": "UCSC Genome Browser"
+ }
+}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/import_from_api.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/import_from_api.json
new file mode 100644
index 0000000000..967e4445a6
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/import_from_api.json
@@ -0,0 +1,27 @@
+[
+ {
+ "paramName": "t",
+ "paramLongName": "targetPath",
+ "paramDescription": "the path of the sequencial file to write",
+ "paramRequired": true
+ },
+
+ {
+ "paramName": "d",
+ "paramLongName": "dataciteDumpPath",
+ "paramDescription": "the path of the Datacite dump",
+ "paramRequired": true
+ },
+ {
+ "paramName": "n",
+ "paramLongName": "namenode",
+ "paramDescription": "the hive metastore uris",
+ "paramRequired": true
+ },
+ {
+ "paramName": "m",
+ "paramLongName": "master",
+ "paramDescription": "the master name",
+ "paramRequired": true
+ }
+]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/config-default.xml
new file mode 100644
index 0000000000..2e0ed9aeea
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/config-default.xml
@@ -0,0 +1,18 @@
+
+
+ jobTracker
+ yarnRM
+
+
+ nameNode
+ hdfs://nameservice1
+
+
+ oozie.use.system.libpath
+ true
+
+
+ oozie.action.sharelib.for.spark
+ spark2
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml
new file mode 100644
index 0000000000..a3caa5e23d
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml
@@ -0,0 +1,103 @@
+
+
+
+ mdstoreInputPath
+ the path of the input MDStore
+
+
+
+ mdstoreOutputPath
+ the path of the cleaned mdstore
+
+
+ nativeInputPath
+ the path of the input MDStore
+
+
+
+
+
+ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
+
+ yarn-cluster
+ cluster
+ ImportDatacite
+ eu.dnetlib.dhp.actionmanager.datacite.ImportDatacite
+ dhp-aggregation-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+
+ -t${nativeInputPath}
+ -d${mdstoreInputPath}
+ -n${nameNode}
+ --masteryarn-cluster
+
+
+
+
+
+
+
+
+ yarn-cluster
+ cluster
+ TransformJob
+ eu.dnetlib.dhp.actionmanager.datacite.GenerateDataciteDatasetSpark
+ dhp-aggregation-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.sql.shuffle.partitions=3840
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+
+ --sourcePath${mdstoreInputPath}
+ --targetPath${mdstoreOutputPath}
+ --isLookupUrl${isLookupUrl}
+ -tr${isLookupUrl}
+ --masteryarn-cluster
+
+
+
+
+
+
+
+
+ yarn-cluster
+ cluster
+ ExportDataset
+ eu.dnetlib.dhp.actionmanager.datacite.ExportActionSetJobNode
+ dhp-aggregation-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.sql.shuffle.partitions=3840
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+
+ --sourcePath${mdstoreOutputPath}
+ --targetPath${mdstoreOutputPath}_raw_AS
+ --masteryarn-cluster
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala
index 683986de24..170dc0dc80 100644
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala
@@ -111,12 +111,12 @@ object DoiBoostMappingUtil {
result.getInstance().asScala.foreach(i => i.setInstancetype(instanceType.get.getInstancetype))
}
result.getInstance().asScala.foreach(i => {
- i.setHostedby(getUbknownHostedBy())
+ i.setHostedby(getUnknownHostedBy())
})
result
}
- def getUbknownHostedBy():KeyValue = {
+ def getUnknownHostedBy():KeyValue = {
val hb = new KeyValue
hb.setValue("Unknown Repository")
hb.setKey(s"10|$OPENAIRE_PREFIX::55045bd2a65019fd8e6741a755395c8c")
diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala
index 8043236e01..3ec3913133 100644
--- a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala
+++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala
@@ -224,7 +224,7 @@ object DLIToOAF {
if (cleanedPids.isEmpty)
return null
result.setId(generateId(inputPublication.getId))
- result.setDataInfo(generateDataInfo(invisibile = true))
+ result.setDataInfo(generateDataInfo(invisible = true))
if (inputPublication.getCollectedfrom == null || inputPublication.getCollectedfrom.size() == 0 || (inputPublication.getCollectedfrom.size() == 1 && inputPublication.getCollectedfrom.get(0) == null))
return null
result.setCollectedfrom(inputPublication.getCollectedfrom.asScala.map(c => collectedFromMap.getOrElse(c.getKey, null)).filter(p => p != null).asJava)