Merge branch 'beta' into affiliationPropagation
commit
1790fa2d44
@ -0,0 +1,21 @@
|
||||
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<project xmlns="http://maven.apache.org/DECORATION/1.8.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/DECORATION/1.8.0 https://maven.apache.org/xsd/decoration-1.8.0.xsd"
|
||||
name="DHP-Aggregation">
|
||||
<skin>
|
||||
<groupId>org.apache.maven.skins</groupId>
|
||||
<artifactId>maven-fluido-skin</artifactId>
|
||||
<version>1.8</version>
|
||||
</skin>
|
||||
<poweredBy>
|
||||
<logo name="OpenAIRE Research Graph" href="https://graph.openaire.eu/"
|
||||
img="https://graph.openaire.eu/assets/common-assets/logo-large-graph.png"/>
|
||||
</poweredBy>
|
||||
<body>
|
||||
<links>
|
||||
<item name="Code" href="https://code-repo.d4science.org/" />
|
||||
</links>
|
||||
<menu ref="modules" />
|
||||
<menu ref="reports"/>
|
||||
</body>
|
||||
</project>
|
@ -0,0 +1,22 @@
|
||||
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<project xmlns="http://maven.apache.org/DECORATION/1.8.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/DECORATION/1.8.0 https://maven.apache.org/xsd/decoration-1.8.0.xsd"
|
||||
name="DHP-Aggregation">
|
||||
<skin>
|
||||
<groupId>org.apache.maven.skins</groupId>
|
||||
<artifactId>maven-fluido-skin</artifactId>
|
||||
<version>1.8</version>
|
||||
</skin>
|
||||
<poweredBy>
|
||||
<logo name="OpenAIRE Research Graph" href="https://graph.openaire.eu/"
|
||||
img="https://graph.openaire.eu/assets/common-assets/logo-large-graph.png"/>
|
||||
</poweredBy>
|
||||
<body>
|
||||
<links>
|
||||
<item name="Code" href="https://code-repo.d4science.org/" />
|
||||
</links>
|
||||
|
||||
<menu ref="modules" />
|
||||
<menu ref="reports"/>
|
||||
</body>
|
||||
</project>
|
@ -0,0 +1,72 @@
|
||||
package eu.dnetlib.dhp.application
|
||||
|
||||
import scala.io.Source
|
||||
|
||||
/**
|
||||
* This is the main Interface SparkApplication
|
||||
* where all the Spark Scala class should inherit
|
||||
*
|
||||
*/
|
||||
trait SparkScalaApplication {
|
||||
/**
|
||||
* This is the path in the classpath of the json
|
||||
* describes all the argument needed to run
|
||||
*/
|
||||
val propertyPath: String
|
||||
|
||||
/**
|
||||
* Utility to parse the arguments using the
|
||||
* property json in the classpath identified from
|
||||
* the variable propertyPath
|
||||
*
|
||||
* @param args the list of arguments
|
||||
*/
|
||||
def parseArguments(args: Array[String]): ArgumentApplicationParser = {
|
||||
val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream(propertyPath)).mkString)
|
||||
parser.parseArgument(args)
|
||||
parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Here all the spark applications runs this method
|
||||
* where the whole logic of the spark node is defined
|
||||
*/
|
||||
def run(): Unit
|
||||
}
|
||||
|
||||
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.sql.SparkSession
|
||||
import org.slf4j.Logger
|
||||
|
||||
abstract class AbstractScalaApplication (val propertyPath:String, val args:Array[String], log:Logger) extends SparkScalaApplication {
|
||||
|
||||
var parser: ArgumentApplicationParser = null
|
||||
|
||||
var spark:SparkSession = null
|
||||
|
||||
|
||||
def initialize():SparkScalaApplication = {
|
||||
parser = parseArguments(args)
|
||||
spark = createSparkSession()
|
||||
this
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility for creating a spark session starting from parser
|
||||
*
|
||||
* @return a spark Session
|
||||
*/
|
||||
private def createSparkSession():SparkSession = {
|
||||
require(parser!= null)
|
||||
|
||||
val conf:SparkConf = new SparkConf()
|
||||
val master = parser.get("master")
|
||||
log.info(s"Creating Spark session: Master: $master")
|
||||
SparkSession.builder().config(conf)
|
||||
.appName(getClass.getSimpleName)
|
||||
.master(master)
|
||||
.getOrCreate()
|
||||
}
|
||||
|
||||
}
|
@ -1,69 +0,0 @@
|
||||
package eu.dnetlib.dhp.actionmanager.scholix
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result}
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.sql._
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
import scala.io.Source
|
||||
|
||||
object SparkCreateActionset {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val log: Logger = LoggerFactory.getLogger(getClass)
|
||||
val conf: SparkConf = new SparkConf()
|
||||
val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/actionset/generate_actionset.json")).mkString)
|
||||
parser.parseArgument(args)
|
||||
|
||||
|
||||
val spark: SparkSession =
|
||||
SparkSession
|
||||
.builder()
|
||||
.config(conf)
|
||||
.appName(getClass.getSimpleName)
|
||||
.master(parser.get("master")).getOrCreate()
|
||||
|
||||
|
||||
val sourcePath = parser.get("sourcePath")
|
||||
log.info(s"sourcePath -> $sourcePath")
|
||||
|
||||
val targetPath = parser.get("targetPath")
|
||||
log.info(s"targetPath -> $targetPath")
|
||||
|
||||
val workingDirFolder = parser.get("workingDirFolder")
|
||||
log.info(s"workingDirFolder -> $workingDirFolder")
|
||||
|
||||
implicit val oafEncoders: Encoder[Oaf] = Encoders.kryo[Oaf]
|
||||
implicit val resultEncoders: Encoder[Result] = Encoders.kryo[Result]
|
||||
implicit val relationEncoders: Encoder[Relation] = Encoders.kryo[Relation]
|
||||
|
||||
import spark.implicits._
|
||||
|
||||
val relation = spark.read.load(s"$sourcePath/relation").as[Relation]
|
||||
|
||||
relation.filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge"))
|
||||
.flatMap(r => List(r.getSource, r.getTarget)).distinct().write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/id_relation")
|
||||
|
||||
|
||||
val idRelation = spark.read.load(s"$workingDirFolder/id_relation").as[String]
|
||||
|
||||
log.info("extract source and target Identifier involved in relations")
|
||||
|
||||
|
||||
log.info("save relation filtered")
|
||||
|
||||
relation.filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge"))
|
||||
.write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/actionSetOaf")
|
||||
|
||||
log.info("saving entities")
|
||||
|
||||
val entities: Dataset[(String, Result)] = spark.read.load(s"$sourcePath/entities/*").as[Result].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING, resultEncoders))
|
||||
|
||||
entities
|
||||
.joinWith(idRelation, entities("_1").equalTo(idRelation("value")))
|
||||
.map(p => p._1._2)
|
||||
.write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf")
|
||||
}
|
||||
|
||||
}
|
@ -1,86 +0,0 @@
|
||||
package eu.dnetlib.dhp.actionmanager.scholix
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction
|
||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Dataset => OafDataset,Publication, Software, OtherResearchProduct, Relation}
|
||||
import org.apache.hadoop.io.Text
|
||||
import org.apache.hadoop.io.compress.GzipCodec
|
||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
import scala.io.Source
|
||||
|
||||
object SparkSaveActionSet {
|
||||
|
||||
|
||||
def toActionSet(item: Oaf): (String, String) = {
|
||||
val mapper = new ObjectMapper()
|
||||
|
||||
item match {
|
||||
case dataset: OafDataset =>
|
||||
val a: AtomicAction[OafDataset] = new AtomicAction[OafDataset]
|
||||
a.setClazz(classOf[OafDataset])
|
||||
a.setPayload(dataset)
|
||||
(dataset.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
||||
case publication: Publication =>
|
||||
val a: AtomicAction[Publication] = new AtomicAction[Publication]
|
||||
a.setClazz(classOf[Publication])
|
||||
a.setPayload(publication)
|
||||
(publication.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
||||
case software: Software =>
|
||||
val a: AtomicAction[Software] = new AtomicAction[Software]
|
||||
a.setClazz(classOf[Software])
|
||||
a.setPayload(software)
|
||||
(software.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
||||
case orp: OtherResearchProduct =>
|
||||
val a: AtomicAction[OtherResearchProduct] = new AtomicAction[OtherResearchProduct]
|
||||
a.setClazz(classOf[OtherResearchProduct])
|
||||
a.setPayload(orp)
|
||||
(orp.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
||||
|
||||
case relation: Relation =>
|
||||
val a: AtomicAction[Relation] = new AtomicAction[Relation]
|
||||
a.setClazz(classOf[Relation])
|
||||
a.setPayload(relation)
|
||||
(relation.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
||||
case _ =>
|
||||
null
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val log: Logger = LoggerFactory.getLogger(getClass)
|
||||
val conf: SparkConf = new SparkConf()
|
||||
val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/actionset/save_actionset.json")).mkString)
|
||||
parser.parseArgument(args)
|
||||
|
||||
|
||||
val spark: SparkSession =
|
||||
SparkSession
|
||||
.builder()
|
||||
.config(conf)
|
||||
.appName(getClass.getSimpleName)
|
||||
.master(parser.get("master")).getOrCreate()
|
||||
|
||||
|
||||
val sourcePath = parser.get("sourcePath")
|
||||
log.info(s"sourcePath -> $sourcePath")
|
||||
|
||||
val targetPath = parser.get("targetPath")
|
||||
log.info(s"targetPath -> $targetPath")
|
||||
|
||||
implicit val oafEncoders: Encoder[Oaf] = Encoders.kryo[Oaf]
|
||||
implicit val tEncoder: Encoder[(String, String)] = Encoders.tuple(Encoders.STRING, Encoders.STRING)
|
||||
|
||||
spark.read.load(sourcePath).as[Oaf]
|
||||
.map(o => toActionSet(o))
|
||||
.filter(o => o != null)
|
||||
.rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text, Text]], classOf[GzipCodec])
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,134 @@
|
||||
package eu.dnetlib.dhp.datacite
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
|
||||
import eu.dnetlib.dhp.schema.oaf.{DataInfo, KeyValue}
|
||||
|
||||
import java.io.InputStream
|
||||
import java.time.format.DateTimeFormatter
|
||||
import java.util.Locale
|
||||
import java.util.regex.Pattern
|
||||
import scala.io.Source
|
||||
|
||||
/**
|
||||
* This class represent the dataModel of the input Dataset of Datacite
|
||||
* @param doi THE DOI
|
||||
* @param timestamp timestamp of last update date
|
||||
* @param isActive the record is active or deleted
|
||||
* @param json the json native records
|
||||
*/
|
||||
case class DataciteType(doi: String, timestamp: Long, isActive: Boolean, json: String) {}
|
||||
|
||||
/*
|
||||
The following class are utility class used for the mapping from
|
||||
json datacite to OAF Shema
|
||||
*/
|
||||
case class RelatedIdentifierType(relationType: String, relatedIdentifier: String, relatedIdentifierType: String) {}
|
||||
|
||||
case class NameIdentifiersType(nameIdentifierScheme: Option[String], schemeUri: Option[String], nameIdentifier: Option[String]) {}
|
||||
|
||||
case class CreatorType(nameType: Option[String], nameIdentifiers: Option[List[NameIdentifiersType]], name: Option[String], familyName: Option[String], givenName: Option[String], affiliation: Option[List[String]]) {}
|
||||
|
||||
case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {}
|
||||
|
||||
case class SubjectType(subject: Option[String], subjectScheme: Option[String]) {}
|
||||
|
||||
case class DescriptionType(descriptionType: Option[String], description: Option[String]) {}
|
||||
|
||||
case class FundingReferenceType(funderIdentifierType: Option[String], awardTitle: Option[String], awardUri: Option[String], funderName: Option[String], funderIdentifier: Option[String], awardNumber: Option[String]) {}
|
||||
|
||||
case class DateType(date: Option[String], dateType: Option[String]) {}
|
||||
|
||||
case class OAFRelations(relation:String, inverse:String, relType:String)
|
||||
|
||||
|
||||
class DataciteModelConstants extends Serializable {
|
||||
|
||||
}
|
||||
|
||||
object DataciteModelConstants {
|
||||
|
||||
val REL_TYPE_VALUE:String = "resultResult"
|
||||
val DATE_RELATION_KEY = "RelationDate"
|
||||
val DATACITE_FILTER_PATH = "/eu/dnetlib/dhp/datacite/datacite_filter"
|
||||
val DOI_CLASS = "doi"
|
||||
val SUBJ_CLASS = "keywords"
|
||||
val DATACITE_NAME = "Datacite"
|
||||
val dataInfo: DataInfo = dataciteDataInfo("0.9")
|
||||
val DATACITE_COLLECTED_FROM: KeyValue = OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, DATACITE_NAME)
|
||||
|
||||
val subRelTypeMapping: Map[String,OAFRelations] = Map(
|
||||
ModelConstants.REFERENCES -> OAFRelations(ModelConstants.REFERENCES, ModelConstants.IS_REFERENCED_BY, ModelConstants.RELATIONSHIP),
|
||||
ModelConstants.IS_REFERENCED_BY -> OAFRelations(ModelConstants.IS_REFERENCED_BY,ModelConstants.REFERENCES, ModelConstants.RELATIONSHIP),
|
||||
|
||||
ModelConstants.IS_SUPPLEMENTED_BY -> OAFRelations(ModelConstants.IS_SUPPLEMENTED_BY,ModelConstants.IS_SUPPLEMENT_TO,ModelConstants.SUPPLEMENT),
|
||||
ModelConstants.IS_SUPPLEMENT_TO -> OAFRelations(ModelConstants.IS_SUPPLEMENT_TO,ModelConstants.IS_SUPPLEMENTED_BY,ModelConstants.SUPPLEMENT),
|
||||
|
||||
ModelConstants.HAS_PART -> OAFRelations(ModelConstants.HAS_PART,ModelConstants.IS_PART_OF, ModelConstants.PART),
|
||||
ModelConstants.IS_PART_OF -> OAFRelations(ModelConstants.IS_PART_OF,ModelConstants.HAS_PART, ModelConstants.PART),
|
||||
|
||||
ModelConstants.IS_VERSION_OF-> OAFRelations(ModelConstants.IS_VERSION_OF,ModelConstants.HAS_VERSION,ModelConstants.VERSION),
|
||||
ModelConstants.HAS_VERSION-> OAFRelations(ModelConstants.HAS_VERSION,ModelConstants.IS_VERSION_OF,ModelConstants.VERSION),
|
||||
|
||||
ModelConstants.IS_IDENTICAL_TO -> OAFRelations(ModelConstants.IS_IDENTICAL_TO,ModelConstants.IS_IDENTICAL_TO, ModelConstants.RELATIONSHIP),
|
||||
|
||||
ModelConstants.IS_CONTINUED_BY -> OAFRelations(ModelConstants.IS_CONTINUED_BY,ModelConstants.CONTINUES, ModelConstants.RELATIONSHIP),
|
||||
ModelConstants.CONTINUES -> OAFRelations(ModelConstants.CONTINUES,ModelConstants.IS_CONTINUED_BY, ModelConstants.RELATIONSHIP),
|
||||
|
||||
ModelConstants.IS_NEW_VERSION_OF-> OAFRelations(ModelConstants.IS_NEW_VERSION_OF,ModelConstants.IS_PREVIOUS_VERSION_OF, ModelConstants.VERSION),
|
||||
ModelConstants.IS_PREVIOUS_VERSION_OF ->OAFRelations(ModelConstants.IS_PREVIOUS_VERSION_OF,ModelConstants.IS_NEW_VERSION_OF, ModelConstants.VERSION),
|
||||
|
||||
ModelConstants.IS_DOCUMENTED_BY -> OAFRelations(ModelConstants.IS_DOCUMENTED_BY,ModelConstants.DOCUMENTS, ModelConstants.RELATIONSHIP),
|
||||
ModelConstants.DOCUMENTS -> OAFRelations(ModelConstants.DOCUMENTS,ModelConstants.IS_DOCUMENTED_BY, ModelConstants.RELATIONSHIP),
|
||||
|
||||
ModelConstants.IS_SOURCE_OF -> OAFRelations(ModelConstants.IS_SOURCE_OF,ModelConstants.IS_DERIVED_FROM, ModelConstants.VERSION),
|
||||
ModelConstants.IS_DERIVED_FROM -> OAFRelations(ModelConstants.IS_DERIVED_FROM,ModelConstants.IS_SOURCE_OF, ModelConstants.VERSION),
|
||||
|
||||
ModelConstants.CITES -> OAFRelations(ModelConstants.CITES,ModelConstants.IS_CITED_BY, ModelConstants.CITATION),
|
||||
ModelConstants.IS_CITED_BY -> OAFRelations(ModelConstants.IS_CITED_BY,ModelConstants.CITES, ModelConstants.CITATION),
|
||||
|
||||
ModelConstants.IS_VARIANT_FORM_OF -> OAFRelations(ModelConstants.IS_VARIANT_FORM_OF,ModelConstants.IS_DERIVED_FROM, ModelConstants.VERSION),
|
||||
ModelConstants.IS_OBSOLETED_BY -> OAFRelations(ModelConstants.IS_OBSOLETED_BY,ModelConstants.IS_NEW_VERSION_OF, ModelConstants.VERSION),
|
||||
|
||||
ModelConstants.REVIEWS -> OAFRelations(ModelConstants.REVIEWS,ModelConstants.IS_REVIEWED_BY, ModelConstants.REVIEW),
|
||||
ModelConstants.IS_REVIEWED_BY -> OAFRelations(ModelConstants.IS_REVIEWED_BY,ModelConstants.REVIEWS, ModelConstants.REVIEW),
|
||||
|
||||
ModelConstants.DOCUMENTS -> OAFRelations(ModelConstants.DOCUMENTS,ModelConstants.IS_DOCUMENTED_BY, ModelConstants.RELATIONSHIP),
|
||||
ModelConstants.IS_DOCUMENTED_BY -> OAFRelations(ModelConstants.IS_DOCUMENTED_BY,ModelConstants.DOCUMENTS, ModelConstants.RELATIONSHIP),
|
||||
|
||||
ModelConstants.COMPILES -> OAFRelations(ModelConstants.COMPILES,ModelConstants.IS_COMPILED_BY, ModelConstants.RELATIONSHIP),
|
||||
ModelConstants.IS_COMPILED_BY -> OAFRelations(ModelConstants.IS_COMPILED_BY,ModelConstants.COMPILES, ModelConstants.RELATIONSHIP)
|
||||
)
|
||||
|
||||
|
||||
val datacite_filter: List[String] = {
|
||||
val stream: InputStream = getClass.getResourceAsStream(DATACITE_FILTER_PATH)
|
||||
require(stream!= null)
|
||||
Source.fromInputStream(stream).getLines().toList
|
||||
}
|
||||
|
||||
|
||||
def dataciteDataInfo(trust: String): DataInfo = OafMapperUtils.dataInfo(false,null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, trust)
|
||||
|
||||
val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern("[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", Locale.ENGLISH)
|
||||
val df_it: DateTimeFormatter = DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN)
|
||||
|
||||
val funder_regex: List[(Pattern, String)] = List(
|
||||
(Pattern.compile("(info:eu-repo/grantagreement/ec/h2020/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda__h2020::"),
|
||||
(Pattern.compile("(info:eu-repo/grantagreement/ec/fp7/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda_______::")
|
||||
|
||||
)
|
||||
|
||||
val Date_regex: List[Pattern] = List(
|
||||
//Y-M-D
|
||||
Pattern.compile("(18|19|20)\\d\\d([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])", Pattern.MULTILINE),
|
||||
//M-D-Y
|
||||
Pattern.compile("((0[1-9]|1[012])|([1-9]))([- /.])(0[1-9]|[12][0-9]|3[01])([- /.])(18|19|20)?\\d\\d", Pattern.MULTILINE),
|
||||
//D-M-Y
|
||||
Pattern.compile("(?:(?:31(/|-|\\.)(?:0?[13578]|1[02]|(?:Jan|Mar|May|Jul|Aug|Oct|Dec)))\\1|(?:(?:29|30)(/|-|\\.)(?:0?[1,3-9]|1[0-2]|(?:Jan|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))\\2))(?:(?:1[6-9]|[2-9]\\d)?\\d{2})|(?:29(/|-|\\.)(?:0?2|(?:Feb))\\3(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))|(?:0?[1-9]|1\\d|2[0-8])(/|-|\\.)(?:(?:0?[1-9]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep))|(?:1[0-2]|(?:Oct|Nov|Dec)))\\4(?:(?:1[6-9]|[2-9]\\d)?\\d{2})", Pattern.MULTILINE),
|
||||
//Y
|
||||
Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE)
|
||||
)
|
||||
|
||||
|
||||
}
|
@ -1,64 +1,94 @@
|
||||
package eu.dnetlib.dhp.datacite
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
||||
import eu.dnetlib.dhp.collection.CollectionUtils.fixRelations
|
||||
import eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH
|
||||
import eu.dnetlib.dhp.common.Constants.MDSTORE_SIZE_PATH
|
||||
import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH}
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||
import eu.dnetlib.dhp.schema.mdstore.{MDStoreVersion, MetadataRecord}
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf
|
||||
import eu.dnetlib.dhp.utils.DHPUtils.writeHdfsFile
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
import scala.io.Source
|
||||
|
||||
object GenerateDataciteDatasetSpark {
|
||||
|
||||
val log: Logger = LoggerFactory.getLogger(GenerateDataciteDatasetSpark.getClass)
|
||||
class GenerateDataciteDatasetSpark (propertyPath:String, args:Array[String], log:Logger) extends AbstractScalaApplication(propertyPath, args, log:Logger) {
|
||||
/**
|
||||
* Here all the spark applications runs this method
|
||||
* where the whole logic of the spark node is defined
|
||||
*/
|
||||
override def run(): Unit = {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
val conf = new SparkConf
|
||||
val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/datacite/generate_dataset_params.json")).mkString)
|
||||
parser.parseArgument(args)
|
||||
val master = parser.get("master")
|
||||
val sourcePath = parser.get("sourcePath")
|
||||
log.info(s"SourcePath is '$sourcePath'")
|
||||
val exportLinks = "true".equalsIgnoreCase(parser.get("exportLinks"))
|
||||
log.info(s"exportLinks is '$exportLinks'")
|
||||
val isLookupUrl: String = parser.get("isLookupUrl")
|
||||
log.info("isLookupUrl: {}", isLookupUrl)
|
||||
|
||||
val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl)
|
||||
val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService)
|
||||
val spark: SparkSession = SparkSession.builder().config(conf)
|
||||
.appName(GenerateDataciteDatasetSpark.getClass.getSimpleName)
|
||||
.master(master)
|
||||
.getOrCreate()
|
||||
|
||||
import spark.implicits._
|
||||
|
||||
implicit val mrEncoder: Encoder[MetadataRecord] = Encoders.kryo[MetadataRecord]
|
||||
|
||||
implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
|
||||
require(vocabularies != null)
|
||||
|
||||
val mdstoreOutputVersion = parser.get("mdstoreOutputVersion")
|
||||
log.info(s"mdstoreOutputVersion is '$mdstoreOutputVersion'")
|
||||
|
||||
val mapper = new ObjectMapper()
|
||||
val cleanedMdStoreVersion = mapper.readValue(mdstoreOutputVersion, classOf[MDStoreVersion])
|
||||
val outputBasePath = cleanedMdStoreVersion.getHdfsPath
|
||||
|
||||
log.info("outputBasePath: {}", outputBasePath)
|
||||
log.info(s"outputBasePath is '$outputBasePath'")
|
||||
val targetPath = s"$outputBasePath/$MDSTORE_DATA_PATH"
|
||||
log.info(s"targetPath is '$targetPath'")
|
||||
|
||||
generateDataciteDataset(sourcePath, exportLinks, vocabularies, targetPath, spark)
|
||||
|
||||
reportTotalSize(targetPath, outputBasePath)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* For working with MDStore we need to store in a file on hdfs the size of
|
||||
* the current dataset
|
||||
* @param targetPath
|
||||
* @param outputBasePath
|
||||
*/
|
||||
def reportTotalSize( targetPath: String, outputBasePath: String ):Unit = {
|
||||
val total_items = spark.read.load(targetPath).count()
|
||||
writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$total_items", outputBasePath + MDSTORE_SIZE_PATH)
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the transformed and cleaned OAF Dataset from the native one
|
||||
|
||||
* @param sourcePath sourcePath of the native Dataset in format JSON/Datacite
|
||||
* @param exportLinks If true it generates unresolved links
|
||||
* @param vocabularies vocabularies for cleaning
|
||||
* @param targetPath the targetPath of the result Dataset
|
||||
*/
|
||||
def generateDataciteDataset(sourcePath: String, exportLinks: Boolean, vocabularies: VocabularyGroup, targetPath: String, spark:SparkSession):Unit = {
|
||||
require(spark!= null)
|
||||
import spark.implicits._
|
||||
|
||||
implicit val mrEncoder: Encoder[MetadataRecord] = Encoders.kryo[MetadataRecord]
|
||||
|
||||
implicit val resEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
|
||||
spark.read.load(sourcePath).as[DataciteType]
|
||||
.filter(d => d.isActive)
|
||||
.flatMap(d => DataciteToOAFTransformation.generateOAF(d.json, d.timestamp, d.timestamp, vocabularies, exportLinks))
|
||||
.filter(d => d != null)
|
||||
.flatMap(i => fixRelations(i)).filter(i => i != null)
|
||||
.write.mode(SaveMode.Overwrite).save(targetPath)
|
||||
}
|
||||
|
||||
val total_items = spark.read.load(targetPath).as[Oaf].count()
|
||||
writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$total_items", outputBasePath + MDSTORE_SIZE_PATH)
|
||||
}
|
||||
|
||||
|
||||
object GenerateDataciteDatasetSpark {
|
||||
|
||||
val log: Logger = LoggerFactory.getLogger(GenerateDataciteDatasetSpark.getClass)
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
new GenerateDataciteDatasetSpark("/eu/dnetlib/dhp/datacite/generate_dataset_params.json", args, log).initialize().run()
|
||||
}
|
||||
}
|
@ -1,9 +1,8 @@
|
||||
package eu.dnetlib.dhp.sx.bio.ebi
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal}
|
||||
import eu.dnetlib.dhp.sx.bio.BioDBToOAF.EBILinkItem
|
||||
import eu.dnetlib.dhp.sx.bio.pubmed.PMJournal
|
||||
import eu.dnetlib.dhp.sx.bio.pubmed.{PMArticle, PMAuthor, PMJournal}
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.http.client.config.RequestConfig
|
||||
import org.apache.http.client.methods.HttpGet
|
@ -1,9 +1,20 @@
|
||||
##DHP-Aggregation
|
||||
|
||||
This module defines a set of oozie workflows for the **collection** and **transformation** of metadata records.
|
||||
This module defines a set of oozie workflows for
|
||||
|
||||
Both workflows interact with the Metadata Store Manager (MdSM) to handle the logical transactions required to ensure
|
||||
1. the **collection** and **transformation** of metadata records.
|
||||
2. the **integration** of new external information in the result
|
||||
|
||||
|
||||
### Collection and Transformation
|
||||
|
||||
The workflows interact with the Metadata Store Manager (MdSM) to handle the logical transactions required to ensure
|
||||
the consistency of the read/write operations on the data as the MdSM in fact keeps track of the logical-physical mapping
|
||||
of each MDStore.
|
||||
|
||||
It defines [mappings](mappings.md) for transformation of different datasource (See mapping section).
|
||||
It defines [mappings](mappings.md) for transformation of different datasource (See mapping section).
|
||||
|
||||
### Integration of external information in the result
|
||||
|
||||
The workflows create new entity in the OpenAIRE format (OAF) which aim is to enrich the result already contained in the graph.
|
||||
See integration section for more insight
|
||||
|
@ -0,0 +1,36 @@
|
||||
DHP Aggregation - Integration method
|
||||
=====================================
|
||||
|
||||
The integration method can be applied every time new information, which is not aggregated from the repositories
|
||||
nor computed directly by OpenAIRE, should be added to the results of the graph.
|
||||
|
||||
The information integrated so far is:
|
||||
|
||||
1. Article impact measures
|
||||
1. [Bip!Finder](https://dl.acm.org/doi/10.1145/3357384.3357850) scores
|
||||
2. Result Subjects
|
||||
1. Integration of Fields od Science and Techonology ([FOS](https://www.qnrf.org/en-us/FOS)) classification in
|
||||
results subjects.
|
||||
|
||||
|
||||
The method always consists in the creation of a new entity in the OpenAIRE format (OAF entity) containing only the id
|
||||
and the element in the OAF model that should be used to map the information we want to integrate.
|
||||
|
||||
The id is set by using a particular encoding of the given PID
|
||||
|
||||
*unresolved:[pid]:[pidtype]*
|
||||
|
||||
where
|
||||
|
||||
1. *unresolved* is a constant value
|
||||
2. *pid* is the persistent id value, e.g. 10.5281/zenodo.4707307
|
||||
3. *pidtype* is the persistent id type, e.g. doi
|
||||
|
||||
Such entities are matched against those available in the graph using the result.instance.pid values.
|
||||
|
||||
This mechanism can be used to integrate enrichments produced as associated by a given PID.
|
||||
If a match will be found with one of the results already in the graph that said result will be enriched with the information
|
||||
present in the new OAF.
|
||||
All the objects for which a match is not found are discarded.
|
||||
|
||||
|
@ -1,56 +0,0 @@
|
||||
package eu.dnetlib.dhp.datacite
|
||||
|
||||
|
||||
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}
|
||||
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf
|
||||
import org.junit.jupiter.api.extension.ExtendWith
|
||||
import org.junit.jupiter.api.{BeforeEach, Test}
|
||||
import org.mockito.junit.jupiter.MockitoExtension
|
||||
|
||||
import java.text.SimpleDateFormat
|
||||
import java.util.Locale
|
||||
import scala.io.Source
|
||||
|
||||
@ExtendWith(Array(classOf[MockitoExtension]))
|
||||
class DataciteToOAFTest extends AbstractVocabularyTest{
|
||||
|
||||
|
||||
@BeforeEach
|
||||
def setUp() :Unit = {
|
||||
|
||||
super.setUpVocabulary()
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def testDateMapping:Unit = {
|
||||
val inputDate = "2021-07-14T11:52:54+0000"
|
||||
val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US)
|
||||
val dt = ISO8601FORMAT.parse(inputDate)
|
||||
println(dt.getTime)
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def testMapping() :Unit = {
|
||||
val record =Source.fromInputStream(getClass.getResourceAsStream("record.json")).mkString
|
||||
|
||||
|
||||
|
||||
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
||||
val res:List[Oaf] =DataciteToOAFTransformation.generateOAF(record, 0L,0L, vocabularies, true )
|
||||
|
||||
res.foreach(r => {
|
||||
println (mapper.writeValueAsString(r))
|
||||
println("----------------------------")
|
||||
|
||||
})
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
Binary file not shown.
@ -0,0 +1,114 @@
|
||||
package eu.dnetlib.dhp.datacite
|
||||
|
||||
|
||||
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}
|
||||
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf
|
||||
import org.apache.commons.io.FileUtils
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.sql.functions.{col, count}
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
|
||||
import org.junit.jupiter.api.Assertions._
|
||||
import org.junit.jupiter.api.extension.ExtendWith
|
||||
import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
|
||||
import org.mockito.junit.jupiter.MockitoExtension
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
import java.nio.file.{Files, Path}
|
||||
import java.text.SimpleDateFormat
|
||||
import java.util.Locale
|
||||
import scala.io.Source
|
||||
|
||||
@ExtendWith(Array(classOf[MockitoExtension]))
|
||||
class DataciteToOAFTest extends AbstractVocabularyTest{
|
||||
|
||||
private var workingDir:Path = null
|
||||
val log: Logger = LoggerFactory.getLogger(getClass)
|
||||
|
||||
@BeforeEach
|
||||
def setUp() :Unit = {
|
||||
|
||||
workingDir= Files.createTempDirectory(getClass.getSimpleName)
|
||||
super.setUpVocabulary()
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
def tearDown() :Unit = {
|
||||
FileUtils.deleteDirectory(workingDir.toFile)
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def testDateMapping:Unit = {
|
||||
val inputDate = "2021-07-14T11:52:54+0000"
|
||||
val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US)
|
||||
val dt = ISO8601FORMAT.parse(inputDate)
|
||||
println(dt.getTime)
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def testConvert(): Unit = {
|
||||
|
||||
|
||||
val path = getClass.getResource("/eu/dnetlib/dhp/actionmanager/datacite/dataset").getPath
|
||||
|
||||
val conf = new SparkConf()
|
||||
val spark:SparkSession = SparkSession.builder().config(conf)
|
||||
.appName(getClass.getSimpleName)
|
||||
.master("local[*]")
|
||||
.getOrCreate()
|
||||
|
||||
|
||||
|
||||
implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf]
|
||||
val instance = new GenerateDataciteDatasetSpark(null, null, log)
|
||||
val targetPath = s"$workingDir/result"
|
||||
|
||||
instance.generateDataciteDataset(path, exportLinks = true, vocabularies,targetPath, spark)
|
||||
|
||||
import spark.implicits._
|
||||
|
||||
val nativeSize =spark.read.load(path).count()
|
||||
|
||||
|
||||
assertEquals(100, nativeSize)
|
||||
|
||||
val result:Dataset[Oaf] = spark.read.load(targetPath).as[Oaf]
|
||||
|
||||
|
||||
result.map(s => s.getClass.getSimpleName).groupBy(col("value").alias("class")).agg(count("value").alias("Total")).show(false)
|
||||
|
||||
val t = spark.read.load(targetPath).count()
|
||||
|
||||
assertTrue(t >0)
|
||||
|
||||
|
||||
spark.stop()
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def testMapping() :Unit = {
|
||||
val record =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/record.json")).mkString
|
||||
|
||||
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
||||
val res:List[Oaf] =DataciteToOAFTransformation.generateOAF(record, 0L,0L, vocabularies, true )
|
||||
|
||||
res.foreach(r => {
|
||||
println (mapper.writeValueAsString(r))
|
||||
println("----------------------------")
|
||||
|
||||
})
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
|
||||
package eu.dnetlib.dhp.blacklist;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.common.RelationInverse;
|
||||
|
||||
public class BlacklistRelationTest {
|
||||
|
||||
@Test
|
||||
public void testRelationInverseLookup() {
|
||||
|
||||
final List<String> rels = Arrays
|
||||
.asList(
|
||||
"resultResult_relationship_IsRelatedTo",
|
||||
"resultOrganization_affiliation_isAuthorInstitutionOf",
|
||||
"resultOrganization_affiliation_hasAuthorInstitution",
|
||||
"datasourceOrganization_provision_isProvidedBy",
|
||||
"projectOrganization_participation_hasParticipant",
|
||||
"resultProject_outcome_produces",
|
||||
"resultProject_outcome_isProducedBy");
|
||||
|
||||
rels.forEach(r -> {
|
||||
RelationInverse inverse = ModelSupport.relationInverseMap.get(r);
|
||||
Assertions.assertNotNull(inverse);
|
||||
Assertions.assertNotNull(inverse.getRelType());
|
||||
Assertions.assertNotNull(inverse.getSubReltype());
|
||||
Assertions.assertNotNull(inverse.getRelClass());
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -1,21 +1,19 @@
|
||||
package eu.dnetlib.doiboost
|
||||
|
||||
import java.time.LocalDate
|
||||
import java.time.format.DateTimeFormatter
|
||||
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction
|
||||
import eu.dnetlib.dhp.schema.oaf.{AccessRight, DataInfo, Dataset, Field, Instance, KeyValue, Oaf, OpenAccessRoute, Organization, Publication, Qualifier, Relation, Result, StructuredProperty}
|
||||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import org.apache.commons.lang3.StringUtils
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf._
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil.{getClosedAccessQualifier, getEmbargoedAccessQualifier, getUnknownQualifier}
|
||||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import org.apache.commons.lang3.StringUtils
|
||||
import org.json4s
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
import java.time.LocalDate
|
||||
import java.time.format.DateTimeFormatter
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
|
@ -1,70 +0,0 @@
|
||||
package eu.dnetlib.dhp.doiboost
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.{Publication, Dataset => OafDataset}
|
||||
import eu.dnetlib.doiboost.{DoiBoostMappingUtil, HostedByItemType}
|
||||
import eu.dnetlib.doiboost.SparkGenerateDoiBoost.getClass
|
||||
import eu.dnetlib.doiboost.mag.ConversionUtil
|
||||
import eu.dnetlib.doiboost.orcid.ORCIDElement
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
||||
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
import scala.io.Source
|
||||
|
||||
class DoiBoostHostedByMapTest {
|
||||
|
||||
|
||||
|
||||
// @Test
|
||||
// def testMerge():Unit = {
|
||||
// val conf: SparkConf = new SparkConf()
|
||||
// val spark: SparkSession =
|
||||
// SparkSession
|
||||
// .builder()
|
||||
// .config(conf)
|
||||
// .appName(getClass.getSimpleName)
|
||||
// .master("local[*]").getOrCreate()
|
||||
//
|
||||
//
|
||||
//
|
||||
// implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication]
|
||||
// implicit val mapEncoderDataset: Encoder[OafDataset] = Encoders.kryo[OafDataset]
|
||||
// implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPub)
|
||||
//
|
||||
//
|
||||
// import spark.implicits._
|
||||
// val dataset:RDD[String]= spark.sparkContext.textFile("/home/sandro/Downloads/hbMap.gz")
|
||||
//
|
||||
//
|
||||
// val hbMap:Dataset[(String, HostedByItemType)] =spark.createDataset(dataset.map(DoiBoostMappingUtil.toHostedByItem))
|
||||
//
|
||||
//
|
||||
// hbMap.show()
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
// }
|
||||
|
||||
|
||||
@Test
|
||||
def idDSGeneration():Unit = {
|
||||
val s ="doajarticles::0066-782X"
|
||||
|
||||
|
||||
|
||||
println(DoiBoostMappingUtil.generateDSId(s))
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
package eu.dnetlib.dhp.doiboost
|
||||
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
class DoiBoostHostedByMapTest {
|
||||
|
||||
@Test
|
||||
def idDSGeneration():Unit = {
|
||||
val s ="doajarticles::0066-782X"
|
||||
|
||||
|
||||
|
||||
println(DoiBoostMappingUtil.generateDSId(s))
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1 @@
|
||||
#DHP Enrichment
|
@ -0,0 +1,26 @@
|
||||
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<project xmlns="http://maven.apache.org/DECORATION/1.8.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/DECORATION/1.8.0 https://maven.apache.org/xsd/decoration-1.8.0.xsd"
|
||||
name="DHP-Aggregation">
|
||||
<skin>
|
||||
<groupId>org.apache.maven.skins</groupId>
|
||||
<artifactId>maven-fluido-skin</artifactId>
|
||||
<version>1.8</version>
|
||||
</skin>
|
||||
<poweredBy>
|
||||
<logo name="OpenAIRE Research Graph" href="https://graph.openaire.eu/"
|
||||
img="https://graph.openaire.eu/assets/common-assets/logo-large-graph.png"/>
|
||||
</poweredBy>
|
||||
<body>
|
||||
<links>
|
||||
<item name="Code" href="https://code-repo.d4science.org/" />
|
||||
</links>
|
||||
<menu name="Documentation">
|
||||
<item name="Link1 Collapsable" href="about.html" collapse="true">
|
||||
<item name="item1" href="pubmed.html"/>
|
||||
<item name="item2" href="datacite.html"/>
|
||||
</item>
|
||||
</menu>
|
||||
<menu ref="reports"/>
|
||||
</body>
|
||||
</project>
|
@ -1,5 +1,6 @@
|
||||
[
|
||||
{"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true},
|
||||
{"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the source Path", "paramRequired": true},
|
||||
{"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true}
|
||||
{"paramName":"g", "paramLongName":"graphBasePath", "paramDescription": "the path of the raw graph", "paramRequired": true},
|
||||
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the target path", "paramRequired": true}
|
||||
]
|
@ -1,8 +1,8 @@
|
||||
package eu.dnetlib.dhp.oa.graph.hostedbymap
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn}
|
||||
import org.apache.spark.sql.expressions.Aggregator
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn}
|
||||
|
||||
|
||||
case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue