WIP: Graph footprint optimisation #287
|
@ -0,0 +1,84 @@
|
||||||
|
package eu.dnetlib.dhp.common.vocabulary;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.lenient;
|
||||||
|
|
||||||
|
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
public class VocabularyTest {
|
||||||
|
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
protected ISLookUpService isLookUpService;
|
||||||
|
|
||||||
|
protected VocabularyGroup vocabularies;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void setUpVocabulary() throws ISLookUpException, IOException {
|
||||||
|
|
||||||
|
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
|
||||||
|
|
||||||
|
lenient()
|
||||||
|
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
|
||||||
|
.thenReturn(synonyms());
|
||||||
|
vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> vocs() throws IOException {
|
||||||
|
return IOUtils
|
||||||
|
.readLines(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/terms.txt")));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> synonyms() throws IOException {
|
||||||
|
return IOUtils
|
||||||
|
.readLines(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/synonyms.txt")));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testVocabularyMatch () throws Exception{
|
||||||
|
final String s= IOUtils.toString(this.getClass().getResourceAsStream("terms"));
|
||||||
|
|
||||||
|
for (String s1 : s.split("\n")) {
|
||||||
|
|
||||||
|
final Qualifier t1 = vocabularies.getSynonymAsQualifier("dnet:publication_resource", s1);
|
||||||
|
|
||||||
|
if (t1 == null) {
|
||||||
|
System.err.println(s1+ " Missing");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
System.out.println("syn=" + s1 + " term = " + t1.getClassid());
|
||||||
|
|
||||||
|
|
||||||
|
System.out.println(vocabularies.getSynonymAsQualifier("dnet:result_typologies", t1.getClassid()).getClassname());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
grant
|
||||||
|
book
|
||||||
|
report-series
|
||||||
|
report-component
|
||||||
|
book-series
|
||||||
|
peer-review
|
||||||
|
component
|
||||||
|
report
|
||||||
|
book-track
|
||||||
|
database
|
||||||
|
standard
|
||||||
|
journal-volume
|
||||||
|
proceedings-series
|
||||||
|
preprint
|
||||||
|
book-section
|
||||||
|
letter
|
||||||
|
reference-book
|
||||||
|
edited-book
|
||||||
|
journal-issue
|
||||||
|
dataset
|
||||||
|
reference-entry
|
||||||
|
dissertation
|
||||||
|
book-chapter
|
||||||
|
book-part
|
||||||
|
journal
|
||||||
|
book-set
|
||||||
|
working_paper
|
||||||
|
dissertation
|
||||||
|
other
|
||||||
|
proceedings-article
|
||||||
|
journal-article
|
||||||
|
other
|
||||||
|
proceedings
|
||||||
|
monograph
|
|
@ -0,0 +1,357 @@
|
||||||
|
package eu.dnetlib.dhp.crossref
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType}
|
||||||
|
import eu.dnetlib.dhp.schema.oaf._
|
||||||
|
import org.apache.commons.lang.StringUtils
|
||||||
|
import org.json4s
|
||||||
|
import org.json4s.DefaultFormats
|
||||||
|
import org.json4s.JsonAST.{JField, JObject, JString, JValue}
|
||||||
|
import org.json4s.jackson.JsonMethods.parse
|
||||||
|
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
|
|
||||||
|
case class CrossrefDT(doi: String, json: String, timestamp: Long) {}
|
||||||
|
object CrossrefUtility {
|
||||||
|
val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)"
|
||||||
|
val DOI_PREFIX = "10."
|
||||||
|
val CROSSREF_COLLECTED_FROM = keyValue(ModelConstants.CROSSREF_ID, ModelConstants.CROSSREF_NAME)
|
||||||
|
|
||||||
|
def normalizeDoi(input: String): String = {
|
||||||
|
if (input == null)
|
||||||
|
return null
|
||||||
|
val replaced = input
|
||||||
|
.replaceAll("(?:\\n|\\r|\\t|\\s)", "")
|
||||||
|
.toLowerCase
|
||||||
|
.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX)
|
||||||
|
if (replaced == null || replaced.trim.isEmpty)
|
||||||
|
return null
|
||||||
|
if (replaced.indexOf("10.") < 0)
|
||||||
|
return null
|
||||||
|
val ret = replaced.substring(replaced.indexOf("10."))
|
||||||
|
if (!ret.startsWith(DOI_PREFIX))
|
||||||
|
return null
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def extractDate(dt: String, datePart: List[List[Int]]): String = {
|
||||||
|
if (StringUtils.isNotBlank(dt))
|
||||||
|
return GraphCleaningFunctions.cleanDate(dt)
|
||||||
|
if (datePart != null && datePart.size == 1) {
|
||||||
|
val res = datePart.head
|
||||||
|
if (res.size == 3) {
|
||||||
|
val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
|
||||||
|
if (dp.length == 10) {
|
||||||
|
return GraphCleaningFunctions.cleanDate(dp)
|
||||||
|
}
|
||||||
|
} else if (res.size == 2) {
|
||||||
|
val dp = f"${res.head}-${res(1)}%02d-01"
|
||||||
|
return GraphCleaningFunctions.cleanDate(dp)
|
||||||
|
} else if (res.size == 1) {
|
||||||
|
return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
null
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private def generateDate(
|
||||||
|
dt: String,
|
||||||
|
datePart: List[List[Int]],
|
||||||
|
classId: String,
|
||||||
|
schemeId: String
|
||||||
|
): StructuredProperty = {
|
||||||
|
val dp = extractDate(dt, datePart)
|
||||||
|
if (StringUtils.isNotBlank(dp))
|
||||||
|
structuredProperty(dp, classId, classId,schemeId)
|
||||||
|
else
|
||||||
|
null
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private def generateItemFromType(objectType: String, vocabularies:VocabularyGroup): (Result, String) = {
|
||||||
|
val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType)
|
||||||
|
if (term != null) {
|
||||||
|
val resourceType = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname
|
||||||
|
|
||||||
|
resourceType match {
|
||||||
|
case "publication" =>(new Publication, resourceType)
|
||||||
|
case "dataset" =>(new Dataset, resourceType)
|
||||||
|
case "software" => (new Software, resourceType)
|
||||||
|
case "otherresearchproduct" =>(new OtherResearchProduct, resourceType)
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
null
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def convert(input: String, vocabularies:VocabularyGroup): List[Oaf] = {
|
||||||
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
lazy val json: json4s.JValue = parse(input)
|
||||||
|
|
||||||
|
var resultList: List[Oaf] = List()
|
||||||
|
|
||||||
|
val objectType = (json \ "type").extractOrElse[String](null)
|
||||||
|
if (objectType == null)
|
||||||
|
return resultList
|
||||||
|
|
||||||
|
val resultWithType = generateItemFromType(objectType, vocabularies)
|
||||||
|
if (resultWithType == null)
|
||||||
|
return List()
|
||||||
|
|
||||||
|
val result = resultWithType._1
|
||||||
|
val cOBJCategory = resultWithType._2
|
||||||
|
mappingResult(result, json, cOBJCategory)
|
||||||
|
if (result == null || result.getId == null)
|
||||||
|
return List()
|
||||||
|
|
||||||
|
val funderList: List[mappingFunder] =
|
||||||
|
(json \ "funder").extractOrElse[List[mappingFunder]](List())
|
||||||
|
|
||||||
|
if (funderList.nonEmpty) {
|
||||||
|
resultList = resultList ::: mappingFunderToRelations(
|
||||||
|
funderList,
|
||||||
|
result.getId,
|
||||||
|
createCrossrefCollectedFrom(),
|
||||||
|
result.getDataInfo,
|
||||||
|
result.getLastupdatetimestamp
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
result match {
|
||||||
|
case publication: Publication => convertPublication(publication, json, cOBJCategory)
|
||||||
|
case dataset: Dataset => convertDataset(dataset)
|
||||||
|
}
|
||||||
|
|
||||||
|
resultList = resultList ::: List(result)
|
||||||
|
resultList
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = {
|
||||||
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
|
||||||
|
//MAPPING Crossref DOI into PID
|
||||||
|
val doi: String = normalizeDoi((json \ "DOI").extract[String])
|
||||||
|
|
||||||
|
result.setPid(
|
||||||
|
List(
|
||||||
|
structuredProperty(doi, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES)
|
||||||
|
).asJava)
|
||||||
|
|
||||||
|
//MAPPING Crossref DOI into OriginalId
|
||||||
|
//and Other Original Identifier of dataset like clinical-trial-number
|
||||||
|
val clinicalTrialNumbers: List[String] = for (JString(ctr) <- json \ "clinical-trial-number") yield ctr
|
||||||
|
val alternativeIds: List[String] = for (JString(ids) <- json \ "alternative-id") yield ids
|
||||||
|
val tmp = clinicalTrialNumbers ::: alternativeIds ::: List(doi)
|
||||||
|
|
||||||
|
|
||||||
|
result.setOriginalId(tmp.filter(id => id != null).asJava)
|
||||||
|
|
||||||
|
// Add DataInfo
|
||||||
|
result.setDataInfo(dataInfo(false, false,0.9F,null, false,ModelConstants.REPOSITORY_PROVENANCE_ACTIONS))
|
||||||
|
|
||||||
|
result.setLastupdatetimestamp((json \ "indexed" \ "timestamp").extract[Long])
|
||||||
|
result.setDateofcollection((json \ "indexed" \ "date-time").extract[String])
|
||||||
|
|
||||||
|
result.setCollectedfrom(List(CROSSREF_COLLECTED_FROM).asJava)
|
||||||
|
|
||||||
|
// Publisher ( Name of work's publisher mapped into Result/Publisher)
|
||||||
|
val publisher = (json \ "publisher").extractOrElse[String](null)
|
||||||
|
if (publisher != null && publisher.nonEmpty)
|
||||||
|
result.setPublisher(new Publisher(publisher))
|
||||||
|
|
||||||
|
// TITLE
|
||||||
|
val mainTitles =
|
||||||
|
for {JString(title) <- json \ "title" if title.nonEmpty}
|
||||||
|
yield
|
||||||
|
structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER)
|
||||||
|
val originalTitles = for {
|
||||||
|
JString(title) <- json \ "original-title" if title.nonEmpty
|
||||||
|
} yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER)
|
||||||
|
val shortTitles = for {
|
||||||
|
JString(title) <- json \ "short-title" if title.nonEmpty
|
||||||
|
} yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER)
|
||||||
|
val subtitles =
|
||||||
|
for {JString(title) <- json \ "subtitle" if title.nonEmpty}
|
||||||
|
yield structuredProperty(title, ModelConstants.SUBTITLE_QUALIFIER)
|
||||||
|
result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava)
|
||||||
|
|
||||||
|
// DESCRIPTION
|
||||||
|
val descriptionList =
|
||||||
|
for {JString(description) <- json \ "abstract"} yield description
|
||||||
|
result.setDescription(descriptionList.asJava)
|
||||||
|
|
||||||
|
// Source
|
||||||
|
val sourceList = for {
|
||||||
|
JString(source) <- json \ "source" if source != null && source.nonEmpty
|
||||||
|
} yield source
|
||||||
|
result.setSource(sourceList.asJava)
|
||||||
|
|
||||||
|
//RELEVANT DATE Mapping
|
||||||
|
val createdDate = generateDate(
|
||||||
|
(json \ "created" \ "date-time").extract[String],
|
||||||
|
(json \ "created" \ "date-parts").extract[List[List[Int]]],
|
||||||
|
"created",
|
||||||
|
ModelConstants.DNET_DATACITE_DATE
|
||||||
|
)
|
||||||
|
val postedDate = generateDate(
|
||||||
|
(json \ "posted" \ "date-time").extractOrElse[String](null),
|
||||||
|
(json \ "posted" \ "date-parts").extract[List[List[Int]]],
|
||||||
|
"available",
|
||||||
|
ModelConstants.DNET_DATACITE_DATE
|
||||||
|
)
|
||||||
|
val acceptedDate = generateDate(
|
||||||
|
(json \ "accepted" \ "date-time").extractOrElse[String](null),
|
||||||
|
(json \ "accepted" \ "date-parts").extract[List[List[Int]]],
|
||||||
|
"accepted",
|
||||||
|
ModelConstants.DNET_DATACITE_DATE
|
||||||
|
)
|
||||||
|
val publishedPrintDate = generateDate(
|
||||||
|
(json \ "published-print" \ "date-time").extractOrElse[String](null),
|
||||||
|
(json \ "published-print" \ "date-parts").extract[List[List[Int]]],
|
||||||
|
"published-print",
|
||||||
|
ModelConstants.DNET_DATACITE_DATE
|
||||||
|
)
|
||||||
|
val publishedOnlineDate = generateDate(
|
||||||
|
(json \ "published-online" \ "date-time").extractOrElse[String](null),
|
||||||
|
(json \ "published-online" \ "date-parts").extract[List[List[Int]]],
|
||||||
|
"published-online",
|
||||||
|
ModelConstants.DNET_DATACITE_DATE
|
||||||
|
)
|
||||||
|
|
||||||
|
val issuedDate = extractDate(
|
||||||
|
(json \ "issued" \ "date-time").extractOrElse[String](null),
|
||||||
|
(json \ "issued" \ "date-parts").extract[List[List[Int]]]
|
||||||
|
)
|
||||||
|
if (StringUtils.isNotBlank(issuedDate)) {
|
||||||
|
result.setDateofacceptance(issuedDate)
|
||||||
|
} else {
|
||||||
|
result.setDateofacceptance(createdDate.getValue)
|
||||||
|
}
|
||||||
|
result.setRelevantdate(
|
||||||
|
List(createdDate, postedDate, acceptedDate, publishedOnlineDate, publishedPrintDate)
|
||||||
|
.filter(p => p != null)
|
||||||
|
.asJava
|
||||||
|
)
|
||||||
|
|
||||||
|
//Mapping Subject
|
||||||
|
val subjectList: List[String] = (json \ "subject").extractOrElse[List[String]](List())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if (subjectList.nonEmpty) {
|
||||||
|
result.setSubject(
|
||||||
|
subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
//Mapping Author
|
||||||
|
val authorList: List[mappingAuthor] =
|
||||||
|
(json \ "author").extractOrElse[List[mappingAuthor]](List())
|
||||||
|
|
||||||
|
val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) =>
|
||||||
|
a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first")
|
||||||
|
)
|
||||||
|
|
||||||
|
result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) =>
|
||||||
|
generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index)
|
||||||
|
}.asJava)
|
||||||
|
|
||||||
|
// Mapping instance
|
||||||
|
val instance = new Instance()
|
||||||
|
val license = for {
|
||||||
|
JObject(license) <- json \ "license"
|
||||||
|
JField("URL", JString(lic)) <- license
|
||||||
|
JField("content-version", JString(content_version)) <- license
|
||||||
|
} yield (asField(lic), content_version)
|
||||||
|
val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue))
|
||||||
|
if (l.nonEmpty) {
|
||||||
|
if (l exists (d => d._2.equals("vor"))) {
|
||||||
|
for (d <- l) {
|
||||||
|
if (d._2.equals("vor")) {
|
||||||
|
instance.setLicense(d._1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
instance.setLicense(l.head._1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ticket #6281 added pid to Instance
|
||||||
|
instance.setPid(result.getPid)
|
||||||
|
|
||||||
|
val has_review = json \ "relation" \ "has-review" \ "id"
|
||||||
|
|
||||||
|
if (has_review != JNothing) {
|
||||||
|
instance.setRefereed(
|
||||||
|
OafMapperUtils.qualifier(
|
||||||
|
"0001",
|
||||||
|
"peerReviewed",
|
||||||
|
ModelConstants.DNET_REVIEW_LEVELS,
|
||||||
|
ModelConstants.DNET_REVIEW_LEVELS
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
instance.setAccessright(
|
||||||
|
decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)
|
||||||
|
)
|
||||||
|
instance.setInstancetype(
|
||||||
|
OafMapperUtils.qualifier(
|
||||||
|
cobjCategory.substring(0, 4),
|
||||||
|
cobjCategory.substring(5),
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||||
|
)
|
||||||
|
)
|
||||||
|
result.setResourcetype(
|
||||||
|
OafMapperUtils.qualifier(
|
||||||
|
cobjCategory.substring(0, 4),
|
||||||
|
cobjCategory.substring(5),
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||||
|
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
||||||
|
if (StringUtils.isNotBlank(issuedDate)) {
|
||||||
|
instance.setDateofacceptance(asField(issuedDate))
|
||||||
|
} else {
|
||||||
|
instance.setDateofacceptance(asField(createdDate.getValue))
|
||||||
|
}
|
||||||
|
val s: List[String] = List("https://doi.org/" + doi)
|
||||||
|
// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
|
||||||
|
// if (links.nonEmpty) {
|
||||||
|
// instance.setUrl(links.asJava)
|
||||||
|
// }
|
||||||
|
if (s.nonEmpty) {
|
||||||
|
instance.setUrl(s.asJava)
|
||||||
|
}
|
||||||
|
|
||||||
|
result.setInstance(List(instance).asJava)
|
||||||
|
|
||||||
|
//IMPORTANT
|
||||||
|
//The old method result.setId(generateIdentifier(result, doi))
|
||||||
|
//is replaced using IdentifierFactory, but the old identifier
|
||||||
|
//is preserved among the originalId(s)
|
||||||
|
val oldId = generateIdentifier(result, doi)
|
||||||
|
result.setId(oldId)
|
||||||
|
|
||||||
|
val newId = IdentifierFactory.createDOIBoostIdentifier(result)
|
||||||
|
if (!oldId.equalsIgnoreCase(newId)) {
|
||||||
|
result.getOriginalId.add(oldId)
|
||||||
|
}
|
||||||
|
result.setId(newId)
|
||||||
|
|
||||||
|
if (result.getId == null)
|
||||||
|
null
|
||||||
|
else
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,22 @@
|
||||||
|
package eu.dnetlib.dhp.crossref
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
||||||
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
|
class GenerateCrossrefDataset (propertyPath: String, args: Array[String], log: Logger)
|
||||||
|
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||||
|
/** Here all the spark applications runs this method
|
||||||
|
* where the whole logic of the spark node is defined
|
||||||
|
*/
|
||||||
|
override def run(): Unit = ???
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
object GenerateCrossrefDataset{
|
||||||
|
val log:Logger = LoggerFactory.getLogger(getClass)
|
||||||
|
val propertyPath ="/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json"
|
||||||
|
|
||||||
|
def main(args: Array[String]): Unit = {
|
||||||
|
new GenerateCrossrefDataset(propertyPath,args, log).initialize().run()
|
||||||
|
}
|
||||||
|
}
|
|
@ -138,12 +138,11 @@ object DoiBoostMappingUtil {
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
def decideAccessRight(lic: Field[String], date: String): AccessRight = {
|
def decideAccessRight(license: String, date: String): AccessRight = {
|
||||||
if (lic == null) {
|
if (license == null || license.isEmpty) {
|
||||||
//Default value Unknown
|
//Default value Unknown
|
||||||
return getUnknownQualifier()
|
return getUnknownQualifier()
|
||||||
}
|
}
|
||||||
val license: String = lic.getValue
|
|
||||||
//CC licenses
|
//CC licenses
|
||||||
if (
|
if (
|
||||||
license.startsWith("cc") ||
|
license.startsWith("cc") ||
|
||||||
|
@ -305,7 +304,7 @@ object DoiBoostMappingUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
def generateDataInfo(): DataInfo = {
|
def generateDataInfo(): DataInfo = {
|
||||||
generateDataInfo("0.9")
|
generateDataInfo(0.9F)
|
||||||
}
|
}
|
||||||
|
|
||||||
def filterPublication(publication: Publication): Boolean = {
|
def filterPublication(publication: Publication): Boolean = {
|
||||||
|
@ -330,7 +329,7 @@ object DoiBoostMappingUtil {
|
||||||
|
|
||||||
// fixes #4360 (test publisher)
|
// fixes #4360 (test publisher)
|
||||||
val publisher =
|
val publisher =
|
||||||
if (publication.getPublisher != null) publication.getPublisher.getValue else null
|
if (publication.getPublisher != null) publication.getPublisher.getName else null
|
||||||
|
|
||||||
if (
|
if (
|
||||||
publisher != null && (publisher.equalsIgnoreCase("Test accounts") || publisher
|
publisher != null && (publisher.equalsIgnoreCase("Test accounts") || publisher
|
||||||
|
@ -358,7 +357,7 @@ object DoiBoostMappingUtil {
|
||||||
// fixes #4368
|
// fixes #4368
|
||||||
if (
|
if (
|
||||||
authors.count(s => s.equalsIgnoreCase("Addie Jackson")) > 0 && "Elsevier BV".equalsIgnoreCase(
|
authors.count(s => s.equalsIgnoreCase("Addie Jackson")) > 0 && "Elsevier BV".equalsIgnoreCase(
|
||||||
publication.getPublisher.getValue
|
publication.getPublisher.getName
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return false
|
return false
|
||||||
|
@ -374,8 +373,8 @@ object DoiBoostMappingUtil {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
def generateDataInfo(trust: String): DataInfo = {
|
def generateDataInfo(trust: Float): DataInfo = {
|
||||||
val di = new DataInfo
|
val di = new EntityDataInfo
|
||||||
di.setDeletedbyinference(false)
|
di.setDeletedbyinference(false)
|
||||||
di.setInferred(false)
|
di.setInferred(false)
|
||||||
di.setInvisible(false)
|
di.setInvisible(false)
|
||||||
|
@ -384,8 +383,8 @@ object DoiBoostMappingUtil {
|
||||||
OafMapperUtils.qualifier(
|
OafMapperUtils.qualifier(
|
||||||
ModelConstants.SYSIMPORT_ACTIONSET,
|
ModelConstants.SYSIMPORT_ACTIONSET,
|
||||||
ModelConstants.SYSIMPORT_ACTIONSET,
|
ModelConstants.SYSIMPORT_ACTIONSET,
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS
|
ModelConstants.DNET_PROVENANCE_ACTIONS
|
||||||
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
di
|
di
|
||||||
|
@ -393,7 +392,7 @@ object DoiBoostMappingUtil {
|
||||||
|
|
||||||
def createSubject(value: String, classId: String, schemeId: String): Subject = {
|
def createSubject(value: String, classId: String, schemeId: String): Subject = {
|
||||||
val s = new Subject
|
val s = new Subject
|
||||||
s.setQualifier(OafMapperUtils.qualifier(classId, classId, schemeId, schemeId))
|
s.setQualifier(OafMapperUtils.qualifier(classId, classId, schemeId))
|
||||||
s.setValue(value)
|
s.setValue(value)
|
||||||
s
|
s
|
||||||
|
|
||||||
|
@ -403,67 +402,37 @@ object DoiBoostMappingUtil {
|
||||||
value: String,
|
value: String,
|
||||||
classId: String,
|
classId: String,
|
||||||
className: String,
|
className: String,
|
||||||
schemeId: String,
|
schemeId: String
|
||||||
schemeName: String
|
|
||||||
): Subject = {
|
): Subject = {
|
||||||
val s = new Subject
|
val s = new Subject
|
||||||
s.setQualifier(OafMapperUtils.qualifier(classId, className, schemeId, schemeName))
|
s.setQualifier(OafMapperUtils.qualifier(classId, className, schemeId))
|
||||||
s.setValue(value)
|
s.setValue(value)
|
||||||
s
|
s
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def createSP(
|
|
||||||
value: String,
|
|
||||||
classId: String,
|
|
||||||
className: String,
|
|
||||||
schemeId: String,
|
|
||||||
schemeName: String
|
|
||||||
): StructuredProperty = {
|
|
||||||
val sp = new StructuredProperty
|
|
||||||
sp.setQualifier(OafMapperUtils.qualifier(classId, className, schemeId, schemeName))
|
|
||||||
sp.setValue(value)
|
|
||||||
sp
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
def createSP(
|
def createSP(
|
||||||
value: String,
|
value: String,
|
||||||
classId: String,
|
classId: String,
|
||||||
className: String,
|
className: String,
|
||||||
schemeId: String,
|
schemeId: String
|
||||||
schemeName: String,
|
|
||||||
dataInfo: DataInfo
|
|
||||||
): StructuredProperty = {
|
): StructuredProperty = {
|
||||||
val sp = new StructuredProperty
|
val sp = new StructuredProperty
|
||||||
sp.setQualifier(OafMapperUtils.qualifier(classId, className, schemeId, schemeName))
|
sp.setQualifier(OafMapperUtils.qualifier(classId, className, schemeId))
|
||||||
sp.setValue(value)
|
sp.setValue(value)
|
||||||
sp.setDataInfo(dataInfo)
|
|
||||||
sp
|
sp
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
|
def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
|
||||||
val sp = new StructuredProperty
|
val sp = new StructuredProperty
|
||||||
sp.setQualifier(OafMapperUtils.qualifier(classId, classId, schemeId, schemeId))
|
sp.setQualifier(OafMapperUtils.qualifier(classId, classId, schemeId))
|
||||||
sp.setValue(value)
|
sp.setValue(value)
|
||||||
sp
|
sp
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def createSP(
|
|
||||||
value: String,
|
|
||||||
classId: String,
|
|
||||||
schemeId: String,
|
|
||||||
dataInfo: DataInfo
|
|
||||||
): StructuredProperty = {
|
|
||||||
val sp = new StructuredProperty
|
|
||||||
sp.setQualifier(OafMapperUtils.qualifier(classId, classId, schemeId, schemeId))
|
|
||||||
sp.setValue(value)
|
|
||||||
sp.setDataInfo(dataInfo)
|
|
||||||
sp
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
def createCrossrefCollectedFrom(): KeyValue = {
|
def createCrossrefCollectedFrom(): KeyValue = {
|
||||||
|
|
||||||
|
@ -506,13 +475,6 @@ object DoiBoostMappingUtil {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def asField[T](value: T): Field[T] = {
|
|
||||||
val tmp = new Field[T]
|
|
||||||
tmp.setValue(value)
|
|
||||||
tmp
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
def isEmpty(x: String) = x == null || x.trim.isEmpty
|
def isEmpty(x: String) = x == null || x.trim.isEmpty
|
||||||
|
|
||||||
def normalizeDoi(input: String): String = {
|
def normalizeDoi(input: String): String = {
|
||||||
|
|
Loading…
Reference in New Issue