Merge branch 'ticket_8369' of https://code-repo.d4science.org/D-Net/dnet-hadoop into ticket_8369

This commit is contained in:
Claudio Atzori 2023-02-15 11:39:36 +01:00
commit 56495cc727
8 changed files with 447 additions and 146 deletions

View File

@ -4,12 +4,14 @@ package eu.dnetlib.dhp.schema.common;
import eu.dnetlib.dhp.schema.oaf.AccessRight;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public class ModelConstants {
private ModelConstants() {
}
public static final String DOI = "doi";
public static final String ORCID = "orcid";
public static final String ORCID_PENDING = "orcid_pending";
public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID";
@ -219,7 +221,7 @@ public class ModelConstants {
public static final Qualifier ALTERNATIVE_TITLE_QUALIFIER = qualifier(
"alternative title", "alternative title", DNET_DATACITE_TITLE);
private static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE);
public static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE);
public static final AccessRight OPEN_ACCESS_RIGHT() {
@ -230,6 +232,38 @@ public class ModelConstants {
return result;
}
public static final AccessRight RESTRICTED_ACCESS_RIGHT() {
final AccessRight result = new AccessRight();
result.setClassid("RESTRICTED");
result.setClassname("Restricted");
result.setSchemeid(ModelConstants.DNET_ACCESS_MODES);
return result;
}
public static final AccessRight UNKNOWN_ACCESS_RIGHT() {
return OafMapperUtils.accessRight(
ModelConstants.UNKNOWN,
ModelConstants.NOT_AVAILABLE,
ModelConstants.DNET_ACCESS_MODES
);
}
public static final AccessRight EMBARGOED_ACCESS_RIGHT() {
return OafMapperUtils.accessRight(
ACCESS_RIGHT_EMBARGO,
ACCESS_RIGHT_EMBARGO,
DNET_ACCESS_MODES
);
}
public static final AccessRight CLOSED_ACCESS_RIGHT() {
return OafMapperUtils.accessRight(
ACCESS_RIGHT_CLOSED,
"Closed Access",
ModelConstants.DNET_ACCESS_MODES
);
}
private static Qualifier qualifier(
final String classid,
final String classname,

View File

@ -6,13 +6,17 @@ import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import lombok.val;
public class CleaningFunctions {
public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)";
private static final String ALL_SPACES_REGEX = "(?:\\n|\\r|\\t|\\s)";
public static final String DOI_PREFIX = "10.";
public static final Set<String> PID_BLACKLIST = new HashSet<>();
@ -58,6 +62,27 @@ public class CleaningFunctions {
return pid;
}
/**
* This utility was moved from DOIBoost,
* it implements a better cleaning of DOI.
* In case of wrong DOI it raises an illegalArgumentException
* @param input DOI
* @return normalized DOI
*/
private static String normalizeDOI(final String input) {
if (input == null)
throw new IllegalArgumentException("PID value cannot be empty");
final String replaced = input
.replaceAll(ALL_SPACES_REGEX, "")
.toLowerCase()
.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX);
if (StringUtils.isEmpty(replaced.trim()))
throw new IllegalArgumentException("PID value normalized return empty string");
if (!replaced.contains("10."))
throw new IllegalArgumentException("DOI Must starts with 10.");
return replaced.substring(replaced.indexOf("10."));
}
public static String normalizePidValue(String pidType, String pidValue) {
String value = Optional
.ofNullable(pidValue)
@ -67,8 +92,8 @@ public class CleaningFunctions {
switch (pidType) {
// TODO add cleaning for more PID types as needed
case "doi":
return value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX);
case ModelConstants.DOI:
return normalizeDOI(value.toLowerCase());
}
return value;
}

View File

@ -495,7 +495,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
private static AccessRight accessRight(String classid, String classname, String scheme) {
return OafMapperUtils
.accessRight(
classid, classname, scheme, scheme);
classid, classname, scheme);
}
private static Qualifier qualifier(String classid, String classname, String scheme) {

View File

@ -55,19 +55,19 @@ public class OafMapperUtils {
return qualifier(UNKNOWN, "Unknown", schemeid);
}
public static AccessRight accessRight(
final String classid,
final String classname,
final String schemeid,
final String schemename) {
return accessRight(classid, classname, schemeid, schemename, null);
final String schemeid) {
return accessRight(classid, classname, schemeid, null);
}
public static AccessRight accessRight(
final String classid,
final String classname,
final String schemeid,
final String schemename,
final OpenAccessRoute openAccessRoute) {
final AccessRight accessRight = new AccessRight();
accessRight.setClassid(classid);

View File

@ -67,7 +67,7 @@ public class VocabularyTest {
if (t1 == null) {
System.err.println(s1 + " Missing");
} else {
System.out.println("syn=" + s1 + " term = " + t1.getClassid());
System.out.println("syn=" + s1 + " term = " + t1.getClassid()+" "+t1.getClassname());
System.out
.println(

View File

@ -1,12 +1,12 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.DeserializationFeature;
@ -81,4 +81,23 @@ class IdentifierFactoryTest {
assertEquals(expectedID, IdentifierFactory.createIdentifier(pub, md5));
}
@Test
void testNormalizeDOI() throws Exception {
final String doi = "10.1042/BCJ20160876";
assertEquals(CleaningFunctions.normalizePidValue("doi", doi), doi.toLowerCase());
final String doi2 = "0.1042/BCJ20160876";
assertThrows(IllegalArgumentException.class, () -> CleaningFunctions.normalizePidValue("doi", doi2));
final String doi3 = "https://doi.org/0.1042/BCJ20160876";
assertThrows(IllegalArgumentException.class, () -> CleaningFunctions.normalizePidValue("doi", doi3));
final String doi4 = "https://doi.org/10.1042/BCJ20160876";
assertEquals(CleaningFunctions.normalizePidValue("doi", doi4), "10.1042/BCJ20160876".toLowerCase());
final String doi5 = "https://doi.org/10.1042/ BCJ20160876";
assertEquals(CleaningFunctions.normalizePidValue("doi", doi5), "10.1042/BCJ20160876".toLowerCase());
}
}

View File

@ -2,90 +2,34 @@ package eu.dnetlib.dhp.crossref
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType}
import eu.dnetlib.dhp.schema.common.ModelConstants.OPEN_ACCESS_RIGHT
import eu.dnetlib.dhp.schema.oaf._
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._
import eu.dnetlib.dhp.schema.oaf.utils._
import org.apache.commons.lang.StringUtils
import org.json4s
import org.json4s.DefaultFormats
import org.json4s.JsonAST.{JField, JObject, JString, JValue}
import org.json4s.JsonAST._
import org.json4s.jackson.JsonMethods.parse
import org.slf4j.{Logger, LoggerFactory}
import java.time.LocalDate
import java.time.format.DateTimeFormatter
import scala.collection.JavaConverters._
import scala.collection.mutable.ListBuffer
import scala.util.matching.Regex
case class CrossrefDT(doi: String, json: String, timestamp: Long) {}
case class CrossrefAuthor(givenName:String, familyName:String,ORCID:String, sequence:String, rank:Int ){}
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
object CrossrefUtility {
val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)"
val DOI_PREFIX = "10."
val CROSSREF_COLLECTED_FROM = keyValue(ModelConstants.CROSSREF_ID, ModelConstants.CROSSREF_NAME)
def normalizeDoi(input: String): String = {
if (input == null)
return null
val replaced = input
.replaceAll("(?:\\n|\\r|\\t|\\s)", "")
.toLowerCase
.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX)
if (replaced == null || replaced.trim.isEmpty)
return null
if (replaced.indexOf("10.") < 0)
return null
val ret = replaced.substring(replaced.indexOf("10."))
if (!ret.startsWith(DOI_PREFIX))
return null
ret
}
val logger: Logger = LoggerFactory.getLogger(getClass)
def extractDate(dt: String, datePart: List[List[Int]]): String = {
if (StringUtils.isNotBlank(dt))
return GraphCleaningFunctions.cleanDate(dt)
if (datePart != null && datePart.size == 1) {
val res = datePart.head
if (res.size == 3) {
val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
if (dp.length == 10) {
return GraphCleaningFunctions.cleanDate(dp)
}
} else if (res.size == 2) {
val dp = f"${res.head}-${res(1)}%02d-01"
return GraphCleaningFunctions.cleanDate(dp)
} else if (res.size == 1) {
return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01")
}
}
null
}
private def generateDate(
dt: String,
datePart: List[List[Int]],
classId: String,
schemeId: String
): StructuredProperty = {
val dp = extractDate(dt, datePart)
if (StringUtils.isNotBlank(dp))
structuredProperty(dp, classId, classId, schemeId)
else
null
}
private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String) = {
val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType)
if (term != null) {
val resourceType =
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname
resourceType match {
case "publication" => (new Publication, resourceType)
case "dataset" => (new Dataset, resourceType)
case "software" => (new Software, resourceType)
case "otherresearchproduct" => (new OtherResearchProduct, resourceType)
}
} else
null
}
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
@ -103,7 +47,8 @@ object CrossrefUtility {
val result = resultWithType._1
val cOBJCategory = resultWithType._2
mappingResult(result, json, cOBJCategory)
val className = resultWithType._3
mappingResult(result, json, cOBJCategory, className)
if (result == null || result.getId == null)
return List()
@ -111,29 +56,144 @@ object CrossrefUtility {
(json \ "funder").extractOrElse[List[mappingFunder]](List())
if (funderList.nonEmpty) {
resultList = resultList ::: mappingFunderToRelations(
funderList,
result.getId,
createCrossrefCollectedFrom(),
result.getDataInfo,
result.getLastupdatetimestamp
)
resultList = resultList ::: mappingFunderToRelations(funderList, result )
}
result match {
case publication: Publication => convertPublication(publication, json, cOBJCategory)
case dataset: Dataset => convertDataset(dataset)
}
resultList = resultList ::: List(result)
resultList
}
def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = {
private def createRelation(sourceId: String, targetId: String, relClass: String): Relation = {
val r = new Relation
r.setSource(sourceId)
r.setTarget(targetId)
r.setRelType(ModelConstants.RESULT_PROJECT)
r.setRelClass(relClass)
r.setSubRelType(ModelConstants.OUTCOME)
r.setProvenance(List(OafMapperUtils.getProvenance(CROSSREF_COLLECTED_FROM, null)).asJava)
r
}
private def generateSimpleRelationFromAward(
funder: mappingFunder,
nsPrefix: String,
extractField: String => String,
source:Result
): List[Relation] = {
if (funder.award.isDefined && funder.award.get.nonEmpty)
funder.award.get
.map(extractField)
.filter(a => a != null && a.nonEmpty)
.map(award => {
val targetId = IdentifierFactory.createOpenaireId("project",s"$nsPrefix::$award", true)
createRelation(targetId, source.getId, ModelConstants.PRODUCES)
})
else List()
}
private def extractECAward(award: String): String = {
val awardECRegex: Regex = "[0-9]{4,9}".r
if (awardECRegex.findAllIn(award).hasNext)
return awardECRegex.findAllIn(award).max
null
}
private def snsfRule(award: String): String = {
val tmp1 = StringUtils.substringAfter(award, "_")
val tmp2 = StringUtils.substringBefore(tmp1, "/")
tmp2
}
private def mappingFunderToRelations(funders: List[mappingFunder], result: Result): List[Relation] = {
var relList:List[Relation] = List()
if (funders != null)
funders.foreach(funder => {
if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) {
funder.DOI.get match {
case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" |
"10.13039/100010665" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
case "10.13039/501100000781" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
case "10.13039/100000001" => relList =relList ::: generateSimpleRelationFromAward(funder, "nsf_________", a => a, result)
case "10.13039/501100001665" => relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
case "10.13039/501100002341" => relList =relList ::: generateSimpleRelationFromAward(funder, "aka_________", a => a, result)
case "10.13039/501100001602" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""), result)
case "10.13039/501100000923" => relList =relList ::: generateSimpleRelationFromAward(funder, "arc_________", a => a, result)
case "10.13039/501100000038" =>
val targetId = IdentifierFactory.createOpenaireId("project", "nserc_______::1e5e62235d094afd01cd56e65112fc63", false)
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case "10.13039/501100000155" =>
val targetId = IdentifierFactory.createOpenaireId("project", "sshrc_______::1e5e62235d094afd01cd56e65112fc63", false)
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case "10.13039/501100000024" =>
val targetId = IdentifierFactory.createOpenaireId("project", "cihr________::1e5e62235d094afd01cd56e65112fc63", false)
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case "10.13039/501100002848" => relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", a => a, result)
case "10.13039/501100003448" => relList =relList ::: generateSimpleRelationFromAward(funder, "gsrt________", extractECAward, result)
case "10.13039/501100010198" => relList =relList ::: generateSimpleRelationFromAward(funder, "sgov________", a => a, result)
case "10.13039/501100004564" => relList =relList ::: generateSimpleRelationFromAward(funder, "mestd_______", extractECAward, result)
case "10.13039/501100003407" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "miur________", a => a, result)
val targetId = IdentifierFactory.createOpenaireId("project", "miur________::1e5e62235d094afd01cd56e65112fc63", false)
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case "10.13039/501100006588" | "10.13039/501100004488" =>
relList =relList ::: generateSimpleRelationFromAward(
funder,
"irb_hr______",
a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", ""), result
)
case "10.13039/501100006769" => relList =relList ::: generateSimpleRelationFromAward(funder, "rsf_________", a => a, result)
case "10.13039/501100001711" => relList =relList ::: generateSimpleRelationFromAward(funder, "snsf________", snsfRule, result)
case "10.13039/501100004410" => relList =relList ::: generateSimpleRelationFromAward(funder, "tubitakf____", a => a, result)
case "10.13039/100004440" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case _ => logger.debug("no match for " + funder.DOI.get)
}
} else {
funder.name match {
case "European Unions Horizon 2020 research and innovation program" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
case "European Union's" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
case "The French National Research Agency (ANR)" | "The French National Research Agency" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", extractECAward, result)
case "Wellcome Trust Masters Fellowship" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case _ => logger.debug("no match for " + funder.name)
}
}
})
relList
}
private def mappingResult(result: Result, json: JValue, cobjCategory: String, className:String): Result = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
//MAPPING Crossref DOI into PID
val doi: String = normalizeDoi((json \ "DOI").extract[String])
val doi: String = CleaningFunctions.normalizePidValue(ModelConstants.DOI, (json \ "DOI").extract[String])
result.setPid(
List(
@ -176,9 +236,7 @@ object CrossrefUtility {
} yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER)
val subtitles =
for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty(
title,
ModelConstants.SUBTITLE_QUALIFIER
)
title, ModelConstants.SUBTITLE_QUALIFIER)
result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava)
// DESCRIPTION
@ -244,21 +302,28 @@ object CrossrefUtility {
if (subjectList.nonEmpty) {
result.setSubject(
subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava
)
subjectList.map(s =>
OafMapperUtils.subject(s, OafMapperUtils.qualifier(ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_TYPOLOGIES), null)
).asJava)
}
//Mapping Author
val authorList: List[mappingAuthor] =
(json \ "author").extractOrElse[List[mappingAuthor]](List())
val authorList:List[CrossrefAuthor] =
for {
JObject(author) <- json \ "author"
JField("ORCID", JString(orcid)) <- author
JField("given", JString(givenName)) <- author
JField("family", JString(familyName)) <- author
JField("sequence", JString(sequence)) <- author
} yield CrossrefAuthor(givenName = givenName, familyName = familyName, ORCID = orcid, sequence = sequence, rank = 0)
val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) =>
a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first")
)
result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) =>
generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index)
}.asJava)
result.setAuthor(authorList.sortWith((a,b) =>{
if (a.sequence.equalsIgnoreCase("first"))
true
else if (b.sequence.equalsIgnoreCase("first"))
false
else a.familyName< b.familyName
}).zipWithIndex.map(k=> k._1.copy(rank = k._2)).map(k => generateAuthor(k)).asJava)
// Mapping instance
val instance = new Instance()
@ -266,8 +331,8 @@ object CrossrefUtility {
JObject(license) <- json \ "license"
JField("URL", JString(lic)) <- license
JField("content-version", JString(content_version)) <- license
} yield (asField(lic), content_version)
val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue))
} yield (new License(lic), content_version)
val l = license.filter(d => StringUtils.isNotBlank(d._1.getUrl))
if (l.nonEmpty) {
if (l exists (d => d._2.equals("vor"))) {
for (d <- l) {
@ -290,66 +355,225 @@ object CrossrefUtility {
OafMapperUtils.qualifier(
"0001",
"peerReviewed",
ModelConstants.DNET_REVIEW_LEVELS,
ModelConstants.DNET_REVIEW_LEVELS
)
)
}
instance.setAccessright(
decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)
)
if (instance.getLicense!= null)
instance.setAccessright(
decideAccessRight(instance.getLicense.getUrl, result.getDateofacceptance)
)
instance.setInstancetype(
OafMapperUtils.qualifier(
cobjCategory.substring(0, 4),
cobjCategory.substring(5),
ModelConstants.DNET_PUBLICATION_RESOURCE,
cobjCategory,
className,
ModelConstants.DNET_PUBLICATION_RESOURCE
)
)
result.setResourcetype(
OafMapperUtils.qualifier(
cobjCategory.substring(0, 4),
cobjCategory.substring(5),
ModelConstants.DNET_PUBLICATION_RESOURCE,
cobjCategory,
className,
ModelConstants.DNET_PUBLICATION_RESOURCE
)
)
instance.setCollectedfrom(createCrossrefCollectedFrom())
instance.setCollectedfrom(CROSSREF_COLLECTED_FROM)
if (StringUtils.isNotBlank(issuedDate)) {
instance.setDateofacceptance(asField(issuedDate))
instance.setDateofacceptance(issuedDate)
} else {
instance.setDateofacceptance(asField(createdDate.getValue))
instance.setDateofacceptance(createdDate.getValue)
}
val s: List[String] = List("https://doi.org/" + doi)
// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
// if (links.nonEmpty) {
// instance.setUrl(links.asJava)
// }
if (s.nonEmpty) {
instance.setUrl(s.asJava)
}
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
//Mapping book
if (className.toLowerCase.contains("book")) {
val ISBN = for {JString(isbn) <- json \ "ISBN"} yield isbn
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
if (result.getSource != null) {
val l: List[String] = result.getSource.asScala.toList ::: List(source)
result.setSource(l.asJava)
} else
result.setSource(List(source).asJava)
}
} else {
// Mapping Journal
val issnInfos = for {
JObject(issn_type) <- json \ "issn-type"
JField("type", JString(tp)) <- issn_type
JField("value", JString(vl)) <- issn_type
} yield Tuple2(tp, vl)
val volume = (json \ "volume").extractOrElse[String](null)
if (containerTitles.nonEmpty) {
val journal = new Journal
journal.setName(containerTitles.head)
if (issnInfos.nonEmpty) {
issnInfos.foreach(tp => {
tp._1 match {
case "electronic" => journal.setIssnOnline(tp._2)
case "print" => journal.setIssnPrinted(tp._2)
}
})
}
journal.setVol(volume)
val page = (json \ "page").extractOrElse[String](null)
if (page != null) {
val pp = page.split("-")
if (pp.nonEmpty)
journal.setSp(pp.head)
if (pp.size > 1)
journal.setEp(pp(1))
}
result.setJournal(journal)
}
}
result.setInstance(List(instance).asJava)
//IMPORTANT
//The old method result.setId(generateIdentifier(result, doi))
//is replaced using IdentifierFactory, but the old identifier
//is preserved among the originalId(s)
val oldId = generateIdentifier(result, doi)
result.setId(oldId)
val newId = IdentifierFactory.createDOIBoostIdentifier(result)
if (!oldId.equalsIgnoreCase(newId)) {
result.getOriginalId.add(oldId)
}
result.setId(newId)
if (result.getId == null)
result.setId("ID")
result.setId(IdentifierFactory.createIdentifier(result, true))
if (result.getId == null || "ID".equalsIgnoreCase(result.getId))
null
else
result
}
def decideAccessRight(license: String, date: String): AccessRight = {
if (license == null || license.isEmpty) {
//Default value Unknown
return ModelConstants.UNKNOWN_ACCESS_RIGHT();
}
//CC licenses
if (
license.startsWith("cc") ||
license.startsWith("http://creativecommons.org/licenses") ||
license.startsWith("https://creativecommons.org/licenses") ||
//ACS Publications Author choice licenses (considered OPEN also by Unpaywall)
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") ||
license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") ||
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") ||
//APA (considered OPEN also by Unpaywall)
license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")
) {
val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT()
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
return oaq
}
//OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED)
if (
license.equals(
"https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"
)
) {
val now = java.time.LocalDate.now
try {
val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd"))
if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) {
val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT()
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
return oaq
} else {
return ModelConstants.EMBARGOED_ACCESS_RIGHT()
}
} catch {
case _: Exception => {
try {
val pub_date =
LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"))
if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) {
val oaq: AccessRight = OPEN_ACCESS_RIGHT()
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
return oaq
} else {
return ModelConstants.EMBARGOED_ACCESS_RIGHT()
}
} catch {
case _: Exception => return ModelConstants.CLOSED_ACCESS_RIGHT()
}
}
}
}
ModelConstants.CLOSED_ACCESS_RIGHT()
}
private def extractDate(dt: String, datePart: List[List[Int]]): String = {
if (StringUtils.isNotBlank(dt))
return GraphCleaningFunctions.cleanDate(dt)
if (datePart != null && datePart.size == 1) {
val res = datePart.head
if (res.size == 3) {
val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
if (dp.length == 10) {
return GraphCleaningFunctions.cleanDate(dp)
}
} else if (res.size == 2) {
val dp = f"${res.head}-${res(1)}%02d-01"
return GraphCleaningFunctions.cleanDate(dp)
} else if (res.size == 1) {
return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01")
}
}
null
}
private def generateDate(
dt: String,
datePart: List[List[Int]],
classId: String,
schemeId: String
): StructuredProperty = {
val dp = extractDate(dt, datePart)
if (StringUtils.isNotBlank(dp))
structuredProperty(dp, classId, classId, schemeId)
else
null
}
private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String, String) = {
val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType)
if (term != null) {
val resourceType =
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname
resourceType match {
case "publication" => (new Publication, resourceType, term.getClassname)
case "dataset" => (new Dataset, resourceType, term.getClassname)
case "software" => (new Software, resourceType, term.getClassname)
case "otherresearchproduct" => (new OtherResearchProduct, resourceType, term.getClassname)
}
} else
null
}
private def generateAuthor(ca: CrossrefAuthor): Author = {
val a = new Author
a.setName(ca.givenName)
a.setSurname(ca.familyName)
a.setFullname(s"${ca.familyName}, ${ca.givenName}")
a.setRank(ca.rank + 1)
if (StringUtils.isNotBlank(ca.ORCID))
a.setPid(
List(
OafMapperUtils.authorPid(ca.ORCID, OafMapperUtils.qualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_PENDING, ModelConstants.DNET_PID_TYPES), null)
).asJava
)
a
}
}

View File

@ -553,7 +553,6 @@ object DataciteToOAFTransformation {
OafMapperUtils.accessRight(
ModelConstants.UNKNOWN,
ModelConstants.NOT_AVAILABLE,
ModelConstants.DNET_ACCESS_MODES,
ModelConstants.DNET_ACCESS_MODES
)