Now Crossref mapping and dhp-aggregation compile

This commit is contained in:
Sandro La Bruzzo 2023-02-14 17:20:13 +01:00
parent 990e3e2f60
commit 8af8b2ea27
8 changed files with 447 additions and 146 deletions

View File

@ -4,12 +4,14 @@ package eu.dnetlib.dhp.schema.common;
import eu.dnetlib.dhp.schema.oaf.AccessRight; import eu.dnetlib.dhp.schema.oaf.AccessRight;
import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public class ModelConstants { public class ModelConstants {
private ModelConstants() { private ModelConstants() {
} }
public static final String DOI = "doi";
public static final String ORCID = "orcid"; public static final String ORCID = "orcid";
public static final String ORCID_PENDING = "orcid_pending"; public static final String ORCID_PENDING = "orcid_pending";
public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID"; public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID";
@ -219,7 +221,7 @@ public class ModelConstants {
public static final Qualifier ALTERNATIVE_TITLE_QUALIFIER = qualifier( public static final Qualifier ALTERNATIVE_TITLE_QUALIFIER = qualifier(
"alternative title", "alternative title", DNET_DATACITE_TITLE); "alternative title", "alternative title", DNET_DATACITE_TITLE);
private static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE); public static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE);
public static final AccessRight OPEN_ACCESS_RIGHT() { public static final AccessRight OPEN_ACCESS_RIGHT() {
@ -230,6 +232,38 @@ public class ModelConstants {
return result; return result;
} }
public static final AccessRight RESTRICTED_ACCESS_RIGHT() {
final AccessRight result = new AccessRight();
result.setClassid("RESTRICTED");
result.setClassname("Restricted");
result.setSchemeid(ModelConstants.DNET_ACCESS_MODES);
return result;
}
public static final AccessRight UNKNOWN_ACCESS_RIGHT() {
return OafMapperUtils.accessRight(
ModelConstants.UNKNOWN,
ModelConstants.NOT_AVAILABLE,
ModelConstants.DNET_ACCESS_MODES
);
}
public static final AccessRight EMBARGOED_ACCESS_RIGHT() {
return OafMapperUtils.accessRight(
ACCESS_RIGHT_EMBARGO,
ACCESS_RIGHT_EMBARGO,
DNET_ACCESS_MODES
);
}
public static final AccessRight CLOSED_ACCESS_RIGHT() {
return OafMapperUtils.accessRight(
ACCESS_RIGHT_CLOSED,
"Closed Access",
ModelConstants.DNET_ACCESS_MODES
);
}
private static Qualifier qualifier( private static Qualifier qualifier(
final String classid, final String classid,
final String classname, final String classname,

View File

@ -6,13 +6,17 @@ import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.Set; import java.util.Set;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import lombok.val;
public class CleaningFunctions { public class CleaningFunctions {
public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)"; public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)";
private static final String ALL_SPACES_REGEX = "(?:\\n|\\r|\\t|\\s)";
public static final String DOI_PREFIX = "10."; public static final String DOI_PREFIX = "10.";
public static final Set<String> PID_BLACKLIST = new HashSet<>(); public static final Set<String> PID_BLACKLIST = new HashSet<>();
@ -58,6 +62,27 @@ public class CleaningFunctions {
return pid; return pid;
} }
/**
* This utility was moved from DOIBoost,
* it implements a better cleaning of DOI.
* In case of wrong DOI it raises an illegalArgumentException
* @param input DOI
* @return normalized DOI
*/
private static String normalizeDOI(final String input) {
if (input == null)
throw new IllegalArgumentException("PID value cannot be empty");
final String replaced = input
.replaceAll(ALL_SPACES_REGEX, "")
.toLowerCase()
.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX);
if (StringUtils.isEmpty(replaced.trim()))
throw new IllegalArgumentException("PID value normalized return empty string");
if (!replaced.contains("10."))
throw new IllegalArgumentException("DOI Must starts with 10.");
return replaced.substring(replaced.indexOf("10."));
}
public static String normalizePidValue(String pidType, String pidValue) { public static String normalizePidValue(String pidType, String pidValue) {
String value = Optional String value = Optional
.ofNullable(pidValue) .ofNullable(pidValue)
@ -67,8 +92,8 @@ public class CleaningFunctions {
switch (pidType) { switch (pidType) {
// TODO add cleaning for more PID types as needed // TODO add cleaning for more PID types as needed
case "doi": case ModelConstants.DOI:
return value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX); return normalizeDOI(value.toLowerCase());
} }
return value; return value;
} }

View File

@ -495,7 +495,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
private static AccessRight accessRight(String classid, String classname, String scheme) { private static AccessRight accessRight(String classid, String classname, String scheme) {
return OafMapperUtils return OafMapperUtils
.accessRight( .accessRight(
classid, classname, scheme, scheme); classid, classname, scheme);
} }
private static Qualifier qualifier(String classid, String classname, String scheme) { private static Qualifier qualifier(String classid, String classname, String scheme) {

View File

@ -55,19 +55,19 @@ public class OafMapperUtils {
return qualifier(UNKNOWN, "Unknown", schemeid); return qualifier(UNKNOWN, "Unknown", schemeid);
} }
public static AccessRight accessRight( public static AccessRight accessRight(
final String classid, final String classid,
final String classname, final String classname,
final String schemeid, final String schemeid) {
final String schemename) { return accessRight(classid, classname, schemeid, null);
return accessRight(classid, classname, schemeid, schemename, null);
} }
public static AccessRight accessRight( public static AccessRight accessRight(
final String classid, final String classid,
final String classname, final String classname,
final String schemeid, final String schemeid,
final String schemename,
final OpenAccessRoute openAccessRoute) { final OpenAccessRoute openAccessRoute) {
final AccessRight accessRight = new AccessRight(); final AccessRight accessRight = new AccessRight();
accessRight.setClassid(classid); accessRight.setClassid(classid);

View File

@ -67,7 +67,7 @@ public class VocabularyTest {
if (t1 == null) { if (t1 == null) {
System.err.println(s1 + " Missing"); System.err.println(s1 + " Missing");
} else { } else {
System.out.println("syn=" + s1 + " term = " + t1.getClassid()); System.out.println("syn=" + s1 + " term = " + t1.getClassid()+" "+t1.getClassname());
System.out System.out
.println( .println(

View File

@ -1,12 +1,12 @@
package eu.dnetlib.dhp.schema.oaf.utils; package eu.dnetlib.dhp.schema.oaf.utils;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import java.io.IOException; import java.io.IOException;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.DeserializationFeature;
@ -81,4 +81,23 @@ class IdentifierFactoryTest {
assertEquals(expectedID, IdentifierFactory.createIdentifier(pub, md5)); assertEquals(expectedID, IdentifierFactory.createIdentifier(pub, md5));
} }
@Test
void testNormalizeDOI() throws Exception {
final String doi = "10.1042/BCJ20160876";
assertEquals(CleaningFunctions.normalizePidValue("doi", doi), doi.toLowerCase());
final String doi2 = "0.1042/BCJ20160876";
assertThrows(IllegalArgumentException.class, () -> CleaningFunctions.normalizePidValue("doi", doi2));
final String doi3 = "https://doi.org/0.1042/BCJ20160876";
assertThrows(IllegalArgumentException.class, () -> CleaningFunctions.normalizePidValue("doi", doi3));
final String doi4 = "https://doi.org/10.1042/BCJ20160876";
assertEquals(CleaningFunctions.normalizePidValue("doi", doi4), "10.1042/BCJ20160876".toLowerCase());
final String doi5 = "https://doi.org/10.1042/ BCJ20160876";
assertEquals(CleaningFunctions.normalizePidValue("doi", doi5), "10.1042/BCJ20160876".toLowerCase());
}
} }

View File

@ -2,90 +2,34 @@ package eu.dnetlib.dhp.crossref
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._ import eu.dnetlib.dhp.schema.common.ModelConstants.OPEN_ACCESS_RIGHT
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType}
import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf._
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._
import eu.dnetlib.dhp.schema.oaf.utils._
import org.apache.commons.lang.StringUtils import org.apache.commons.lang.StringUtils
import org.json4s import org.json4s
import org.json4s.DefaultFormats import org.json4s.DefaultFormats
import org.json4s.JsonAST.{JField, JObject, JString, JValue} import org.json4s.JsonAST._
import org.json4s.jackson.JsonMethods.parse import org.json4s.jackson.JsonMethods.parse
import org.slf4j.{Logger, LoggerFactory}
import java.time.LocalDate
import java.time.format.DateTimeFormatter
import scala.collection.JavaConverters._ import scala.collection.JavaConverters._
import scala.collection.mutable.ListBuffer
import scala.util.matching.Regex
case class CrossrefDT(doi: String, json: String, timestamp: Long) {} case class CrossrefDT(doi: String, json: String, timestamp: Long) {}
case class CrossrefAuthor(givenName:String, familyName:String,ORCID:String, sequence:String, rank:Int ){}
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
object CrossrefUtility { object CrossrefUtility {
val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)"
val DOI_PREFIX = "10."
val CROSSREF_COLLECTED_FROM = keyValue(ModelConstants.CROSSREF_ID, ModelConstants.CROSSREF_NAME) val CROSSREF_COLLECTED_FROM = keyValue(ModelConstants.CROSSREF_ID, ModelConstants.CROSSREF_NAME)
def normalizeDoi(input: String): String = { val logger: Logger = LoggerFactory.getLogger(getClass)
if (input == null)
return null
val replaced = input
.replaceAll("(?:\\n|\\r|\\t|\\s)", "")
.toLowerCase
.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX)
if (replaced == null || replaced.trim.isEmpty)
return null
if (replaced.indexOf("10.") < 0)
return null
val ret = replaced.substring(replaced.indexOf("10."))
if (!ret.startsWith(DOI_PREFIX))
return null
ret
}
def extractDate(dt: String, datePart: List[List[Int]]): String = {
if (StringUtils.isNotBlank(dt))
return GraphCleaningFunctions.cleanDate(dt)
if (datePart != null && datePart.size == 1) {
val res = datePart.head
if (res.size == 3) {
val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
if (dp.length == 10) {
return GraphCleaningFunctions.cleanDate(dp)
}
} else if (res.size == 2) {
val dp = f"${res.head}-${res(1)}%02d-01"
return GraphCleaningFunctions.cleanDate(dp)
} else if (res.size == 1) {
return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01")
}
}
null
}
private def generateDate(
dt: String,
datePart: List[List[Int]],
classId: String,
schemeId: String
): StructuredProperty = {
val dp = extractDate(dt, datePart)
if (StringUtils.isNotBlank(dp))
structuredProperty(dp, classId, classId, schemeId)
else
null
}
private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String) = {
val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType)
if (term != null) {
val resourceType =
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname
resourceType match {
case "publication" => (new Publication, resourceType)
case "dataset" => (new Dataset, resourceType)
case "software" => (new Software, resourceType)
case "otherresearchproduct" => (new OtherResearchProduct, resourceType)
}
} else
null
}
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = { def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
@ -103,7 +47,8 @@ object CrossrefUtility {
val result = resultWithType._1 val result = resultWithType._1
val cOBJCategory = resultWithType._2 val cOBJCategory = resultWithType._2
mappingResult(result, json, cOBJCategory) val className = resultWithType._3
mappingResult(result, json, cOBJCategory, className)
if (result == null || result.getId == null) if (result == null || result.getId == null)
return List() return List()
@ -111,29 +56,144 @@ object CrossrefUtility {
(json \ "funder").extractOrElse[List[mappingFunder]](List()) (json \ "funder").extractOrElse[List[mappingFunder]](List())
if (funderList.nonEmpty) { if (funderList.nonEmpty) {
resultList = resultList ::: mappingFunderToRelations( resultList = resultList ::: mappingFunderToRelations(funderList, result )
funderList,
result.getId,
createCrossrefCollectedFrom(),
result.getDataInfo,
result.getLastupdatetimestamp
)
} }
result match {
case publication: Publication => convertPublication(publication, json, cOBJCategory)
case dataset: Dataset => convertDataset(dataset)
}
resultList = resultList ::: List(result) resultList = resultList ::: List(result)
resultList resultList
} }
def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { private def createRelation(sourceId: String, targetId: String, relClass: String): Relation = {
val r = new Relation
r.setSource(sourceId)
r.setTarget(targetId)
r.setRelType(ModelConstants.RESULT_PROJECT)
r.setRelClass(relClass)
r.setSubRelType(ModelConstants.OUTCOME)
r.setProvenance(List(OafMapperUtils.getProvenance(CROSSREF_COLLECTED_FROM, null)).asJava)
r
}
private def generateSimpleRelationFromAward(
funder: mappingFunder,
nsPrefix: String,
extractField: String => String,
source:Result
): List[Relation] = {
if (funder.award.isDefined && funder.award.get.nonEmpty)
funder.award.get
.map(extractField)
.filter(a => a != null && a.nonEmpty)
.map(award => {
val targetId = IdentifierFactory.createOpenaireId("project",s"$nsPrefix::$award", true)
createRelation(targetId, source.getId, ModelConstants.PRODUCES)
})
else List()
}
private def extractECAward(award: String): String = {
val awardECRegex: Regex = "[0-9]{4,9}".r
if (awardECRegex.findAllIn(award).hasNext)
return awardECRegex.findAllIn(award).max
null
}
private def snsfRule(award: String): String = {
val tmp1 = StringUtils.substringAfter(award, "_")
val tmp2 = StringUtils.substringBefore(tmp1, "/")
tmp2
}
private def mappingFunderToRelations(funders: List[mappingFunder], result: Result): List[Relation] = {
var relList:List[Relation] = List()
if (funders != null)
funders.foreach(funder => {
if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) {
funder.DOI.get match {
case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" |
"10.13039/100010665" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
case "10.13039/501100000781" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
case "10.13039/100000001" => relList =relList ::: generateSimpleRelationFromAward(funder, "nsf_________", a => a, result)
case "10.13039/501100001665" => relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
case "10.13039/501100002341" => relList =relList ::: generateSimpleRelationFromAward(funder, "aka_________", a => a, result)
case "10.13039/501100001602" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""), result)
case "10.13039/501100000923" => relList =relList ::: generateSimpleRelationFromAward(funder, "arc_________", a => a, result)
case "10.13039/501100000038" =>
val targetId = IdentifierFactory.createOpenaireId("project", "nserc_______::1e5e62235d094afd01cd56e65112fc63", false)
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case "10.13039/501100000155" =>
val targetId = IdentifierFactory.createOpenaireId("project", "sshrc_______::1e5e62235d094afd01cd56e65112fc63", false)
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case "10.13039/501100000024" =>
val targetId = IdentifierFactory.createOpenaireId("project", "cihr________::1e5e62235d094afd01cd56e65112fc63", false)
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case "10.13039/501100002848" => relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", a => a, result)
case "10.13039/501100003448" => relList =relList ::: generateSimpleRelationFromAward(funder, "gsrt________", extractECAward, result)
case "10.13039/501100010198" => relList =relList ::: generateSimpleRelationFromAward(funder, "sgov________", a => a, result)
case "10.13039/501100004564" => relList =relList ::: generateSimpleRelationFromAward(funder, "mestd_______", extractECAward, result)
case "10.13039/501100003407" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "miur________", a => a, result)
val targetId = IdentifierFactory.createOpenaireId("project", "miur________::1e5e62235d094afd01cd56e65112fc63", false)
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case "10.13039/501100006588" | "10.13039/501100004488" =>
relList =relList ::: generateSimpleRelationFromAward(
funder,
"irb_hr______",
a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", ""), result
)
case "10.13039/501100006769" => relList =relList ::: generateSimpleRelationFromAward(funder, "rsf_________", a => a, result)
case "10.13039/501100001711" => relList =relList ::: generateSimpleRelationFromAward(funder, "snsf________", snsfRule, result)
case "10.13039/501100004410" => relList =relList ::: generateSimpleRelationFromAward(funder, "tubitakf____", a => a, result)
case "10.13039/100004440" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case _ => logger.debug("no match for " + funder.DOI.get)
}
} else {
funder.name match {
case "European Unions Horizon 2020 research and innovation program" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
case "European Union's" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
case "The French National Research Agency (ANR)" | "The French National Research Agency" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", extractECAward, result)
case "Wellcome Trust Masters Fellowship" =>
relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
case _ => logger.debug("no match for " + funder.name)
}
}
})
relList
}
private def mappingResult(result: Result, json: JValue, cobjCategory: String, className:String): Result = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
//MAPPING Crossref DOI into PID //MAPPING Crossref DOI into PID
val doi: String = normalizeDoi((json \ "DOI").extract[String]) val doi: String = CleaningFunctions.normalizePidValue(ModelConstants.DOI, (json \ "DOI").extract[String])
result.setPid( result.setPid(
List( List(
@ -176,9 +236,7 @@ object CrossrefUtility {
} yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER) } yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER)
val subtitles = val subtitles =
for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty( for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty(
title, title, ModelConstants.SUBTITLE_QUALIFIER)
ModelConstants.SUBTITLE_QUALIFIER
)
result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava) result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava)
// DESCRIPTION // DESCRIPTION
@ -244,21 +302,28 @@ object CrossrefUtility {
if (subjectList.nonEmpty) { if (subjectList.nonEmpty) {
result.setSubject( result.setSubject(
subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava subjectList.map(s =>
) OafMapperUtils.subject(s, OafMapperUtils.qualifier(ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_TYPOLOGIES), null)
).asJava)
} }
//Mapping Author //Mapping Author
val authorList: List[mappingAuthor] = val authorList:List[CrossrefAuthor] =
(json \ "author").extractOrElse[List[mappingAuthor]](List()) for {
JObject(author) <- json \ "author"
JField("ORCID", JString(orcid)) <- author
JField("given", JString(givenName)) <- author
JField("family", JString(familyName)) <- author
JField("sequence", JString(sequence)) <- author
} yield CrossrefAuthor(givenName = givenName, familyName = familyName, ORCID = orcid, sequence = sequence, rank = 0)
val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) => result.setAuthor(authorList.sortWith((a,b) =>{
a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first") if (a.sequence.equalsIgnoreCase("first"))
) true
else if (b.sequence.equalsIgnoreCase("first"))
result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) => false
generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index) else a.familyName< b.familyName
}.asJava) }).zipWithIndex.map(k=> k._1.copy(rank = k._2)).map(k => generateAuthor(k)).asJava)
// Mapping instance // Mapping instance
val instance = new Instance() val instance = new Instance()
@ -266,8 +331,8 @@ object CrossrefUtility {
JObject(license) <- json \ "license" JObject(license) <- json \ "license"
JField("URL", JString(lic)) <- license JField("URL", JString(lic)) <- license
JField("content-version", JString(content_version)) <- license JField("content-version", JString(content_version)) <- license
} yield (asField(lic), content_version) } yield (new License(lic), content_version)
val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue)) val l = license.filter(d => StringUtils.isNotBlank(d._1.getUrl))
if (l.nonEmpty) { if (l.nonEmpty) {
if (l exists (d => d._2.equals("vor"))) { if (l exists (d => d._2.equals("vor"))) {
for (d <- l) { for (d <- l) {
@ -290,66 +355,225 @@ object CrossrefUtility {
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
"0001", "0001",
"peerReviewed", "peerReviewed",
ModelConstants.DNET_REVIEW_LEVELS,
ModelConstants.DNET_REVIEW_LEVELS ModelConstants.DNET_REVIEW_LEVELS
) )
) )
} }
instance.setAccessright( if (instance.getLicense!= null)
decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue) instance.setAccessright(
) decideAccessRight(instance.getLicense.getUrl, result.getDateofacceptance)
)
instance.setInstancetype( instance.setInstancetype(
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
cobjCategory.substring(0, 4), cobjCategory,
cobjCategory.substring(5), className,
ModelConstants.DNET_PUBLICATION_RESOURCE,
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
result.setResourcetype( result.setResourcetype(
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
cobjCategory.substring(0, 4), cobjCategory,
cobjCategory.substring(5), className,
ModelConstants.DNET_PUBLICATION_RESOURCE,
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
instance.setCollectedfrom(createCrossrefCollectedFrom()) instance.setCollectedfrom(CROSSREF_COLLECTED_FROM)
if (StringUtils.isNotBlank(issuedDate)) { if (StringUtils.isNotBlank(issuedDate)) {
instance.setDateofacceptance(asField(issuedDate)) instance.setDateofacceptance(issuedDate)
} else { } else {
instance.setDateofacceptance(asField(createdDate.getValue)) instance.setDateofacceptance(createdDate.getValue)
} }
val s: List[String] = List("https://doi.org/" + doi) val s: List[String] = List("https://doi.org/" + doi)
// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
// if (links.nonEmpty) {
// instance.setUrl(links.asJava)
// }
if (s.nonEmpty) { if (s.nonEmpty) {
instance.setUrl(s.asJava) instance.setUrl(s.asJava)
} }
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
//Mapping book
if (className.toLowerCase.contains("book")) {
val ISBN = for {JString(isbn) <- json \ "ISBN"} yield isbn
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
if (result.getSource != null) {
val l: List[String] = result.getSource.asScala.toList ::: List(source)
result.setSource(l.asJava)
} else
result.setSource(List(source).asJava)
}
} else {
// Mapping Journal
val issnInfos = for {
JObject(issn_type) <- json \ "issn-type"
JField("type", JString(tp)) <- issn_type
JField("value", JString(vl)) <- issn_type
} yield Tuple2(tp, vl)
val volume = (json \ "volume").extractOrElse[String](null)
if (containerTitles.nonEmpty) {
val journal = new Journal
journal.setName(containerTitles.head)
if (issnInfos.nonEmpty) {
issnInfos.foreach(tp => {
tp._1 match {
case "electronic" => journal.setIssnOnline(tp._2)
case "print" => journal.setIssnPrinted(tp._2)
}
})
}
journal.setVol(volume)
val page = (json \ "page").extractOrElse[String](null)
if (page != null) {
val pp = page.split("-")
if (pp.nonEmpty)
journal.setSp(pp.head)
if (pp.size > 1)
journal.setEp(pp(1))
}
result.setJournal(journal)
}
}
result.setInstance(List(instance).asJava) result.setInstance(List(instance).asJava)
result.setId("ID")
//IMPORTANT result.setId(IdentifierFactory.createIdentifier(result, true))
//The old method result.setId(generateIdentifier(result, doi)) if (result.getId == null || "ID".equalsIgnoreCase(result.getId))
//is replaced using IdentifierFactory, but the old identifier
//is preserved among the originalId(s)
val oldId = generateIdentifier(result, doi)
result.setId(oldId)
val newId = IdentifierFactory.createDOIBoostIdentifier(result)
if (!oldId.equalsIgnoreCase(newId)) {
result.getOriginalId.add(oldId)
}
result.setId(newId)
if (result.getId == null)
null null
else else
result result
} }
def decideAccessRight(license: String, date: String): AccessRight = {
if (license == null || license.isEmpty) {
//Default value Unknown
return ModelConstants.UNKNOWN_ACCESS_RIGHT();
}
//CC licenses
if (
license.startsWith("cc") ||
license.startsWith("http://creativecommons.org/licenses") ||
license.startsWith("https://creativecommons.org/licenses") ||
//ACS Publications Author choice licenses (considered OPEN also by Unpaywall)
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") ||
license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") ||
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") ||
//APA (considered OPEN also by Unpaywall)
license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")
) {
val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT()
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
return oaq
}
//OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED)
if (
license.equals(
"https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"
)
) {
val now = java.time.LocalDate.now
try {
val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd"))
if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) {
val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT()
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
return oaq
} else {
return ModelConstants.EMBARGOED_ACCESS_RIGHT()
}
} catch {
case _: Exception => {
try {
val pub_date =
LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"))
if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) {
val oaq: AccessRight = OPEN_ACCESS_RIGHT()
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
return oaq
} else {
return ModelConstants.EMBARGOED_ACCESS_RIGHT()
}
} catch {
case _: Exception => return ModelConstants.CLOSED_ACCESS_RIGHT()
}
}
}
}
ModelConstants.CLOSED_ACCESS_RIGHT()
}
private def extractDate(dt: String, datePart: List[List[Int]]): String = {
if (StringUtils.isNotBlank(dt))
return GraphCleaningFunctions.cleanDate(dt)
if (datePart != null && datePart.size == 1) {
val res = datePart.head
if (res.size == 3) {
val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
if (dp.length == 10) {
return GraphCleaningFunctions.cleanDate(dp)
}
} else if (res.size == 2) {
val dp = f"${res.head}-${res(1)}%02d-01"
return GraphCleaningFunctions.cleanDate(dp)
} else if (res.size == 1) {
return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01")
}
}
null
}
private def generateDate(
dt: String,
datePart: List[List[Int]],
classId: String,
schemeId: String
): StructuredProperty = {
val dp = extractDate(dt, datePart)
if (StringUtils.isNotBlank(dp))
structuredProperty(dp, classId, classId, schemeId)
else
null
}
private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String, String) = {
val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType)
if (term != null) {
val resourceType =
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname
resourceType match {
case "publication" => (new Publication, resourceType, term.getClassname)
case "dataset" => (new Dataset, resourceType, term.getClassname)
case "software" => (new Software, resourceType, term.getClassname)
case "otherresearchproduct" => (new OtherResearchProduct, resourceType, term.getClassname)
}
} else
null
}
private def generateAuthor(ca: CrossrefAuthor): Author = {
val a = new Author
a.setName(ca.givenName)
a.setSurname(ca.familyName)
a.setFullname(s"${ca.familyName}, ${ca.givenName}")
a.setRank(ca.rank + 1)
if (StringUtils.isNotBlank(ca.ORCID))
a.setPid(
List(
OafMapperUtils.authorPid(ca.ORCID, OafMapperUtils.qualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_PENDING, ModelConstants.DNET_PID_TYPES), null)
).asJava
)
a
}
} }

View File

@ -553,7 +553,6 @@ object DataciteToOAFTransformation {
OafMapperUtils.accessRight( OafMapperUtils.accessRight(
ModelConstants.UNKNOWN, ModelConstants.UNKNOWN,
ModelConstants.NOT_AVAILABLE, ModelConstants.NOT_AVAILABLE,
ModelConstants.DNET_ACCESS_MODES,
ModelConstants.DNET_ACCESS_MODES ModelConstants.DNET_ACCESS_MODES
) )