diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml
index c0e12a63f..dfc3c75d9 100644
--- a/dhp-build/dhp-code-style/pom.xml
+++ b/dhp-build/dhp-code-style/pom.xml
@@ -5,7 +5,6 @@
eu.dnetlib.dhp
dhp-code-style
-
2.0.0-SNAPSHOT
jar
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java
index dc38f218f..f10fda99d 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java
@@ -4,12 +4,14 @@ package eu.dnetlib.dhp.schema.common;
import eu.dnetlib.dhp.schema.oaf.AccessRight;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public class ModelConstants {
private ModelConstants() {
}
+ public static final String DOI = "doi";
public static final String ORCID = "orcid";
public static final String ORCID_PENDING = "orcid_pending";
public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID";
@@ -219,7 +221,7 @@ public class ModelConstants {
public static final Qualifier ALTERNATIVE_TITLE_QUALIFIER = qualifier(
"alternative title", "alternative title", DNET_DATACITE_TITLE);
- private static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE);
+ public static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE);
public static final AccessRight OPEN_ACCESS_RIGHT() {
@@ -230,6 +232,38 @@ public class ModelConstants {
return result;
}
+ public static final AccessRight RESTRICTED_ACCESS_RIGHT() {
+ final AccessRight result = new AccessRight();
+ result.setClassid("RESTRICTED");
+ result.setClassname("Restricted");
+ result.setSchemeid(ModelConstants.DNET_ACCESS_MODES);
+ return result;
+ }
+
+ public static final AccessRight UNKNOWN_ACCESS_RIGHT() {
+ return OafMapperUtils.accessRight(
+ ModelConstants.UNKNOWN,
+ ModelConstants.NOT_AVAILABLE,
+ ModelConstants.DNET_ACCESS_MODES
+ );
+ }
+
+ public static final AccessRight EMBARGOED_ACCESS_RIGHT() {
+ return OafMapperUtils.accessRight(
+ ACCESS_RIGHT_EMBARGO,
+ ACCESS_RIGHT_EMBARGO,
+ DNET_ACCESS_MODES
+ );
+ }
+
+ public static final AccessRight CLOSED_ACCESS_RIGHT() {
+ return OafMapperUtils.accessRight(
+ ACCESS_RIGHT_CLOSED,
+ "Closed Access",
+ ModelConstants.DNET_ACCESS_MODES
+ );
+ }
+
private static Qualifier qualifier(
final String classid,
final String classname,
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java
index c0ef339bd..aaae0fe0a 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java
@@ -6,13 +6,17 @@ import java.util.Objects;
import java.util.Optional;
import java.util.Set;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+import lombok.val;
public class CleaningFunctions {
public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)";
+
+ private static final String ALL_SPACES_REGEX = "(?:\\n|\\r|\\t|\\s)";
public static final String DOI_PREFIX = "10.";
public static final Set PID_BLACKLIST = new HashSet<>();
@@ -58,6 +62,27 @@ public class CleaningFunctions {
return pid;
}
+ /**
+ * This utility was moved from DOIBoost,
+ * it implements a better cleaning of DOI.
+ * In case of wrong DOI it raises an illegalArgumentException
+ * @param input DOI
+ * @return normalized DOI
+ */
+ private static String normalizeDOI(final String input) {
+ if (input == null)
+ throw new IllegalArgumentException("PID value cannot be empty");
+ final String replaced = input
+ .replaceAll(ALL_SPACES_REGEX, "")
+ .toLowerCase()
+ .replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX);
+ if (StringUtils.isEmpty(replaced.trim()))
+ throw new IllegalArgumentException("PID value normalized return empty string");
+ if (!replaced.contains("10."))
+ throw new IllegalArgumentException("DOI Must starts with 10.");
+ return replaced.substring(replaced.indexOf("10."));
+ }
+
public static String normalizePidValue(String pidType, String pidValue) {
String value = Optional
.ofNullable(pidValue)
@@ -67,8 +92,8 @@ public class CleaningFunctions {
switch (pidType) {
// TODO add cleaning for more PID types as needed
- case "doi":
- return value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX);
+ case ModelConstants.DOI:
+ return normalizeDOI(value.toLowerCase());
}
return value;
}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
index fff9ac885..b70250f26 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
@@ -495,7 +495,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
private static AccessRight accessRight(String classid, String classname, String scheme) {
return OafMapperUtils
.accessRight(
- classid, classname, scheme, scheme);
+ classid, classname, scheme);
}
private static Qualifier qualifier(String classid, String classname, String scheme) {
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
index 723254bab..5c7e237fb 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
@@ -55,19 +55,19 @@ public class OafMapperUtils {
return qualifier(UNKNOWN, "Unknown", schemeid);
}
+
+
public static AccessRight accessRight(
final String classid,
final String classname,
- final String schemeid,
- final String schemename) {
- return accessRight(classid, classname, schemeid, schemename, null);
+ final String schemeid) {
+ return accessRight(classid, classname, schemeid, null);
}
public static AccessRight accessRight(
final String classid,
final String classname,
final String schemeid,
- final String schemename,
final OpenAccessRoute openAccessRoute) {
final AccessRight accessRight = new AccessRight();
accessRight.setClassid(classid);
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java
index 0650dc53b..eb4a092cf 100644
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/vocabulary/VocabularyTest.java
@@ -67,7 +67,7 @@ public class VocabularyTest {
if (t1 == null) {
System.err.println(s1 + " Missing");
} else {
- System.out.println("syn=" + s1 + " term = " + t1.getClassid());
+ System.out.println("syn=" + s1 + " term = " + t1.getClassid()+" "+t1.getClassname());
System.out
.println(
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java
index 08339c3a1..a26e1c83d 100644
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactoryTest.java
@@ -1,12 +1,12 @@
package eu.dnetlib.dhp.schema.oaf.utils;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException;
import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.DeserializationFeature;
@@ -81,4 +81,23 @@ class IdentifierFactoryTest {
assertEquals(expectedID, IdentifierFactory.createIdentifier(pub, md5));
}
+ @Test
+ void testNormalizeDOI() throws Exception {
+
+ final String doi = "10.1042/BCJ20160876";
+
+ assertEquals(CleaningFunctions.normalizePidValue("doi", doi), doi.toLowerCase());
+ final String doi2 = "0.1042/BCJ20160876";
+ assertThrows(IllegalArgumentException.class, () -> CleaningFunctions.normalizePidValue("doi", doi2));
+
+ final String doi3 = "https://doi.org/0.1042/BCJ20160876";
+ assertThrows(IllegalArgumentException.class, () -> CleaningFunctions.normalizePidValue("doi", doi3));
+
+ final String doi4 = "https://doi.org/10.1042/BCJ20160876";
+ assertEquals(CleaningFunctions.normalizePidValue("doi", doi4), "10.1042/BCJ20160876".toLowerCase());
+
+ final String doi5 = "https://doi.org/10.1042/ BCJ20160876";
+ assertEquals(CleaningFunctions.normalizePidValue("doi", doi5), "10.1042/BCJ20160876".toLowerCase());
+ }
+
}
diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala
index 4d81b4858..18299cb87 100644
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/crossref/CrossrefUtility.scala
@@ -2,90 +2,34 @@ package eu.dnetlib.dhp.crossref
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
import eu.dnetlib.dhp.schema.common.ModelConstants
-import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._
-import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType}
+import eu.dnetlib.dhp.schema.common.ModelConstants.OPEN_ACCESS_RIGHT
import eu.dnetlib.dhp.schema.oaf._
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._
+import eu.dnetlib.dhp.schema.oaf.utils._
import org.apache.commons.lang.StringUtils
import org.json4s
import org.json4s.DefaultFormats
-import org.json4s.JsonAST.{JField, JObject, JString, JValue}
+import org.json4s.JsonAST._
import org.json4s.jackson.JsonMethods.parse
+import org.slf4j.{Logger, LoggerFactory}
+import java.time.LocalDate
+import java.time.format.DateTimeFormatter
import scala.collection.JavaConverters._
+import scala.collection.mutable.ListBuffer
+import scala.util.matching.Regex
case class CrossrefDT(doi: String, json: String, timestamp: Long) {}
+case class CrossrefAuthor(givenName:String, familyName:String,ORCID:String, sequence:String, rank:Int ){}
+
+case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
+
object CrossrefUtility {
- val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)"
- val DOI_PREFIX = "10."
val CROSSREF_COLLECTED_FROM = keyValue(ModelConstants.CROSSREF_ID, ModelConstants.CROSSREF_NAME)
- def normalizeDoi(input: String): String = {
- if (input == null)
- return null
- val replaced = input
- .replaceAll("(?:\\n|\\r|\\t|\\s)", "")
- .toLowerCase
- .replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX)
- if (replaced == null || replaced.trim.isEmpty)
- return null
- if (replaced.indexOf("10.") < 0)
- return null
- val ret = replaced.substring(replaced.indexOf("10."))
- if (!ret.startsWith(DOI_PREFIX))
- return null
- ret
- }
+ val logger: Logger = LoggerFactory.getLogger(getClass)
- def extractDate(dt: String, datePart: List[List[Int]]): String = {
- if (StringUtils.isNotBlank(dt))
- return GraphCleaningFunctions.cleanDate(dt)
- if (datePart != null && datePart.size == 1) {
- val res = datePart.head
- if (res.size == 3) {
- val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
- if (dp.length == 10) {
- return GraphCleaningFunctions.cleanDate(dp)
- }
- } else if (res.size == 2) {
- val dp = f"${res.head}-${res(1)}%02d-01"
- return GraphCleaningFunctions.cleanDate(dp)
- } else if (res.size == 1) {
- return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01")
- }
- }
- null
-
- }
-
- private def generateDate(
- dt: String,
- datePart: List[List[Int]],
- classId: String,
- schemeId: String
- ): StructuredProperty = {
- val dp = extractDate(dt, datePart)
- if (StringUtils.isNotBlank(dp))
- structuredProperty(dp, classId, classId, schemeId)
- else
- null
- }
-
- private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String) = {
- val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType)
- if (term != null) {
- val resourceType =
- vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname
-
- resourceType match {
- case "publication" => (new Publication, resourceType)
- case "dataset" => (new Dataset, resourceType)
- case "software" => (new Software, resourceType)
- case "otherresearchproduct" => (new OtherResearchProduct, resourceType)
- }
- } else
- null
- }
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
@@ -103,7 +47,8 @@ object CrossrefUtility {
val result = resultWithType._1
val cOBJCategory = resultWithType._2
- mappingResult(result, json, cOBJCategory)
+ val className = resultWithType._3
+ mappingResult(result, json, cOBJCategory, className)
if (result == null || result.getId == null)
return List()
@@ -111,29 +56,144 @@ object CrossrefUtility {
(json \ "funder").extractOrElse[List[mappingFunder]](List())
if (funderList.nonEmpty) {
- resultList = resultList ::: mappingFunderToRelations(
- funderList,
- result.getId,
- createCrossrefCollectedFrom(),
- result.getDataInfo,
- result.getLastupdatetimestamp
- )
+ resultList = resultList ::: mappingFunderToRelations(funderList, result )
}
-
- result match {
- case publication: Publication => convertPublication(publication, json, cOBJCategory)
- case dataset: Dataset => convertDataset(dataset)
- }
-
resultList = resultList ::: List(result)
resultList
}
- def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = {
+ private def createRelation(sourceId: String, targetId: String, relClass: String): Relation = {
+ val r = new Relation
+ r.setSource(sourceId)
+ r.setTarget(targetId)
+ r.setRelType(ModelConstants.RESULT_PROJECT)
+ r.setRelClass(relClass)
+ r.setSubRelType(ModelConstants.OUTCOME)
+ r.setProvenance(List(OafMapperUtils.getProvenance(CROSSREF_COLLECTED_FROM, null)).asJava)
+ r
+ }
+
+
+ private def generateSimpleRelationFromAward(
+ funder: mappingFunder,
+ nsPrefix: String,
+ extractField: String => String,
+ source:Result
+ ): List[Relation] = {
+ if (funder.award.isDefined && funder.award.get.nonEmpty)
+ funder.award.get
+ .map(extractField)
+ .filter(a => a != null && a.nonEmpty)
+ .map(award => {
+ val targetId = IdentifierFactory.createOpenaireId("project",s"$nsPrefix::$award", true)
+ createRelation(targetId, source.getId, ModelConstants.PRODUCES)
+ })
+ else List()
+ }
+
+ private def extractECAward(award: String): String = {
+ val awardECRegex: Regex = "[0-9]{4,9}".r
+ if (awardECRegex.findAllIn(award).hasNext)
+ return awardECRegex.findAllIn(award).max
+ null
+ }
+
+ private def snsfRule(award: String): String = {
+ val tmp1 = StringUtils.substringAfter(award, "_")
+ val tmp2 = StringUtils.substringBefore(tmp1, "/")
+ tmp2
+
+ }
+
+ private def mappingFunderToRelations(funders: List[mappingFunder], result: Result): List[Relation] = {
+ var relList:List[Relation] = List()
+
+ if (funders != null)
+ funders.foreach(funder => {
+ if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) {
+ funder.DOI.get match {
+ case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" |
+ "10.13039/100010665" =>
+ relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
+ case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" =>
+ relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
+ case "10.13039/501100000781" =>
+ relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
+ relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
+ case "10.13039/100000001" => relList =relList ::: generateSimpleRelationFromAward(funder, "nsf_________", a => a, result)
+ case "10.13039/501100001665" => relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
+ case "10.13039/501100002341" => relList =relList ::: generateSimpleRelationFromAward(funder, "aka_________", a => a, result)
+ case "10.13039/501100001602" =>
+ relList =relList ::: generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""), result)
+ case "10.13039/501100000923" => relList =relList ::: generateSimpleRelationFromAward(funder, "arc_________", a => a, result)
+ case "10.13039/501100000038" =>
+ val targetId = IdentifierFactory.createOpenaireId("project", "nserc_______::1e5e62235d094afd01cd56e65112fc63", false)
+ relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
+ case "10.13039/501100000155" =>
+ val targetId = IdentifierFactory.createOpenaireId("project", "sshrc_______::1e5e62235d094afd01cd56e65112fc63", false)
+ relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
+ case "10.13039/501100000024" =>
+ val targetId = IdentifierFactory.createOpenaireId("project", "cihr________::1e5e62235d094afd01cd56e65112fc63", false)
+ relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
+ case "10.13039/501100002848" => relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", a => a, result)
+ case "10.13039/501100003448" => relList =relList ::: generateSimpleRelationFromAward(funder, "gsrt________", extractECAward, result)
+ case "10.13039/501100010198" => relList =relList ::: generateSimpleRelationFromAward(funder, "sgov________", a => a, result)
+ case "10.13039/501100004564" => relList =relList ::: generateSimpleRelationFromAward(funder, "mestd_______", extractECAward, result)
+ case "10.13039/501100003407" =>
+ relList =relList ::: generateSimpleRelationFromAward(funder, "miur________", a => a, result)
+ val targetId = IdentifierFactory.createOpenaireId("project", "miur________::1e5e62235d094afd01cd56e65112fc63", false)
+ relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
+ case "10.13039/501100006588" | "10.13039/501100004488" =>
+ relList =relList ::: generateSimpleRelationFromAward(
+ funder,
+ "irb_hr______",
+ a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", ""), result
+ )
+ case "10.13039/501100006769" => relList =relList ::: generateSimpleRelationFromAward(funder, "rsf_________", a => a, result)
+ case "10.13039/501100001711" => relList =relList ::: generateSimpleRelationFromAward(funder, "snsf________", snsfRule, result)
+ case "10.13039/501100004410" => relList =relList ::: generateSimpleRelationFromAward(funder, "tubitakf____", a => a, result)
+ case "10.13039/100004440" =>
+ relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
+ val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
+ relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
+ case _ => logger.debug("no match for " + funder.DOI.get)
+
+ }
+
+ } else {
+ funder.name match {
+ case "European Union’s Horizon 2020 research and innovation program" =>
+ relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
+ case "European Union's" =>
+ relList =relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
+ relList =relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
+ case "The French National Research Agency (ANR)" | "The French National Research Agency" =>
+ relList =relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
+ case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
+ relList =relList ::: generateSimpleRelationFromAward(funder, "conicytf____", extractECAward, result)
+ case "Wellcome Trust Masters Fellowship" =>
+ relList =relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
+ val targetId = IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
+ relList =relList ::: List(createRelation(targetId, result.getId, ModelConstants.PRODUCES))
+ case _ => logger.debug("no match for " + funder.name)
+
+ }
+ }
+
+ })
+ relList
+
+ }
+
+
+
+
+
+ private def mappingResult(result: Result, json: JValue, cobjCategory: String, className:String): Result = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
//MAPPING Crossref DOI into PID
- val doi: String = normalizeDoi((json \ "DOI").extract[String])
+ val doi: String = CleaningFunctions.normalizePidValue(ModelConstants.DOI, (json \ "DOI").extract[String])
result.setPid(
List(
@@ -176,9 +236,7 @@ object CrossrefUtility {
} yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER)
val subtitles =
for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty(
- title,
- ModelConstants.SUBTITLE_QUALIFIER
- )
+ title, ModelConstants.SUBTITLE_QUALIFIER)
result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava)
// DESCRIPTION
@@ -244,21 +302,28 @@ object CrossrefUtility {
if (subjectList.nonEmpty) {
result.setSubject(
- subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava
- )
+ subjectList.map(s =>
+ OafMapperUtils.subject(s, OafMapperUtils.qualifier(ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_KEYWORD,ModelConstants.DNET_SUBJECT_TYPOLOGIES), null)
+ ).asJava)
}
//Mapping Author
- val authorList: List[mappingAuthor] =
- (json \ "author").extractOrElse[List[mappingAuthor]](List())
+ val authorList:List[CrossrefAuthor] =
+ for {
+ JObject(author) <- json \ "author"
+ JField("ORCID", JString(orcid)) <- author
+ JField("given", JString(givenName)) <- author
+ JField("family", JString(familyName)) <- author
+ JField("sequence", JString(sequence)) <- author
+ } yield CrossrefAuthor(givenName = givenName, familyName = familyName, ORCID = orcid, sequence = sequence, rank = 0)
- val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) =>
- a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first")
- )
-
- result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) =>
- generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index)
- }.asJava)
+ result.setAuthor(authorList.sortWith((a,b) =>{
+ if (a.sequence.equalsIgnoreCase("first"))
+ true
+ else if (b.sequence.equalsIgnoreCase("first"))
+ false
+ else a.familyName< b.familyName
+ }).zipWithIndex.map(k=> k._1.copy(rank = k._2)).map(k => generateAuthor(k)).asJava)
// Mapping instance
val instance = new Instance()
@@ -266,8 +331,8 @@ object CrossrefUtility {
JObject(license) <- json \ "license"
JField("URL", JString(lic)) <- license
JField("content-version", JString(content_version)) <- license
- } yield (asField(lic), content_version)
- val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue))
+ } yield (new License(lic), content_version)
+ val l = license.filter(d => StringUtils.isNotBlank(d._1.getUrl))
if (l.nonEmpty) {
if (l exists (d => d._2.equals("vor"))) {
for (d <- l) {
@@ -290,66 +355,225 @@ object CrossrefUtility {
OafMapperUtils.qualifier(
"0001",
"peerReviewed",
- ModelConstants.DNET_REVIEW_LEVELS,
ModelConstants.DNET_REVIEW_LEVELS
)
)
}
- instance.setAccessright(
- decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)
- )
+ if (instance.getLicense!= null)
+ instance.setAccessright(
+ decideAccessRight(instance.getLicense.getUrl, result.getDateofacceptance)
+ )
instance.setInstancetype(
OafMapperUtils.qualifier(
- cobjCategory.substring(0, 4),
- cobjCategory.substring(5),
- ModelConstants.DNET_PUBLICATION_RESOURCE,
+ cobjCategory,
+ className,
ModelConstants.DNET_PUBLICATION_RESOURCE
)
)
result.setResourcetype(
OafMapperUtils.qualifier(
- cobjCategory.substring(0, 4),
- cobjCategory.substring(5),
- ModelConstants.DNET_PUBLICATION_RESOURCE,
+ cobjCategory,
+ className,
ModelConstants.DNET_PUBLICATION_RESOURCE
)
)
- instance.setCollectedfrom(createCrossrefCollectedFrom())
+ instance.setCollectedfrom(CROSSREF_COLLECTED_FROM)
if (StringUtils.isNotBlank(issuedDate)) {
- instance.setDateofacceptance(asField(issuedDate))
+ instance.setDateofacceptance(issuedDate)
} else {
- instance.setDateofacceptance(asField(createdDate.getValue))
+ instance.setDateofacceptance(createdDate.getValue)
}
val s: List[String] = List("https://doi.org/" + doi)
- // val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
- // if (links.nonEmpty) {
- // instance.setUrl(links.asJava)
- // }
if (s.nonEmpty) {
instance.setUrl(s.asJava)
}
+ val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
+ //Mapping book
+ if (className.toLowerCase.contains("book")) {
+ val ISBN = for {JString(isbn) <- json \ "ISBN"} yield isbn
+ if (ISBN.nonEmpty && containerTitles.nonEmpty) {
+ val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
+ if (result.getSource != null) {
+ val l: List[String] = result.getSource.asScala.toList ::: List(source)
+ result.setSource(l.asJava)
+ } else
+ result.setSource(List(source).asJava)
+ }
+ } else {
+ // Mapping Journal
+ val issnInfos = for {
+ JObject(issn_type) <- json \ "issn-type"
+ JField("type", JString(tp)) <- issn_type
+ JField("value", JString(vl)) <- issn_type
+ } yield Tuple2(tp, vl)
+
+ val volume = (json \ "volume").extractOrElse[String](null)
+ if (containerTitles.nonEmpty) {
+ val journal = new Journal
+ journal.setName(containerTitles.head)
+ if (issnInfos.nonEmpty) {
+
+ issnInfos.foreach(tp => {
+ tp._1 match {
+ case "electronic" => journal.setIssnOnline(tp._2)
+ case "print" => journal.setIssnPrinted(tp._2)
+ }
+ })
+ }
+ journal.setVol(volume)
+ val page = (json \ "page").extractOrElse[String](null)
+ if (page != null) {
+ val pp = page.split("-")
+ if (pp.nonEmpty)
+ journal.setSp(pp.head)
+ if (pp.size > 1)
+ journal.setEp(pp(1))
+ }
+ result.setJournal(journal)
+ }
+ }
+
result.setInstance(List(instance).asJava)
-
- //IMPORTANT
- //The old method result.setId(generateIdentifier(result, doi))
- //is replaced using IdentifierFactory, but the old identifier
- //is preserved among the originalId(s)
- val oldId = generateIdentifier(result, doi)
- result.setId(oldId)
-
- val newId = IdentifierFactory.createDOIBoostIdentifier(result)
- if (!oldId.equalsIgnoreCase(newId)) {
- result.getOriginalId.add(oldId)
- }
- result.setId(newId)
-
- if (result.getId == null)
+ result.setId("ID")
+ result.setId(IdentifierFactory.createIdentifier(result, true))
+ if (result.getId == null || "ID".equalsIgnoreCase(result.getId))
null
else
result
}
+ def decideAccessRight(license: String, date: String): AccessRight = {
+ if (license == null || license.isEmpty) {
+ //Default value Unknown
+ return ModelConstants.UNKNOWN_ACCESS_RIGHT();
+ }
+ //CC licenses
+ if (
+ license.startsWith("cc") ||
+ license.startsWith("http://creativecommons.org/licenses") ||
+ license.startsWith("https://creativecommons.org/licenses") ||
+
+ //ACS Publications Author choice licenses (considered OPEN also by Unpaywall)
+ license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") ||
+ license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") ||
+ license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") ||
+
+ //APA (considered OPEN also by Unpaywall)
+ license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")
+ ) {
+
+ val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT()
+ oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
+ return oaq
+ }
+
+ //OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED)
+ if (
+ license.equals(
+ "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"
+ )
+ ) {
+ val now = java.time.LocalDate.now
+
+ try {
+ val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd"))
+ if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) {
+ val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT()
+ oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
+ return oaq
+ } else {
+ return ModelConstants.EMBARGOED_ACCESS_RIGHT()
+ }
+ } catch {
+ case _: Exception => {
+ try {
+ val pub_date =
+ LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"))
+ if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) {
+ val oaq: AccessRight = OPEN_ACCESS_RIGHT()
+ oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
+ return oaq
+ } else {
+ return ModelConstants.EMBARGOED_ACCESS_RIGHT()
+ }
+ } catch {
+ case _: Exception => return ModelConstants.CLOSED_ACCESS_RIGHT()
+ }
+ }
+
+ }
+
+ }
+
+ ModelConstants.CLOSED_ACCESS_RIGHT()
+ }
+
+
+ private def extractDate(dt: String, datePart: List[List[Int]]): String = {
+ if (StringUtils.isNotBlank(dt))
+ return GraphCleaningFunctions.cleanDate(dt)
+ if (datePart != null && datePart.size == 1) {
+ val res = datePart.head
+ if (res.size == 3) {
+ val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
+ if (dp.length == 10) {
+ return GraphCleaningFunctions.cleanDate(dp)
+ }
+ } else if (res.size == 2) {
+ val dp = f"${res.head}-${res(1)}%02d-01"
+ return GraphCleaningFunctions.cleanDate(dp)
+ } else if (res.size == 1) {
+ return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01")
+ }
+ }
+ null
+ }
+
+ private def generateDate(
+ dt: String,
+ datePart: List[List[Int]],
+ classId: String,
+ schemeId: String
+ ): StructuredProperty = {
+ val dp = extractDate(dt, datePart)
+ if (StringUtils.isNotBlank(dp))
+ structuredProperty(dp, classId, classId, schemeId)
+ else
+ null
+ }
+
+ private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String, String) = {
+ val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType)
+ if (term != null) {
+ val resourceType =
+ vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname
+
+ resourceType match {
+ case "publication" => (new Publication, resourceType, term.getClassname)
+ case "dataset" => (new Dataset, resourceType, term.getClassname)
+ case "software" => (new Software, resourceType, term.getClassname)
+ case "otherresearchproduct" => (new OtherResearchProduct, resourceType, term.getClassname)
+ }
+ } else
+ null
+ }
+
+ private def generateAuthor(ca: CrossrefAuthor): Author = {
+ val a = new Author
+ a.setName(ca.givenName)
+ a.setSurname(ca.familyName)
+ a.setFullname(s"${ca.familyName}, ${ca.givenName}")
+ a.setRank(ca.rank + 1)
+ if (StringUtils.isNotBlank(ca.ORCID))
+ a.setPid(
+ List(
+ OafMapperUtils.authorPid(ca.ORCID, OafMapperUtils.qualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_PENDING, ModelConstants.DNET_PID_TYPES), null)
+ ).asJava
+ )
+ a
+ }
+
}
diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
index afb687b37..2696b5252 100644
--- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala
@@ -553,7 +553,6 @@ object DataciteToOAFTransformation {
OafMapperUtils.accessRight(
ModelConstants.UNKNOWN,
ModelConstants.NOT_AVAILABLE,
- ModelConstants.DNET_ACCESS_MODES,
ModelConstants.DNET_ACCESS_MODES
)