forked from D-Net/dnet-hadoop
resolved ticket #6377
This commit is contained in:
parent
524e5f3092
commit
dbe0d0378e
|
@ -1,9 +1,7 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.common;
|
package eu.dnetlib.dhp.schema.common;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
|
||||||
|
|
||||||
public class ModelConstants {
|
public class ModelConstants {
|
||||||
|
|
||||||
|
@ -144,6 +142,17 @@ public class ModelConstants {
|
||||||
public static final Qualifier MAIN_TITLE_QUALIFIER = qualifier(
|
public static final Qualifier MAIN_TITLE_QUALIFIER = qualifier(
|
||||||
"main title", "main title", DNET_DATACITE_TITLE, DNET_DATACITE_TITLE);
|
"main title", "main title", DNET_DATACITE_TITLE, DNET_DATACITE_TITLE);
|
||||||
|
|
||||||
|
|
||||||
|
public static final AccessRight OPEN_ACCESS_RIGHT () {
|
||||||
|
|
||||||
|
final AccessRight result = new AccessRight();
|
||||||
|
result.setClassid(ACCESS_RIGHT_OPEN);
|
||||||
|
result.setClassid(ACCESS_RIGHT_OPEN);
|
||||||
|
result.setSchemeid(ModelConstants.DNET_ACCESS_MODES);
|
||||||
|
result.setSchemename(ModelConstants.DNET_ACCESS_MODES);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
private static Qualifier qualifier(
|
private static Qualifier qualifier(
|
||||||
final String classid,
|
final String classid,
|
||||||
final String classname,
|
final String classname,
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.util.regex.Pattern
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
import scala.io.{Codec, Source}
|
import scala.io.{Codec, Source}
|
||||||
|
|
||||||
case class DataciteType(doi:String,timestamp:Long,isActive:Boolean, json:String ){}
|
case class DataciteType(doi: String, timestamp: Long, isActive: Boolean, json: String) {}
|
||||||
|
|
||||||
case class NameIdentifiersType(nameIdentifierScheme: Option[String], schemeUri: Option[String], nameIdentifier: Option[String]) {}
|
case class NameIdentifiersType(nameIdentifierScheme: Option[String], schemeUri: Option[String], nameIdentifier: Option[String]) {}
|
||||||
|
|
||||||
|
@ -51,7 +51,7 @@ object DataciteToOAFTransformation {
|
||||||
val SUBJ_CLASS = "keywords"
|
val SUBJ_CLASS = "keywords"
|
||||||
|
|
||||||
|
|
||||||
val j_filter:List[String] = {
|
val j_filter: List[String] = {
|
||||||
val s = Source.fromInputStream(getClass.getResourceAsStream("datacite_filter")).mkString
|
val s = Source.fromInputStream(getClass.getResourceAsStream("datacite_filter")).mkString
|
||||||
s.lines.toList
|
s.lines.toList
|
||||||
}
|
}
|
||||||
|
@ -72,9 +72,9 @@ object DataciteToOAFTransformation {
|
||||||
val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern("[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", Locale.ENGLISH)
|
val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern("[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]", Locale.ENGLISH)
|
||||||
val df_it: DateTimeFormatter = DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN)
|
val df_it: DateTimeFormatter = DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN)
|
||||||
|
|
||||||
val funder_regex:List[(Pattern, String)] = List(
|
val funder_regex: List[(Pattern, String)] = List(
|
||||||
(Pattern.compile("(info:eu-repo/grantagreement/ec/h2020/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE),"40|corda__h2020::"),
|
(Pattern.compile("(info:eu-repo/grantagreement/ec/h2020/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda__h2020::"),
|
||||||
(Pattern.compile("(info:eu-repo/grantagreement/ec/fp7/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE),"40|corda_______::")
|
(Pattern.compile("(info:eu-repo/grantagreement/ec/fp7/)(\\d\\d\\d\\d\\d\\d)(.*)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE), "40|corda_______::")
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -90,11 +90,11 @@ object DataciteToOAFTransformation {
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def filter_json(json:String):Boolean = {
|
def filter_json(json: String): Boolean = {
|
||||||
j_filter.exists(f => json.contains(f))
|
j_filter.exists(f => json.contains(f))
|
||||||
}
|
}
|
||||||
|
|
||||||
def toActionSet(item:Oaf) :(String, String) = {
|
def toActionSet(item: Oaf): (String, String) = {
|
||||||
val mapper = new ObjectMapper()
|
val mapper = new ObjectMapper()
|
||||||
|
|
||||||
item match {
|
item match {
|
||||||
|
@ -131,8 +131,6 @@ object DataciteToOAFTransformation {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def embargo_end(embargo_end_date: String): Boolean = {
|
def embargo_end(embargo_end_date: String): Boolean = {
|
||||||
val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]"))
|
val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]"))
|
||||||
val td = LocalDate.now()
|
val td = LocalDate.now()
|
||||||
|
@ -166,7 +164,7 @@ object DataciteToOAFTransformation {
|
||||||
d
|
d
|
||||||
}
|
}
|
||||||
|
|
||||||
def getTypeQualifier(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies:VocabularyGroup): (Qualifier, Qualifier) = {
|
def getTypeQualifier(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies: VocabularyGroup): (Qualifier, Qualifier) = {
|
||||||
if (resourceType != null && resourceType.nonEmpty) {
|
if (resourceType != null && resourceType.nonEmpty) {
|
||||||
val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
|
val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
|
||||||
if (typeQualifier != null)
|
if (typeQualifier != null)
|
||||||
|
@ -188,7 +186,7 @@ object DataciteToOAFTransformation {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def getResult(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies:VocabularyGroup): Result = {
|
def getResult(resourceType: String, resourceTypeGeneral: String, schemaOrg: String, vocabularies: VocabularyGroup): Result = {
|
||||||
val typeQualifiers: (Qualifier, Qualifier) = getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
|
val typeQualifiers: (Qualifier, Qualifier) = getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
|
||||||
if (typeQualifiers == null)
|
if (typeQualifiers == null)
|
||||||
return null
|
return null
|
||||||
|
@ -230,11 +228,31 @@ object DataciteToOAFTransformation {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* As describe in ticket #6377
|
||||||
|
* when the result come from figshare we need to remove subject
|
||||||
|
* and set Access rights OPEN.
|
||||||
|
* @param r
|
||||||
|
*/
|
||||||
|
def fix_figshare(r: Result): Unit = {
|
||||||
|
|
||||||
|
if (r.getInstance() != null) {
|
||||||
|
val hosted_by_figshare = r.getInstance().asScala.exists(i => i.getHostedby != null && "figshare".equalsIgnoreCase(i.getHostedby.getValue))
|
||||||
|
if (hosted_by_figshare) {
|
||||||
|
r.getInstance().asScala.foreach(i => i.setAccessright(ModelConstants.OPEN_ACCESS_RIGHT()))
|
||||||
|
val l: List[StructuredProperty] = List()
|
||||||
|
r.setSubject(l.asJava)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
def generateOAFDate(dt: String, q: Qualifier): StructuredProperty = {
|
def generateOAFDate(dt: String, q: Qualifier): StructuredProperty = {
|
||||||
OafMapperUtils.structuredProperty(dt, q, null)
|
OafMapperUtils.structuredProperty(dt, q, null)
|
||||||
}
|
}
|
||||||
|
|
||||||
def generateRelation(sourceId:String, targetId:String, relClass:String, cf:KeyValue, di:DataInfo) :Relation = {
|
def generateRelation(sourceId: String, targetId: String, relClass: String, cf: KeyValue, di: DataInfo): Relation = {
|
||||||
|
|
||||||
val r = new Relation
|
val r = new Relation
|
||||||
r.setSource(sourceId)
|
r.setSource(sourceId)
|
||||||
|
@ -249,17 +267,17 @@ object DataciteToOAFTransformation {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_projectRelation(awardUri:String, sourceId:String):List[Relation] = {
|
def get_projectRelation(awardUri: String, sourceId: String): List[Relation] = {
|
||||||
val match_pattern = funder_regex.find(s =>s._1.matcher(awardUri).find())
|
val match_pattern = funder_regex.find(s => s._1.matcher(awardUri).find())
|
||||||
|
|
||||||
if (match_pattern.isDefined) {
|
if (match_pattern.isDefined) {
|
||||||
val m =match_pattern.get._1
|
val m = match_pattern.get._1
|
||||||
val p = match_pattern.get._2
|
val p = match_pattern.get._2
|
||||||
val grantId = m.matcher(awardUri).replaceAll("$2")
|
val grantId = m.matcher(awardUri).replaceAll("$2")
|
||||||
val targetId = s"$p${DHPUtils.md5(grantId)}"
|
val targetId = s"$p${DHPUtils.md5(grantId)}"
|
||||||
List(
|
List(
|
||||||
generateRelation(sourceId, targetId,"isProducedBy", DATACITE_COLLECTED_FROM, dataInfo),
|
generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo),
|
||||||
generateRelation(targetId, sourceId,"produces", DATACITE_COLLECTED_FROM, dataInfo)
|
generateRelation(targetId, sourceId, "produces", DATACITE_COLLECTED_FROM, dataInfo)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -268,9 +286,9 @@ object DataciteToOAFTransformation {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def generateOAF(input:String,ts:Long, dateOfCollection:Long, vocabularies: VocabularyGroup):List[Oaf] = {
|
def generateOAF(input: String, ts: Long, dateOfCollection: Long, vocabularies: VocabularyGroup): List[Oaf] = {
|
||||||
if (filter_json(input))
|
if (filter_json(input))
|
||||||
return List()
|
return List()
|
||||||
|
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
lazy val json = parse(input)
|
lazy val json = parse(input)
|
||||||
|
@ -289,13 +307,13 @@ object DataciteToOAFTransformation {
|
||||||
return List()
|
return List()
|
||||||
|
|
||||||
|
|
||||||
val doi_q = OafMapperUtils.qualifier("doi", "doi", ModelConstants.DNET_PID_TYPES,ModelConstants.DNET_PID_TYPES)
|
val doi_q = OafMapperUtils.qualifier("doi", "doi", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES)
|
||||||
val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo)
|
val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo)
|
||||||
result.setPid(List(pid).asJava)
|
result.setPid(List(pid).asJava)
|
||||||
result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true))
|
result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true))
|
||||||
result.setOriginalId(List(doi).asJava)
|
result.setOriginalId(List(doi).asJava)
|
||||||
|
|
||||||
val d = new Date(dateOfCollection*1000)
|
val d = new Date(dateOfCollection * 1000)
|
||||||
val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US)
|
val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US)
|
||||||
|
|
||||||
|
|
||||||
|
@ -311,10 +329,10 @@ object DataciteToOAFTransformation {
|
||||||
a.setFullname(c.name.orNull)
|
a.setFullname(c.name.orNull)
|
||||||
a.setName(c.givenName.orNull)
|
a.setName(c.givenName.orNull)
|
||||||
a.setSurname(c.familyName.orNull)
|
a.setSurname(c.familyName.orNull)
|
||||||
if (c.nameIdentifiers!= null&& c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null) {
|
if (c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null) {
|
||||||
a.setPid(c.nameIdentifiers.get.map(ni => {
|
a.setPid(c.nameIdentifiers.get.map(ni => {
|
||||||
val q = if (ni.nameIdentifierScheme.isDefined) vocabularies.getTermAsQualifier(ModelConstants.DNET_PID_TYPES, ni.nameIdentifierScheme.get.toLowerCase()) else null
|
val q = if (ni.nameIdentifierScheme.isDefined) vocabularies.getTermAsQualifier(ModelConstants.DNET_PID_TYPES, ni.nameIdentifierScheme.get.toLowerCase()) else null
|
||||||
if (ni.nameIdentifier!= null && ni.nameIdentifier.isDefined) {
|
if (ni.nameIdentifier != null && ni.nameIdentifier.isDefined) {
|
||||||
OafMapperUtils.structuredProperty(ni.nameIdentifier.get, q, dataInfo)
|
OafMapperUtils.structuredProperty(ni.nameIdentifier.get, q, dataInfo)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -331,9 +349,7 @@ object DataciteToOAFTransformation {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List())
|
||||||
|
|
||||||
val titles:List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List())
|
|
||||||
|
|
||||||
result.setTitle(titles.filter(t => t.title.nonEmpty).map(t => {
|
result.setTitle(titles.filter(t => t.title.nonEmpty).map(t => {
|
||||||
if (t.titleType.isEmpty) {
|
if (t.titleType.isEmpty) {
|
||||||
|
@ -343,7 +359,7 @@ object DataciteToOAFTransformation {
|
||||||
}
|
}
|
||||||
}).asJava)
|
}).asJava)
|
||||||
|
|
||||||
if(authors==null || authors.isEmpty || !authors.exists(a => a !=null))
|
if (authors == null || authors.isEmpty || !authors.exists(a => a != null))
|
||||||
return List()
|
return List()
|
||||||
result.setAuthor(authors.asJava)
|
result.setAuthor(authors.asJava)
|
||||||
|
|
||||||
|
@ -357,7 +373,7 @@ object DataciteToOAFTransformation {
|
||||||
val a_date: Option[String] = dates
|
val a_date: Option[String] = dates
|
||||||
.filter(d => d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available"))
|
.filter(d => d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available"))
|
||||||
.map(d => extract_date(d.date.get))
|
.map(d => extract_date(d.date.get))
|
||||||
.find(d => d != null && d.isDefined)
|
.find(d => d != null && d.isDefined)
|
||||||
.map(d => d.get)
|
.map(d => d.get)
|
||||||
|
|
||||||
if (a_date.isDefined) {
|
if (a_date.isDefined) {
|
||||||
|
@ -397,7 +413,7 @@ object DataciteToOAFTransformation {
|
||||||
.filter(d => d.description.isDefined).
|
.filter(d => d.description.isDefined).
|
||||||
map(d =>
|
map(d =>
|
||||||
OafMapperUtils.field(d.description.get, null)
|
OafMapperUtils.field(d.description.get, null)
|
||||||
).filter(s => s!=null).asJava)
|
).filter(s => s != null).asJava)
|
||||||
|
|
||||||
|
|
||||||
val publisher = (json \\ "publisher").extractOrElse[String](null)
|
val publisher = (json \\ "publisher").extractOrElse[String](null)
|
||||||
|
@ -415,7 +431,7 @@ object DataciteToOAFTransformation {
|
||||||
|
|
||||||
val client = (json \ "relationships" \ "client" \\ "id").extractOpt[String]
|
val client = (json \ "relationships" \ "client" \\ "id").extractOpt[String]
|
||||||
|
|
||||||
val accessRights:List[String] = for {
|
val accessRights: List[String] = for {
|
||||||
JObject(rightsList) <- json \\ "rightsList"
|
JObject(rightsList) <- json \\ "rightsList"
|
||||||
JField("rightsUri", JString(rightsUri)) <- rightsList
|
JField("rightsUri", JString(rightsUri)) <- rightsList
|
||||||
} yield rightsUri
|
} yield rightsUri
|
||||||
|
@ -447,18 +463,18 @@ object DataciteToOAFTransformation {
|
||||||
instance.setLicense(OafMapperUtils.field(license.get, null))
|
instance.setLicense(OafMapperUtils.field(license.get, null))
|
||||||
}
|
}
|
||||||
|
|
||||||
val awardUris:List[String] = for {
|
val awardUris: List[String] = for {
|
||||||
JObject(fundingReferences) <- json \\ "fundingReferences"
|
JObject(fundingReferences) <- json \\ "fundingReferences"
|
||||||
JField("awardUri", JString(awardUri)) <- fundingReferences
|
JField("awardUri", JString(awardUri)) <- fundingReferences
|
||||||
} yield awardUri
|
} yield awardUri
|
||||||
|
|
||||||
val relations:List[Relation] =awardUris.flatMap(a=> get_projectRelation(a, result.getId)).filter(r => r!= null)
|
val relations: List[Relation] = awardUris.flatMap(a => get_projectRelation(a, result.getId)).filter(r => r != null)
|
||||||
|
fix_figshare(result)
|
||||||
result.setId(IdentifierFactory.createIdentifier(result))
|
result.setId(IdentifierFactory.createIdentifier(result))
|
||||||
if(result.getId == null)
|
if (result.getId == null)
|
||||||
return List()
|
return List()
|
||||||
if (relations!= null && relations.nonEmpty) {
|
if (relations != null && relations.nonEmpty) {
|
||||||
List(result):::relations
|
List(result) ::: relations
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
List(result)
|
List(result)
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.datacite
|
package eu.dnetlib.dhp.actionmanager.datacite
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper
|
|
||||||
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf
|
import eu.dnetlib.dhp.schema.oaf.Oaf
|
||||||
import org.junit.jupiter.api.extension.ExtendWith
|
import org.junit.jupiter.api.extension.ExtendWith
|
||||||
import org.junit.jupiter.api.{BeforeEach, Test}
|
import org.junit.jupiter.api.{BeforeEach, Test}
|
||||||
import org.mockito.junit.jupiter.MockitoExtension
|
import org.mockito.junit.jupiter.MockitoExtension
|
||||||
|
import org.codehaus.jackson.map.ObjectMapper
|
||||||
import scala.io.Source
|
import scala.io.Source
|
||||||
|
|
||||||
@ExtendWith(Array(classOf[MockitoExtension]))
|
@ExtendWith(Array(classOf[MockitoExtension]))
|
||||||
|
@ -15,7 +15,7 @@ class DataciteToOAFTest extends AbstractVocabularyTest{
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
def setUp() :Unit = {
|
def setUp() :Unit = {
|
||||||
println("Called Method")
|
|
||||||
super.setUpVocabulary()
|
super.setUpVocabulary()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ class DataciteToOAFTest extends AbstractVocabularyTest{
|
||||||
|
|
||||||
val mapper = new ObjectMapper()
|
val mapper = new ObjectMapper()
|
||||||
val res:List[Oaf] =DataciteToOAFTransformation.generateOAF(record, 0L,0L, vocabularies )
|
val res:List[Oaf] =DataciteToOAFTransformation.generateOAF(record, 0L,0L, vocabularies )
|
||||||
println (mapper.writeValueAsString(res.head))
|
println (mapper.defaultPrettyPrintingWriter().writeValueAsString(res.head))
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue