2020-05-19 09:24:45 +02:00
|
|
|
package eu.dnetlib.doiboost
|
|
|
|
|
2021-07-14 11:43:00 +02:00
|
|
|
import java.time.LocalDate
|
|
|
|
import java.time.format.DateTimeFormatter
|
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
import eu.dnetlib.dhp.schema.action.AtomicAction
|
2021-07-14 11:43:00 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.{AccessRight, DataInfo, Dataset, Field, Instance, KeyValue, Oaf, OpenAccessRoute, Organization, Publication, Qualifier, Relation, Result, StructuredProperty}
|
2020-05-19 09:24:45 +02:00
|
|
|
import eu.dnetlib.dhp.utils.DHPUtils
|
2020-05-26 09:15:33 +02:00
|
|
|
import org.apache.commons.lang3.StringUtils
|
2020-12-23 16:59:52 +01:00
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper
|
2021-03-23 09:39:56 +01:00
|
|
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
2021-07-01 22:13:45 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
|
2021-07-14 11:43:00 +02:00
|
|
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil.{getClosedAccessQualifier, getEmbargoedAccessQualifier, getUnknownQualifier}
|
2020-05-23 08:46:49 +02:00
|
|
|
import org.json4s
|
|
|
|
import org.json4s.DefaultFormats
|
|
|
|
import org.json4s.jackson.JsonMethods.parse
|
2020-05-26 09:15:33 +02:00
|
|
|
import org.slf4j.{Logger, LoggerFactory}
|
2020-05-23 08:46:49 +02:00
|
|
|
|
|
|
|
import scala.collection.JavaConverters._
|
|
|
|
|
|
|
|
|
2020-06-04 14:39:20 +02:00
|
|
|
case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {}
|
2020-05-19 09:24:45 +02:00
|
|
|
|
2020-06-04 14:39:20 +02:00
|
|
|
case class DoiBoostAffiliation(PaperId:Long, AffiliationId:Long, GridId:Option[String], OfficialPage:Option[String], DisplayName:Option[String]){}
|
2020-05-26 09:15:33 +02:00
|
|
|
|
2020-05-19 09:24:45 +02:00
|
|
|
object DoiBoostMappingUtil {
|
2020-06-04 14:39:20 +02:00
|
|
|
|
|
|
|
def generateMAGAffiliationId(affId: String): String = {
|
|
|
|
s"20|microsoft___$SEPARATOR${DHPUtils.md5(affId)}"
|
|
|
|
}
|
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
val logger: Logger = LoggerFactory.getLogger(getClass)
|
|
|
|
|
2020-05-19 09:24:45 +02:00
|
|
|
//STATIC STRING
|
|
|
|
val MAG = "microsoft"
|
2020-05-23 08:46:49 +02:00
|
|
|
val MAG_NAME = "Microsoft Academic Graph"
|
2020-05-19 09:24:45 +02:00
|
|
|
val CROSSREF = "Crossref"
|
|
|
|
val UNPAYWALL = "UnpayWall"
|
|
|
|
val GRID_AC = "grid.ac"
|
|
|
|
val WIKPEDIA = "wikpedia"
|
|
|
|
val doiBoostNSPREFIX = "doiboost____"
|
|
|
|
val OPENAIRE_PREFIX = "openaire____"
|
|
|
|
val SEPARATOR = "::"
|
|
|
|
|
2021-06-29 18:35:28 +02:00
|
|
|
val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)"
|
|
|
|
val DOI_PREFIX = "10."
|
|
|
|
|
2020-05-23 08:46:49 +02:00
|
|
|
val invalidName = List(",", "none none", "none, none", "none &na;", "(:null)", "test test test", "test test", "test", "&na; &na;")
|
2020-05-19 09:24:45 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
def toActionSet(item:Oaf) :(String, String) = {
|
|
|
|
val mapper = new ObjectMapper()
|
|
|
|
|
|
|
|
item match {
|
|
|
|
case dataset: Dataset =>
|
|
|
|
val a: AtomicAction[Dataset] = new AtomicAction[Dataset]
|
|
|
|
a.setClazz(classOf[Dataset])
|
|
|
|
a.setPayload(dataset)
|
|
|
|
(dataset.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
|
|
|
case publication: Publication =>
|
|
|
|
val a: AtomicAction[Publication] = new AtomicAction[Publication]
|
|
|
|
a.setClazz(classOf[Publication])
|
|
|
|
a.setPayload(publication)
|
|
|
|
(publication.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
2020-06-04 14:39:20 +02:00
|
|
|
case organization: Organization =>
|
|
|
|
val a: AtomicAction[Organization] = new AtomicAction[Organization]
|
|
|
|
a.setClazz(classOf[Organization])
|
|
|
|
a.setPayload(organization)
|
|
|
|
(organization.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
2020-05-26 09:15:33 +02:00
|
|
|
case relation: Relation =>
|
|
|
|
val a: AtomicAction[Relation] = new AtomicAction[Relation]
|
|
|
|
a.setClazz(classOf[Relation])
|
|
|
|
a.setPayload(relation)
|
|
|
|
(relation.getClass.getCanonicalName, mapper.writeValueAsString(a))
|
|
|
|
case _ =>
|
|
|
|
null
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2020-05-19 09:24:45 +02:00
|
|
|
|
2020-06-04 14:39:20 +02:00
|
|
|
def toHostedByItem(input:String): (String, HostedByItemType) = {
|
2020-05-23 08:46:49 +02:00
|
|
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
2020-06-04 14:39:20 +02:00
|
|
|
|
|
|
|
lazy val json: json4s.JValue = parse(input)
|
|
|
|
val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]]
|
|
|
|
(c.keys.head, c.values.head)
|
2020-05-20 08:14:03 +02:00
|
|
|
}
|
|
|
|
|
2020-05-19 09:24:45 +02:00
|
|
|
|
2020-06-04 14:39:20 +02:00
|
|
|
def toISSNPair(publication: Publication) : (String, Publication) = {
|
|
|
|
val issn = if (publication.getJournal == null) null else publication.getJournal.getIssnPrinted
|
|
|
|
val eissn =if (publication.getJournal == null) null else publication.getJournal.getIssnOnline
|
|
|
|
val lissn =if (publication.getJournal == null) null else publication.getJournal.getIssnLinking
|
|
|
|
|
|
|
|
if (issn!= null && issn.nonEmpty)
|
|
|
|
(issn, publication)
|
|
|
|
else if(eissn!= null && eissn.nonEmpty)
|
|
|
|
(eissn, publication)
|
|
|
|
else if(lissn!= null && lissn.nonEmpty)
|
|
|
|
(lissn, publication)
|
|
|
|
else
|
|
|
|
(publication.getId, publication)
|
|
|
|
}
|
2020-05-19 09:24:45 +02:00
|
|
|
|
2020-05-23 08:46:49 +02:00
|
|
|
|
2020-05-19 09:24:45 +02:00
|
|
|
|
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
def generateGridAffiliationId(gridId:String) :String = {
|
2020-05-29 09:32:04 +02:00
|
|
|
s"20|grid________::${DHPUtils.md5(gridId.toLowerCase().trim())}"
|
2020-05-26 09:15:33 +02:00
|
|
|
}
|
2020-05-19 09:24:45 +02:00
|
|
|
|
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
def fixResult(result: Dataset) :Dataset = {
|
2021-03-23 09:23:22 +01:00
|
|
|
val instanceType = extractInstance(result)
|
2020-05-26 09:15:33 +02:00
|
|
|
if (instanceType.isDefined) {
|
|
|
|
result.getInstance().asScala.foreach(i => i.setInstancetype(instanceType.get.getInstancetype))
|
|
|
|
}
|
|
|
|
result.getInstance().asScala.foreach(i => {
|
2021-03-23 09:39:56 +01:00
|
|
|
i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY)
|
2020-05-26 09:15:33 +02:00
|
|
|
})
|
|
|
|
result
|
|
|
|
}
|
2020-05-19 09:24:45 +02:00
|
|
|
|
2020-06-04 14:39:20 +02:00
|
|
|
|
2021-07-14 11:43:00 +02:00
|
|
|
def decideAccessRight(lic : Field[String], date:String) : AccessRight = {
|
|
|
|
if(lic == null){
|
|
|
|
//Default value Unknown
|
|
|
|
return getUnknownQualifier()
|
|
|
|
}
|
|
|
|
val license : String = lic.getValue
|
|
|
|
//CC licenses
|
|
|
|
if(license.startsWith("cc") ||
|
|
|
|
license.startsWith("http://creativecommons.org/licenses") ||
|
|
|
|
license.startsWith("https://creativecommons.org/licenses") ||
|
|
|
|
|
|
|
|
//ACS Publications Author choice licenses (considered OPEN also by Unpaywall)
|
|
|
|
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") ||
|
|
|
|
license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") ||
|
|
|
|
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") ||
|
|
|
|
|
|
|
|
//APA (considered OPEN also by Unpaywall)
|
|
|
|
license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")){
|
|
|
|
|
|
|
|
val oaq : AccessRight = getOpenAccessQualifier()
|
|
|
|
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
|
|
|
|
return oaq
|
|
|
|
}
|
|
|
|
|
|
|
|
//OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED)
|
|
|
|
if(license.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model")){
|
|
|
|
val now = java.time.LocalDate.now
|
|
|
|
|
2021-07-16 17:30:27 +02:00
|
|
|
try{
|
|
|
|
val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd"))
|
|
|
|
if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){
|
|
|
|
val oaq : AccessRight = getOpenAccessQualifier()
|
|
|
|
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
|
|
|
|
return oaq
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
return getEmbargoedAccessQualifier()
|
|
|
|
}
|
|
|
|
}catch {
|
|
|
|
case e: Exception => {
|
|
|
|
try{
|
|
|
|
val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"))
|
|
|
|
if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){
|
|
|
|
val oaq : AccessRight = getOpenAccessQualifier()
|
|
|
|
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
|
|
|
|
return oaq
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
return getEmbargoedAccessQualifier()
|
|
|
|
}
|
|
|
|
}catch{
|
|
|
|
case ex: Exception => return getClosedAccessQualifier()
|
|
|
|
}
|
|
|
|
}
|
2021-07-14 11:43:00 +02:00
|
|
|
|
|
|
|
}
|
2021-07-16 17:30:27 +02:00
|
|
|
|
|
|
|
//val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// val pub_date = LocalDate.parse(date, formatter)
|
|
|
|
|
|
|
|
// if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){
|
|
|
|
// val oaq : AccessRight = getOpenAccessQualifier()
|
|
|
|
// oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
|
|
|
|
// return oaq
|
|
|
|
// }
|
|
|
|
// else{
|
|
|
|
// return getEmbargoedAccessQualifier()
|
|
|
|
// }
|
2021-07-14 11:43:00 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return getClosedAccessQualifier()
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2020-06-04 14:39:20 +02:00
|
|
|
|
|
|
|
|
2021-01-12 15:36:38 +01:00
|
|
|
def getOpenAccessQualifier():AccessRight = {
|
2021-07-01 22:13:45 +02:00
|
|
|
OafMapperUtils.accessRight("OPEN","Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
2020-06-04 14:39:20 +02:00
|
|
|
}
|
|
|
|
|
2021-01-12 15:36:38 +01:00
|
|
|
def getRestrictedQualifier():AccessRight = {
|
2021-07-01 22:13:45 +02:00
|
|
|
OafMapperUtils.accessRight("RESTRICTED","Restricted",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
2020-06-04 14:39:20 +02:00
|
|
|
}
|
|
|
|
|
2021-03-23 09:23:22 +01:00
|
|
|
|
2021-07-14 11:43:00 +02:00
|
|
|
def getUnknownQualifier():AccessRight = {
|
|
|
|
OafMapperUtils.accessRight("UNKNOWN","not available",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def getEmbargoedAccessQualifier():AccessRight = {
|
|
|
|
OafMapperUtils.accessRight("EMBARGO","Embargo",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
|
|
|
}
|
|
|
|
|
|
|
|
def getClosedAccessQualifier():AccessRight = {
|
|
|
|
OafMapperUtils.accessRight("CLOSED","Closed Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-03-23 09:23:22 +01:00
|
|
|
def extractInstance(r:Result):Option[Instance] = {
|
|
|
|
r.getInstance().asScala.find(i => i.getInstancetype != null && i.getInstancetype.getClassid.nonEmpty)
|
|
|
|
}
|
|
|
|
|
2020-06-04 14:39:20 +02:00
|
|
|
def fixPublication(input:((String,Publication), (String,HostedByItemType))): Publication = {
|
|
|
|
|
|
|
|
val publication = input._1._2
|
|
|
|
|
|
|
|
val item = if (input._2 != null) input._2._2 else null
|
|
|
|
|
2021-03-23 09:23:22 +01:00
|
|
|
val instanceType:Option[Instance] = extractInstance(publication)
|
2020-05-26 09:15:33 +02:00
|
|
|
|
|
|
|
if (instanceType.isDefined) {
|
|
|
|
publication.getInstance().asScala.foreach(i => i.setInstancetype(instanceType.get.getInstancetype))
|
|
|
|
}
|
2020-05-19 09:24:45 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
publication.getInstance().asScala.foreach(i => {
|
2021-03-23 09:39:56 +01:00
|
|
|
var hb = new KeyValue
|
2020-05-26 09:15:33 +02:00
|
|
|
if (item != null) {
|
2020-06-04 14:39:20 +02:00
|
|
|
hb.setValue(item.officialname)
|
2020-05-26 09:15:33 +02:00
|
|
|
hb.setKey(generateDSId(item.id))
|
2021-07-16 10:29:51 +02:00
|
|
|
if (item.openAccess) {
|
2020-06-04 14:39:20 +02:00
|
|
|
i.setAccessright(getOpenAccessQualifier())
|
2021-07-16 10:29:51 +02:00
|
|
|
i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold)
|
|
|
|
}
|
2021-07-22 12:00:23 +02:00
|
|
|
// val ar = getOpenAccessQualifier()
|
|
|
|
// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename))
|
2020-05-26 09:15:33 +02:00
|
|
|
}
|
|
|
|
else {
|
2021-03-23 09:39:56 +01:00
|
|
|
hb = ModelConstants.UNKNOWN_REPOSITORY
|
2020-05-26 09:15:33 +02:00
|
|
|
}
|
|
|
|
i.setHostedby(hb)
|
|
|
|
})
|
|
|
|
|
2021-07-19 12:34:35 +02:00
|
|
|
publication.setBestaccessright(OafMapperUtils.createBestAccessRights(publication.getInstance()))
|
|
|
|
// val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid)
|
|
|
|
// if (ar.nonEmpty) {
|
|
|
|
// if(ar.contains(ModelConstants.ACCESS_RIGHT_OPEN)){
|
|
|
|
// val ar = getOpenAccessQualifier()
|
|
|
|
// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename))
|
|
|
|
// }
|
|
|
|
// else {
|
|
|
|
// val ar = getRestrictedQualifier()
|
|
|
|
// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename))
|
|
|
|
// }
|
|
|
|
// }
|
2020-05-23 08:46:49 +02:00
|
|
|
publication
|
2020-05-26 09:15:33 +02:00
|
|
|
}
|
2020-05-22 15:15:09 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
def generateDSId(input: String): String = {
|
2020-05-22 15:15:09 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
val b = StringUtils.substringBefore(input, "::")
|
|
|
|
val a = StringUtils.substringAfter(input, "::")
|
|
|
|
s"10|${b}::${DHPUtils.md5(a)}"
|
|
|
|
}
|
2020-05-22 15:15:09 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
def generateDataInfo(): DataInfo = {
|
|
|
|
generateDataInfo("0.9")
|
|
|
|
}
|
2020-05-22 15:15:09 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
def filterPublication(publication: Publication): Boolean = {
|
2020-05-22 15:15:09 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
//Case empty publication
|
|
|
|
if (publication == null)
|
|
|
|
return false
|
2021-03-18 10:24:42 +01:00
|
|
|
if (publication.getId == null || publication.getId.isEmpty)
|
|
|
|
return false
|
2020-05-19 09:24:45 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
//Case publication with no title
|
|
|
|
if (publication.getTitle == null || publication.getTitle.size == 0)
|
|
|
|
return false
|
2020-05-19 09:24:45 +02:00
|
|
|
|
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
val s = publication.getTitle.asScala.count(p => p.getValue != null
|
|
|
|
&& p.getValue.nonEmpty && !p.getValue.equalsIgnoreCase("[NO TITLE AVAILABLE]"))
|
2020-05-19 09:24:45 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
if (s == 0)
|
|
|
|
return false
|
2020-05-19 09:24:45 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
// fixes #4360 (test publisher)
|
|
|
|
val publisher = if (publication.getPublisher != null) publication.getPublisher.getValue else null
|
2020-05-19 09:24:45 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
if (publisher != null && (publisher.equalsIgnoreCase("Test accounts") || publisher.equalsIgnoreCase("CrossRef Test Account"))) {
|
|
|
|
return false;
|
|
|
|
}
|
2020-05-19 09:24:45 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
//Publication with no Author
|
|
|
|
if (publication.getAuthor == null || publication.getAuthor.size() == 0)
|
|
|
|
return false
|
|
|
|
|
|
|
|
|
|
|
|
//filter invalid author
|
|
|
|
val authors = publication.getAuthor.asScala.map(s => {
|
|
|
|
if (s.getFullname.nonEmpty) {
|
|
|
|
s.getFullname
|
|
|
|
}
|
|
|
|
else
|
|
|
|
s"${
|
|
|
|
s.getName
|
|
|
|
} ${
|
|
|
|
s.getSurname
|
|
|
|
}"
|
|
|
|
})
|
|
|
|
|
|
|
|
val c = authors.count(isValidAuthorName)
|
|
|
|
if (c == 0)
|
|
|
|
return false
|
|
|
|
|
|
|
|
// fixes #4368
|
|
|
|
if (authors.count(s => s.equalsIgnoreCase("Addie Jackson")) > 0 && "Elsevier BV".equalsIgnoreCase(publication.getPublisher.getValue))
|
|
|
|
return false
|
|
|
|
|
|
|
|
true
|
|
|
|
}
|
2020-05-19 09:24:45 +02:00
|
|
|
|
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
def isValidAuthorName(fullName: String): Boolean = {
|
|
|
|
if (fullName == null || fullName.isEmpty)
|
|
|
|
return false
|
|
|
|
if (invalidName.contains(fullName.toLowerCase.trim))
|
|
|
|
return false
|
|
|
|
true
|
|
|
|
}
|
2020-05-19 09:24:45 +02:00
|
|
|
|
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
def generateDataInfo(trust: String): DataInfo = {
|
|
|
|
val di = new DataInfo
|
|
|
|
di.setDeletedbyinference(false)
|
|
|
|
di.setInferred(false)
|
|
|
|
di.setInvisible(false)
|
|
|
|
di.setTrust(trust)
|
2021-07-01 22:13:45 +02:00
|
|
|
di.setProvenanceaction(OafMapperUtils.qualifier(ModelConstants.SYSIMPORT_ACTIONSET,ModelConstants.SYSIMPORT_ACTIONSET, ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS))
|
2020-05-26 09:15:33 +02:00
|
|
|
di
|
|
|
|
}
|
2020-05-19 09:24:45 +02:00
|
|
|
|
|
|
|
|
2020-06-25 10:48:15 +02:00
|
|
|
|
|
|
|
def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String): StructuredProperty = {
|
|
|
|
val sp = new StructuredProperty
|
2021-07-01 22:13:45 +02:00
|
|
|
sp.setQualifier(OafMapperUtils.qualifier(classId,className, schemeId, schemeName))
|
2020-06-25 10:48:15 +02:00
|
|
|
sp.setValue(value)
|
|
|
|
sp
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String, dataInfo: DataInfo): StructuredProperty = {
|
|
|
|
val sp = new StructuredProperty
|
2021-07-01 22:13:45 +02:00
|
|
|
sp.setQualifier(OafMapperUtils.qualifier(classId,className, schemeId, schemeName))
|
2020-06-25 10:48:15 +02:00
|
|
|
sp.setValue(value)
|
|
|
|
sp.setDataInfo(dataInfo)
|
|
|
|
sp
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
|
|
|
|
val sp = new StructuredProperty
|
2021-07-01 22:13:45 +02:00
|
|
|
sp.setQualifier(OafMapperUtils.qualifier(classId,classId, schemeId, schemeId))
|
2020-05-26 09:15:33 +02:00
|
|
|
sp.setValue(value)
|
|
|
|
sp
|
2020-05-23 08:46:49 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
}
|
2020-05-23 08:46:49 +02:00
|
|
|
|
2020-06-25 10:48:15 +02:00
|
|
|
|
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
def createSP(value: String, classId: String, schemeId: String, dataInfo: DataInfo): StructuredProperty = {
|
|
|
|
val sp = new StructuredProperty
|
2021-07-01 22:13:45 +02:00
|
|
|
sp.setQualifier(OafMapperUtils.qualifier(classId,classId, schemeId, schemeId))
|
2020-05-26 09:15:33 +02:00
|
|
|
sp.setValue(value)
|
|
|
|
sp.setDataInfo(dataInfo)
|
|
|
|
sp
|
2020-05-23 08:46:49 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
}
|
2020-05-23 08:46:49 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
def createCrossrefCollectedFrom(): KeyValue = {
|
2020-05-19 09:24:45 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
val cf = new KeyValue
|
|
|
|
cf.setValue(CROSSREF)
|
|
|
|
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + DHPUtils.md5(CROSSREF.toLowerCase))
|
|
|
|
cf
|
2020-05-23 08:46:49 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
}
|
2020-05-23 08:46:49 +02:00
|
|
|
|
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
def createUnpayWallCollectedFrom(): KeyValue = {
|
2020-05-23 08:46:49 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
val cf = new KeyValue
|
|
|
|
cf.setValue(UNPAYWALL)
|
|
|
|
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + DHPUtils.md5(UNPAYWALL.toLowerCase))
|
|
|
|
cf
|
2020-05-23 08:46:49 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
}
|
2020-05-23 08:46:49 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
def createORIDCollectedFrom(): KeyValue = {
|
2020-05-23 08:46:49 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
val cf = new KeyValue
|
2021-03-31 18:33:57 +02:00
|
|
|
cf.setValue(StringUtils.upperCase(ModelConstants.ORCID))
|
|
|
|
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + DHPUtils.md5(ModelConstants.ORCID.toLowerCase))
|
2020-05-26 09:15:33 +02:00
|
|
|
cf
|
|
|
|
|
|
|
|
}
|
2020-05-23 08:46:49 +02:00
|
|
|
|
|
|
|
|
|
|
|
def generateIdentifier (oaf: Result, doi: String): String = {
|
2020-05-26 09:15:33 +02:00
|
|
|
val id = DHPUtils.md5 (doi.toLowerCase)
|
2020-10-01 15:46:44 +02:00
|
|
|
s"50|${doiBoostNSPREFIX}${SEPARATOR}${id}"
|
2020-05-26 09:15:33 +02:00
|
|
|
}
|
2020-05-23 08:46:49 +02:00
|
|
|
|
|
|
|
|
2020-06-04 14:39:20 +02:00
|
|
|
|
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
def createMAGCollectedFrom(): KeyValue = {
|
2020-05-23 08:46:49 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
val cf = new KeyValue
|
|
|
|
cf.setValue(MAG_NAME)
|
|
|
|
cf.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + DHPUtils.md5(MAG))
|
|
|
|
cf
|
2020-05-23 08:46:49 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
}
|
2020-05-23 08:46:49 +02:00
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
def asField[T](value: T): Field[T] = {
|
|
|
|
val tmp = new Field[T]
|
|
|
|
tmp.setValue(value)
|
|
|
|
tmp
|
2020-05-23 08:46:49 +02:00
|
|
|
|
|
|
|
|
2020-05-26 09:15:33 +02:00
|
|
|
}
|
2020-05-19 09:24:45 +02:00
|
|
|
|
2021-06-29 18:35:28 +02:00
|
|
|
def isEmpty(x: String) = x == null || x.trim.isEmpty
|
|
|
|
|
|
|
|
def normalizeDoi(input : String) :String ={
|
2021-07-05 16:22:38 +02:00
|
|
|
if(input == null)
|
|
|
|
return null
|
2021-06-29 18:35:28 +02:00
|
|
|
val replaced = input.replaceAll("(?:\\n|\\r|\\t|\\s)", "").toLowerCase.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX)
|
|
|
|
if (isEmpty(replaced))
|
|
|
|
return null
|
|
|
|
|
|
|
|
if(replaced.indexOf("10.") < 0)
|
|
|
|
return null
|
|
|
|
|
|
|
|
val ret = replaced.substring(replaced.indexOf("10."))
|
|
|
|
|
|
|
|
if (!ret.startsWith(DOI_PREFIX))
|
|
|
|
return null
|
|
|
|
|
|
|
|
return ret
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-05-19 09:24:45 +02:00
|
|
|
|
|
|
|
}
|