forked from antonis.lempesis/dnet-hadoop
Merge pull request 'Change the access right in DoiBoost' (#126) from doiboosi_accessright into beta
Reviewed-on: D-Net/dnet-hadoop#126
This commit is contained in:
commit
b346feed36
|
@ -1,12 +1,16 @@
|
|||
package eu.dnetlib.doiboost
|
||||
|
||||
import java.time.LocalDate
|
||||
import java.time.format.DateTimeFormatter
|
||||
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction
|
||||
import eu.dnetlib.dhp.schema.oaf.{AccessRight, DataInfo, Dataset, Field, Instance, KeyValue, Oaf, Organization, Publication, Qualifier, Relation, Result, StructuredProperty}
|
||||
import eu.dnetlib.dhp.schema.oaf.{AccessRight, DataInfo, Dataset, Field, Instance, KeyValue, Oaf, OpenAccessRoute, Organization, Publication, Qualifier, Relation, Result, StructuredProperty}
|
||||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import org.apache.commons.lang3.StringUtils
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil.{getClosedAccessQualifier, getEmbargoedAccessQualifier, getUnknownQualifier}
|
||||
import org.json4s
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
|
@ -118,14 +122,92 @@ object DoiBoostMappingUtil {
|
|||
}
|
||||
|
||||
|
||||
def decideAccessRight(lic : Field[String], date:String) : AccessRight = {
|
||||
if(lic == null){
|
||||
//Default value Unknown
|
||||
return getUnknownQualifier()
|
||||
}
|
||||
val license : String = lic.getValue
|
||||
//CC licenses
|
||||
if(license.startsWith("cc") ||
|
||||
license.startsWith("http://creativecommons.org/licenses") ||
|
||||
license.startsWith("https://creativecommons.org/licenses") ||
|
||||
|
||||
//ACS Publications Author choice licenses (considered OPEN also by Unpaywall)
|
||||
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") ||
|
||||
license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") ||
|
||||
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") ||
|
||||
|
||||
//APA (considered OPEN also by Unpaywall)
|
||||
license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")){
|
||||
|
||||
val oaq : AccessRight = getOpenAccessQualifier()
|
||||
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
|
||||
return oaq
|
||||
}
|
||||
|
||||
//OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED)
|
||||
if(license.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model")){
|
||||
val now = java.time.LocalDate.now
|
||||
|
||||
try{
|
||||
val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd"))
|
||||
if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){
|
||||
val oaq : AccessRight = getOpenAccessQualifier()
|
||||
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
|
||||
return oaq
|
||||
}
|
||||
else{
|
||||
return getEmbargoedAccessQualifier()
|
||||
}
|
||||
}catch {
|
||||
case e: Exception => {
|
||||
try{
|
||||
val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"))
|
||||
if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){
|
||||
val oaq : AccessRight = getOpenAccessQualifier()
|
||||
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
|
||||
return oaq
|
||||
}
|
||||
else{
|
||||
return getEmbargoedAccessQualifier()
|
||||
}
|
||||
}catch{
|
||||
case ex: Exception => return getClosedAccessQualifier()
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return getClosedAccessQualifier()
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
def getOpenAccessQualifier():AccessRight = {
|
||||
OafMapperUtils.accessRight("OPEN","Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
||||
|
||||
OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN,"Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
||||
}
|
||||
|
||||
def getRestrictedQualifier():AccessRight = {
|
||||
OafMapperUtils.accessRight("RESTRICTED","Restricted",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
||||
OafMapperUtils.accessRight( "RESTRICTED","Restricted",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
||||
}
|
||||
|
||||
|
||||
def getUnknownQualifier():AccessRight = {
|
||||
OafMapperUtils.accessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
||||
}
|
||||
|
||||
|
||||
def getEmbargoedAccessQualifier():AccessRight = {
|
||||
OafMapperUtils.accessRight("EMBARGO","Embargo",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
||||
}
|
||||
|
||||
def getClosedAccessQualifier():AccessRight = {
|
||||
OafMapperUtils.accessRight("CLOSED","Closed Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
||||
}
|
||||
|
||||
|
||||
|
@ -150,10 +232,11 @@ object DoiBoostMappingUtil {
|
|||
if (item != null) {
|
||||
hb.setValue(item.officialname)
|
||||
hb.setKey(generateDSId(item.id))
|
||||
if (item.openAccess)
|
||||
if (item.openAccess) {
|
||||
i.setAccessright(getOpenAccessQualifier())
|
||||
val ar = getOpenAccessQualifier()
|
||||
publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename))
|
||||
i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold)
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
hb = ModelConstants.UNKNOWN_REPOSITORY
|
||||
|
@ -161,17 +244,8 @@ object DoiBoostMappingUtil {
|
|||
i.setHostedby(hb)
|
||||
})
|
||||
|
||||
val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid)
|
||||
if (ar.nonEmpty) {
|
||||
if(ar.contains(ModelConstants.ACCESS_RIGHT_OPEN)){
|
||||
val ar = getOpenAccessQualifier()
|
||||
publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename))
|
||||
}
|
||||
else {
|
||||
val ar = getRestrictedQualifier()
|
||||
publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename))
|
||||
}
|
||||
}
|
||||
publication.setBestaccessright(OafMapperUtils.createBestAccessRights(publication.getInstance()))
|
||||
|
||||
publication
|
||||
}
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants
|
|||
import eu.dnetlib.dhp.schema.oaf._
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
|
||||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil.{decideAccessRight, _}
|
||||
import org.apache.commons.lang.StringUtils
|
||||
import org.json4s
|
||||
import org.json4s.DefaultFormats
|
||||
|
@ -168,12 +168,22 @@ case object Crossref2Oaf {
|
|||
// Mapping instance
|
||||
val instance = new Instance()
|
||||
val license = for {
|
||||
JString(lic) <- json \ "license" \ "URL"
|
||||
} yield asField(lic)
|
||||
val l = license.filter(d => StringUtils.isNotBlank(d.getValue))
|
||||
if (l.nonEmpty)
|
||||
instance.setLicense(l.head)
|
||||
|
||||
JObject(license) <- json \ "license"
|
||||
JField("URL", JString(lic)) <- license
|
||||
JField("content-version", JString(content_version)) <- license
|
||||
} yield (asField(lic), content_version)
|
||||
val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue))
|
||||
if (l.nonEmpty){
|
||||
if (l exists (d => d._2.equals("vor"))){
|
||||
for(d <- l){
|
||||
if (d._2.equals("vor")){
|
||||
instance.setLicense(d._1)
|
||||
}
|
||||
}
|
||||
}
|
||||
else{
|
||||
instance.setLicense(l.head._1)}
|
||||
}
|
||||
|
||||
// Ticket #6281 added pid to Instance
|
||||
instance.setPid(result.getPid)
|
||||
|
@ -185,7 +195,7 @@ case object Crossref2Oaf {
|
|||
OafMapperUtils.qualifier("0001", "peerReviewed", ModelConstants.DNET_REVIEW_LEVELS, ModelConstants.DNET_REVIEW_LEVELS))
|
||||
}
|
||||
|
||||
instance.setAccessright(getRestrictedQualifier())
|
||||
instance.setAccessright(decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue))
|
||||
instance.setInstancetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE))
|
||||
result.setResourcetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE))
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ import org.slf4j.{Logger, LoggerFactory}
|
|||
|
||||
import scala.collection.JavaConverters._
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
|
||||
import eu.dnetlib.doiboost.uw.UnpayWallToOAF.get_unpaywall_color
|
||||
|
||||
|
||||
|
||||
|
@ -23,6 +24,21 @@ case class OALocation(evidence:Option[String], host_type:Option[String], is_best
|
|||
object UnpayWallToOAF {
|
||||
val logger: Logger = LoggerFactory.getLogger(getClass)
|
||||
|
||||
|
||||
def get_unpaywall_color(input:String):Option[OpenAccessRoute] = {
|
||||
if(input == null || input.equalsIgnoreCase("close"))
|
||||
return None
|
||||
if(input.equalsIgnoreCase("green"))
|
||||
return Some(OpenAccessRoute.green)
|
||||
if(input.equalsIgnoreCase("bronze"))
|
||||
return Some(OpenAccessRoute.bronze)
|
||||
if(input.equalsIgnoreCase("hybrid"))
|
||||
return Some(OpenAccessRoute.hybrid)
|
||||
else
|
||||
return Some(OpenAccessRoute.gold)
|
||||
|
||||
}
|
||||
|
||||
def get_color(is_oa:Boolean, location: OALocation, journal_is_oa:Boolean):Option[OpenAccessRoute] = {
|
||||
if (is_oa) {
|
||||
if (location.host_type.isDefined) {
|
||||
|
@ -65,7 +81,7 @@ object UnpayWallToOAF {
|
|||
|
||||
val oaLocation:OALocation = (json \ "best_oa_location").extractOrElse[OALocation](null)
|
||||
|
||||
val colour = get_color(is_oa, oaLocation, journal_is_oa)
|
||||
val colour = get_unpaywall_color((json \ "oa_status").extractOrElse[String](null))
|
||||
|
||||
pub.setCollectedfrom(List(createUnpayWallCollectedFrom()).asJava)
|
||||
pub.setDataInfo(generateDataInfo())
|
||||
|
|
|
@ -99,7 +99,7 @@
|
|||
--executor-memory=${sparkExecutorMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
--conf spark.sql.shuffle.partitions=7680
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
|
@ -124,7 +124,7 @@
|
|||
--executor-memory=${sparkExecutorIntersectionMemory}
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
--conf spark.sql.shuffle.partitions=7680
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
|
|
|
@ -492,6 +492,124 @@ class CrossrefMappingTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
def testLicenseVorClosed() :Unit = {
|
||||
val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_vor.json")).mkString
|
||||
|
||||
|
||||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
||||
val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result]
|
||||
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
println(mapper.writeValueAsString(item))
|
||||
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor")))
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("CLOSED")))
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null))
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
def testLicenseOpen() :Unit = {
|
||||
val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_open.json")).mkString
|
||||
|
||||
|
||||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
||||
val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result]
|
||||
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html")))
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN")))
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid))
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
println(mapper.writeValueAsString(item))
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
def testLicenseEmbargoOpen() :Unit = {
|
||||
val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo_open.json")).mkString
|
||||
|
||||
|
||||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
||||
val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result]
|
||||
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model")))
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN")))
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid))
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
println(mapper.writeValueAsString(item))
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
def testLicenseEmbargo() :Unit = {
|
||||
val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo.json")).mkString
|
||||
|
||||
|
||||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
||||
val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result]
|
||||
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model")))
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO")))
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null))
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
println(mapper.writeValueAsString(item))
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def testLicenseEmbargoDateTime() :Unit = {
|
||||
val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo_datetime.json")).mkString
|
||||
|
||||
|
||||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
||||
val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result]
|
||||
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model")))
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO")))
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null))
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
println(mapper.writeValueAsString(item))
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue