forked from D-Net/dnet-hadoop
improvement DOIBoost
This commit is contained in:
parent
b87b3ddb6b
commit
13815d5d13
|
@ -122,8 +122,8 @@ object DoiBoostMappingUtil {
|
||||||
hb.setValue(item.officialName)
|
hb.setValue(item.officialName)
|
||||||
hb.setKey(generateDSId(item.id))
|
hb.setKey(generateDSId(item.id))
|
||||||
if (item.openAccess)
|
if (item.openAccess)
|
||||||
i.setAccessright(createQualifier("Open", "dnet:access_modes"))
|
i.setAccessright(createQualifier("OPEN", "dnet:access_modes"))
|
||||||
publication.setBestaccessright(createQualifier("Open", "dnet:access_modes"))
|
publication.setBestaccessright(createQualifier("OPEN", "dnet:access_modes"))
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
hb.setValue("Unknown Repository")
|
hb.setValue("Unknown Repository")
|
||||||
|
@ -134,8 +134,8 @@ object DoiBoostMappingUtil {
|
||||||
|
|
||||||
val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid)
|
val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid)
|
||||||
if (ar.nonEmpty) {
|
if (ar.nonEmpty) {
|
||||||
if(ar.contains("Open")){
|
if(ar.contains("OPEN")){
|
||||||
publication.setBestaccessright(createQualifier("Open", "dnet:access_modes"))
|
publication.setBestaccessright(createQualifier("OPEN", "dnet:access_modes"))
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
publication.setBestaccessright(createQualifier(ar.head, "dnet:access_modes"))
|
publication.setBestaccessright(createQualifier(ar.head, "dnet:access_modes"))
|
||||||
|
|
|
@ -168,9 +168,10 @@ case object Crossref2Oaf {
|
||||||
instance.setRefereed(asField("peerReviewed"))
|
instance.setRefereed(asField("peerReviewed"))
|
||||||
|
|
||||||
|
|
||||||
instance.setAccessright(createQualifier("Restricted", "dnet:access_modes"))
|
instance.setAccessright(createQualifier("RESTRICTED", "dnet:access_modes"))
|
||||||
result.setInstance(List(instance).asJava)
|
result.setInstance(List(instance).asJava)
|
||||||
instance.setInstancetype(createQualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), "dnet:publication_resource", "dnet:publication_resource"))
|
instance.setInstancetype(createQualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), "dnet:publication_resource", "dnet:publication_resource"))
|
||||||
|
result.setResourcetype(createQualifier(cobjCategory.substring(0, 4),"dnet:dataCite_resource"))
|
||||||
|
|
||||||
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
||||||
if (StringUtils.isNotBlank(issuedDate)) {
|
if (StringUtils.isNotBlank(issuedDate)) {
|
||||||
|
@ -199,7 +200,6 @@ case object Crossref2Oaf {
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
lazy val json: json4s.JValue = parse(input)
|
lazy val json: json4s.JValue = parse(input)
|
||||||
|
|
||||||
|
|
||||||
var resultList: List[Oaf] = List()
|
var resultList: List[Oaf] = List()
|
||||||
|
|
||||||
|
|
||||||
|
@ -344,7 +344,7 @@ case object Crossref2Oaf {
|
||||||
}
|
}
|
||||||
|
|
||||||
def convertDataset(dataset: Dataset): Unit = {
|
def convertDataset(dataset: Dataset): Unit = {
|
||||||
//TODO probably we need to add relation and other stuff here
|
// TODO check if there are other info to map into the Dataset
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,7 @@ object UnpayWallToOAF {
|
||||||
val i :Instance= new Instance()
|
val i :Instance= new Instance()
|
||||||
|
|
||||||
i.setCollectedfrom(createUnpayWallCollectedFrom())
|
i.setCollectedfrom(createUnpayWallCollectedFrom())
|
||||||
i.setAccessright(createQualifier("Open", "dnet:access_modes"))
|
i.setAccessright(createQualifier("OPEN", "dnet:access_modes"))
|
||||||
i.setUrl(List(oaLocation.url.get).asJava)
|
i.setUrl(List(oaLocation.url.get).asJava)
|
||||||
|
|
||||||
if (oaLocation.license.isDefined)
|
if (oaLocation.license.isDefined)
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,4 +1,4 @@
|
||||||
<workflow-app name="import MAG into HDFS" xmlns="uri:oozie:workflow:0.5">
|
<workflow-app name="Create DOIBoostActionSet" xmlns="uri:oozie:workflow:0.5">
|
||||||
<parameters>
|
<parameters>
|
||||||
<property>
|
<property>
|
||||||
<name>crossrefPublicationPath</name>
|
<name>crossrefPublicationPath</name>
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
</property>
|
</property>
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<start to="PreprocessMag"/>
|
<start to="ResetWorkingPath"/>
|
||||||
|
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
|
|
|
@ -43,7 +43,7 @@
|
||||||
--conf spark.sql.shuffle.partitions=3840
|
--conf spark.sql.shuffle.partitions=3840
|
||||||
${sparkExtraOPT}
|
${sparkExtraOPT}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/json.gz</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/uw_extracted</arg>
|
||||||
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
||||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||||
</spark>
|
</spark>
|
||||||
|
|
1
pom.xml
1
pom.xml
|
@ -193,6 +193,7 @@
|
||||||
<groupId>net.sf.saxon</groupId>
|
<groupId>net.sf.saxon</groupId>
|
||||||
<artifactId>Saxon-HE</artifactId>
|
<artifactId>Saxon-HE</artifactId>
|
||||||
<version>9.9.1-6</version>
|
<version>9.9.1-6</version>
|
||||||
|
<scope>provided</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
Loading…
Reference in New Issue