forked from D-Net/dnet-hadoop
improvement DOIBoost
This commit is contained in:
parent
b87b3ddb6b
commit
13815d5d13
|
@ -122,8 +122,8 @@ object DoiBoostMappingUtil {
|
|||
hb.setValue(item.officialName)
|
||||
hb.setKey(generateDSId(item.id))
|
||||
if (item.openAccess)
|
||||
i.setAccessright(createQualifier("Open", "dnet:access_modes"))
|
||||
publication.setBestaccessright(createQualifier("Open", "dnet:access_modes"))
|
||||
i.setAccessright(createQualifier("OPEN", "dnet:access_modes"))
|
||||
publication.setBestaccessright(createQualifier("OPEN", "dnet:access_modes"))
|
||||
}
|
||||
else {
|
||||
hb.setValue("Unknown Repository")
|
||||
|
@ -134,8 +134,8 @@ object DoiBoostMappingUtil {
|
|||
|
||||
val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid)
|
||||
if (ar.nonEmpty) {
|
||||
if(ar.contains("Open")){
|
||||
publication.setBestaccessright(createQualifier("Open", "dnet:access_modes"))
|
||||
if(ar.contains("OPEN")){
|
||||
publication.setBestaccessright(createQualifier("OPEN", "dnet:access_modes"))
|
||||
}
|
||||
else {
|
||||
publication.setBestaccessright(createQualifier(ar.head, "dnet:access_modes"))
|
||||
|
|
|
@ -168,9 +168,10 @@ case object Crossref2Oaf {
|
|||
instance.setRefereed(asField("peerReviewed"))
|
||||
|
||||
|
||||
instance.setAccessright(createQualifier("Restricted", "dnet:access_modes"))
|
||||
instance.setAccessright(createQualifier("RESTRICTED", "dnet:access_modes"))
|
||||
result.setInstance(List(instance).asJava)
|
||||
instance.setInstancetype(createQualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), "dnet:publication_resource", "dnet:publication_resource"))
|
||||
result.setResourcetype(createQualifier(cobjCategory.substring(0, 4),"dnet:dataCite_resource"))
|
||||
|
||||
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
||||
if (StringUtils.isNotBlank(issuedDate)) {
|
||||
|
@ -199,7 +200,6 @@ case object Crossref2Oaf {
|
|||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: json4s.JValue = parse(input)
|
||||
|
||||
|
||||
var resultList: List[Oaf] = List()
|
||||
|
||||
|
||||
|
@ -344,7 +344,7 @@ case object Crossref2Oaf {
|
|||
}
|
||||
|
||||
def convertDataset(dataset: Dataset): Unit = {
|
||||
//TODO probably we need to add relation and other stuff here
|
||||
// TODO check if there are other info to map into the Dataset
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ object UnpayWallToOAF {
|
|||
val i :Instance= new Instance()
|
||||
|
||||
i.setCollectedfrom(createUnpayWallCollectedFrom())
|
||||
i.setAccessright(createQualifier("Open", "dnet:access_modes"))
|
||||
i.setAccessright(createQualifier("OPEN", "dnet:access_modes"))
|
||||
i.setUrl(List(oaLocation.url.get).asJava)
|
||||
|
||||
if (oaLocation.license.isDefined)
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,4 +1,4 @@
|
|||
<workflow-app name="import MAG into HDFS" xmlns="uri:oozie:workflow:0.5">
|
||||
<workflow-app name="Create DOIBoostActionSet" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>crossrefPublicationPath</name>
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="PreprocessMag"/>
|
||||
<start to="ResetWorkingPath"/>
|
||||
|
||||
|
||||
<kill name="Kill">
|
||||
|
|
|
@ -43,7 +43,7 @@
|
|||
--conf spark.sql.shuffle.partitions=3840
|
||||
${sparkExtraOPT}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/json.gz</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/uw_extracted</arg>
|
||||
<arg>--targetPath</arg><arg>${targetPath}</arg>
|
||||
<arg>--master</arg><arg>yarn-cluster</arg>
|
||||
</spark>
|
||||
|
|
Loading…
Reference in New Issue