enrichment steps #38

Merged
claudio.atzori merged 334 commits from miriam.baglioni/dnet-hadoop:master into enrichment_wfs 2020-08-11 16:40:26 +02:00
8 changed files with 12 additions and 12 deletions
Showing only changes of commit 13815d5d13 - Show all commits

View File

@ -122,8 +122,8 @@ object DoiBoostMappingUtil {
hb.setValue(item.officialName)
hb.setKey(generateDSId(item.id))
if (item.openAccess)
i.setAccessright(createQualifier("Open", "dnet:access_modes"))
publication.setBestaccessright(createQualifier("Open", "dnet:access_modes"))
i.setAccessright(createQualifier("OPEN", "dnet:access_modes"))
publication.setBestaccessright(createQualifier("OPEN", "dnet:access_modes"))
}
else {
hb.setValue("Unknown Repository")
@ -134,8 +134,8 @@ object DoiBoostMappingUtil {
val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid)
if (ar.nonEmpty) {
if(ar.contains("Open")){
publication.setBestaccessright(createQualifier("Open", "dnet:access_modes"))
if(ar.contains("OPEN")){
publication.setBestaccessright(createQualifier("OPEN", "dnet:access_modes"))
}
else {
publication.setBestaccessright(createQualifier(ar.head, "dnet:access_modes"))

View File

@ -168,9 +168,10 @@ case object Crossref2Oaf {
instance.setRefereed(asField("peerReviewed"))
instance.setAccessright(createQualifier("Restricted", "dnet:access_modes"))
instance.setAccessright(createQualifier("RESTRICTED", "dnet:access_modes"))
result.setInstance(List(instance).asJava)
instance.setInstancetype(createQualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), "dnet:publication_resource", "dnet:publication_resource"))
result.setResourcetype(createQualifier(cobjCategory.substring(0, 4),"dnet:dataCite_resource"))
instance.setCollectedfrom(createCrossrefCollectedFrom())
if (StringUtils.isNotBlank(issuedDate)) {
@ -199,7 +200,6 @@ case object Crossref2Oaf {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = parse(input)
var resultList: List[Oaf] = List()
@ -344,7 +344,7 @@ case object Crossref2Oaf {
}
def convertDataset(dataset: Dataset): Unit = {
//TODO probably we need to add relation and other stuff here
// TODO check if there are other info to map into the Dataset
}

View File

@ -46,7 +46,7 @@ object UnpayWallToOAF {
val i :Instance= new Instance()
i.setCollectedfrom(createUnpayWallCollectedFrom())
i.setAccessright(createQualifier("Open", "dnet:access_modes"))
i.setAccessright(createQualifier("OPEN", "dnet:access_modes"))
i.setUrl(List(oaLocation.url.get).asJava)
if (oaLocation.license.isDefined)

File diff suppressed because one or more lines are too long

View File

@ -1,4 +1,4 @@
<workflow-app name="import MAG into HDFS" xmlns="uri:oozie:workflow:0.5">
<workflow-app name="Create DOIBoostActionSet" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>crossrefPublicationPath</name>

View File

@ -22,7 +22,7 @@
</property>
</parameters>
<start to="PreprocessMag"/>
<start to="ResetWorkingPath"/>
<kill name="Kill">

View File

@ -43,7 +43,7 @@
--conf spark.sql.shuffle.partitions=3840
${sparkExtraOPT}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/json.gz</arg>
<arg>--sourcePath</arg><arg>${sourcePath}/uw_extracted</arg>
<arg>--targetPath</arg><arg>${targetPath}</arg>
<arg>--master</arg><arg>yarn-cluster</arg>
</spark>

View File

@ -193,6 +193,7 @@
<groupId>net.sf.saxon</groupId>
<artifactId>Saxon-HE</artifactId>
<version>9.9.1-6</version>
<scope>provided</scope>
</dependency>
<dependency>