orcid-no-doi #123

Merged
claudio.atzori merged 13 commits from enrico.ottonello/dnet-hadoop:orcid-no-doi into master 2021-07-15 17:53:59 +02:00
2 changed files with 5 additions and 5 deletions
Showing only changes of commit 66604bb2b4 - Show all commits

View File

@ -188,7 +188,7 @@ public class SparkGenEnrichedOrcidWorks {
OBJECT_MAPPER.writeValueAsString(new AtomicAction<>(Publication.class, p))))
.mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2())))
.saveAsNewAPIHadoopFile(
workingPath.concat(outputEnrichedWorksPath),
outputEnrichedWorksPath,
Text.class,
Text.class,
SequenceFileOutputFormat.class,

View File

@ -11,7 +11,7 @@
<description>path where to store the action set</description>
</property>
<property>
<name>processOutputFolder</name>
<name>processOutputPath</name>
<value>process_no_doi_dataset_prod</value>
<description>temporary path where to store the action set</description>
</property>
@ -71,7 +71,7 @@
<action name="ResetWorkingPath">
<fs>
<delete path='${workingPath}/${processOutputFolder}'/>
<delete path='${processOutputPath}'/>
</fs>
<ok to="GenOrcidNoDoiDataset"/>
<error to="Kill"/>
@ -97,7 +97,7 @@
<arg>--workingPath</arg><arg>${workingPath}/</arg>
<arg>--hdfsServerUri</arg><arg>${nameNode}</arg>
<arg>--orcidDataFolder</arg><arg>last_orcid_dataset</arg>
<arg>--outputEnrichedWorksPath</arg><arg>${processOutputFolder}</arg>
<arg>--outputEnrichedWorksPath</arg><arg>${processOutputPath}</arg>
</spark>
<ok to="importOrcidNoDoi"/>
<error to="Kill"/>
@ -105,7 +105,7 @@
<action name="importOrcidNoDoi">
<distcp xmlns="uri:oozie:distcp-action:0.2">
<arg>${workingPath}/${processOutputFolder}/*</arg>
<arg>${processOutputPath}/*</arg>
<arg>${outputPath}</arg>
</distcp>
<ok to="End"/>