forked from D-Net/dnet-hadoop
This commit is contained in:
parent
1566fd590e
commit
3da12be81f
|
@ -35,11 +35,13 @@ public class Mapper implements Serializable {
|
|||
externals.add(kv);
|
||||
}
|
||||
out.setUrl(Constants.PUBLICATION_URL + input.getId().substring(3));
|
||||
externals.add(KeyValue.newInstance("result type", "publication"));
|
||||
break;
|
||||
case "dataset":
|
||||
eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input;
|
||||
Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue()));
|
||||
out.setUrl(Constants.DATASET_URL + input.getId().substring(3));
|
||||
externals.add(KeyValue.newInstance("result type", "dataset"));
|
||||
break;
|
||||
case "software":
|
||||
|
||||
|
@ -59,10 +61,12 @@ public class Mapper implements Serializable {
|
|||
.ifPresent(
|
||||
value -> externals.add(KeyValue.newInstance("programming language", value.getClassname())));
|
||||
out.setUrl(Constants.SOFTWARE_URL + input.getId().substring(3));
|
||||
externals.add(KeyValue.newInstance("result type", "software"));
|
||||
break;
|
||||
|
||||
case "other":
|
||||
out.setUrl(Constants.ORP_URL + input.getId().substring(3));
|
||||
externals.add(KeyValue.newInstance("result type", "other"));
|
||||
break;
|
||||
|
||||
}
|
||||
|
@ -100,13 +104,10 @@ public class Mapper implements Serializable {
|
|||
final List<String> descriptionList = new ArrayList<>();
|
||||
Optional
|
||||
.ofNullable(input.getDescription())
|
||||
.ifPresent(value -> {
|
||||
Iterator<Field<String>> it = value.iterator();
|
||||
if (it.hasNext()) {
|
||||
out.setName(it.next().getValue());
|
||||
}
|
||||
it.forEachRemaining(v -> externals.add(KeyValue.newInstance("description", v.getValue())));
|
||||
});
|
||||
.ifPresent(value ->
|
||||
|
||||
getDescription(out, externals, value)
|
||||
);
|
||||
|
||||
Optional
|
||||
.ofNullable(input.getEmbargoenddate())
|
||||
|
@ -133,7 +134,6 @@ public class Mapper implements Serializable {
|
|||
.ofNullable(v.getUrl())
|
||||
.ifPresent(u -> u.forEach(url -> urlSet.add(url)));
|
||||
|
||||
|
||||
}));
|
||||
|
||||
Optional
|
||||
|
@ -180,7 +180,6 @@ public class Mapper implements Serializable {
|
|||
.add(
|
||||
KeyValue
|
||||
.newInstance("subject", s.getQualifier().getClassid() + ":" + s.getValue()))));
|
||||
externals.add(KeyValue.newInstance("resource type", input.getResourcetype().getClassid()));
|
||||
|
||||
cfSet.forEach(cf -> externals.add(KeyValue.newInstance("collected from", cf)));
|
||||
hbSet.forEach(hb -> externals.add(KeyValue.newInstance("hosted by", hb)));
|
||||
|
@ -193,31 +192,13 @@ public class Mapper implements Serializable {
|
|||
return out;
|
||||
}
|
||||
|
||||
private static eu.dnetlib.dhp.schema.dump.oaf.Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) {
|
||||
eu.dnetlib.dhp.schema.dump.oaf.Author a = new eu.dnetlib.dhp.schema.dump.oaf.Author();
|
||||
Optional
|
||||
.ofNullable(oa.getAffiliation())
|
||||
.ifPresent(
|
||||
value -> a
|
||||
.setAffiliation(
|
||||
value
|
||||
.stream()
|
||||
.map(aff -> aff.getValue())
|
||||
.collect(Collectors.toList())));
|
||||
a.setFullname(oa.getFullname());
|
||||
a.setName(oa.getName());
|
||||
a.setSurname(oa.getSurname());
|
||||
a.setRank(oa.getRank());
|
||||
Optional
|
||||
.ofNullable(oa.getPid())
|
||||
.ifPresent(
|
||||
value -> a
|
||||
.setPid(
|
||||
value
|
||||
.stream()
|
||||
.map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||
.collect(Collectors.toList())));
|
||||
return a;
|
||||
private static void getDescription(CatalogueEntry out, List<KeyValue> externals, List<Field<String>> value) {
|
||||
Iterator<Field<String>> it = value.iterator();
|
||||
if (it.hasNext()) {
|
||||
out.setNotes(it.next().getValue());
|
||||
}
|
||||
it.forEachRemaining(v -> externals.add(KeyValue.newInstance("description", v.getValue())));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -122,7 +122,7 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/publication</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
||||
<arg>--communityName</arg><arg>${communityName}</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
|
@ -148,7 +148,7 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
||||
<arg>--communityName</arg><arg>${communityName}</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
|
@ -174,7 +174,7 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||
<arg>--communityName</arg><arg>${communityName}</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
|
@ -200,7 +200,7 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/software</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
||||
<arg>--communityName</arg><arg>${communityName}</arg>
|
||||
</spark>
|
||||
<ok to="join_dump"/>
|
||||
|
|
|
@ -69,17 +69,17 @@ public class DumpJobTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testDataset() throws Exception {
|
||||
public void testSoftware() throws Exception {
|
||||
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset.json")
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/software.json")
|
||||
.getPath();
|
||||
|
||||
SparkDumpRISISCatalogue.main(new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-outputPath", workingDir.toString() + "/result",
|
||||
"-sourcePath", sourcePath,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
||||
"-communityName", "risis"
|
||||
});
|
||||
|
||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue