From d35ffd201de3ff0ea302589522f16df6a6d38768 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Sat, 1 Jul 2023 12:32:48 +0200 Subject: [PATCH] refactoring --- .../eu/dnetlib/dhp/oa/graph/dump/MakeTar.java | 2 +- .../dhp/oa/graph/dump/ResultMapper.java | 604 ++++++++++-------- .../dhp/oa/graph/dump/SendToZenodoHDFS.java | 2 +- .../oa/graph/dump/complete/MergedRels.java | 25 - .../oa/graph/dump/eosc/SaveCommunityMap.java | 2 +- .../eosc/SparkExtendResultWithRelation.java | 2 +- .../dump/eosc/SparkPrepareResultProject.java | 2 +- .../graph/dump/eosc/SparkSelectRelation.java | 2 +- .../dump/eosc/SparkUpdateProjectInfo.java | 2 +- ...arameters.json => eosc_cm_parameters.json} | 0 ..._input_extendwithrelation_parameters.json} | 0 ...son => eosc_input_maketar_parameters.json} | 0 ...> eosc_input_relationdump_parameters.json} | 0 ...son => eosc_project_input_parameters.json} | 0 ...json => eosc_project_prep_parameters.json} | 0 ...ad_zenodo.json => eosc_upload_zenodo.json} | 0 .../graph/dump/funder_result_parameters.json | 26 - .../oa/graph/dump/input_collect_and_save.json | 30 - .../graph/dump/input_complete_parameters.json | 30 - .../oa/graph/dump/input_entity_parameter.json | 24 - .../dump/input_organization_parameters.json | 36 -- .../dump/input_parameter_select_relation.json | 20 - .../dhp/oa/graph/dump/input_parameters.json | 47 -- .../graph/dump/input_parameters_link_prj.json | 41 -- .../{eoscdump => }/oozie_app/workflow.xml | 0 .../graph/dump/project_subset_parameters.json | 27 - .../oozie_app/config-default.xml | 30 - .../dump/projectsubset/oozie_app/workflow.xml | 171 ----- .../dhp/oa/graph/dump/split_parameters.json | 37 -- .../dump/wf/main/oozie_app/config-default.xml | 30 - .../graph/dump/wf/main/oozie_app/import.txt | 4 - .../graph/dump/wf/main/oozie_app/workflow.xml | 306 --------- .../community/oozie_app/config-default.xml | 30 - .../community/oozie_app/workflow.xml | 362 ----------- .../complete/oozie_app/config-default.xml | 30 - .../complete/oozie_app/workflow.xml | 539 ---------------- .../funder/oozie_app/config-default.xml | 30 - .../funder/oozie_app/workflow.xml | 255 -------- 38 files changed, 338 insertions(+), 2410 deletions(-) delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/MergedRels.java rename dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{input_cm_parameters.json => eosc_cm_parameters.json} (100%) rename dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{input_extendwithrelation_parameters.json => eosc_input_extendwithrelation_parameters.json} (100%) rename dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{input_maketar_parameters.json => eosc_input_maketar_parameters.json} (100%) rename dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{input_relationdump_parameters.json => eosc_input_relationdump_parameters.json} (100%) rename dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{project_input_parameters.json => eosc_project_input_parameters.json} (100%) rename dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{project_prep_parameters.json => eosc_project_prep_parameters.json} (100%) rename dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{upload_zenodo.json => eosc_upload_zenodo.json} (100%) delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_complete_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameter_select_relation.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json rename dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{eoscdump => }/oozie_app/workflow.xml (100%) delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/import.txt delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java index cb2e29b..2ca3a61 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java @@ -26,7 +26,7 @@ public class MakeTar implements Serializable { .toString( MakeTar.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_maketar_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/eosc_input_maketar_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 0178fa8..a4de2a0 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -36,280 +36,30 @@ public class ResultMapper implements Serializable { Optional ort = Optional.ofNullable(input.getResulttype()); if (ort.isPresent()) { try { - - addTypeSpecificInformation(out, input, ort); - - Optional - .ofNullable(input.getAuthor()) - .ifPresent( - ats -> out.setAuthor(ats.stream().map(ResultMapper::getAuthor).collect(Collectors.toList()))); - - // I do not map Access Right UNKNOWN or OTHER - - Optional oar = Optional.ofNullable(input.getBestaccessright()); - if (oar.isPresent() && Constants.ACCESS_RIGHTS_COAR_MAP.containsKey(oar.get().getClassid())) { - String code = Constants.ACCESS_RIGHTS_COAR_MAP.get(oar.get().getClassid()); - out - .setBestaccessright( - - BestAccessRight - .newInstance( - code, - Constants.COAR_CODE_LABEL_MAP.get(code), - Constants.COAR_ACCESS_RIGHT_SCHEMA)); - } - - final List contributorList = new ArrayList<>(); - Optional - .ofNullable(input.getContributor()) - .ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue()))); - out.setContributor(contributorList); - - Optional - .ofNullable(input.getCountry()) - .ifPresent( - value -> out - .setCountry( - value - .stream() - .map( - c -> { - if (c.getClassid().equals((ModelConstants.UNKNOWN))) { - return null; - } - ResultCountry country = new ResultCountry(); - country.setCode(c.getClassid()); - country.setLabel(c.getClassname()); - Optional - .ofNullable(c.getDataInfo()) - .ifPresent( - provenance -> country - .setProvenance( - Provenance - .newInstance( - provenance - .getProvenanceaction() - .getClassname(), - c.getDataInfo().getTrust()))); - return country; - }) - .filter(Objects::nonNull) - .collect(Collectors.toList()))); - - final List coverageList = new ArrayList<>(); - Optional - .ofNullable(input.getCoverage()) - .ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue()))); - out.setCoverage(coverageList); - + addTypeSpecificInformation(out, input, ort.get()); + mapAuthor(out, input); + mapAccessRight(out, input); + mapContributor(out, input); + mapCountry(out, input); + mapCoverage(out, input); out.setDateofcollection(input.getDateofcollection()); - - final List descriptionList = new ArrayList<>(); - Optional - .ofNullable(input.getDescription()) - .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue()))); - out.setDescription(descriptionList); - - if (Optional.ofNullable(input.getEmbargoenddate()).isPresent()) { - out.setEmbargoenddate(input.getEmbargoenddate().getValue()); - } - - if (Optional.ofNullable(input.getMeasures()).isPresent()) { - Indicator i = new Indicator(); - UsageCounts uc = new UsageCounts(); - input.getMeasures().forEach(m -> { - if (m.getId().equals("downloads")) { - uc.setDownloads(m.getUnit().get(0).getValue()); - } - if (m.getId().equals("views")) { - uc.setViews(m.getUnit().get(0).getValue()); - } - }); - if (!uc.isEmpty()) { - i.setUsageCounts(uc); - out.setIndicator(i); - } - } - final List formatList = new ArrayList<>(); - Optional - .ofNullable(input.getFormat()) - .ifPresent(value -> value.forEach(f -> formatList.add(f.getValue()))); - out.setFormat(formatList); + mapDescription(out, input); + mapEmbrargo(out, input); + mapMeasure(out, input); + mapFormat(out, input); out.setId(input.getId()); - out.setOriginalId(new ArrayList<>()); - Optional - .ofNullable(input.getOriginalId()) - .ifPresent( - v -> out - .setOriginalId( - input - .getOriginalId() - .stream() - .filter(s -> !s.startsWith("50|")) - .collect(Collectors.toList()))); - - if (Optional - .ofNullable(input.getInstance()) - .isPresent()) { - out - .setInstance( - input - .getInstance() - .stream() - .map(i -> getCommunityInstance(i)) - .collect(Collectors.toList())); - } - - if (Optional.ofNullable(input.getLanguage()).isPresent()) { - out - .setLanguage( - Language.newInstance(input.getLanguage().getClassid(), input.getLanguage().getClassname())); - } - - if (Optional.ofNullable(input.getLastupdatetimestamp()).isPresent()) { - out.setLastupdatetimestamp(input.getLastupdatetimestamp()); - } - - if (Optional.ofNullable(input.getTitle()).isPresent()) { - List iTitle = input - .getTitle() - .stream() - .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) - .collect(Collectors.toList()); - if (!iTitle.isEmpty()) { - out.setMaintitle(iTitle.get(0).getValue()); - } - - iTitle = input - .getTitle() - .stream() - .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) - .collect(Collectors.toList()); - if (!iTitle.isEmpty()) { - out.setSubtitle(iTitle.get(0).getValue()); - } - - } - - Optional - .ofNullable(input.getPid()) - .ifPresent( - value -> out - .setPid( - value - .stream() - .map( - p -> ResultPid - .newInstance(p.getQualifier().getClassid(), p.getValue())) - .collect(Collectors.toList()))); - - if (Optional.ofNullable(input.getDateofacceptance()).isPresent()) { - out.setPublicationdate(input.getDateofacceptance().getValue()); - } - - if (Optional.ofNullable(input.getPublisher()).isPresent()) { - out.setPublisher(input.getPublisher().getValue()); - } - - Optional - .ofNullable(input.getSource()) - .ifPresent( - value -> out.setSource(value.stream().map(Field::getValue).collect(Collectors.toList()))); - - if (Optional.ofNullable(input.getSubject()).isPresent()) { - out.setSubject(createSubjectMap(input)); - out - .setKeywords( - input - .getSubject() - .stream() - .filter( - s -> s.getQualifier().getClassid().equalsIgnoreCase("keyword") && - !s.getValue().equalsIgnoreCase("EOSC::RO-crate")) - .map(s -> s.getValue()) - .collect(Collectors.toList())); - - if (Optional.ofNullable(input.getEoscifguidelines()).isPresent()) { - out - .setEoscIF( - input - .getEoscifguidelines() - .stream() - .map( - eig -> EoscInteroperabilityFramework - .newInstance( - eig.getCode(), eig.getLabel(), eig.getUrl(), - eig.getSemanticRelation())) - .collect(Collectors.toList())); - } - - } - + mapOriginalId(out, input); + mapInstance(out, input); + mapLamguage(out, input); + mapLastUpdateTimestamp(out, input); + mapTitle(out, input); + mapPid(out, input); + mapAcceptanceDate(out, input); + mapPublisher(out, input); + mapSource(out, input); + mapSubject(out, input); out.setType(input.getResulttype().getClassid()); - - Set communities = communityMap.keySet(); - List contextList = Optional - .ofNullable( - input - .getContext()) - .map( - value -> value - .stream() - .map(c -> { - String communityId = c.getId(); - if (communityId.contains("::")) { - communityId = communityId.substring(0, communityId.indexOf("::")); - } - if (communities.contains(communityId)) { - Context context = new Context(); - context.setCode(communityId); - context.setLabel(communityMap.get(communityId)); - Optional> dataInfo = Optional.ofNullable(c.getDataInfo()); - if (dataInfo.isPresent()) { - List provenance = new ArrayList<>(); - provenance - .addAll( - dataInfo - .get() - .stream() - .map( - di -> Optional - .ofNullable(di.getProvenanceaction()) - .map( - provenanceaction -> Provenance - .newInstance( - provenanceaction.getClassname(), - di.getTrust())) - .orElse(null)) - .filter(Objects::nonNull) - .collect(Collectors.toSet())); - - try { - context.setProvenance(getUniqueProvenance(provenance)); - } catch (NoAvailableEntityTypeException e) { - e.printStackTrace(); - } - } - return context; - } - return null; - }) - .filter(Objects::nonNull) - .collect(Collectors.toList())) - .orElse(new ArrayList<>()); - - if (!contextList.isEmpty()) { - Set hashValue = new HashSet<>(); - List remainigContext = new ArrayList<>(); - contextList.forEach(c -> { - if (!hashValue.contains(c.hashCode())) { - remainigContext.add(c); - hashValue.add(c.hashCode()); - } - }); - out.setContext(remainigContext); - } - + mapContext(communityMap, out, input); } catch (ClassCastException cce) { return null; } @@ -319,6 +69,314 @@ public class ResultMapper implements Serializable { } + private static void mapContext(Map communityMap, Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + Set communities = communityMap.keySet(); + List contextList = Optional + .ofNullable( + input + .getContext()) + .map( + value -> value + .stream() + .map(c -> { + String communityId = c.getId(); + if (communityId.contains("::")) { + communityId = communityId.substring(0, communityId.indexOf("::")); + } + if (communities.contains(communityId)) { + Context context = new Context(); + context.setCode(communityId); + context.setLabel(communityMap.get(communityId)); + Optional> dataInfo = Optional.ofNullable(c.getDataInfo()); + if (dataInfo.isPresent()) { + List provenance = new ArrayList<>(); + provenance + .addAll( + dataInfo + .get() + .stream() + .map( + di -> Optional + .ofNullable(di.getProvenanceaction()) + .map( + provenanceaction -> Provenance + .newInstance( + provenanceaction.getClassname(), + di.getTrust())) + .orElse(null)) + .filter(Objects::nonNull) + .collect(Collectors.toSet())); + + try { + context.setProvenance(getUniqueProvenance(provenance)); + } catch (NoAvailableEntityTypeException e) { + e.printStackTrace(); + } + } + return context; + } + return null; + }) + .filter(Objects::nonNull) + .collect(Collectors.toList())) + .orElse(new ArrayList<>()); + + if (!contextList.isEmpty()) { + Set hashValue = new HashSet<>(); + List remainigContext = new ArrayList<>(); + contextList.forEach(c -> { + if (!hashValue.contains(c.hashCode())) { + remainigContext.add(c); + hashValue.add(c.hashCode()); + } + }); + out.setContext(remainigContext); + } + } + + private static void mapSubject(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + if (Optional.ofNullable(input.getSubject()).isPresent()) { + out.setSubject(createSubjectMap(input)); + out + .setKeywords( + input + .getSubject() + .stream() + .filter( + s -> s.getQualifier().getClassid().equalsIgnoreCase("keyword") && + !s.getValue().equalsIgnoreCase("EOSC::RO-crate")) + .map(s -> s.getValue()) + .collect(Collectors.toList())); + + if (Optional.ofNullable(input.getEoscifguidelines()).isPresent()) { + out + .setEoscIF( + input + .getEoscifguidelines() + .stream() + .map( + eig -> EoscInteroperabilityFramework + .newInstance( + eig.getCode(), eig.getLabel(), eig.getUrl(), + eig.getSemanticRelation())) + .collect(Collectors.toList())); + } + + } + } + + private static void mapSource(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + Optional + .ofNullable(input.getSource()) + .ifPresent( + value -> out.setSource(value.stream().map(Field::getValue).collect(Collectors.toList()))); + } + + private static void mapPublisher(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + if (Optional.ofNullable(input.getPublisher()).isPresent()) { + out.setPublisher(input.getPublisher().getValue()); + } + } + + private static void mapAcceptanceDate(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + if (Optional.ofNullable(input.getDateofacceptance()).isPresent()) { + out.setPublicationdate(input.getDateofacceptance().getValue()); + } + } + + private static void mapPid(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + Optional + .ofNullable(input.getPid()) + .ifPresent( + value -> out + .setPid( + value + .stream() + .map( + p -> ResultPid + .newInstance(p.getQualifier().getClassid(), p.getValue())) + .collect(Collectors.toList()))); + } + + private static void mapTitle(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + if (Optional.ofNullable(input.getTitle()).isPresent()) { + List iTitle = input + .getTitle() + .stream() + .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) + .collect(Collectors.toList()); + if (!iTitle.isEmpty()) { + out.setMaintitle(iTitle.get(0).getValue()); + } + + iTitle = input + .getTitle() + .stream() + .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) + .collect(Collectors.toList()); + if (!iTitle.isEmpty()) { + out.setSubtitle(iTitle.get(0).getValue()); + } + + } + } + + private static void mapLastUpdateTimestamp(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + if (Optional.ofNullable(input.getLastupdatetimestamp()).isPresent()) { + out.setLastupdatetimestamp(input.getLastupdatetimestamp()); + } + } + + private static void mapLamguage(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + if (Optional.ofNullable(input.getLanguage()).isPresent()) { + out + .setLanguage( + Language.newInstance(input.getLanguage().getClassid(), input.getLanguage().getClassname())); + } + } + + private static void mapInstance(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + if (Optional + .ofNullable(input.getInstance()) + .isPresent()) { + out + .setInstance( + input + .getInstance() + .stream() + .map(i -> getCommunityInstance(i)) + .collect(Collectors.toList())); + } + } + + private static void mapOriginalId(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + out.setOriginalId(new ArrayList<>()); + Optional + .ofNullable(input.getOriginalId()) + .ifPresent( + v -> out + .setOriginalId( + input + .getOriginalId() + .stream() + .filter(s -> !s.startsWith("50|")) + .collect(Collectors.toList()))); + } + + private static void mapFormat(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + final List formatList = new ArrayList<>(); + Optional + .ofNullable(input.getFormat()) + .ifPresent(value -> value.forEach(f -> formatList.add(f.getValue()))); + out.setFormat(formatList); + } + + private static void mapMeasure(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + if (Optional.ofNullable(input.getMeasures()).isPresent()) { + Indicator i = new Indicator(); + UsageCounts uc = new UsageCounts(); + input.getMeasures().forEach(m -> { + if (m.getId().equals("downloads")) { + uc.setDownloads(m.getUnit().get(0).getValue()); + } + if (m.getId().equals("views")) { + uc.setViews(m.getUnit().get(0).getValue()); + } + }); + if (!uc.isEmpty()) { + i.setUsageCounts(uc); + out.setIndicator(i); + } + } + } + + private static void mapEmbrargo(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + if (Optional.ofNullable(input.getEmbargoenddate()).isPresent()) { + out.setEmbargoenddate(input.getEmbargoenddate().getValue()); + } + } + + private static void mapDescription(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + final List descriptionList = new ArrayList<>(); + Optional + .ofNullable(input.getDescription()) + .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue()))); + out.setDescription(descriptionList); + } + + private static void mapCoverage(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + final List coverageList = new ArrayList<>(); + Optional + .ofNullable(input.getCoverage()) + .ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue()))); + out.setCoverage(coverageList); + } + + private static void mapCountry(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + Optional + .ofNullable(input.getCountry()) + .ifPresent( + value -> out + .setCountry( + value + .stream() + .map( + c -> { + if (c.getClassid().equals((ModelConstants.UNKNOWN))) { + return null; + } + ResultCountry country = new ResultCountry(); + country.setCode(c.getClassid()); + country.setLabel(c.getClassname()); + Optional + .ofNullable(c.getDataInfo()) + .ifPresent( + provenance -> country + .setProvenance( + Provenance + .newInstance( + provenance + .getProvenanceaction() + .getClassname(), + c.getDataInfo().getTrust()))); + return country; + }) + .filter(Objects::nonNull) + .collect(Collectors.toList()))); + } + + private static void mapContributor(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + final List contributorList = new ArrayList<>(); + Optional + .ofNullable(input.getContributor()) + .ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue()))); + out.setContributor(contributorList); + } + + private static void mapAccessRight(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + // I do not map Access Right UNKNOWN or OTHER + + Optional oar = Optional.ofNullable(input.getBestaccessright()); + if (oar.isPresent() && Constants.ACCESS_RIGHTS_COAR_MAP.containsKey(oar.get().getClassid())) { + String code = Constants.ACCESS_RIGHTS_COAR_MAP.get(oar.get().getClassid()); + out + .setBestaccessright( + + BestAccessRight + .newInstance( + code, + Constants.COAR_CODE_LABEL_MAP.get(code), + Constants.COAR_ACCESS_RIGHT_SCHEMA)); + } + } + + private static void mapAuthor(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + Optional + .ofNullable(input.getAuthor()) + .ifPresent( + ats -> out.setAuthor(ats.stream().map(ResultMapper::getAuthor).collect(Collectors.toList()))); + } + private static Map> createSubjectMap( eu.dnetlib.dhp.schema.oaf.Result input) { Map> map = new HashMap<>(); @@ -345,8 +403,8 @@ public class ResultMapper implements Serializable { } private static void addTypeSpecificInformation(Result out, eu.dnetlib.dhp.schema.oaf.Result input, - Optional ort) throws NoAvailableEntityTypeException { - switch (ort.get().getClassid()) { + eu.dnetlib.dhp.schema.oaf.Qualifier ort) throws NoAvailableEntityTypeException { + switch (ort.getClassid()) { case "publication": Optional journal = Optional .ofNullable(((Publication) input).getJournal()); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index 685af91..ac81971 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -25,7 +25,7 @@ public class SendToZenodoHDFS implements Serializable { .toString( SendToZenodoHDFS.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json"))); + "/eu/dnetlib/dhp/oa/graph/dump/eosc_upload_zenodo.json"))); parser.parseArgument(args); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/MergedRels.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/MergedRels.java deleted file mode 100644 index 30088e8..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/MergedRels.java +++ /dev/null @@ -1,25 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.io.Serializable; - -public class MergedRels implements Serializable { - private String organizationId; - private String representativeId; - - public String getOrganizationId() { - return organizationId; - } - - public void setOrganizationId(String organizationId) { - this.organizationId = organizationId; - } - - public String getRepresentativeId() { - return representativeId; - } - - public void setRepresentativeId(String representativeId) { - this.representativeId = representativeId; - } -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java index 88be031..f866435 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java @@ -57,7 +57,7 @@ public class SaveCommunityMap implements Serializable { .toString( SaveCommunityMap.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/eosc_cm_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkExtendResultWithRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkExtendResultWithRelation.java index 088dbed..5eed398 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkExtendResultWithRelation.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkExtendResultWithRelation.java @@ -35,7 +35,7 @@ public class SparkExtendResultWithRelation implements Serializable { .toString( SparkExtendResultWithRelation.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_extendwithrelation_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/eosc_input_extendwithrelation_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkPrepareResultProject.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkPrepareResultProject.java index 3e75f31..5ea3f60 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkPrepareResultProject.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkPrepareResultProject.java @@ -49,7 +49,7 @@ public class SparkPrepareResultProject implements Serializable { .toString( SparkPrepareResultProject.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/eosc_project_prep_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkSelectRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkSelectRelation.java index e820393..d7be673 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkSelectRelation.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkSelectRelation.java @@ -36,7 +36,7 @@ public class SparkSelectRelation implements Serializable { .toString( SparkSelectRelation.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/eosc_input_relationdump_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java index 3caa06b..376a677 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java @@ -33,7 +33,7 @@ public class SparkUpdateProjectInfo implements Serializable { .toString( SparkUpdateProjectInfo.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/eosc_project_input_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_cm_parameters.json similarity index 100% rename from dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json rename to dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_cm_parameters.json diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_extendwithrelation_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_input_extendwithrelation_parameters.json similarity index 100% rename from dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_extendwithrelation_parameters.json rename to dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_input_extendwithrelation_parameters.json diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_maketar_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_input_maketar_parameters.json similarity index 100% rename from dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_maketar_parameters.json rename to dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_input_maketar_parameters.json diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_input_relationdump_parameters.json similarity index 100% rename from dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json rename to dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_input_relationdump_parameters.json diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_project_input_parameters.json similarity index 100% rename from dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json rename to dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_project_input_parameters.json diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_project_prep_parameters.json similarity index 100% rename from dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json rename to dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_project_prep_parameters.json diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_upload_zenodo.json similarity index 100% rename from dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json rename to dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_upload_zenodo.json diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json deleted file mode 100644 index 5669dca..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "gp", - "paramLongName": "graphPath", - "paramDescription": "the relationPath", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json deleted file mode 100644 index 2b42217..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json +++ /dev/null @@ -1,30 +0,0 @@ -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "ra", - "paramLongName": "resultAggregation", - "paramDescription": "true if all the result type should be saved under the generic result name. false to get a different dump for each result type", - "paramRequired": true -} -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_complete_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_complete_parameters.json deleted file mode 100644 index a59a5ce..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_complete_parameters.json +++ /dev/null @@ -1,30 +0,0 @@ -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - } -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json deleted file mode 100644 index 87de13d..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json +++ /dev/null @@ -1,24 +0,0 @@ -[ - - { - "paramName":"is", - "paramLongName":"isLookUpUrl", - "paramDescription": "URL of the isLookUp Service", - "paramRequired": false - }, - { - "paramName": "hdfs", - "paramLongName": "hdfsPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "nn", - "paramLongName": "nameNode", - "paramDescription": "the name node", - "paramRequired": true - } -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json deleted file mode 100644 index c27a923..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json +++ /dev/null @@ -1,36 +0,0 @@ -[ - - { - "paramName":"ocm", - "paramLongName":"organizationCommunityMap", - "paramDescription": "the organization community map association", - "paramRequired": false - }, - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"cmp", - "paramLongName":"communityMapPath", - "paramDescription": "the path to the serialization of the community map", - "paramRequired": true - } -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameter_select_relation.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameter_select_relation.json deleted file mode 100644 index 1a67134..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameter_select_relation.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json deleted file mode 100644 index e86f6e3..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json +++ /dev/null @@ -1,47 +0,0 @@ -[ - - { - "paramName":"cmp", - "paramLongName":"communityMapPath", - "paramDescription": "the path to the serialization of the community map", - "paramRequired": true - }, - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName":"dt", - "paramLongName":"dumpType", - "paramDescription": "the type of the dump (complete for the whole graph, community for the products related to communities, funder for the results with at least a link to project", - "paramRequired": false - }, { - "paramName":"cid", - "paramLongName":"communityId", - "paramDescription": "the id of the community to be dumped", - "paramRequired": false -} -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json deleted file mode 100644 index 6c45538..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json +++ /dev/null @@ -1,41 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName":"gp", - "paramLongName":"graphPath", - "paramDescription": "the path to the relations", - "paramRequired": true - }, - { - "paramName":"cmp", - "paramLongName":"communityMapPath", - "paramDescription": "the path to the relations", - "paramRequired": true - } -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml similarity index 100% rename from dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/workflow.xml rename to dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json deleted file mode 100644 index ed23136..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json +++ /dev/null @@ -1,27 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "pl", - "paramLongName": "projectListPath", - "paramDescription": "the path of the association result projectlist", - "paramRequired": true - } -] - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/config-default.xml deleted file mode 100644 index d262cb6..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml deleted file mode 100644 index bfb443e..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml +++ /dev/null @@ -1,171 +0,0 @@ - - - - sourcePath - the source path - - - projectListPath - the path to the project list - - - outputPath - the output path - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - the metadata associated to the deposition - - - depositionType - the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) - - - conceptRecordId - for new version, the id of the record for the old deposition - - - depositionId - the depositionId of a deposition open that has to be added content - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - yarn - cluster - Dump table project - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/project - --resultTableNameeu.dnetlib.dhp.schema.oaf.Project - --outputPath${workingDir}/project - --communityMapPathnoneed - - - - - - - yarn - cluster - Dump table project - eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectsSubsetSparkJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/project - --outputPath${workingDir}/tar/project - --projectListPath${projectListPath} - - - - - - - eu.dnetlib.dhp.oa.graph.dump.eosc.MakeTar - --hdfsPath${outputPath} - --nameNode${nameNode} - --sourcePath${workingDir}/tar - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --conceptRecordId${conceptRecordId} - --depositionType${depositionType} - --depositionId${depositionId} - - - - - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json deleted file mode 100644 index dec82bc..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json +++ /dev/null @@ -1,37 +0,0 @@ - -[ - - { - "paramName":"cmp", - "paramLongName":"communityMapPath", - "paramDescription": "the path to the serialization of the community map", - "paramRequired": false - }, - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, { - "paramName":"cid", - "paramLongName":"communityId", - "paramDescription": "the id of the community to be dumped", - "paramRequired": false -} -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/config-default.xml deleted file mode 100644 index d262cb6..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/import.txt b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/import.txt deleted file mode 100644 index bf55947..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/import.txt +++ /dev/null @@ -1,4 +0,0 @@ -## This is a classpath-based import file (this header is required) -dump_complete classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app -dump_funder classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app -dump_community classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml deleted file mode 100644 index 5b57282..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml +++ /dev/null @@ -1,306 +0,0 @@ - - - - - singleDeposition - Indicates if it is a single community deposition - - - communityId - the id of the community to be dumped if a dump for a single community should be done - - - dumpType - the type of the dump one of {complete, community, funder} - - - onlyUpload - true if the dump is already done and should only be upload in zenodo - - - upload - true if the dump should be upload in zenodo - - - sourcePath - the source path - - - isLookUpUrl - the isLookup service endpoint - - - outputPath - the output path - - - resultAggregation - true if all the result type have to be dumped under result. false otherwise - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - the metadata associated to the deposition - - - depositionType - the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) - - - conceptRecordId - for new version, the id of the record for the old deposition - - - depositionId - the depositionId of a deposition open that has to be added content - - - organizationCommunityMap - the organization community map - - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - ${wf:conf('onlyUpload') eq true} - - - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.eosc.SaveCommunityMap - --outputPath${workingDir}/communityMap - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - --singleDeposition${singleDeposition} - --communityId${communityId} - - - - - - - - ${wf:conf('dumpType') eq "funder"} - ${wf:conf('dumpType') eq "community"} - - - - - - - - ${wf:appPath()}/dump_complete - - - - - communityMapPath - ${workingDir}/communityMap - - - outputPath - ${workingDir}/tar - - - sourcePath - ${sourcePath} - - - organizationCommunityMap - ${organizationCommunityMap} - - - isLookUpUrl - ${isLookUpUrl} - - - resultAggregation - ${resultAggregation} - - - - - - - - - - - ${wf:appPath()}/dump_community - - - - - sourcePath - ${sourcePath} - - - communityMapPath - ${workingDir}/communityMap - - - outputPath - ${workingDir}/tar - - - - - - - - - - ${wf:appPath()}/dump_funder - - - - - communityMapPath - ${workingDir}/communityMap - - - outputPath - ${workingDir}/tar - - - sourcePath - ${sourcePath} - - - dumpType - ${dumpType} - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.eosc.MakeTar - --hdfsPath${outputPath} - --nameNode${nameNode} - --sourcePath${workingDir}/tar - - - - - - - - ${wf:conf('upload') eq true} - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --conceptRecordId${conceptRecordId} - --depositionType${depositionType} - --depositionId${depositionId} - - - - - - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml deleted file mode 100644 index fdaf099..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml +++ /dev/null @@ -1,362 +0,0 @@ - - - - - sourcePath - the source path - - - outputPath - the output path - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - yarn - cluster - Dump table publication for community/funder related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/dump/publication - --communityMapPath${communityMapPath} - --dumpType${dumpType} - - - - - - - - yarn - cluster - Dump table dataset for community/funder related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/dump/dataset - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table ORP for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/dump/otherresearchproduct - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table software for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/dump/software - --communityMapPath${communityMapPath} - - - - - - - - - - yarn - cluster - Prepare association result subset of project info - eu.dnetlib.dhp.oa.graph.dump.eosc.SparkPrepareResultProject - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --outputPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - yarn - cluster - Extend dumped publications with information about project - eu.dnetlib.dhp.oa.graph.dump.eosc.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/publication - --outputPath${workingDir}/ext/publication - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped dataset with information about project - eu.dnetlib.dhp.oa.graph.dump.eosc.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/dataset - --outputPath${workingDir}/ext/dataset - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped ORP with information about project - eu.dnetlib.dhp.oa.graph.dump.eosc.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/otherresearchproduct - --outputPath${workingDir}/ext/orp - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped software with information about project - eu.dnetlib.dhp.oa.graph.dump.eosc.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/software - --outputPath${workingDir}/ext/software - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - - - yarn - cluster - Split dumped result for community - eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/ext - --outputPath${outputPath} - --communityMapPath${communityMapPath} - - - - - - - - - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml deleted file mode 100644 index 569b143..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml +++ /dev/null @@ -1,539 +0,0 @@ - - - - sourcePath - the source path - - - outputPath - the output path - - - resultAggregation - true if all the result type have to be dumped under result. false otherwise - - - organizationCommunityMap - the organization community map - - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - - - - - - - - - - - - - - - yarn - cluster - Dump table publication - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/result/publication - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table dataset - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/result/dataset - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table ORP - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/result/otherresearchproduct - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table software - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/result/software - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table organization - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/organization - --resultTableNameeu.dnetlib.dhp.schema.oaf.Organization - --outputPath${outputPath}/organization - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table project - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/project - --resultTableNameeu.dnetlib.dhp.schema.oaf.Project - --outputPath${outputPath}/project - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table datasource - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/datasource - --resultTableNameeu.dnetlib.dhp.schema.oaf.Datasource - --outputPath${outputPath}/datasource - --communityMapPath${workingDir}/communityMap - - - - - - - - yarn - cluster - Select valid table relation - eu.dnetlib.dhp.oa.graph.dump.complete.SparkSelectValidRelationsJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath} - --outputPath${workingDir}/validrelation - - - - - - - - yarn - cluster - Dump table relation - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpRelationJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/validrelation - --outputPath${workingDir}/relation/relation - --removeSet${removeSet} - - - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities - --hdfsPath${outputPath}/communities_infrastructures/communities_infrastructure.json.gz - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation - --hdfsPath${workingDir}/relation/context - --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - - - - - - - - yarn - cluster - Dump table relation - eu.dnetlib.dhp.oa.graph.dump.complete.SparkOrganizationRelation - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/relation - --outputPath${workingDir}/relation/contextOrg - --organizationCommunityMap${organizationCommunityMap} - --communityMapPath${communityMapPath} - - - - - - - - - - - - - - - - - yarn - cluster - Extract Relations from publication - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/relation/publication - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table dataset - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/relation/dataset - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table ORP - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/relation/orp - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table software - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/relation/software - --communityMapPath${communityMapPath} - - - - - - - - - - yarn - cluster - Collect Results and Relations and put them in the right path - eu.dnetlib.dhp.oa.graph.dump.complete.SparkCollectAndSave - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir} - --outputPath${outputPath} - --resultAggregation${resultAggregation} - - - - - - - - - Sub-workflow dump complete failed with error message ${wf:errorMessage()} - - - - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml deleted file mode 100644 index d4c4cd7..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml +++ /dev/null @@ -1,255 +0,0 @@ - - - - - sourcePath - the source path - - - outputPath - the output path - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - yarn - cluster - Prepare association result subset of project info - eu.dnetlib.dhp.oa.graph.dump.eosc.SparkPrepareResultProject - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --outputPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/result/publication - --graphPath${workingDir}/preparedInfo - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/result/dataset - --graphPath${workingDir}/preparedInfo - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/result/otherresearchproduct - --graphPath${workingDir}/preparedInfo - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/result/software - --graphPath${workingDir}/preparedInfo - --communityMapPath${communityMapPath} - - - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/result - --outputPath${outputPath} - - - - - - - - - \ No newline at end of file