partial implementation of entities from db

This commit is contained in:
Michele Artini 2020-01-20 16:04:19 +01:00
parent 81f82b5d34
commit b35c59eb42
7 changed files with 702 additions and 538 deletions

View File

@ -76,6 +76,41 @@
</configuration> </configuration>
</plugin> </plugin>
</plugins> </plugins>
<pluginManagement>
<plugins>
<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.-->
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>
org.apache.maven.plugins
</groupId>
<artifactId>
maven-plugin-plugin
</artifactId>
<versionRange>
[3.2,)
</versionRange>
<goals>
<goal>descriptor</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore></ignore>
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build> </build>
</project> </project>

View File

@ -5,43 +5,48 @@ import java.util.List;
public class Relation implements Serializable { public class Relation implements Serializable {
private String source; /**
*
*/
private static final long serialVersionUID = -9103706796710618813L;
private String target; private String source;
private List<Provenance> provenance; private String target;
private RelationSemantic semantic; private List<Provenance> provenance;
public String getSource() { private RelationSemantic semantic;
return source;
}
public void setSource(String source) { public String getSource() {
this.source = source; return source;
} }
public String getTarget() { public void setSource(final String source) {
return target; this.source = source;
} }
public void setTarget(String target) { public String getTarget() {
this.target = target; return target;
} }
public List<Provenance> getProvenance() { public void setTarget(final String target) {
return provenance; this.target = target;
} }
public void setProvenance(List<Provenance> provenance) { public List<Provenance> getProvenance() {
this.provenance = provenance; return provenance;
} }
public RelationSemantic getSemantic() { public void setProvenance(final List<Provenance> provenance) {
return semantic; this.provenance = provenance;
} }
public void setSemantic(RelationSemantic semantic) { public RelationSemantic getSemantic() {
this.semantic = semantic; return semantic;
} }
public void setSemantic(final RelationSemantic semantic) {
this.semantic = semantic;
}
} }

View File

@ -2,11 +2,17 @@ package eu.dnetlib.dhp.migration;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.sql.Array;
import java.sql.ResultSet; import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.function.Consumer; import java.util.function.Consumer;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -15,14 +21,21 @@ import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class MigrateDbEntitiesApplication extends AbstractMigrateApplication implements Closeable { public class MigrateDbEntitiesApplication extends AbstractMigrateApplication implements Closeable {
private static final Qualifier ENTITYREGISTRY_PROVENANCE_ACTION = MigrationUtils
.qualifier("sysimport:crosswalk:entityregistry", "sysimport:crosswalk:entityregistry", "dnet:provenance_actions", "dnet:provenance_actions");
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class); private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
private final DbClient dbClient; private final DbClient dbClient;
private final long lastUpdateTimestamp;
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(MigrateDbEntitiesApplication.class.getResourceAsStream("/eu/dnetlib/dhp/migration/migrate_db_entities_parameters.json"))); IOUtils.toString(MigrateDbEntitiesApplication.class.getResourceAsStream("/eu/dnetlib/dhp/migration/migrate_db_entities_parameters.json")));
@ -51,6 +64,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
final String dbPassword) throws Exception { final String dbPassword) throws Exception {
super(hdfsPath, hdfsNameNode, hdfsUser); super(hdfsPath, hdfsNameNode, hdfsUser);
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
this.lastUpdateTimestamp = new Date().getTime();
} }
public void execute(final String sqlFile, final Consumer<ResultSet> consumer) throws Exception { public void execute(final String sqlFile, final Consumer<ResultSet> consumer) throws Exception {
@ -61,7 +75,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
public void processDatasource(final ResultSet rs) { public void processDatasource(final ResultSet rs) {
try { try {
final DataInfo info = MigrationUtils.dataInfo(null, null, null, null, null, null); // TODO final DataInfo info = prepareDataInfo(rs);
final Datasource ds = new Datasource(); final Datasource ds = new Datasource();
@ -74,8 +88,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
ds.setExtraInfo(null); // TODO ds.setExtraInfo(null); // TODO
ds.setOaiprovenance(null); // TODO ds.setOaiprovenance(null); // TODO
ds.setDatasourcetype(null); // Qualifier datasourcetype) { ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
ds.setOpenairecompatibility(null); // Qualifier openairecompatibility) { ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
ds.setOfficialname(MigrationUtils.field(rs.getString("officialname"), info)); ds.setOfficialname(MigrationUtils.field(rs.getString("officialname"), info));
ds.setEnglishname(MigrationUtils.field(rs.getString("englishname"), info)); ds.setEnglishname(MigrationUtils.field(rs.getString("englishname"), info));
ds.setWebsiteurl(MigrationUtils.field(rs.getString("websiteurl"), info)); ds.setWebsiteurl(MigrationUtils.field(rs.getString("websiteurl"), info));
@ -86,7 +100,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
ds.setLongitude(MigrationUtils.field(Double.toString(rs.getDouble("longitude")), info)); ds.setLongitude(MigrationUtils.field(Double.toString(rs.getDouble("longitude")), info));
ds.setDateofvalidation(MigrationUtils.field(rs.getDate("dateofvalidation").toString(), info)); ds.setDateofvalidation(MigrationUtils.field(rs.getDate("dateofvalidation").toString(), info));
ds.setDescription(MigrationUtils.field(rs.getString("description"), info)); ds.setDescription(MigrationUtils.field(rs.getString("description"), info));
ds.setSubjects(null); // List<StructuredProperty> subjects) { ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
ds.setOdnumberofitems(MigrationUtils.field(Double.toString(rs.getInt("odnumberofitems")), info)); ds.setOdnumberofitems(MigrationUtils.field(Double.toString(rs.getInt("odnumberofitems")), info));
ds.setOdnumberofitemsdate(MigrationUtils.field(rs.getDate("odnumberofitemsdate").toString(), info)); ds.setOdnumberofitemsdate(MigrationUtils.field(rs.getDate("odnumberofitemsdate").toString(), info));
ds.setOdpolicies(MigrationUtils.field(rs.getString("odpolicies"), info)); ds.setOdpolicies(MigrationUtils.field(rs.getString("odpolicies"), info));
@ -110,12 +124,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
ds.setPolicies(null); // List<KeyValue> // TODO ds.setPolicies(null); // List<KeyValue> // TODO
ds.setJournal(null); // Journal // TODO ds.setJournal(null); // Journal // TODO
ds.setDataInfo(info);
ds.setLastupdatetimestamp(lastUpdateTimestamp);
// rs.getString("datasourceid"); // rs.getString("datasourceid");
rs.getArray("identities"); rs.getArray("identities");
// rs.getString("officialname"); // rs.getString("officialname");
// rs.getString("englishname"); // rs.getString("englishname");
// rs.getString("contactemail"); // rs.getString("contactemail");
rs.getString("openairecompatibility"); // COMPLEX ...@@@... // rs.getString("openairecompatibility"); // COMPLEX ...@@@...
// rs.getString("websiteurl"); // rs.getString("websiteurl");
// rs.getString("logourl"); // rs.getString("logourl");
// rs.getArray("accessinfopackage"); // rs.getArray("accessinfopackage");
@ -124,15 +141,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
// rs.getString("namespaceprefix"); // rs.getString("namespaceprefix");
// rs.getInt("odnumberofitems"); // NULL // rs.getInt("odnumberofitems"); // NULL
// rs.getDate("odnumberofitemsdate"); // NULL // rs.getDate("odnumberofitemsdate"); // NULL
rs.getArray("subjects"); // rs.getArray("subjects");
// rs.getString("description"); // rs.getString("description");
// rs.getString("odpolicies"); // NULL // rs.getString("odpolicies"); // NULL
// rs.getArray("odlanguages"); // rs.getArray("odlanguages");
// rs.getArray("odcontenttypes"); // rs.getArray("odcontenttypes");
rs.getBoolean("inferred"); // false // rs.getBoolean("inferred"); // false
rs.getBoolean("deletedbyinference");// false // rs.getBoolean("deletedbyinference");// false
rs.getDouble("trust"); // 0.9 // rs.getDouble("trust"); // 0.9
rs.getString("inferenceprovenance"); // NULL // rs.getString("inferenceprovenance"); // NULL
// rs.getDate("dateofcollection"); // rs.getDate("dateofcollection");
// rs.getDate("dateofvalidation"); // rs.getDate("dateofvalidation");
// rs.getDate("releasestartdate"); // rs.getDate("releasestartdate");
@ -152,21 +169,22 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
rs.getArray("policies"); rs.getArray("policies");
// rs.getString("collectedfromid"); // rs.getString("collectedfromid");
// rs.getString("collectedfromname"); // rs.getString("collectedfromname");
rs.getString("datasourcetype"); // COMPLEX XXX@@@@.... // rs.getString("datasourcetype"); // COMPLEX XXX@@@@....
rs.getString("provenanceaction"); // 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' // rs.getString("provenanceaction"); //
// AS provenanceaction, // 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions'
// AS provenanceaction,
rs.getString("journal"); // CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn) AS journal rs.getString("journal"); // CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn) AS journal
emitOaf(ds); emitOaf(ds);
} catch (final Exception e) { } catch (final Exception e) {
// TODO: handle exception throw new RuntimeException(e);
} }
} }
public void processProject(final ResultSet rs) { public void processProject(final ResultSet rs) {
try { try {
final DataInfo info = MigrationUtils.dataInfo(null, null, null, null, null, null); // TODO final DataInfo info = prepareDataInfo(rs);
final Project p = new Project(); final Project p = new Project();
@ -192,9 +210,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
p.setEcsc39(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecsc39")), info)); p.setEcsc39(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecsc39")), info));
p.setOamandatepublications(MigrationUtils.field(Boolean.toString(rs.getBoolean("oamandatepublications")), info)); p.setOamandatepublications(MigrationUtils.field(Boolean.toString(rs.getBoolean("oamandatepublications")), info));
p.setEcarticle29_3(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info)); p.setEcarticle29_3(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info));
p.setSubjects(null); // List<StructuredProperty> //TODO p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
p.setFundingtree(null); // List<Field<String>> //TODO p.setFundingtree(null); // List<Field<String>> //TODO
p.setContracttype(null); // Qualifier //TODO p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype")));
p.setOptional1(MigrationUtils.field(rs.getString("optional1"), info)); p.setOptional1(MigrationUtils.field(rs.getString("optional1"), info));
p.setOptional2(MigrationUtils.field(rs.getString("optional2"), info)); p.setOptional2(MigrationUtils.field(rs.getString("optional2"), info));
p.setJsonextrainfo(MigrationUtils.field(rs.getString("jsonextrainfo"), info)); p.setJsonextrainfo(MigrationUtils.field(rs.getString("jsonextrainfo"), info));
@ -207,6 +225,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
p.setTotalcost(new Float(rs.getDouble("totalcost"))); p.setTotalcost(new Float(rs.getDouble("totalcost")));
p.setFundedamount(new Float(rs.getDouble("fundedamount"))); p.setFundedamount(new Float(rs.getDouble("fundedamount")));
p.setDataInfo(info);
p.setLastupdatetimestamp(lastUpdateTimestamp);
// rs.getString("projectid"); // rs.getString("projectid");
// rs.getString("code"); // rs.getString("code");
// rs.getString("websiteurl"); // rs.getString("websiteurl");
@ -222,13 +243,13 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
// rs.getBoolean("ecarticle29_3"); // rs.getBoolean("ecarticle29_3");
// rs.getDate("dateofcollection"); // rs.getDate("dateofcollection");
// rs.getDate("dateoftransformation"); // rs.getDate("dateoftransformation");
rs.getBoolean("inferred"); // rs.getBoolean("inferred");
rs.getBoolean("deletedbyinference"); // rs.getBoolean("deletedbyinference");
rs.getDouble("trust"); // rs.getDouble("trust");
rs.getString("inferenceprovenance"); // rs.getString("inferenceprovenance");
// rs.getString("optional1"); // rs.getString("optional1");
// rs.getString("optional2"); // rs.getString("optional2");
rs.getString("jsonextrainfo"); // rs.getString("jsonextrainfo");
// rs.getString("contactfullname"); // rs.getString("contactfullname");
// rs.getString("contactfax"); // rs.getString("contactfax");
// rs.getString("contactphone"); // rs.getString("contactphone");
@ -248,14 +269,14 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
emitOaf(p); emitOaf(p);
} catch (final Exception e) { } catch (final Exception e) {
// TODO: handle exception throw new RuntimeException(e);
} }
} }
public void processOrganization(final ResultSet rs) { public void processOrganization(final ResultSet rs) {
try { try {
final DataInfo info = MigrationUtils.dataInfo(null, null, null, null, null, null); // TODO final DataInfo info = prepareDataInfo(rs);
final Organization o = new Organization(); final Organization o = new Organization();
@ -269,7 +290,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
o.setOaiprovenance(null); // OAIProvenance // TODO o.setOaiprovenance(null); // OAIProvenance // TODO
o.setLegalshortname(MigrationUtils.field("legalshortname", info)); o.setLegalshortname(MigrationUtils.field("legalshortname", info));
o.setLegalname(MigrationUtils.field("legalname", info)); o.setLegalname(MigrationUtils.field("legalname", info));
o.setAlternativeNames(null); // List<Field<String>> //TODO o.setAlternativeNames(new ArrayList<>());
o.setWebsiteurl(MigrationUtils.field("websiteurl", info)); o.setWebsiteurl(MigrationUtils.field("websiteurl", info));
o.setLogourl(MigrationUtils.field("logourl", info)); o.setLogourl(MigrationUtils.field("logourl", info));
o.setEclegalbody(MigrationUtils.field(Boolean.toString(rs.getBoolean("eclegalbody")), info)); o.setEclegalbody(MigrationUtils.field(Boolean.toString(rs.getBoolean("eclegalbody")), info));
@ -283,7 +304,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
o.setEcenterprise(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecenterprise")), info)); o.setEcenterprise(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecenterprise")), info));
o.setEcsmevalidated(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info)); o.setEcsmevalidated(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info));
o.setEcnutscode(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecnutscode")), info)); o.setEcnutscode(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecnutscode")), info));
o.setCountry(null); // Qualifier country) { o.setCountry(prepareQualifierSplitting(rs.getString("country")));
o.setDataInfo(info);
o.setLastupdatetimestamp(lastUpdateTimestamp);
// rs.getString("organizationid"); // rs.getString("organizationid");
// rs.getString("legalshortname"); // rs.getString("legalshortname");
@ -300,87 +324,160 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
// rs.getBoolean("ecenterprise"); // rs.getBoolean("ecenterprise");
// rs.getBoolean("ecsmevalidated"); // rs.getBoolean("ecsmevalidated");
// rs.getBoolean("ecnutscode"); // rs.getBoolean("ecnutscode");
rs.getDate("dateofcollection"); // rs.getDate("dateofcollection");
rs.getDate("dateoftransformation"); // rs.getDate("dateoftransformation");
rs.getBoolean("inferred"); // rs.getBoolean("inferred");
rs.getBoolean("deletedbyinference"); // rs.getBoolean("deletedbyinference");
rs.getDouble("trust"); // rs.getDouble("trust");
rs.getString("inferenceprovenance"); // rs.getString("inferenceprovenance");
// rs.getString("collectedfromid"); // rs.getString("collectedfromid");
// rs.getString("collectedfromname"); // rs.getString("collectedfromname");
rs.getString("country"); // rs.getString("country");
rs.getString("provenanceaction"); rs.getString("provenanceaction");
rs.getArray("pid"); rs.getArray("pid");
emitOaf(o); emitOaf(o);
} catch (final Exception e) { } catch (final Exception e) {
// TODO: handle exception throw new RuntimeException(e);
} }
} }
public void processDatasourceOrganization(final ResultSet rs) { public void processDatasourceOrganization(final ResultSet rs) {
try { try {
final Relation r = new Relation(); final DataInfo info = prepareDataInfo(rs);
final String orgId = MigrationUtils.createOpenaireId("20", rs.getString("organization"));
final String dsId = MigrationUtils.createOpenaireId("10", rs.getString("datasource"));
r.setRelType(null); // TODO final Relation r1 = new Relation();
r.setSubRelType(null); // TODO r1.setRelType("datasourceOrganization");
r.setRelClass(null); // TODO r1.setSubRelType("provision");
r.setSource(null); // TODO r1.setRelClass("isProvidedBy");
r.setTarget(null); // TODO r1.setSource(dsId);
r.setCollectedFrom(MigrationUtils.listKeyValues("", "")); r1.setTarget(orgId);
r1.setCollectedFrom(null);// TODO
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
emitOaf(r1);
rs.getString("datasource"); final Relation r2 = new Relation();
rs.getString("organization"); r2.setRelType("datasourceOrganization");
rs.getDate("startdate"); // NULL r2.setSubRelType("provision");
rs.getDate("enddate"); // NULL r2.setRelClass("provides");
rs.getBoolean("inferred"); // false r2.setSource(orgId);
rs.getBoolean("deletedbyinference"); // false r2.setTarget(dsId);
rs.getDouble("trust"); // 0.9 r2.setCollectedFrom(null); // TODO
rs.getString("inferenceprovenance"); // NULL r2.setDataInfo(info);
rs.getString("semantics"); // 'providedBy@@@provided r1.setLastupdatetimestamp(lastUpdateTimestamp);
// by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS emitOaf(r2);
// semantics,
rs.getString("provenanceaction"); // d.provenanceaction || '@@@' || d.provenanceaction || // rs.getString("datasource");
// '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction // rs.getString("organization");
// rs.getDate("startdate"); // NULL
// rs.getDate("enddate"); // NULL
// rs.getBoolean("inferred"); // false
// rs.getBoolean("deletedbyinference"); // false
// rs.getDouble("trust"); // 0.9
// rs.getString("inferenceprovenance"); // NULL
// rs.getString("semantics"); // 'providedBy@@@provided
// by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS
// semantics,
// rs.getString("provenanceaction"); // d.provenanceaction || '@@@' || d.provenanceaction ||
// '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction
emitOaf(r);
} catch (final Exception e) { } catch (final Exception e) {
// TODO: handle exception throw new RuntimeException(e);
} }
} }
public void processProjectOrganization(final ResultSet rs) { public void processProjectOrganization(final ResultSet rs) {
try { try {
final Relation r = new Relation(); final DataInfo info = prepareDataInfo(rs);
final String orgId = MigrationUtils.createOpenaireId("20", rs.getString("resporganization"));
final String projectId = MigrationUtils.createOpenaireId("40", rs.getString("project"));
r.setRelType(null); // TODO final Relation r1 = new Relation();
r.setSubRelType(null); // TODO r1.setRelType("projectOrganization");
r.setRelClass(null); // TODO r1.setSubRelType("participation");
r.setSource(null); // TODO r1.setRelClass("isParticipant");
r.setTarget(null); // TODO r1.setSource(projectId);
r.setCollectedFrom(null); r1.setTarget(orgId);
r1.setCollectedFrom(null);// TODO
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
emitOaf(r1);
final Relation r2 = new Relation();
r2.setRelType("projectOrganization");
r2.setSubRelType("participation");
r2.setRelClass("hasParticipant");
r2.setSource(orgId);
r2.setTarget(projectId);
r2.setCollectedFrom(null); // TODO
r2.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
emitOaf(r2);
// rs.getString("project");
// rs.getString("resporganization");
// rs.getInt("participantnumber");
// rs.getDouble("contribution");
// rs.getDate("startdate");// null
// rs.getDate("enddate");// null
// rs.getBoolean("inferred");// false
// rs.getBoolean("deletedbyinference"); // false
// rs.getDouble("trust");
// rs.getString("inferenceprovenance"); // NULL
// rs.getString("semantics"); // po.semanticclass || '@@@' || po.semanticclass ||
// '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics,
// rs.getString("provenanceaction"); //
// 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions'
// AS provenanceaction
rs.getString("project");
rs.getString("resporganization");
rs.getInt("participantnumber");
rs.getDouble("contribution");
rs.getDate("startdate");// null
rs.getDate("enddate");// null
rs.getBoolean("inferred");// false
rs.getBoolean("deletedbyinference"); // false
rs.getDouble("trust");
rs.getString("inferenceprovenance"); // NULL
rs.getString("semantics"); // po.semanticclass || '@@@' || po.semanticclass ||
// '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics,
rs.getString("provenanceaction"); // 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions'
// AS provenanceaction
emitOaf(r);
} catch (final Exception e) { } catch (final Exception e) {
// TODO: handle exception throw new RuntimeException(e);
} }
} }
private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException {
final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
final String inferenceprovenance = rs.getString("inferenceprovenance");
final Boolean inferred = rs.getBoolean("inferred");
final String trust = rs.getString("trust");
return MigrationUtils.dataInfo(deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust);
}
private Qualifier prepareQualifierSplitting(final String s) {
if (StringUtils.isBlank(s)) { return null; }
final String[] arr = s.split("@@@");
return arr.length == 4 ? MigrationUtils.qualifier(arr[0], arr[1], arr[2], arr[3]) : null;
}
private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) {
if (StringUtils.isBlank(s)) { return null; }
final String[] parts = s.split("###");
if (parts.length == 2) {
final String value = parts[0];
final String[] arr = parts[1].split("@@@");
if (arr.length == 4) { return MigrationUtils.structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); }
}
return null;
}
private List<StructuredProperty> prepareListOfStructProps(final Array array, final DataInfo dataInfo) throws SQLException {
final List<StructuredProperty> res = new ArrayList<>();
if (array != null) {
for (final String s : (String[]) array.getArray()) {
final StructuredProperty sp = prepareStructProp(s, dataInfo);
if (sp != null) {
res.add(sp);
}
}
}
return res;
}
@Override @Override
public void close() throws IOException { public void close() throws IOException {
super.close(); super.close();

View File

@ -10,6 +10,37 @@
<artifactId>dhp-dedup</artifactId> <artifactId>dhp-dedup</artifactId>
<build>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>4.0.1</version>
<executions>
<execution>
<id>scala-compile-first</id>
<phase>initialize</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>scala-test-compile</id>
<phase>process-test-resources</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
</configuration>
</plugin>
</plugins>
</build>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -1,23 +1,31 @@
package eu.dnetlib.dhp.graph; package eu.dnetlib.dhp.graph;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.*;
import java.util.Map; import java.util.Map;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Software;
public class GraphMappingUtils { public class GraphMappingUtils {
public final static Map<String, Class> types = Maps.newHashMap(); public final static Map<String, Class> types = Maps.newHashMap();
static { static {
types.put("datasource", Datasource.class); types.put("datasource", Datasource.class);
types.put("organization", Organization.class); types.put("organization", Organization.class);
types.put("project", Project.class); types.put("project", Project.class);
types.put("dataset", Dataset.class); types.put("dataset", Dataset.class);
types.put("otherresearchproduct", OtherResearchProduct.class); types.put("otherresearchproduct", OtherResearchProduct.class);
types.put("software", Software.class); types.put("software", Software.class);
types.put("publication", Publication.class); types.put("publication", Publication.class);
types.put("relation", Relation.class); types.put("relation", Relation.class);
} }
} }

View File

@ -1,7 +1,5 @@
package eu.dnetlib.dhp.graph; package eu.dnetlib.dhp.graph;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
@ -9,42 +7,47 @@ import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import scala.Tuple2; import scala.Tuple2;
public class SparkGraphImporterJob { public class SparkGraphImporterJob {
public static void main(String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkGraphImporterJob.class.getResourceAsStream("/eu/dnetlib/dhp/graph/input_graph_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(
parser.parseArgument(args); IOUtils.toString(SparkGraphImporterJob.class.getResourceAsStream("/eu/dnetlib/dhp/graph/input_graph_parameters.json")));
final SparkSession spark = SparkSession parser.parseArgument(args);
.builder() final SparkSession spark = SparkSession
.appName(SparkGraphImporterJob.class.getSimpleName()) .builder()
.master(parser.get("master")) .appName(SparkGraphImporterJob.class.getSimpleName())
.config("hive.metastore.uris", parser.get("hive_metastore_uris")) .master(parser.get("master"))
.enableHiveSupport() .config("hive.metastore.uris", parser.get("hive_metastore_uris"))
.getOrCreate(); .enableHiveSupport()
.getOrCreate();
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
final String inputPath = parser.get("sourcePath"); final String inputPath = parser.get("sourcePath");
final String hiveDbName = parser.get("hive_db_name"); final String hiveDbName = parser.get("hive_db_name");
spark.sql(String.format("CREATE DATABASE IF NOT EXISTS %s", hiveDbName)); spark.sql(String.format("CREATE DATABASE IF NOT EXISTS %s", hiveDbName));
// Read the input file and convert it into RDD of serializable object // Read the input file and convert it into RDD of serializable object
GraphMappingUtils.types.forEach((name, clazz) -> { GraphMappingUtils.types.forEach((name, clazz) -> {
final JavaRDD<Tuple2<String, String>> inputRDD = sc.sequenceFile(inputPath + "/" + name, Text.class, Text.class) final JavaRDD<Tuple2<String, String>> inputRDD = sc.sequenceFile(inputPath + "/" + name, Text.class, Text.class)
.map(item -> new Tuple2<>(item._1.toString(), item._2.toString())); .map(item -> new Tuple2<>(item._1.toString(), item._2.toString()));
spark.createDataset(inputRDD spark.createDataset(inputRDD
.filter(s -> s._1().equals(clazz.getName())) .filter(s -> s._1().equals(clazz.getName()))
.map(Tuple2::_2) .map(Tuple2::_2)
.map(s -> new ObjectMapper().readValue(s, clazz)) .map(s -> new ObjectMapper().readValue(s, clazz))
.rdd(), Encoders.bean(clazz)) .rdd(), Encoders.bean(clazz))
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.saveAsTable(hiveDbName + "." + name); .saveAsTable(hiveDbName + "." + name);
}); });
} }
} }

759
pom.xml
View File

@ -1,426 +1,411 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> <project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId> <artifactId>dhp</artifactId>
<version>1.0.5-SNAPSHOT</version> <version>1.0.5-SNAPSHOT</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<url>http://www.d-net.research-infrastructures.eu</url> <url>http://www.d-net.research-infrastructures.eu</url>
<licenses> <licenses>
<license> <license>
<name>The Apache Software License, Version 2.0</name> <name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url> <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>repo</distribution> <distribution>repo</distribution>
<comments>A business-friendly OSS license</comments> <comments>A business-friendly OSS license</comments>
</license> </license>
</licenses> </licenses>
<modules> <modules>
<module>dhp-build</module> <module>dhp-build</module>
<module>dhp-schemas</module> <module>dhp-schemas</module>
<module>dhp-common</module> <module>dhp-common</module>
<module>dhp-workflows</module> <module>dhp-workflows</module>
</modules> </modules>
<issueManagement> <issueManagement>
<system>Redmine</system> <system>Redmine</system>
<url>https://issue.openaire.research-infrastructures.eu/projects/openaire</url> <url>https://issue.openaire.research-infrastructures.eu/projects/openaire</url>
</issueManagement> </issueManagement>
<ciManagement> <ciManagement>
<system>jenkins</system> <system>jenkins</system>
<url>https://jenkins-dnet.d4science.org/</url> <url>https://jenkins-dnet.d4science.org/</url>
</ciManagement> </ciManagement>
<scm> <scm>
<connection>scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git</connection> <connection>scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git</connection>
<developerConnection>scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git</developerConnection> <developerConnection>scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git</developerConnection>
<url>https://code-repo.d4science.org/D-Net/dnet-hadoop/</url> <url>https://code-repo.d4science.org/D-Net/dnet-hadoop/</url>
<tag>HEAD</tag> <tag>HEAD</tag>
</scm> </scm>
<pluginRepositories> <pluginRepositories>
</pluginRepositories> </pluginRepositories>
<repositories> <repositories>
<repository> <repository>
<id>dnet45-releases</id> <id>dnet45-releases</id>
<name>D-Net 45 releases</name> <name>D-Net 45 releases</name>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url> <url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
<layout>default</layout> <layout>default</layout>
<snapshots> <snapshots>
<enabled>false</enabled> <enabled>false</enabled>
</snapshots> </snapshots>
<releases> <releases>
<enabled>true</enabled> <enabled>true</enabled>
</releases> </releases>
</repository> </repository>
<repository> <repository>
<id>cloudera</id> <id>cloudera</id>
<name>Cloudera Repository</name> <name>Cloudera Repository</name>
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url> <url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
<releases> <releases>
<enabled>true</enabled> <enabled>true</enabled>
</releases> </releases>
<snapshots> <snapshots>
<enabled>false</enabled> <enabled>false</enabled>
</snapshots> </snapshots>
</repository> </repository>
</repositories> </repositories>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>junit</groupId> <groupId>junit</groupId>
<artifactId>junit</artifactId> <artifactId>junit</artifactId>
<version>4.12</version> <version>4.12</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.mockito</groupId> <groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId> <artifactId>mockito-core</artifactId>
<version>2.7.22</version> <version>2.7.22</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
</dependencies> </dependencies>
<dependencyManagement> <dependencyManagement>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.apache.hadoop</groupId> <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId> <artifactId>hadoop-hdfs</artifactId>
<version>${dhp.hadoop.version}</version> <version>${dhp.hadoop.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.hadoop</groupId> <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId> <artifactId>hadoop-client</artifactId>
<version>${dhp.hadoop.version}</version> <version>${dhp.hadoop.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_2.11</artifactId>
<version>${dhp.spark.version}</version> <version>${dhp.spark.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_2.11</artifactId>
<version>${dhp.spark.version}</version> <version>${dhp.spark.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-graphx_2.11</artifactId> <artifactId>spark-graphx_2.11</artifactId>
<version>${dhp.spark.version}</version> <version>${dhp.spark.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId> <artifactId>commons-lang3</artifactId>
<version>${dhp.commons.lang.version}</version> <version>${dhp.commons.lang.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>commons-codec</groupId> <groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId> <artifactId>commons-codec</artifactId>
<version>1.9</version> <version>1.9</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>commons-io</groupId> <groupId>commons-io</groupId>
<artifactId>commons-io</artifactId> <artifactId>commons-io</artifactId>
<version>2.4</version> <version>2.4</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>commons-cli</groupId> <groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId> <artifactId>commons-cli</artifactId>
<version>1.2</version> <version>1.2</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>net.sf.saxon</groupId> <groupId>net.sf.saxon</groupId>
<artifactId>Saxon-HE</artifactId> <artifactId>Saxon-HE</artifactId>
<version>9.5.1-5</version> <version>9.5.1-5</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>dom4j</groupId> <groupId>dom4j</groupId>
<artifactId>dom4j</artifactId> <artifactId>dom4j</artifactId>
<version>1.6.1</version> <version>1.6.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>xml-apis</groupId> <groupId>xml-apis</groupId>
<artifactId>xml-apis</artifactId> <artifactId>xml-apis</artifactId>
<version>1.4.01</version> <version>1.4.01</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>jaxen</groupId> <groupId>jaxen</groupId>
<artifactId>jaxen</artifactId> <artifactId>jaxen</artifactId>
<version>1.1.6</version> <version>1.1.6</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>net.schmizz</groupId> <groupId>net.schmizz</groupId>
<artifactId>sshj</artifactId> <artifactId>sshj</artifactId>
<version>0.10.0</version> <version>0.10.0</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.fasterxml.jackson.core</groupId> <groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId> <artifactId>jackson-core</artifactId>
<version>${dhp.jackson.version}</version> <version>${dhp.jackson.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.fasterxml.jackson.core</groupId> <groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId> <artifactId>jackson-annotations</artifactId>
<version>${dhp.jackson.version}</version> <version>${dhp.jackson.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.fasterxml.jackson.core</groupId> <groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId> <artifactId>jackson-databind</artifactId>
<version>${dhp.jackson.version}</version> <version>${dhp.jackson.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>eu.dnetlib</groupId> <groupId>eu.dnetlib</groupId>
<artifactId>dnet-pace-core</artifactId> <artifactId>dnet-pace-core</artifactId>
<version>4.0.0-SNAPSHOT</version> <version>4.0.0-SNAPSHOT</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>javax.persistence</groupId> <groupId>javax.persistence</groupId>
<artifactId>javax.persistence-api</artifactId> <artifactId>javax.persistence-api</artifactId>
<version>2.2</version> <version>2.2</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.rabbitmq</groupId> <groupId>com.rabbitmq</groupId>
<artifactId>amqp-client</artifactId> <artifactId>amqp-client</artifactId>
<version>5.6.0</version> <version>5.6.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.jayway.jsonpath</groupId> <groupId>com.jayway.jsonpath</groupId>
<artifactId>json-path</artifactId> <artifactId>json-path</artifactId>
<version>2.4.0</version> <version>2.4.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.arakelian</groupId> <groupId>com.arakelian</groupId>
<artifactId>java-jq</artifactId> <artifactId>java-jq</artifactId>
<version>0.10.1</version> <version>0.10.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>edu.cmu</groupId> <groupId>edu.cmu</groupId>
<artifactId>secondstring</artifactId> <artifactId>secondstring</artifactId>
<version>1.0.0</version> <version>1.0.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.oozie</groupId> <groupId>org.mongodb</groupId>
<artifactId>oozie-client</artifactId> <artifactId>mongo-java-driver</artifactId>
<version>${dhp.oozie.version}</version> <version>${mongodb.driver.version}</version>
<scope>provided</scope> </dependency>
<exclusions>
<!-- conflicts -->
<exclusion>
<artifactId>slf4j-simple</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</dependencyManagement>
<build> <dependency>
<directory>target</directory> <groupId>org.apache.oozie</groupId>
<outputDirectory>target/classes</outputDirectory> <artifactId>oozie-client</artifactId>
<finalName>${project.artifactId}-${project.version}</finalName> <version>${dhp.oozie.version}</version>
<testOutputDirectory>target/test-classes</testOutputDirectory> <scope>provided</scope>
<pluginManagement> <exclusions>
<plugins> <!-- conflicts -->
<plugin> <exclusion>
<groupId>org.apache.maven.plugins</groupId> <artifactId>slf4j-simple</artifactId>
<artifactId>maven-compiler-plugin</artifactId> <groupId>org.slf4j</groupId>
<version>${maven.compiler.plugin.version}</version> </exclusion>
<configuration> </exclusions>
<source>1.8</source> </dependency>
<target>1.8</target> </dependencies>
<encoding>${project.build.sourceEncoding}</encoding> </dependencyManagement>
</configuration>
</plugin>
<plugin> <build>
<groupId>org.apache.maven.plugins</groupId> <directory>target</directory>
<artifactId>maven-jar-plugin</artifactId> <outputDirectory>target/classes</outputDirectory>
<version>3.0.2</version> <finalName>${project.artifactId}-${project.version}</finalName>
</plugin> <testOutputDirectory>target/test-classes</testOutputDirectory>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>${maven.compiler.plugin.version}</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>${project.build.sourceEncoding}</encoding>
</configuration>
</plugin>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId> <artifactId>maven-jar-plugin</artifactId>
<version>3.0.1</version> <version>3.0.2</version>
<executions> </plugin>
<execution>
<id>attach-sources</id>
<phase>verify</phase>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId> <artifactId>maven-source-plugin</artifactId>
<version>2.19.1</version> <version>3.0.1</version>
<configuration> <executions>
<redirectTestOutputToFile>true</redirectTestOutputToFile> <execution>
</configuration> <id>attach-sources</id>
</plugin> <phase>verify</phase>
<plugin> <goals>
<groupId>org.apache.maven.plugins</groupId> <goal>jar-no-fork</goal>
<artifactId>maven-javadoc-plugin</artifactId> </goals>
<version>2.10.4</version> </execution>
<configuration> </executions>
<detectLinks>true</detectLinks> </plugin>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin> <plugin>
<groupId>org.codehaus.mojo</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>build-helper-maven-plugin</artifactId> <artifactId>maven-surefire-plugin</artifactId>
<version>1.12</version> <version>2.19.1</version>
</plugin> <configuration>
</plugins> <redirectTestOutputToFile>true</redirectTestOutputToFile>
</pluginManagement> </configuration>
<plugins> </plugin>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-release-plugin</artifactId> <artifactId>maven-javadoc-plugin</artifactId>
<version>2.5.3</version> <version>2.10.4</version>
</plugin> <configuration>
<plugin> <detectLinks>true</detectLinks>
<groupId>org.jacoco</groupId> </configuration>
<artifactId>jacoco-maven-plugin</artifactId> </plugin>
<version>0.7.9</version> <plugin>
<configuration> <groupId>org.apache.maven.plugins</groupId>
<excludes> <artifactId>maven-dependency-plugin</artifactId>
<exclude>**/schemas/*</exclude> <version>3.0.0</version>
<exclude>**/com/cloudera/**/*</exclude> </plugin>
<exclude>**/org/apache/avro/io/**/*</exclude>
</excludes>
</configuration>
<executions>
<execution>
<id>default-prepare-agent</id>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>default-report</id>
<phase>prepare-package</phase>
<goals>
<goal>report</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>4.0.1</version>
<executions>
<execution>
<id>scala-compile-first</id>
<phase>initialize</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>scala-test-compile</id>
<phase>process-test-resources</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
</configuration>
</plugin>
</plugins>
<extensions> <plugin>
<extension> <groupId>org.codehaus.mojo</groupId>
<groupId>org.apache.maven.wagon</groupId> <artifactId>build-helper-maven-plugin</artifactId>
<artifactId>wagon-ssh</artifactId> <version>1.12</version>
<version>2.10</version> </plugin>
</extension> </plugins>
</extensions> </pluginManagement>
</build> <plugins>
<distributionManagement> <plugin>
<snapshotRepository> <groupId>org.apache.maven.plugins</groupId>
<id>dnet45-snapshots</id> <artifactId>maven-release-plugin</artifactId>
<name>DNet45 Snapshots</name> <version>2.5.3</version>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url> </plugin>
<layout>default</layout> <plugin>
</snapshotRepository> <groupId>org.jacoco</groupId>
<repository> <artifactId>jacoco-maven-plugin</artifactId>
<id>dnet45-releases</id> <version>0.7.9</version>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url> <configuration>
</repository> <excludes>
</distributionManagement> <exclude>**/schemas/*</exclude>
<reporting> <exclude>**/com/cloudera/**/*</exclude>
<plugins> <exclude>**/org/apache/avro/io/**/*</exclude>
<plugin> </excludes>
<groupId>org.apache.maven.plugins</groupId> </configuration>
<artifactId>maven-javadoc-plugin</artifactId> <executions>
<version>2.10.4</version> <execution>
<configuration> <id>default-prepare-agent</id>
<detectLinks>true</detectLinks> <goals>
</configuration> <goal>prepare-agent</goal>
</plugin> </goals>
</plugins> </execution>
</reporting> <execution>
<id>default-report</id>
<phase>prepare-package</phase>
<goals>
<goal>report</goal>
</goals>
</execution>
</executions>
</plugin>
<properties> </plugins>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <extensions>
<maven.compiler.plugin.version>3.6.0</maven.compiler.plugin.version> <extension>
<maven.failsave.plugin.version>2.22.2</maven.failsave.plugin.version> <groupId>org.apache.maven.wagon</groupId>
<dhp.cdh.version>cdh5.9.2</dhp.cdh.version> <artifactId>wagon-ssh</artifactId>
<dhp.hadoop.version>2.6.0-${dhp.cdh.version}</dhp.hadoop.version> <version>2.10</version>
<dhp.oozie.version>4.1.0-${dhp.cdh.version}</dhp.oozie.version> </extension>
<dhp.spark.version>2.4.0.cloudera2</dhp.spark.version> </extensions>
<dhp.jackson.version>2.9.6</dhp.jackson.version> </build>
<dhp.commons.lang.version>3.5</dhp.commons.lang.version> <distributionManagement>
<scala.version>2.11.12</scala.version> <snapshotRepository>
</properties> <id>dnet45-snapshots</id>
<name>DNet45 Snapshots</name>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots</url>
<layout>default</layout>
</snapshotRepository>
<repository>
<id>dnet45-releases</id>
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases</url>
</repository>
</distributionManagement>
<reporting>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version>
<configuration>
<detectLinks>true</detectLinks>
</configuration>
</plugin>
</plugins>
</reporting>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<maven.compiler.plugin.version>3.6.0</maven.compiler.plugin.version>
<maven.failsave.plugin.version>2.22.2</maven.failsave.plugin.version>
<dhp.cdh.version>cdh5.9.2</dhp.cdh.version>
<dhp.hadoop.version>2.6.0-${dhp.cdh.version}</dhp.hadoop.version>
<dhp.oozie.version>4.1.0-${dhp.cdh.version}</dhp.oozie.version>
<dhp.spark.version>2.4.0.cloudera2</dhp.spark.version>
<dhp.jackson.version>2.9.6</dhp.jackson.version>
<dhp.commons.lang.version>3.5</dhp.commons.lang.version>
<scala.version>2.11.12</scala.version>
<mongodb.driver.version>3.4.2</mongodb.driver.version>
</properties>
</project> </project>