master #2

Merged
sandro.labruzzo merged 16 commits from michele.artini/dnet-hadoop:master into master 2020-02-17 10:43:09 +01:00
7 changed files with 702 additions and 538 deletions
Showing only changes of commit b35c59eb42 - Show all commits

View File

@ -76,6 +76,41 @@
</configuration> </configuration>
</plugin> </plugin>
</plugins> </plugins>
<pluginManagement>
<plugins>
<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.-->
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>
org.apache.maven.plugins
</groupId>
<artifactId>
maven-plugin-plugin
</artifactId>
<versionRange>
[3.2,)
</versionRange>
<goals>
<goal>descriptor</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore></ignore>
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build> </build>
</project> </project>

View File

@ -5,6 +5,11 @@ import java.util.List;
public class Relation implements Serializable { public class Relation implements Serializable {
/**
*
*/
private static final long serialVersionUID = -9103706796710618813L;
private String source; private String source;
private String target; private String target;
@ -17,7 +22,7 @@ public class Relation implements Serializable {
return source; return source;
} }
public void setSource(String source) { public void setSource(final String source) {
this.source = source; this.source = source;
} }
@ -25,7 +30,7 @@ public class Relation implements Serializable {
return target; return target;
} }
public void setTarget(String target) { public void setTarget(final String target) {
this.target = target; this.target = target;
} }
@ -33,7 +38,7 @@ public class Relation implements Serializable {
return provenance; return provenance;
} }
public void setProvenance(List<Provenance> provenance) { public void setProvenance(final List<Provenance> provenance) {
this.provenance = provenance; this.provenance = provenance;
} }
@ -41,7 +46,7 @@ public class Relation implements Serializable {
return semantic; return semantic;
} }
public void setSemantic(RelationSemantic semantic) { public void setSemantic(final RelationSemantic semantic) {
this.semantic = semantic; this.semantic = semantic;
} }
} }

View File

@ -2,11 +2,17 @@ package eu.dnetlib.dhp.migration;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.sql.Array;
import java.sql.ResultSet; import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.function.Consumer; import java.util.function.Consumer;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -15,14 +21,21 @@ import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class MigrateDbEntitiesApplication extends AbstractMigrateApplication implements Closeable { public class MigrateDbEntitiesApplication extends AbstractMigrateApplication implements Closeable {
private static final Qualifier ENTITYREGISTRY_PROVENANCE_ACTION = MigrationUtils
.qualifier("sysimport:crosswalk:entityregistry", "sysimport:crosswalk:entityregistry", "dnet:provenance_actions", "dnet:provenance_actions");
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class); private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
private final DbClient dbClient; private final DbClient dbClient;
private final long lastUpdateTimestamp;
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(MigrateDbEntitiesApplication.class.getResourceAsStream("/eu/dnetlib/dhp/migration/migrate_db_entities_parameters.json"))); IOUtils.toString(MigrateDbEntitiesApplication.class.getResourceAsStream("/eu/dnetlib/dhp/migration/migrate_db_entities_parameters.json")));
@ -51,6 +64,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
final String dbPassword) throws Exception { final String dbPassword) throws Exception {
super(hdfsPath, hdfsNameNode, hdfsUser); super(hdfsPath, hdfsNameNode, hdfsUser);
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
this.lastUpdateTimestamp = new Date().getTime();
} }
public void execute(final String sqlFile, final Consumer<ResultSet> consumer) throws Exception { public void execute(final String sqlFile, final Consumer<ResultSet> consumer) throws Exception {
@ -61,7 +75,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
public void processDatasource(final ResultSet rs) { public void processDatasource(final ResultSet rs) {
try { try {
final DataInfo info = MigrationUtils.dataInfo(null, null, null, null, null, null); // TODO final DataInfo info = prepareDataInfo(rs);
final Datasource ds = new Datasource(); final Datasource ds = new Datasource();
@ -74,8 +88,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
ds.setExtraInfo(null); // TODO ds.setExtraInfo(null); // TODO
ds.setOaiprovenance(null); // TODO ds.setOaiprovenance(null); // TODO
ds.setDatasourcetype(null); // Qualifier datasourcetype) { ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
ds.setOpenairecompatibility(null); // Qualifier openairecompatibility) { ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
ds.setOfficialname(MigrationUtils.field(rs.getString("officialname"), info)); ds.setOfficialname(MigrationUtils.field(rs.getString("officialname"), info));
ds.setEnglishname(MigrationUtils.field(rs.getString("englishname"), info)); ds.setEnglishname(MigrationUtils.field(rs.getString("englishname"), info));
ds.setWebsiteurl(MigrationUtils.field(rs.getString("websiteurl"), info)); ds.setWebsiteurl(MigrationUtils.field(rs.getString("websiteurl"), info));
@ -86,7 +100,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
ds.setLongitude(MigrationUtils.field(Double.toString(rs.getDouble("longitude")), info)); ds.setLongitude(MigrationUtils.field(Double.toString(rs.getDouble("longitude")), info));
ds.setDateofvalidation(MigrationUtils.field(rs.getDate("dateofvalidation").toString(), info)); ds.setDateofvalidation(MigrationUtils.field(rs.getDate("dateofvalidation").toString(), info));
ds.setDescription(MigrationUtils.field(rs.getString("description"), info)); ds.setDescription(MigrationUtils.field(rs.getString("description"), info));
ds.setSubjects(null); // List<StructuredProperty> subjects) { ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
ds.setOdnumberofitems(MigrationUtils.field(Double.toString(rs.getInt("odnumberofitems")), info)); ds.setOdnumberofitems(MigrationUtils.field(Double.toString(rs.getInt("odnumberofitems")), info));
ds.setOdnumberofitemsdate(MigrationUtils.field(rs.getDate("odnumberofitemsdate").toString(), info)); ds.setOdnumberofitemsdate(MigrationUtils.field(rs.getDate("odnumberofitemsdate").toString(), info));
ds.setOdpolicies(MigrationUtils.field(rs.getString("odpolicies"), info)); ds.setOdpolicies(MigrationUtils.field(rs.getString("odpolicies"), info));
@ -110,12 +124,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
ds.setPolicies(null); // List<KeyValue> // TODO ds.setPolicies(null); // List<KeyValue> // TODO
ds.setJournal(null); // Journal // TODO ds.setJournal(null); // Journal // TODO
ds.setDataInfo(info);
ds.setLastupdatetimestamp(lastUpdateTimestamp);
// rs.getString("datasourceid"); // rs.getString("datasourceid");
rs.getArray("identities"); rs.getArray("identities");
// rs.getString("officialname"); // rs.getString("officialname");
// rs.getString("englishname"); // rs.getString("englishname");
// rs.getString("contactemail"); // rs.getString("contactemail");
rs.getString("openairecompatibility"); // COMPLEX ...@@@... // rs.getString("openairecompatibility"); // COMPLEX ...@@@...
// rs.getString("websiteurl"); // rs.getString("websiteurl");
// rs.getString("logourl"); // rs.getString("logourl");
// rs.getArray("accessinfopackage"); // rs.getArray("accessinfopackage");
@ -124,15 +141,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
// rs.getString("namespaceprefix"); // rs.getString("namespaceprefix");
// rs.getInt("odnumberofitems"); // NULL // rs.getInt("odnumberofitems"); // NULL
// rs.getDate("odnumberofitemsdate"); // NULL // rs.getDate("odnumberofitemsdate"); // NULL
rs.getArray("subjects"); // rs.getArray("subjects");
// rs.getString("description"); // rs.getString("description");
// rs.getString("odpolicies"); // NULL // rs.getString("odpolicies"); // NULL
// rs.getArray("odlanguages"); // rs.getArray("odlanguages");
// rs.getArray("odcontenttypes"); // rs.getArray("odcontenttypes");
rs.getBoolean("inferred"); // false // rs.getBoolean("inferred"); // false
rs.getBoolean("deletedbyinference");// false // rs.getBoolean("deletedbyinference");// false
rs.getDouble("trust"); // 0.9 // rs.getDouble("trust"); // 0.9
rs.getString("inferenceprovenance"); // NULL // rs.getString("inferenceprovenance"); // NULL
// rs.getDate("dateofcollection"); // rs.getDate("dateofcollection");
// rs.getDate("dateofvalidation"); // rs.getDate("dateofvalidation");
// rs.getDate("releasestartdate"); // rs.getDate("releasestartdate");
@ -152,21 +169,22 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
rs.getArray("policies"); rs.getArray("policies");
// rs.getString("collectedfromid"); // rs.getString("collectedfromid");
// rs.getString("collectedfromname"); // rs.getString("collectedfromname");
rs.getString("datasourcetype"); // COMPLEX XXX@@@@.... // rs.getString("datasourcetype"); // COMPLEX XXX@@@@....
rs.getString("provenanceaction"); // 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' // rs.getString("provenanceaction"); //
// 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions'
// AS provenanceaction, // AS provenanceaction,
rs.getString("journal"); // CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn) AS journal rs.getString("journal"); // CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn) AS journal
emitOaf(ds); emitOaf(ds);
} catch (final Exception e) { } catch (final Exception e) {
// TODO: handle exception throw new RuntimeException(e);
} }
} }
public void processProject(final ResultSet rs) { public void processProject(final ResultSet rs) {
try { try {
final DataInfo info = MigrationUtils.dataInfo(null, null, null, null, null, null); // TODO final DataInfo info = prepareDataInfo(rs);
final Project p = new Project(); final Project p = new Project();
@ -192,9 +210,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
p.setEcsc39(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecsc39")), info)); p.setEcsc39(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecsc39")), info));
p.setOamandatepublications(MigrationUtils.field(Boolean.toString(rs.getBoolean("oamandatepublications")), info)); p.setOamandatepublications(MigrationUtils.field(Boolean.toString(rs.getBoolean("oamandatepublications")), info));
p.setEcarticle29_3(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info)); p.setEcarticle29_3(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info));
p.setSubjects(null); // List<StructuredProperty> //TODO p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
p.setFundingtree(null); // List<Field<String>> //TODO p.setFundingtree(null); // List<Field<String>> //TODO
p.setContracttype(null); // Qualifier //TODO p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype")));
p.setOptional1(MigrationUtils.field(rs.getString("optional1"), info)); p.setOptional1(MigrationUtils.field(rs.getString("optional1"), info));
p.setOptional2(MigrationUtils.field(rs.getString("optional2"), info)); p.setOptional2(MigrationUtils.field(rs.getString("optional2"), info));
p.setJsonextrainfo(MigrationUtils.field(rs.getString("jsonextrainfo"), info)); p.setJsonextrainfo(MigrationUtils.field(rs.getString("jsonextrainfo"), info));
@ -207,6 +225,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
p.setTotalcost(new Float(rs.getDouble("totalcost"))); p.setTotalcost(new Float(rs.getDouble("totalcost")));
p.setFundedamount(new Float(rs.getDouble("fundedamount"))); p.setFundedamount(new Float(rs.getDouble("fundedamount")));
p.setDataInfo(info);
p.setLastupdatetimestamp(lastUpdateTimestamp);
// rs.getString("projectid"); // rs.getString("projectid");
// rs.getString("code"); // rs.getString("code");
// rs.getString("websiteurl"); // rs.getString("websiteurl");
@ -222,13 +243,13 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
// rs.getBoolean("ecarticle29_3"); // rs.getBoolean("ecarticle29_3");
// rs.getDate("dateofcollection"); // rs.getDate("dateofcollection");
// rs.getDate("dateoftransformation"); // rs.getDate("dateoftransformation");
rs.getBoolean("inferred"); // rs.getBoolean("inferred");
rs.getBoolean("deletedbyinference"); // rs.getBoolean("deletedbyinference");
rs.getDouble("trust"); // rs.getDouble("trust");
rs.getString("inferenceprovenance"); // rs.getString("inferenceprovenance");
// rs.getString("optional1"); // rs.getString("optional1");
// rs.getString("optional2"); // rs.getString("optional2");
rs.getString("jsonextrainfo"); // rs.getString("jsonextrainfo");
// rs.getString("contactfullname"); // rs.getString("contactfullname");
// rs.getString("contactfax"); // rs.getString("contactfax");
// rs.getString("contactphone"); // rs.getString("contactphone");
@ -248,14 +269,14 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
emitOaf(p); emitOaf(p);
} catch (final Exception e) { } catch (final Exception e) {
// TODO: handle exception throw new RuntimeException(e);
} }
} }
public void processOrganization(final ResultSet rs) { public void processOrganization(final ResultSet rs) {
try { try {
final DataInfo info = MigrationUtils.dataInfo(null, null, null, null, null, null); // TODO final DataInfo info = prepareDataInfo(rs);
final Organization o = new Organization(); final Organization o = new Organization();
@ -269,7 +290,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
o.setOaiprovenance(null); // OAIProvenance // TODO o.setOaiprovenance(null); // OAIProvenance // TODO
o.setLegalshortname(MigrationUtils.field("legalshortname", info)); o.setLegalshortname(MigrationUtils.field("legalshortname", info));
o.setLegalname(MigrationUtils.field("legalname", info)); o.setLegalname(MigrationUtils.field("legalname", info));
o.setAlternativeNames(null); // List<Field<String>> //TODO o.setAlternativeNames(new ArrayList<>());
o.setWebsiteurl(MigrationUtils.field("websiteurl", info)); o.setWebsiteurl(MigrationUtils.field("websiteurl", info));
o.setLogourl(MigrationUtils.field("logourl", info)); o.setLogourl(MigrationUtils.field("logourl", info));
o.setEclegalbody(MigrationUtils.field(Boolean.toString(rs.getBoolean("eclegalbody")), info)); o.setEclegalbody(MigrationUtils.field(Boolean.toString(rs.getBoolean("eclegalbody")), info));
@ -283,7 +304,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
o.setEcenterprise(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecenterprise")), info)); o.setEcenterprise(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecenterprise")), info));
o.setEcsmevalidated(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info)); o.setEcsmevalidated(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info));
o.setEcnutscode(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecnutscode")), info)); o.setEcnutscode(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecnutscode")), info));
o.setCountry(null); // Qualifier country) { o.setCountry(prepareQualifierSplitting(rs.getString("country")));
o.setDataInfo(info);
o.setLastupdatetimestamp(lastUpdateTimestamp);
// rs.getString("organizationid"); // rs.getString("organizationid");
// rs.getString("legalshortname"); // rs.getString("legalshortname");
@ -300,87 +324,160 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
// rs.getBoolean("ecenterprise"); // rs.getBoolean("ecenterprise");
// rs.getBoolean("ecsmevalidated"); // rs.getBoolean("ecsmevalidated");
// rs.getBoolean("ecnutscode"); // rs.getBoolean("ecnutscode");
rs.getDate("dateofcollection"); // rs.getDate("dateofcollection");
rs.getDate("dateoftransformation"); // rs.getDate("dateoftransformation");
rs.getBoolean("inferred"); // rs.getBoolean("inferred");
rs.getBoolean("deletedbyinference"); // rs.getBoolean("deletedbyinference");
rs.getDouble("trust"); // rs.getDouble("trust");
rs.getString("inferenceprovenance"); // rs.getString("inferenceprovenance");
// rs.getString("collectedfromid"); // rs.getString("collectedfromid");
// rs.getString("collectedfromname"); // rs.getString("collectedfromname");
rs.getString("country"); // rs.getString("country");
rs.getString("provenanceaction"); rs.getString("provenanceaction");
rs.getArray("pid"); rs.getArray("pid");
emitOaf(o); emitOaf(o);
} catch (final Exception e) { } catch (final Exception e) {
// TODO: handle exception throw new RuntimeException(e);
} }
} }
public void processDatasourceOrganization(final ResultSet rs) { public void processDatasourceOrganization(final ResultSet rs) {
try { try {
final Relation r = new Relation(); final DataInfo info = prepareDataInfo(rs);
final String orgId = MigrationUtils.createOpenaireId("20", rs.getString("organization"));
final String dsId = MigrationUtils.createOpenaireId("10", rs.getString("datasource"));
r.setRelType(null); // TODO final Relation r1 = new Relation();
r.setSubRelType(null); // TODO r1.setRelType("datasourceOrganization");
r.setRelClass(null); // TODO r1.setSubRelType("provision");
r.setSource(null); // TODO r1.setRelClass("isProvidedBy");
r.setTarget(null); // TODO r1.setSource(dsId);
r.setCollectedFrom(MigrationUtils.listKeyValues("", "")); r1.setTarget(orgId);
r1.setCollectedFrom(null);// TODO
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
emitOaf(r1);
rs.getString("datasource"); final Relation r2 = new Relation();
rs.getString("organization"); r2.setRelType("datasourceOrganization");
rs.getDate("startdate"); // NULL r2.setSubRelType("provision");
rs.getDate("enddate"); // NULL r2.setRelClass("provides");
rs.getBoolean("inferred"); // false r2.setSource(orgId);
rs.getBoolean("deletedbyinference"); // false r2.setTarget(dsId);
rs.getDouble("trust"); // 0.9 r2.setCollectedFrom(null); // TODO
rs.getString("inferenceprovenance"); // NULL r2.setDataInfo(info);
rs.getString("semantics"); // 'providedBy@@@provided r1.setLastupdatetimestamp(lastUpdateTimestamp);
emitOaf(r2);
// rs.getString("datasource");
// rs.getString("organization");
// rs.getDate("startdate"); // NULL
// rs.getDate("enddate"); // NULL
// rs.getBoolean("inferred"); // false
// rs.getBoolean("deletedbyinference"); // false
// rs.getDouble("trust"); // 0.9
// rs.getString("inferenceprovenance"); // NULL
// rs.getString("semantics"); // 'providedBy@@@provided
// by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS // by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS
// semantics, // semantics,
rs.getString("provenanceaction"); // d.provenanceaction || '@@@' || d.provenanceaction || // rs.getString("provenanceaction"); // d.provenanceaction || '@@@' || d.provenanceaction ||
// '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction // '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction
emitOaf(r);
} catch (final Exception e) { } catch (final Exception e) {
// TODO: handle exception throw new RuntimeException(e);
} }
} }
public void processProjectOrganization(final ResultSet rs) { public void processProjectOrganization(final ResultSet rs) {
try { try {
final Relation r = new Relation(); final DataInfo info = prepareDataInfo(rs);
final String orgId = MigrationUtils.createOpenaireId("20", rs.getString("resporganization"));
final String projectId = MigrationUtils.createOpenaireId("40", rs.getString("project"));
r.setRelType(null); // TODO final Relation r1 = new Relation();
r.setSubRelType(null); // TODO r1.setRelType("projectOrganization");
r.setRelClass(null); // TODO r1.setSubRelType("participation");
r.setSource(null); // TODO r1.setRelClass("isParticipant");
r.setTarget(null); // TODO r1.setSource(projectId);
r.setCollectedFrom(null); r1.setTarget(orgId);
r1.setCollectedFrom(null);// TODO
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
emitOaf(r1);
rs.getString("project"); final Relation r2 = new Relation();
rs.getString("resporganization"); r2.setRelType("projectOrganization");
rs.getInt("participantnumber"); r2.setSubRelType("participation");
rs.getDouble("contribution"); r2.setRelClass("hasParticipant");
rs.getDate("startdate");// null r2.setSource(orgId);
rs.getDate("enddate");// null r2.setTarget(projectId);
rs.getBoolean("inferred");// false r2.setCollectedFrom(null); // TODO
rs.getBoolean("deletedbyinference"); // false r2.setDataInfo(info);
rs.getDouble("trust"); r1.setLastupdatetimestamp(lastUpdateTimestamp);
rs.getString("inferenceprovenance"); // NULL emitOaf(r2);
rs.getString("semantics"); // po.semanticclass || '@@@' || po.semanticclass ||
// rs.getString("project");
// rs.getString("resporganization");
// rs.getInt("participantnumber");
// rs.getDouble("contribution");
// rs.getDate("startdate");// null
// rs.getDate("enddate");// null
// rs.getBoolean("inferred");// false
// rs.getBoolean("deletedbyinference"); // false
// rs.getDouble("trust");
// rs.getString("inferenceprovenance"); // NULL
// rs.getString("semantics"); // po.semanticclass || '@@@' || po.semanticclass ||
// '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics, // '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics,
rs.getString("provenanceaction"); // 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' // rs.getString("provenanceaction"); //
// 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions'
// AS provenanceaction // AS provenanceaction
emitOaf(r);
} catch (final Exception e) { } catch (final Exception e) {
// TODO: handle exception throw new RuntimeException(e);
} }
} }
private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException {
final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
final String inferenceprovenance = rs.getString("inferenceprovenance");
final Boolean inferred = rs.getBoolean("inferred");
final String trust = rs.getString("trust");
return MigrationUtils.dataInfo(deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust);
}
private Qualifier prepareQualifierSplitting(final String s) {
if (StringUtils.isBlank(s)) { return null; }
final String[] arr = s.split("@@@");
return arr.length == 4 ? MigrationUtils.qualifier(arr[0], arr[1], arr[2], arr[3]) : null;
}
private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) {
if (StringUtils.isBlank(s)) { return null; }
final String[] parts = s.split("###");
if (parts.length == 2) {
final String value = parts[0];
final String[] arr = parts[1].split("@@@");
if (arr.length == 4) { return MigrationUtils.structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); }
}
return null;
}
private List<StructuredProperty> prepareListOfStructProps(final Array array, final DataInfo dataInfo) throws SQLException {
final List<StructuredProperty> res = new ArrayList<>();
if (array != null) {
for (final String s : (String[]) array.getArray()) {
final StructuredProperty sp = prepareStructProp(s, dataInfo);
if (sp != null) {
res.add(sp);
}
}
}
return res;
}
@Override @Override
public void close() throws IOException { public void close() throws IOException {
super.close(); super.close();

View File

@ -10,6 +10,37 @@
<artifactId>dhp-dedup</artifactId> <artifactId>dhp-dedup</artifactId>
<build>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>4.0.1</version>
<executions>
<execution>
<id>scala-compile-first</id>
<phase>initialize</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>scala-test-compile</id>
<phase>process-test-resources</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
</configuration>
</plugin>
</plugins>
</build>
<dependencies> <dependencies>
<dependency> <dependency>

View File

@ -1,10 +1,18 @@
package eu.dnetlib.dhp.graph; package eu.dnetlib.dhp.graph;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.*;
import java.util.Map; import java.util.Map;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Software;
public class GraphMappingUtils { public class GraphMappingUtils {
public final static Map<String, Class> types = Maps.newHashMap(); public final static Map<String, Class> types = Maps.newHashMap();

View File

@ -1,7 +1,5 @@
package eu.dnetlib.dhp.graph; package eu.dnetlib.dhp.graph;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
@ -9,13 +7,18 @@ import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import scala.Tuple2; import scala.Tuple2;
public class SparkGraphImporterJob { public class SparkGraphImporterJob {
public static void main(String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkGraphImporterJob.class.getResourceAsStream("/eu/dnetlib/dhp/graph/input_graph_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(SparkGraphImporterJob.class.getResourceAsStream("/eu/dnetlib/dhp/graph/input_graph_parameters.json")));
parser.parseArgument(args); parser.parseArgument(args);
final SparkSession spark = SparkSession final SparkSession spark = SparkSession
.builder() .builder()

37
pom.xml
View File

@ -1,4 +1,6 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> <project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
@ -232,6 +234,12 @@
<version>1.0.0</version> <version>1.0.0</version>
</dependency> </dependency>
<dependency>
<groupId>org.mongodb</groupId>
<artifactId>mongo-java-driver</artifactId>
<version>${mongodb.driver.version}</version>
</dependency>
<dependency> <dependency>
<groupId>org.apache.oozie</groupId> <groupId>org.apache.oozie</groupId>
<artifactId>oozie-client</artifactId> <artifactId>oozie-client</artifactId>
@ -349,31 +357,7 @@
</execution> </execution>
</executions> </executions>
</plugin> </plugin>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>4.0.1</version>
<executions>
<execution>
<id>scala-compile-first</id>
<phase>initialize</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>scala-test-compile</id>
<phase>process-test-resources</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
</configuration>
</plugin>
</plugins> </plugins>
<extensions> <extensions>
@ -421,6 +405,7 @@
<dhp.jackson.version>2.9.6</dhp.jackson.version> <dhp.jackson.version>2.9.6</dhp.jackson.version>
<dhp.commons.lang.version>3.5</dhp.commons.lang.version> <dhp.commons.lang.version>3.5</dhp.commons.lang.version>
<scala.version>2.11.12</scala.version> <scala.version>2.11.12</scala.version>
<mongodb.driver.version>3.4.2</mongodb.driver.version>
</properties> </properties>
</project> </project>