dump #50

Merged
claudio.atzori merged 98 commits from miriam.baglioni/dnet-hadoop:dump into master 2020-11-04 18:07:01 +01:00
4 changed files with 11 additions and 13 deletions
Showing only changes of commit dc5096a327 - Show all commits

View File

@ -3,11 +3,13 @@ package eu.dnetlib.dhp.oa.graph.dump;
import java.io.Serializable; import java.io.Serializable;
import java.util.Optional; import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.*;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.api.MissingConceptDoiException; import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
import eu.dnetlib.dhp.common.api.ZenodoAPIClient; import eu.dnetlib.dhp.common.api.ZenodoAPIClient;

View File

@ -23,13 +23,11 @@ import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
/** /**
* Creates new Relations (as in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation) from the information in the Entity. * Creates new Relations (as in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation) from the information in the Entity. The
* The new Relations are created for the datasource in the collectedfrom and hostedby elements and for the context * new Relations are created for the datasource in the collectedfrom and hostedby elements and for the context related
* related to communities and research initiative/infrastructures. * to communities and research initiative/infrastructures. For collectedfrom elements it creates: datasource -> provides
* * -> result and result -> isProvidedBy -> datasource For hostedby elements it creates: datasource -> hosts -> result
* For collectedfrom elements it creates: datasource -> provides -> result and result -> isProvidedBy -> datasource * and result -> isHostedBy -> datasource For context elements it creates: context <-> isRelatedTo <-> result
* For hostedby elements it creates: datasource -> hosts -> result and result -> isHostedBy -> datasource
* For context elements it creates: context <-> isRelatedTo <-> result
*/ */
public class Extractor implements Serializable { public class Extractor implements Serializable {

View File

@ -16,9 +16,8 @@ import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
import eu.dnetlib.dhp.schema.dump.oaf.graph.*; import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
/** /**
* It process the ContextInfo information to produce a new Context Entity or a set of Relations between the * It process the ContextInfo information to produce a new Context Entity or a set of Relations between the generic
* generic context entity and datasource/projects related to the context. * context entity and datasource/projects related to the context.
*
*/ */
public class Process implements Serializable { public class Process implements Serializable {
private static final Logger log = LoggerFactory.getLogger(Process.class); private static final Logger log = LoggerFactory.getLogger(Process.class);

View File

@ -1,5 +1,4 @@
package eu.dnetlib.dhp.oa.graph.dump.graph; package eu.dnetlib.dhp.oa.graph.dump.graph;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
@ -28,8 +27,8 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
/** /**
* Create new Relations between Context Entities and Organizations whose products are associated to the context. * Create new Relations between Context Entities and Organizations whose products are associated to the context. It
* It produces relation such as: organization <-> isRelatedTo <-> context * produces relation such as: organization <-> isRelatedTo <-> context
*/ */
public class SparkOrganizationRelation implements Serializable { public class SparkOrganizationRelation implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkOrganizationRelation.class); private static final Logger log = LoggerFactory.getLogger(SparkOrganizationRelation.class);