stats
This commit is contained in:
parent
d2b7541583
commit
f8cf7ffbcb
|
@ -4,9 +4,9 @@ package eu.dnetlib.dhp.collection.plugin.base;
|
|||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Map;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
@ -26,7 +26,6 @@ import org.apache.spark.api.java.JavaSparkContext;
|
|||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.dom4j.Attribute;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.DocumentHelper;
|
||||
|
@ -166,7 +165,7 @@ public class BaseAnalyzerJob {
|
|||
|
||||
final Set<String> paths = new LinkedHashSet<>();
|
||||
final Set<String> types = new LinkedHashSet<>();
|
||||
final Map<String, Map<String, String>> colls = new HashMap<>();
|
||||
final List<BaseCollectionInfo> colls = new ArrayList<>();
|
||||
|
||||
for (final Object o : record.selectNodes("//*|//@*")) {
|
||||
paths.add(((Node) o).getPath());
|
||||
|
@ -178,12 +177,13 @@ public class BaseAnalyzerJob {
|
|||
|
||||
if ("collection".equals(nodeName)) {
|
||||
final String collName = n.getText().trim();
|
||||
|
||||
if (StringUtils.isNotBlank(collName)) {
|
||||
final Map<String, String> attrs = new HashMap<>();
|
||||
for (final Object ao : n.attributes()) {
|
||||
attrs.put(((Attribute) ao).getName(), ((Attribute) ao).getValue());
|
||||
}
|
||||
colls.put(collName, attrs);
|
||||
final BaseCollectionInfo coll = new BaseCollectionInfo();
|
||||
coll.setId(collName);
|
||||
coll.setOpendoarId(n.valueOf("@opendoar_id").trim());
|
||||
coll.setRorId(n.valueOf("@ror_id").trim());
|
||||
colls.add(coll);
|
||||
}
|
||||
} else if ("type".equals(nodeName)) {
|
||||
types.add("TYPE: " + n.getText().trim());
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.base;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public class BaseCollectionInfo implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = 5766333937429419647L;
|
||||
|
||||
private String id;
|
||||
private String opendoarId;
|
||||
private String rorId;
|
||||
|
||||
public String getId() {
|
||||
return this.id;
|
||||
}
|
||||
|
||||
public void setId(final String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getOpendoarId() {
|
||||
return this.opendoarId;
|
||||
}
|
||||
|
||||
public void setOpendoarId(final String opendoarId) {
|
||||
this.opendoarId = opendoarId;
|
||||
}
|
||||
|
||||
public String getRorId() {
|
||||
return this.rorId;
|
||||
}
|
||||
|
||||
public void setRorId(final String rorId) {
|
||||
this.rorId = rorId;
|
||||
}
|
||||
|
||||
}
|
|
@ -3,16 +3,14 @@ package eu.dnetlib.dhp.collection.plugin.base;
|
|||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class BaseRecordInfo implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = -8848232018350074593L;
|
||||
|
||||
private String id;
|
||||
private Map<String, Map<String, String>> collections = new HashMap<>();
|
||||
private List<BaseCollectionInfo> collections = new ArrayList<>();
|
||||
private List<String> paths = new ArrayList<>();
|
||||
private List<String> types = new ArrayList<>();
|
||||
|
||||
|
@ -40,11 +38,11 @@ public class BaseRecordInfo implements Serializable {
|
|||
this.types = types;
|
||||
}
|
||||
|
||||
public Map<String, Map<String, String>> getCollections() {
|
||||
public List<BaseCollectionInfo> getCollections() {
|
||||
return this.collections;
|
||||
}
|
||||
|
||||
public void setCollections(final Map<String, Map<String, String>> collections) {
|
||||
public void setCollections(final List<BaseCollectionInfo> collections) {
|
||||
this.collections = collections;
|
||||
}
|
||||
|
||||
|
|
|
@ -121,11 +121,11 @@ public class BaseCollectorIteratorTest {
|
|||
}
|
||||
|
||||
final JavaRDD<BaseRecordInfo> rdd = JavaSparkContext
|
||||
.fromSparkContext(spark.sparkContext())
|
||||
.parallelize(ls);
|
||||
.fromSparkContext(spark.sparkContext())
|
||||
.parallelize(ls);
|
||||
|
||||
final Dataset<BaseRecordInfo> df = spark
|
||||
.createDataset(rdd.rdd(), Encoders.bean(BaseRecordInfo.class));
|
||||
.createDataset(rdd.rdd(), Encoders.bean(BaseRecordInfo.class));
|
||||
|
||||
df.printSchema();
|
||||
|
||||
|
|
Loading…
Reference in New Issue