initial stage

This commit is contained in:
Giambattista Bloisi 2024-06-29 10:13:01 +02:00
parent c683be854a
commit 387ddce398
2 changed files with 1 additions and 6 deletions

View File

@ -116,23 +116,18 @@ def isEmpty(current_value: Dict[str, Any], labels: List[str]) -> bool:
# #
def filter_product(p: dict) -> bool: def filter_product(p: dict) -> bool:
if isEmpty(p, ["titles", "none"]): if isEmpty(p, ["titles", "none"]):
print("missing title")
return True return True
if isEmpty(p, ["firstPublishDate"]): if isEmpty(p, ["firstPublishDate"]):
print("missing date")
return True return True
if p['product_type'] == "literature": if p['product_type'] == "literature":
if isEmpty(p, ["abstracts", "none"]): if isEmpty(p, ["abstracts", "none"]):
print("missing abstracts")
return True return True
if isEmpty(p, ["contributions", "person", "local_identifier"]): if isEmpty(p, ["contributions", "person", "local_identifier"]):
print("missing creators")
return True return True
elif p['product_type'] in ["research data", "other"]: elif p['product_type'] in ["research data", "other"]:
if isEmpty(p, ["contributions", "person", "local_identifier"]): if isEmpty(p, ["contributions", "person", "local_identifier"]):
print("missing creators")
return True return True
return False return False

View File

@ -185,7 +185,7 @@ for config_name, config in configs.items():
raise_on_error=False, raise_on_error=False,
chunk_size=5000, chunk_size=5000,
max_chunk_bytes=50 * 1024 * 1024, max_chunk_bytes=50 * 1024 * 1024,
timeout=180): timeout="5m"):
if success: if success:
succeeded = succeeded + 1 succeeded = succeeded + 1
else: else: