Eliminare 'ScholexplorerPropagation.json'

removed because not rendered
2020-06-29 18:09:20 +02:00 · 2020-06-29 18:09:20 +02:00 · 6bc22d7e39
parent afec5e4cf9
commit 6bc22d7e39
1 changed files with 0 additions and 113 deletions
--- a/ScholexplorerPropagation.json
+++ b/ScholexplorerPropagation.json
@ -1,113 +0,0 @@
-{
-    "paragraphs": [
-        {
-            "text": "%pyspark\nimport json\nimport sys\nimport re\nfrom pyspark.sql.types import *\nfrom pyspark.sql import SQLContext\nfrom pyspark.sql.functions import *\nimport copy\n\nsqlContext = SQLContext(sc)\n\npaper_dataset_propagation = {\n    \"documents\": {\n        \"prob\": 1.0,\n        \"path\":set()\n    },\n    \"isderivedfrom\": {\n        \"prob\": 0.9,\n        \"path\":set()\n    },\n    \"issourceof\": {\n         \"prob\": 0.7,\n        \"path\":set()\n    },\n    \"reviews\": {\n        \"prob\": 0.8,\n        \"path\":set()\n    },\n    \"references\": {\n        \"prob\": 1.0,\n        \"path\":set()\n    },\n    \"issupplementedby\": {\n        \"prob\": 0.8,\n        \"path\":set()\n    },\n    \"cites\": {\n        \"prob\": 0.8,\n        \"path\":set()\n    }\n}\n\ndataset_dataset_propagation= {\n    \"issupplementedby\": {\n        \"prob\": 1.0\n        },\n    \"documents\": {\n       \"prob\": 0.9\n        },\n    \"iscitedby\": {\n    \"prob\": 0.9\n            },\n        \"haspart\": {\n    \"prob\": 0.7        },\n        \"isdocumentedby\": {\n    \"prob\": 0.7        },\n        \"continues\": {\n    \"prob\": 0.8        },\n        \"cites\": {\n    \"prob\": 1.0        },\n        \"issupplementto\": {\n    \"prob\": 0.8        },\n        \"isnewversionof\": {\n    \"prob\": 0.9        },\n        \"ispartof\": {\n    \"prob\": 0.8        },\n        \"references\": {\n    \"prob\": 1.0        },\n        \"isreferencedby\": {\n    \"prob\": 0.9        },\n        \"iscontinuedby\": {\n    \"prob\": 0.7        },\n        \"isvariantformof\": {\n    \"prob\": 0.9        }\n    }\n\n\n    \ndef propagateDataset(x):\n    propagation = copy.deepcopy(x[1][0]) #dictionary {\"publicationId\":{propagation_probabilities and path}}\n    dsprob = x[1][1] #dictionary {\"datasetId\":{dataset_probabilities}}\n    source = dsprob.keys().pop()\n    todelpid = set()\n    for pid in propagation:\n        entry = propagation[pid]\n        if source in propagation[pid]['path']:\n            todelpid.add(pid)\n            continue\n        for use in entry:\n            if use == 'path':\n                continue\n            new_p =  entry[use] * dsprob[source][\"prob\"]\n            if new_p > 0.3:\n                entry[use] = new_p\n                propagation[pid]['path'].add(x[0])\n            else:\n                todelpid.add(pid)\n    for pid in todelpid:\n        del propagation[pid]\n    return (source, propagation)\n\ndef reduceRelation(a, b):\n    if a is None:\n        return b\n    if b is None:\n        return a \n    for pid in b:\n        if not pid in a:\n            a[pid] = copy.deepcopy(b[pid])\n        else:\n            probabilities = b[pid]\n            for prob in probabilities:\n                if prob =='path':\n                    for e in probabilities['path']:\n                        a[pid]['path'].add(e)\n                    continue\n                if prob in a[pid]:\n                    if a[pid][prob] < probabilities[prob]:\n                        a[pid][prob] = probabilities[prob]\n                else:\n                    a[pid][prob] = probabilities[prob]\n    return a \n    \ndef hasDescription(x):\n    if 'description' in x and not x['description'] is None:\n        for dic in  x['description']:\n            if dic['value'] is not None and dic['value'].strip() != \"\":\n                return True\n    return False\n    ",
-            "user": "miriam.baglioni",
-            "dateUpdated": "2020-06-29T14:55:43+0000",
-            "config": {
-                "editorSetting": {
-                    "language": "python",
-                    "editOnDblClick": false,
-                    "completionKey": "TAB",
-                    "completionSupport": true
-                },
-                "colWidth": 12,
-                "editorMode": "ace/mode/python",
-                "fontSize": 9,
-                "results": {},
-                "enabled": true
-            },
-            "settings": {
-                "params": {},
-                "forms": {}
-            },
-            "results": {
-                "code": "SUCCESS",
-                "msg": []
-            },
-            "apps": [],
-            "jobName": "paragraph_1593089330199_-1573015420",
-            "id": "20200521-082800_526102814",
-            "dateCreated": "2020-06-25T12:48:50+0000",
-            "status": "READY",
-            "errorMessage": "",
-            "progressUpdateIntervalMs": 500,
-            "focus": true,
-            "$$hashKey": "object:1124"
-        },
-        {
-            "text": "%pyspark\n\nload_datasets = sc.textFile('/user/sandro.labruzzo/scholix/graph/dataset').map(json.loads).filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference'])\nload_publications = sc.textFile('/user/sandro.labruzzo/scholix/graph/publication').map(json.loads).filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference'])\nrelations_rdd =  spark.read.parquet('/user/sandro.labruzzo/scholix/graph/relation').rdd.filter(lambda x: x['dataInfo'] is None or not x['dataInfo']['deletedbyinference'])\n\n#relations from publication to dataset in the graph subset \npubs_relation = relations_rdd.filter(lambda x: x['source'][:2] == '50' and x['target'][:2] == '60' and x['relType'].lower() in paper_dataset_propagation)\n\n#relation from dataset to dataset (no self loops) in the graph subset\ndats_relation = relations_rdd.filter(lambda x: x['source'][:2] == '60' and x['target'][:2] == '60' and x['source'] != x['target'] and x['relType'].lower() in dataset_dataset_propagation)\n\n#distinct publication subset appearing in a relation to at least one dataset\npubs_subgraph = pubs_relation.map(lambda x: (x['source'],1)).reduceByKey(lambda a,b : a+b).join(load_publications.map(lambda x:(x['id'],x))).map(lambda x: x[1][1])\n\n#publications with abstract\npubs_with_abst = pubs_subgraph.filter(hasDescription).map(lambda x:(x['id'],x))\n\n#relations from publication with abstract to dataset\nrel_pubs_dats_abst = pubs_relation.map(lambda x: (x['source'],x)).join(pubs_with_abst).map(lambda x: x[1][0]).map(lambda x: (x['target'], x)).join(load_datasets.map(lambda x: (x['id'], 1))).map(lambda x: x[1][0])\n\n\npublication_dataset = rel_pubs_dats_abst.map(lambda x: (x['target'], {x['source']:copy.deepcopy(paper_dataset_propagation[x['relType'].lower()])}))\ndataset_dataset = dats_relation.map(lambda x: (x['source'], {x['target']:copy.deepcopy(dataset_dataset_propagation[x['relType'].lower()])}))\n\n\npl1 = publication_dataset.reduceByKey(reduceRelation)\n\npreviuos_propagation = pl1\npl1.count()\ncount = 2\nhops = 3\nwhile (True):\n    if count > hops:\n        break\n    pl_step1 = previuos_propagation.join(dataset_dataset)\n    pl_step2 = pl_step1.map(propagateDataset).filter(lambda x: len(x[1]) > 0)\n    if pl_step2.count() == 0:\n        break\n    pl_step3 = pl_step2.reduceByKey(reduceRelation)\n    current_propagation = pl_step3.union(previuos_propagation).reduceByKey(reduceRelation)\n    current_propagation.count()\n    count += 1\n    previuos_propagation = current_propagation\n\n",
-            "user": "miriam.baglioni",
-            "dateUpdated": "2020-06-29T14:52:36+0000",
-            "config": {
-                "editorSetting": {
-                    "language": "python",
-                    "editOnDblClick": false,
-                    "completionKey": "TAB",
-                    "completionSupport": true
-                },
-                "colWidth": 12,
-                "editorMode": "ace/mode/python",
-                "fontSize": 9,
-                "results": {},
-                "enabled": true
-            },
-            "settings": {
-                "params": {},
-                "forms": {}
-            },
-            "apps": [],
-            "jobName": "paragraph_1593089330225_1214619039",
-            "id": "20200521-084556_457403103",
-            "dateCreated": "2020-06-25T12:48:50+0000",
-            "status": "READY",
-            "errorMessage": "",
-            "progressUpdateIntervalMs": 500,
-            "$$hashKey": "object:1125"
-        },
-        {
-            "text": "%pyspark\n",
-            "user": "miriam.baglioni",
-            "dateUpdated": "2020-06-29T14:53:46+0000",
-            "config": {
-                "colWidth": 12,
-                "fontSize": 9,
-                "enabled": true,
-                "results": {},
-                "editorSetting": {
-                    "language": "scala",
-                    "editOnDblClick": false,
-                    "completionKey": "TAB",
-                    "completionSupport": true
-                },
-                "editorMode": "ace/mode/scala"
-            },
-            "settings": {
-                "params": {},
-                "forms": {}
-            },
-            "apps": [],
-            "jobName": "paragraph_1593442426323_1460687479",
-            "id": "20200629-145346_169818547",
-            "dateCreated": "2020-06-29T14:53:46+0000",
-            "status": "READY",
-            "progressUpdateIntervalMs": 500,
-            "focus": true,
-            "$$hashKey": "object:1868"
-        }
-    ],
-    "name": "ScholexplorerPropagation",
-    "id": "2FB9ZGBK4",
-    "noteParams": {},
-    "noteForms": {},
-    "angularObjects": {
-        "md:shared_process": [],
-        "spark:miriam.baglioni:": []
-    },
-    "config": {
-        "isZeppelinNotebookCronEnable": false,
-        "looknfeel": "default",
-        "personalizedMode": "false"
-    },
-    "info": {}
-}