5 年之前 · 19c4ad5bfb
--- a/.DS_Store
+++ b/.DS_Store
--- a/README.md
+++ b/README.md
@@ -49,39 +49,41 @@ will automatically download a model for you and install it.
 
				 `cd ~/covid19_ISMIR`
			
 
				 
			
 
				 - **Step 1:** Modify the values to each variables in env_variables.sh file then run
			
 
				-
			
 
				+> Assumption: You have already created/downloaded the json key to your Google Cloud Service Account. Useful [link](https://cloud.google.com/iam/docs/creating-managing-service-account-keys#iam-service-account-keys-create-python)
			
 
				 ```
			
 
				 ./env_variables.sh
			
 
				 ```
			
 
				 
			
 
				 - **Step 2:** Download the required files to your bucket and load the required model in your local  
			
 
				 (this step will take ~10 min)
			
 
				-
			
 
				+> Optional: If you have already downloaded the scispacy model, you should modify the file ./content/download_content.sh to not repeat that step
			
 
				 ```
			
 
				-sh ~/data/download_content.sh
			
 
				+sh ~/content/download_content.sh
			
 
				 pip install -U ./scispacy_models/en_core_sci_lg-0.2.4.tar.gz
			
 
				 ```
			
 
				 
			
 
				 - **Step 3:** Start the extraction of text from the pdf documents  
			
 
				 
			
 
				-`python3 extraction.py`
			
 
				+`python3 ./scripts/extraction.py`
			
 
				 
			
 
				 ## Pre-processing data
			
 
				 Following the extraction of text, it's time to translate it from Italian to English and curate it.
			
 
				 
			
 
				-`python3 preprocessing.py`
			
 
				+`python3 ./scripts/preprocessing.py`
			
 
				 
			
 
				 ## Storing data
			
 
				 Following the pre-processing, it's time to store the data in a more searchable format: a data warehouse - 
			
 
				 [BigQuery](https://cloud.google.com/bigquery) - for the text, and a No-SQL database - 
			
 
				 [Datastore](https://cloud.google.com/datastore) - for the (UMLS) medical entities. 
			
 
				 
			
 
				-`python3 storing.py`
			
 
				+`python3 ./scripts/storing.py`
			
 
				 
			
 
				 ## Test
			
 
				 Last but not least, you can query your databases using this script.
			
 
				 
			
 
				-`python3 retrieving.py`
			
 
				+`python3 ./scripts/retrieving.py`
			
 
				+
			
 
				+---
			
 
				 
			
 
				 ## Contributing
			
 
				 > To get started...
			
--- a/content/.DS_Store
+++ b/content/.DS_Store
--- a/content/UMLS_tuis.csv
+++ b/content/UMLS_tuis.csv
--- a/content/download_content.sh
+++ b/content/download_content.sh
--- a/content/images/.DS_Store
+++ b/content/images/.DS_Store
--- a/content/images/bq_snapshot.gif
+++ b/content/images/bq_snapshot.gif
--- a/content/images/covid19_repo_architecture_3_24_2020.png
+++ b/content/images/covid19_repo_architecture_3_24_2020.png
--- a/content/images/datastore_snapshot.gif
+++ b/content/images/datastore_snapshot.gif
--- a/scripts/__init__.py
+++ b/scripts/__init__.py
--- a/scripts/extraction.py
+++ b/scripts/extraction.py
@@ -1,6 +1,6 @@
 
				 from google.cloud import storage, vision
			
 
				 from google.oauth2 import service_account
			
 
				-from utils.preprocessing_fcn import async_detect_document, read_json_result, upload_blob
			
 
				+from covid19_ISMIR.utils.preprocessing_fcn import async_detect_document, read_json_result, upload_blob
			
 
				 
			
 
				 import logging
			
 
				 import time
			
--- a/scripts/preprocessing.py
+++ b/scripts/preprocessing.py
@@ -1,6 +1,6 @@
 
				 from google.cloud import storage
			
 
				 from google.oauth2 import service_account
			
 
				-from utils.preprocessing_fcn import batch_translate_text, upload_blob
			
 
				+from covid19_ISMIR.utils.preprocessing_fcn import batch_translate_text, upload_blob
			
 
				 import logging
			
 
				 
			
 
				 import re
			
--- a/scripts/retrieving.py
+++ b/scripts/retrieving.py
@@ -1,7 +1,7 @@
 
				 from google.cloud import storage, bigquery, datastore
			
 
				 from google.oauth2 import service_account
			
 
				-from utils.bq_fcn import returnQueryResults
			
 
				-from utils.ner_fcn import getCases
			
 
				+from covid19_ISMIR.utils.bq_fcn import returnQueryResults
			
 
				+from covid19_ISMIR.utils.ner_fcn import getCases
			
 
				 
			
 
				 import logging
			
 
				 import os
			
--- a/scripts/storing.py
+++ b/scripts/storing.py
@@ -1,7 +1,7 @@
 
				 from google.cloud import storage, bigquery, datastore
			
 
				 from google.oauth2 import service_account
			
 
				-from utils.bq_fcn import bqCreateDataset, bqCreateTable, exportItems2BQ
			
 
				-from utils.ner_fcn import loadModel, addTask, extractMedEntities
			
 
				+from covid19_ISMIR.utils.bq_fcn import bqCreateDataset, bqCreateTable, exportItems2BQ
			
 
				+from covid19_ISMIR.utils.ner_fcn import loadModel, addTask, extractMedEntities
			
 
				 import en_core_sci_lg
			
 
				 
			
 
				 import logging