5 жил өмнө · 19c4ad5bfb
--- a/.DS_Store
+++ b/.DS_Store
--- a/README.md
+++ b/README.md
@@ -49,39 +49,41 @@ will automatically download a model for you and install it.
 
															 `cd ~/covid19_ISMIR`
														
 
															 - **Step 1:** Modify the values to each variables in env_variables.sh file then run
														
 
															-
														
 
															+> Assumption: You have already created/downloaded the json key to your Google Cloud Service Account. Useful [link](https://cloud.google.com/iam/docs/creating-managing-service-account-keys#iam-service-account-keys-create-python)
														
 
															 ```
														
 
															 ./env_variables.sh
														
 
															 ```
														
 
															 - **Step 2:** Download the required files to your bucket and load the required model in your local  
														
 
															 (this step will take ~10 min)
														
 
															-
														
 
															+> Optional: If you have already downloaded the scispacy model, you should modify the file ./content/download_content.sh to not repeat that step
														
 
															 ```
														
 
															-sh ~/data/download_content.sh
														
 
															+sh ~/content/download_content.sh
														
 
															 pip install -U ./scispacy_models/en_core_sci_lg-0.2.4.tar.gz
														
 
															 ```
														
 
															 - **Step 3:** Start the extraction of text from the pdf documents  
														
 
															-`python3 extraction.py`
														
 
															+`python3 ./scripts/extraction.py`
														
 
															 ## Pre-processing data
														
 
															 Following the extraction of text, it's time to translate it from Italian to English and curate it.
														
 
															-`python3 preprocessing.py`
														
 
															+`python3 ./scripts/preprocessing.py`
														
 
															 ## Storing data
														
 
															 Following the pre-processing, it's time to store the data in a more searchable format: a data warehouse - 
														
 
															 [BigQuery](https://cloud.google.com/bigquery) - for the text, and a No-SQL database - 
														
 
															 [Datastore](https://cloud.google.com/datastore) - for the (UMLS) medical entities. 
														
 
															-`python3 storing.py`
														
 
															+`python3 ./scripts/storing.py`
														
 
															 ## Test
														
 
															 Last but not least, you can query your databases using this script.
														
 
															-`python3 retrieving.py`
														
 
															+`python3 ./scripts/retrieving.py`
														
 
															+
														
 
															+---
														
 
															 ## Contributing
														
 
															 > To get started...
														
--- a/content/.DS_Store
+++ b/content/.DS_Store
--- a/content/UMLS_tuis.csv
+++ b/content/UMLS_tuis.csv
--- a/content/download_content.sh
+++ b/content/download_content.sh
--- a/content/images/.DS_Store
+++ b/content/images/.DS_Store
--- a/content/images/bq_snapshot.gif
+++ b/content/images/bq_snapshot.gif
--- a/content/images/covid19_repo_architecture_3_24_2020.png
+++ b/content/images/covid19_repo_architecture_3_24_2020.png
--- a/content/images/datastore_snapshot.gif
+++ b/content/images/datastore_snapshot.gif
--- a/scripts/__init__.py
+++ b/scripts/__init__.py
--- a/scripts/extraction.py
+++ b/scripts/extraction.py
@@ -1,6 +1,6 @@
 
															 from google.cloud import storage, vision
														
 
															 from google.oauth2 import service_account
														
 
															-from utils.preprocessing_fcn import async_detect_document, read_json_result, upload_blob
														
 
															+from covid19_ISMIR.utils.preprocessing_fcn import async_detect_document, read_json_result, upload_blob
														
 
															 import logging
														
 
															 import time
														
--- a/scripts/preprocessing.py
+++ b/scripts/preprocessing.py
@@ -1,6 +1,6 @@
 
															 from google.cloud import storage
														
 
															 from google.oauth2 import service_account
														
 
															-from utils.preprocessing_fcn import batch_translate_text, upload_blob
														
 
															+from covid19_ISMIR.utils.preprocessing_fcn import batch_translate_text, upload_blob
														
 
															 import logging
														
 
															 import re
														
--- a/scripts/retrieving.py
+++ b/scripts/retrieving.py
@@ -1,7 +1,7 @@
 
															 from google.cloud import storage, bigquery, datastore
														
 
															 from google.oauth2 import service_account
														
 
															-from utils.bq_fcn import returnQueryResults
														
 
															-from utils.ner_fcn import getCases
														
 
															+from covid19_ISMIR.utils.bq_fcn import returnQueryResults
														
 
															+from covid19_ISMIR.utils.ner_fcn import getCases
														
 
															 import logging
														
 
															 import os
														
--- a/scripts/storing.py
+++ b/scripts/storing.py
@@ -1,7 +1,7 @@
 
															 from google.cloud import storage, bigquery, datastore
														
 
															 from google.oauth2 import service_account
														
 
															-from utils.bq_fcn import bqCreateDataset, bqCreateTable, exportItems2BQ
														
 
															-from utils.ner_fcn import loadModel, addTask, extractMedEntities
														
 
															+from covid19_ISMIR.utils.bq_fcn import bqCreateDataset, bqCreateTable, exportItems2BQ
														
 
															+from covid19_ISMIR.utils.ner_fcn import loadModel, addTask, extractMedEntities
														
 
															 import en_core_sci_lg
														
 
															 import logging