Aziz Ketari %!s(int64=4) %!d(string=hai) anos
pai
achega
494528ea1c

BIN=BIN
.DS_Store


+ 2 - 4
scripts/extraction.py

@@ -13,11 +13,9 @@ key_path = os.getenv('SA_KEY_PATH')
 
 credentials = service_account.Credentials.from_service_account_file(key_path)
 
-storage_client = storage.Client(credentials=credentials,
-                                project_id=project_id)
+storage_client = storage.Client(credentials=credentials)
 
-vision_client = vision.Client(credentials=credentials,
-                              project_id=project_id)
+vision_client = vision.ImageAnnotatorClient(credentials=credentials)
 
 lst_pdf_blobs = storage_client.list_blobs(bucket_or_name=bucket_name,
                                           prefix='pdf')

+ 4 - 3
scripts/preprocessing.py

@@ -1,4 +1,4 @@
-from google.cloud import storage
+from google.cloud import storage, translate
 from google.oauth2 import service_account
 from utils.preprocessing_fcn import batch_translate_text, upload_blob
 import logging
@@ -14,8 +14,9 @@ key_path = os.getenv('SA_KEY_PATH')
 
 credentials = service_account.Credentials.from_service_account_file(key_path)
 
-storage_client = storage.Client(credentials=credentials,
-                                project_id=project_id)
+storage_client = storage.Client(credentials=credentials)
+
+translate_client = translate.TranslationServiceClient(credentials=credentials)
 
 lst_json_blobs = storage_client.list_blobs(bucket_or_name=bucket_name,
                                            prefix='json')

+ 2 - 4
scripts/retrieving.py

@@ -16,11 +16,9 @@ case_id = os.getenv('TEST_CASE')
 
 credentials = service_account.Credentials.from_service_account_file(key_path)
 
-bq_client = bigquery.Client(credentials=credentials,
-                            project_id=project_id)
+bq_client = bigquery.Client(credentials=credentials)
 
-datastore_client = datastore.Client(credentials=credentials,
-                                    project_id=project_id)
+datastore_client = datastore.Client(credentials=credentials)
 
 # Returns a list of results
 try:

+ 2 - 7
scripts/storing.py

@@ -5,12 +5,9 @@ from utils.ner_fcn import loadModel, addTask, extractMedEntities
 import en_core_sci_lg
 
 import logging
-import re
 import time
 import os
 import pandas as pd
-import sys
-import argparse
 
 project_id = os.getenv('PROJECT_ID')
 bucket_name = os.getenv('BUCKET_NAME')
@@ -21,11 +18,9 @@ table_name = os.getenv('BQ_TABLE_NAME')
 
 credentials = service_account.Credentials.from_service_account_file(key_path)
 
-storage_client = storage.Client(credentials=credentials,
-                                project_id=project_id)
+storage_client = storage.Client(credentials=credentials)
 
-datastore_client = datastore.Client(credentials=credentials,
-                                    project_id=project_id)
+datastore_client = datastore.Client(credentials=credentials)
 
 gcs_source_prefix = 'raw_txt'
 lst_blobs = storage_client.list_blobs(bucket_or_name=bucket_name,

+ 9 - 9
scripts/utils/bq_fcn.py

@@ -3,15 +3,15 @@ from google.oauth2 import service_account
 import logging
 import os
 
-project_id = os.getenv('PROJECT_ID')
-bucket_name = os.getenv('BUCKET_NAME')
-location = os.getenv('LOCATION')
-key_path = os.getenv('SA_KEY_PATH')
-
-credentials = service_account.Credentials.from_service_account_file(key_path)
-
-bq_client = bigquery.Client(credentials=credentials,
-                            project_id=project_id)
+# project_id = os.getenv('PROJECT_ID')
+# bucket_name = os.getenv('BUCKET_NAME')
+# location = os.getenv('LOCATION')
+# key_path = os.getenv('SA_KEY_PATH')
+#
+# credentials = service_account.Credentials.from_service_account_file(key_path)
+#
+# bq_client = bigquery.Client(credentials=credentials,
+#                             project_id=project_id)
 
 
 def bqCreateDataset(dataset_name):

+ 9 - 9
scripts/utils/ner_fcn.py

@@ -10,15 +10,15 @@ from scispacy.abbreviation import AbbreviationDetector
 
 
 # DEVELOPER: change path to key
-project_id = os.getenv('PROJECT_ID')
-bucket_name = os.getenv('BUCKET_NAME')
-location = os.getenv('LOCATION')
-key_path = os.getenv('SA_KEY_PATH')
-
-credentials = service_account.Credentials.from_service_account_file(key_path)
-
-datastore_client = datastore.Client(credentials=credentials,
-                                    project_id=credentials.project_id)
+# project_id = os.getenv('PROJECT_ID')
+# bucket_name = os.getenv('BUCKET_NAME')
+# location = os.getenv('LOCATION')
+# key_path = os.getenv('SA_KEY_PATH')
+
+# credentials = service_account.Credentials.from_service_account_file(key_path)
+#
+# datastore_client = datastore.Client(credentials=credentials,
+#                                     project_id=credentials.project_id)
 
 
 def loadModel(model=en_core_sci_lg):