storing.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. from google.cloud import storage, bigquery, datastore
  2. from google.oauth2 import service_account
  3. from utils.bq_fcn import populateBQ
  4. from utils.ner_fcn import populateDatastore
  5. import logging
  6. import argparse
  7. import os
  8. import time
  9. # Importing the models
  10. logging.getLogger().setLevel(logging.INFO)
  11. # Create the parser
  12. parser = argparse.ArgumentParser(description='Select the model of interest.')
  13. # Add the arguments
  14. parser.add_argument('store_bigquery',
  15. metavar='bool',
  16. choices=['True', 'False'],
  17. help='Store data in BigQuery. Options: True or False')
  18. parser.add_argument('store_datastore',
  19. metavar='bool',
  20. choices=['True', 'False'],
  21. help='Store data in Datastore. Options: True or False')
  22. model_choices = ['en_core_sci_sm', 'en_core_sci_lg', 'en_ner_bc5cdr_md']
  23. parser.add_argument('model_name',
  24. metavar='name',
  25. type=str,
  26. help='Model options: en_core_sci_sm, en_core_sci_lg, en_ner_bc5cdr_md')
  27. # Execute the parse_args() method
  28. args = parser.parse_args()
  29. if args.store_datastore == 'True' and not args.model_name:
  30. parser.error('--storing in datastore can only be done when --model_name is set to a specific model.')
  31. elif args.store_datastore == 'True' and args.model_name not in model_choices:
  32. parser.error('--storing in datastore can only be done when --model_name is among the supported models: {}.'.format(model_choices))
  33. model_name = args.model_name
  34. project_id = os.getenv('PROJECT_ID')
  35. bucket_name = os.getenv('BUCKET_NAME')
  36. location = os.getenv('LOCATION')
  37. key_path = os.getenv('SA_KEY_PATH')
  38. dataset_name = os.getenv('BQ_DATASET_NAME')
  39. table_name = os.getenv('BQ_TABLE_NAME')
  40. credentials = service_account.Credentials.from_service_account_file(key_path)
  41. storage_client = storage.Client(credentials=credentials)
  42. datastore_client = datastore.Client(credentials=credentials)
  43. bq_client = bigquery.Client(credentials=credentials)
  44. if args.store_bigquery == 'True':
  45. start_time = time.time()
  46. populateBQ(bq_client=bq_client,storage_client=storage_client,
  47. bucket_name=bucket_name, dataset_name=dataset_name,
  48. table_name=table_name)
  49. total_time = time.time() - start_time
  50. logging.info(
  51. 'The export to BigQuery was completed successfully and took {} seconds.'.format(round(total_time, 1)))
  52. else:
  53. logging.info('The export to BigQuery was disable.')
  54. if args.store_datastore == 'True':
  55. start_time = time.time()
  56. populateDatastore(datastore_client=datastore_client, storage_client=storage_client,
  57. src_bucket=bucket_name, model_name=model_name)
  58. total_time = time.time() - start_time
  59. logging.info(
  60. "The export to Datastore was completed successfully and took {} seconds.".format(round(total_time, 1)))
  61. else:
  62. logging.info('The export to Datastore was disable.')