3030import os
3131import netrc
3232import requests
33+ import socket
3334import json
3435import struct
3536import ctypes
@@ -353,7 +354,6 @@ def __build_auth_header():
353354 """
354355 Build authentication header for use with provisioning system
355356 """
356-
357357 global service_url , ps_access_token , ps_refresh_token , ps_token_exp
358358 headers = None
359359 if ps_access_token :
@@ -371,6 +371,21 @@ def __build_auth_header():
371371 return headers
372372
373373
374+ ###############################################################################
375+ # Overriding DNS
376+ ###############################################################################
377+
378+ local_dns = {}
379+ socket_getaddrinfo = socket .getaddrinfo
380+ def __override_getaddrinfo (* args ):
381+ if args [0 ] in local_dns :
382+ logger .info ("Overriding {} to {}" .format (args [0 ], local_dns [args [0 ]]))
383+ return socket_getaddrinfo (local_dns [args [0 ]], * args [1 :])
384+ else :
385+ return socket_getaddrinfo (* args )
386+ socket .getaddrinfo = __override_getaddrinfo
387+
388+
374389###############################################################################
375390# Default Record Processing
376391###############################################################################
@@ -527,6 +542,7 @@ def init (url=service_url, verbose=False, loglevel=logging.CRITICAL, organizatio
527542 set_verbose (verbose )
528543 set_url (url ) # configure domain
529544 authenticate (organization ) # configure credentials (if any) for organization
545+ local_dns .clear () # clear cache of DNS lookups for clusters
530546 scaleout (desired_nodes , time_to_live ) # set cluster to desired number of nodes (if permitted based on credentials)
531547 check_version (plugins = plugins ) # verify compatibility between client and server versions
532548
@@ -748,8 +764,7 @@ def update_available_servers (desired_nodes=None, time_to_live=None):
748764 >>> import sliderule
749765 >>> num_servers, max_workers = sliderule.update_available_servers(10)
750766 '''
751-
752- global service_url , service_org , request_timeout
767+ global service_url , service_org , request_timeout , local_dns
753768
754769 # Update number of nodes
755770 if type (desired_nodes ) == int :
@@ -793,20 +808,31 @@ def scaleout(desired_nodes, time_to_live):
793808 return # nothing needs to be done
794809 if desired_nodes < 0 :
795810 raise FatalError ("Number of desired nodes must be greater than zero ({})" .format (desired_nodes ))
811+ # Send Initial Request for Desired Cluster State
796812 update_available_servers (desired_nodes = desired_nodes , time_to_live = time_to_live )
797813 start = time .time ()
798814 available_nodes ,_ = update_available_servers ()
799815 scale_up_needed = False
816+ dns_overridden = False
817+ # Wait for Cluster to Reach Desired State
800818 while available_nodes < desired_nodes :
801819 scale_up_needed = True
802820 logger .info ("Waiting while cluster scales to desired capacity (currently at {} nodes, desired is {} nodes)... {} seconds" .format (available_nodes , desired_nodes , int (time .time () - start )))
803821 time .sleep (10.0 )
804822 available_nodes ,_ = update_available_servers ()
805- if available_nodes == 0 :
806- time .sleep (20.0 ) # wait an extra 20 seconds for cluster to start if cluster is not running
823+ # Override DNS if Cluster is Starting
824+ if available_nodes == 0 and not dns_overridden :
825+ headers = __build_auth_header ()
826+ host = "https://ps." + service_url + "/api/org_ip_adr/" + service_org + "/"
827+ rsps = session .get (host , headers = headers , timeout = request_timeout ).json ()
828+ if rsps ["status" ] == "SUCCESS" :
829+ dns_overridden = True
830+ local_dns [service_org + "." + service_url ] = rsps ["ip_address" ]
831+ # Timeout Occurred
807832 if int (time .time () - start ) > MAX_PS_CLUSTER_WAIT_SECS :
808833 logger .error ("Maximum time allowed waiting for cluster has been exceeded" )
809834 break
835+ # Log Final Message if Cluster Needed State Change
810836 if scale_up_needed :
811837 logger .info ("Cluster has reached capacity of {} nodes... {} seconds" .format (available_nodes , int (time .time () - start )))
812838
0 commit comments