Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 001446e

Browse files
committed
added local dns caching to speed up cluster startup time from client perspective
1 parent e912311 commit 001446e

File tree

1 file changed

+31
-5
lines changed

1 file changed

+31
-5
lines changed

sliderule/sliderule.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import os
3131
import netrc
3232
import requests
33+
import socket
3334
import json
3435
import struct
3536
import ctypes
@@ -353,7 +354,6 @@ def __build_auth_header():
353354
"""
354355
Build authentication header for use with provisioning system
355356
"""
356-
357357
global service_url, ps_access_token, ps_refresh_token, ps_token_exp
358358
headers = None
359359
if ps_access_token:
@@ -371,6 +371,21 @@ def __build_auth_header():
371371
return headers
372372

373373

374+
###############################################################################
375+
# Overriding DNS
376+
###############################################################################
377+
378+
local_dns = {}
379+
socket_getaddrinfo = socket.getaddrinfo
380+
def __override_getaddrinfo(*args):
381+
if args[0] in local_dns:
382+
logger.info("Overriding {} to {}".format(args[0], local_dns[args[0]]))
383+
return socket_getaddrinfo(local_dns[args[0]], *args[1:])
384+
else:
385+
return socket_getaddrinfo(*args)
386+
socket.getaddrinfo = __override_getaddrinfo
387+
388+
374389
###############################################################################
375390
# Default Record Processing
376391
###############################################################################
@@ -527,6 +542,7 @@ def init (url=service_url, verbose=False, loglevel=logging.CRITICAL, organizatio
527542
set_verbose(verbose)
528543
set_url(url) # configure domain
529544
authenticate(organization) # configure credentials (if any) for organization
545+
local_dns.clear() # clear cache of DNS lookups for clusters
530546
scaleout(desired_nodes, time_to_live) # set cluster to desired number of nodes (if permitted based on credentials)
531547
check_version(plugins=plugins) # verify compatibility between client and server versions
532548

@@ -748,8 +764,7 @@ def update_available_servers (desired_nodes=None, time_to_live=None):
748764
>>> import sliderule
749765
>>> num_servers, max_workers = sliderule.update_available_servers(10)
750766
'''
751-
752-
global service_url, service_org, request_timeout
767+
global service_url, service_org, request_timeout, local_dns
753768

754769
# Update number of nodes
755770
if type(desired_nodes) == int:
@@ -793,20 +808,31 @@ def scaleout(desired_nodes, time_to_live):
793808
return # nothing needs to be done
794809
if desired_nodes < 0:
795810
raise FatalError("Number of desired nodes must be greater than zero ({})".format(desired_nodes))
811+
# Send Initial Request for Desired Cluster State
796812
update_available_servers(desired_nodes=desired_nodes, time_to_live=time_to_live)
797813
start = time.time()
798814
available_nodes,_ = update_available_servers()
799815
scale_up_needed = False
816+
dns_overridden = False
817+
# Wait for Cluster to Reach Desired State
800818
while available_nodes < desired_nodes:
801819
scale_up_needed = True
802820
logger.info("Waiting while cluster scales to desired capacity (currently at {} nodes, desired is {} nodes)... {} seconds".format(available_nodes, desired_nodes, int(time.time() - start)))
803821
time.sleep(10.0)
804822
available_nodes,_ = update_available_servers()
805-
if available_nodes == 0:
806-
time.sleep(20.0) # wait an extra 20 seconds for cluster to start if cluster is not running
823+
# Override DNS if Cluster is Starting
824+
if available_nodes == 0 and not dns_overridden:
825+
headers = __build_auth_header()
826+
host = "https://ps." + service_url + "/api/org_ip_adr/" + service_org + "/"
827+
rsps = session.get(host, headers=headers, timeout=request_timeout).json()
828+
if rsps["status"] == "SUCCESS":
829+
dns_overridden = True
830+
local_dns[service_org + "." + service_url] = rsps["ip_address"]
831+
# Timeout Occurred
807832
if int(time.time() - start) > MAX_PS_CLUSTER_WAIT_SECS:
808833
logger.error("Maximum time allowed waiting for cluster has been exceeded")
809834
break
835+
# Log Final Message if Cluster Needed State Change
810836
if scale_up_needed:
811837
logger.info("Cluster has reached capacity of {} nodes... {} seconds".format(available_nodes, int(time.time() - start)))
812838

0 commit comments

Comments
 (0)