@@ -843,6 +843,10 @@ PHP_REDIS_API redisCluster *cluster_create(double timeout, double read_timeout,
843
843
ALLOC_HASHTABLE (c -> nodes );
844
844
zend_hash_init (c -> nodes , 0 , NULL , ht_free_node , 0 );
845
845
846
+ /* Allocate our preferred nodes HashTable */
847
+ ALLOC_HASHTABLE (c -> preferred_nodes );
848
+ zend_hash_init (c -> preferred_nodes , 0 , NULL , NULL , 0 );
849
+
846
850
return c ;
847
851
}
848
852
@@ -861,10 +865,12 @@ cluster_free(redisCluster *c, int free_ctx)
861
865
/* Call hash table destructors */
862
866
zend_hash_destroy (c -> seeds );
863
867
zend_hash_destroy (c -> nodes );
868
+ zend_hash_destroy (c -> preferred_nodes );
864
869
865
870
/* Free hash tables themselves */
866
871
efree (c -> seeds );
867
872
efree (c -> nodes );
873
+ efree (c -> preferred_nodes );
868
874
869
875
/* Free any error we've got */
870
876
if (c -> err ) zend_string_release (c -> err );
@@ -1235,10 +1241,74 @@ PHP_REDIS_API void cluster_disconnect(redisCluster *c, int force) {
1235
1241
} ZEND_HASH_FOREACH_END ();
1236
1242
}
1237
1243
1244
+ int preferred_compare (const void * const first , const void * const second )
1245
+ {
1246
+ const preferredNode * a = (const preferredNode * )first ;
1247
+ const preferredNode * b = (const preferredNode * )second ;
1248
+ if (a -> preferred == 1 && b -> preferred == 0 )
1249
+ return -1 ;
1250
+ else if (a -> preferred == 0 && b -> preferred == 1 )
1251
+ return 1 ;
1252
+ else if (a -> original_order < b -> original_order )
1253
+ return -1 ;
1254
+ else if (a -> original_order > b -> original_order )
1255
+ return 1 ;
1256
+ else
1257
+ return 0 ;
1258
+ }
1259
+
1260
+ /* This method takes the randomised list of nodes and sorts preferred nodes to
1261
+ * the top. */
1262
+ static void preferredsort (int * array , size_t len , redisCluster * c ,
1263
+ unsigned short slot )
1264
+ {
1265
+ int i , temp , key_len , * prefnodes ;
1266
+ size_t r ;
1267
+ RedisSock * redis_sock ;
1268
+ char key [1024 ];
1269
+ zval * node ;
1270
+
1271
+ struct preferredNode ab [len ];
1272
+
1273
+ // array: key => order; value => node-idx (0=master)
1274
+ for (i = 0 ; i < len ; i ++ ) {
1275
+ // Get node host+port string.
1276
+ redis_sock = cluster_slot_sock (c , c -> cmd_slot , array [i ]);
1277
+
1278
+ ab [i ].idx = array [i ];
1279
+ ab [i ].original_order = i ;
1280
+ ab [i ].preferred = 0 ;
1281
+ if (!redis_sock ) {
1282
+ continue ;
1283
+ }
1284
+
1285
+ // Is it in the preferred_nodes map?
1286
+ key_len = snprintf (key , sizeof (key ), "%s:%d" , ZSTR_VAL (redis_sock -> host ), redis_sock -> port );
1287
+ // Perhaps the preferred_nodes table should be keyed on the host:port string to make this
1288
+ // easier and faster.
1289
+ ZEND_HASH_FOREACH_VAL (c -> preferred_nodes , node ) {
1290
+ ZVAL_DEREF (node );
1291
+
1292
+ if (strcmp (Z_STRVAL_P (node ), key ) == 0 ) {
1293
+ ab [i ].preferred = 1 ;
1294
+ break ;
1295
+ }
1296
+
1297
+ } ZEND_HASH_FOREACH_END ();
1298
+ }
1299
+
1300
+ // Sort preferred nodes to the top of the list.
1301
+ qsort (ab , len , sizeof (* ab ), preferred_compare );
1302
+ for (i = 0 ; i < len ; i ++ ) {
1303
+ array [i ] = ab [i ].idx ;
1304
+ }
1305
+ }
1306
+
1307
+
1238
1308
/* This method attempts to write our command at random to the master and any
1239
1309
* attached slaves, until we either successufly do so, or fail. */
1240
1310
static int cluster_dist_write (redisCluster * c , const char * cmd , size_t sz ,
1241
- int nomaster )
1311
+ int nomaster , int preferred )
1242
1312
{
1243
1313
int i , count = 1 , * nodes ;
1244
1314
RedisSock * redis_sock ;
@@ -1256,6 +1326,11 @@ static int cluster_dist_write(redisCluster *c, const char *cmd, size_t sz,
1256
1326
for (i = 0 ; i < count ; i ++ ) nodes [i ] = i ;
1257
1327
fyshuffle (nodes , count );
1258
1328
1329
+ /* Shift preferred nodes to the top of the list if we're in preferred
1330
+ * mode */
1331
+ if (preferred && zend_hash_num_elements (c -> preferred_nodes ) > 0 )
1332
+ preferredsort (nodes , count , c , c -> cmd_slot );
1333
+
1259
1334
/* Iterate through our nodes until we find one we can write to or fail */
1260
1335
for (i = 0 ; i < count ; i ++ ) {
1261
1336
/* Skip if this is the master node and we don't want to query that */
@@ -1306,6 +1381,8 @@ static int cluster_dist_write(redisCluster *c, const char *cmd, size_t sz,
1306
1381
* REDIS_FAILOVER_DISTRIBUTE_SLAVES:
1307
1382
* We pick at random from slave nodes of a given master. This option is
1308
1383
* used to load balance read queries against N slaves.
1384
+ * REDIS_FAILOVER_PREFERRED:
1385
+ * Similar to DISTRIBUTE, but with a list of nodes we prefer over others.
1309
1386
*
1310
1387
* Once we are able to find a node we can write to, we check for MOVED or
1311
1388
* ASKING redirection, such that the keyspace can be updated.
@@ -1315,7 +1392,7 @@ static int cluster_sock_write(redisCluster *c, const char *cmd, size_t sz,
1315
1392
{
1316
1393
redisClusterNode * seed_node ;
1317
1394
RedisSock * redis_sock ;
1318
- int failover , nomaster ;
1395
+ int failover , nomaster , preferred ;
1319
1396
1320
1397
/* First try the socket requested */
1321
1398
redis_sock = c -> cmd_sock ;
@@ -1343,12 +1420,13 @@ static int cluster_sock_write(redisCluster *c, const char *cmd, size_t sz,
1343
1420
} else if (failover == REDIS_FAILOVER_ERROR ) {
1344
1421
/* Try the master, then fall back to any slaves we may have */
1345
1422
if (CLUSTER_SEND_PAYLOAD (redis_sock , cmd , sz ) ||
1346
- !cluster_dist_write (c , cmd , sz , 1 )) return 0 ;
1423
+ !cluster_dist_write (c , cmd , sz , 1 , 0 )) return 0 ;
1347
1424
} else {
1348
1425
/* Include or exclude master node depending on failover option and
1349
1426
* attempt to make our write */
1350
1427
nomaster = failover == REDIS_FAILOVER_DISTRIBUTE_SLAVES ;
1351
- if (!cluster_dist_write (c , cmd , sz , nomaster )) {
1428
+ preferred = failover == REDIS_FAILOVER_PREFERRED ;
1429
+ if (!cluster_dist_write (c , cmd , sz , nomaster , preferred )) {
1352
1430
/* We were able to write to a master or slave at random */
1353
1431
return 0 ;
1354
1432
}
0 commit comments