Module tests.test_zone
Functions
def get_zone_replica_count(client, volume_name, zone_name, chk_running=False)
-
Expand source code
def get_zone_replica_count(client, volume_name, zone_name, chk_running=False): # NOQA volume = client.by_id_volume(volume_name) zone_replica_count = 0 for replica in volume.replicas: if chk_running and not replica.running: continue replica_host_id = replica.hostId replica_host_zone = client.by_id_node(replica_host_id).zone if replica_host_zone == zone_name: zone_replica_count += 1 return zone_replica_count
-
Expand source code
@pytest.fixture def k8s_node_zone_tags(client, core_api): # NOQA k8s_zone_label = get_k8s_zone_label() lh_nodes = client.list_node() node_index = 0 for node in lh_nodes: node_name = node.name if node_index % 2 == 0: zone = ZONE1 else: zone = ZONE2 payload = { "metadata": { "labels": { k8s_zone_label: zone} } } core_api.patch_node(node_name, body=payload) node_index += 1 yield lh_nodes = client.list_node() node_index = 0 for node in lh_nodes: node_name = node.name payload = { "metadata": { "labels": { k8s_zone_label: None} } } core_api.patch_node(node_name, body=payload)
def test_replica_auto_balance_node_duplicates_in_multiple_zones(client, core_api, volume_name)
-
Expand source code
@pytest.mark.v2_volume_test # NOQA def test_replica_auto_balance_node_duplicates_in_multiple_zones(client, core_api, volume_name): # NOQA """ Scenario: replica auto-balance to nodes with duplicated replicas in the zone. Given set `replica-soft-anti-affinity` to `true`. And set `replica-zone-soft-anti-affinity` to `true`. And set volume spec `replicaAutoBalance` to `least-effort`. And set node-1 to zone-1. set node-2 to zone-2. And disable scheduling for node-3. And create a volume with 3 replicas. And attach the volume to self-node. And zone-1 and zone-2 should contain 3 replica in total. When set node-3 to the zone with duplicated replicas. And enable scheduling for node-3. Then count replicas running on each node. And 1 replica running on node-1 1 replica running on node-2 1 replica running on node-3. And count replicas running in each zone. And total of 3 replicas running in zone-1 and zone-2. """ update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true") update_setting(client, SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY, "true") update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "least-effort") n1, n2, n3 = client.list_node() node_zone_map = { n1.name: ZONE1, n2.name: ZONE2, n3.name: "temp" } set_and_wait_k8s_nodes_zone_label(core_api, node_zone_map) client.update(n3, allowScheduling=False) n_replicas = 3 volume = create_and_check_volume(client, volume_name, num_of_replicas=n_replicas) volume.attach(hostId=get_self_host_id()) z1_r_count = get_zone_replica_count(client, volume_name, ZONE1) z2_r_count = get_zone_replica_count(client, volume_name, ZONE2) assert z1_r_count + z2_r_count == n_replicas # The GKE zone label is periodically updated with the actual zone. # Invoke _set_k8s_node_zone_label to refresh the zone label with each # retry iteration to maintain the expected zone label. def _set_and_wait_k8s_node_zone_label(): node_zone_map = {} if z1_r_count == 2: node_zone_map = { n1.name: ZONE1, n2.name: ZONE2, n3.name: ZONE1 } else: node_zone_map = { n1.name: ZONE1, n2.name: ZONE2, n3.name: ZONE2 } set_and_wait_k8s_nodes_zone_label(core_api, node_zone_map) _set_and_wait_k8s_node_zone_label() client.update(n3, allowScheduling=True) for _ in range(RETRY_COUNTS): if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() n1_r_count = get_host_replica_count( client, volume_name, n1.name, chk_running=True) n2_r_count = get_host_replica_count( client, volume_name, n2.name, chk_running=True) n3_r_count = get_host_replica_count( client, volume_name, n3.name, chk_running=True) z1_r_count = get_zone_replica_count( client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count( client, volume_name, ZONE2, chk_running=True) if n1_r_count == n2_r_count == n3_r_count == 1 and \ z1_r_count + z2_r_count == n_replicas: break time.sleep(RETRY_INTERVAL) assert n1_r_count == 1 assert n2_r_count == 1 assert n3_r_count == 1 assert z1_r_count + z2_r_count == n_replicas
Scenario: replica auto-balance to nodes with duplicated replicas in the zone.
Given set
replica-soft-anti-affinity
totrue
. And setreplica-zone-soft-anti-affinity
totrue
. And set volume specreplicaAutoBalance
toleast-effort
. And set node-1 to zone-1. set node-2 to zone-2. And disable scheduling for node-3. And create a volume with 3 replicas. And attach the volume to self-node. And zone-1 and zone-2 should contain 3 replica in total.When set node-3 to the zone with duplicated replicas. And enable scheduling for node-3. Then count replicas running on each node. And 1 replica running on node-1 1 replica running on node-2 1 replica running on node-3. And count replicas running in each zone. And total of 3 replicas running in zone-1 and zone-2.
def test_replica_auto_balance_should_respect_node_selector(client, core_api, volume_name, pod)
-
Expand source code
@pytest.mark.v2_volume_test # NOQA def test_replica_auto_balance_should_respect_node_selector(client, core_api, volume_name, pod): # NOQA """ Background: Given Setting (replica-soft-anti-affinity) is (true). And Setting (replica-zone-soft-anti-affinity) is (true). And Node (node-1, node-2) has tag (tag-0). And Node (node-1) is in zone (lh-zone-1). Node (node-2) is in zone (lh-zone-2). Node (node-3) is in zone (should-not-schedule). Scenario Outline: replica auto-balance should respect node-selector. Issue: https://github.com/longhorn/longhorn/issues/5971 Given Volume created. And Volume replica number is (3). And Volume has node selector (tag-0). And Volume attached (node-1). And Replica is in zone (lh-zone-1, lh-zone-2). When Setting (replica-auto-balance) is (least-effort). Then Replica is in zone (lh-zone-1, lh-zone-2) (loop 10 sec). """ update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true") update_setting(client, SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY, "true") n1, n2, n3 = client.list_node() selected_nodes = [n1, n2] node_tag = "tag0" for node in selected_nodes: set_node_tags(client, node, tags=[node_tag]) wait_for_node_tag_update(client, node.name, [node_tag]) # The GKE zone label is periodically updated with the actual zone. # Invoke _set_k8s_node_zone_label to refresh the zone label with each # retry iteration to maintain the expected zone label. def _set_and_wait_k8s_node_zone_label(): node_zone_map = { n1.name: ZONE1, n2.name: ZONE2, n3.name: "should-not-schedule" } set_and_wait_k8s_nodes_zone_label(core_api, node_zone_map) _set_and_wait_k8s_node_zone_label() n_replicas = 3 client.create_volume(name=volume_name, numberOfReplicas=n_replicas, nodeSelector=[node_tag], dataEngine=DATA_ENGINE) volume = wait_for_volume_detached(client, volume_name) volume.attach(hostId=selected_nodes[0].name) z1_r_count = get_zone_replica_count(client, volume_name, ZONE1) z2_r_count = get_zone_replica_count(client, volume_name, ZONE2) assert z1_r_count + z2_r_count == n_replicas update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "least-effort") # Check over 10 seconds to check for unexpected re-scheduling. for _ in range(10): if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() time.sleep(1) check_z1_r_count = get_zone_replica_count(client, volume_name, ZONE1) check_z2_r_count = get_zone_replica_count(client, volume_name, ZONE2) assert check_z1_r_count == z1_r_count assert check_z2_r_count == z2_r_count
Background:
Given Setting (replica-soft-anti-affinity) is (true). And Setting (replica-zone-soft-anti-affinity) is (true). And Node (node-1, node-2) has tag (tag-0). And Node (node-1) is in zone (lh-zone-1). Node (node-2) is in zone (lh-zone-2). Node (node-3) is in zone (should-not-schedule).
Scenario Outline: replica auto-balance should respect node-selector.
Issue: https://github.com/longhorn/longhorn/issues/5971
Given Volume created. And Volume replica number is (3). And Volume has node selector (tag-0). And Volume attached (node-1). And Replica is in zone (lh-zone-1, lh-zone-2).
When Setting (replica-auto-balance) is (least-effort).
Then Replica is in zone (lh-zone-1, lh-zone-2) (loop 10 sec).
def test_replica_auto_balance_when_disabled_disk_scheduling_in_zone(client, core_api, volume_name)
-
Expand source code
@pytest.mark.v2_volume_test # NOQA def test_replica_auto_balance_when_disabled_disk_scheduling_in_zone(client, core_api, volume_name): # NOQA """ Scenario: replica auto-balance when disk scheduling is disabled on nodes in a zone. Issue: https://github.com/longhorn/longhorn/issues/6508 Given `replica-soft-anti-affinity` setting is `true`. And node-1 is in zone-1. node-2 is in zone-2. node-3 is in zone-3. And disk scheduling is disabled on node-3. And create a volume with 3 replicas. And attach the volume to test pod node. And 3 replicas running in zone-1 and zone-2. 0 replicas running in zone-3. When set `replica-auto-balance` to `best-effort`. Then 3 replicas running in zone-1 and zone-2. 0 replicas running in zone-3. And replica count remains stable across zones and nodes. """ # Set `replica-soft-anti-affinity` to `true`. update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true") # Assign nodes to respective zones node1, node2, node3 = client.list_node() # The GKE zone label is periodically updated with the actual zone. # Invoke _set_k8s_node_zone_label to refresh the zone label with each # retry iteration to maintain the expected zone label. def _set_and_wait_k8s_node_zone_label(): node_zone_map = { node1.name: ZONE1, node2.name: ZONE2, node3.name: ZONE3 } set_and_wait_k8s_nodes_zone_label(core_api, node_zone_map) _set_and_wait_k8s_node_zone_label() # Disable disk scheduling on node 3 cleanup_node_disks(client, node3.name) # Create a volume with 3 replicas num_of_replicas = 3 volume = client.create_volume(name=volume_name, numberOfReplicas=num_of_replicas, dataEngine=DATA_ENGINE) # Wait for the volume to detach and attach it to the test pod node volume = wait_for_volume_detached(client, volume_name) volume.attach(hostId=get_self_host_id()) # Define a function to assert replica count def assert_replica_count(is_stable=False): assert_tolerated = 0 for _ in range(RETRY_COUNTS): if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() time.sleep(RETRY_INTERVAL) zone3_replica_count = get_zone_replica_count( client, volume_name, ZONE3, chk_running=True) assert zone3_replica_count == 0 total_replica_count = \ get_zone_replica_count( client, volume_name, ZONE1, chk_running=True) + \ get_zone_replica_count( client, volume_name, ZONE2, chk_running=True) if is_stable: try: assert total_replica_count == num_of_replicas except AssertionError as e: # The GKE zone label undergoes periodic updates to reflect # the current zone. Consequently, we cannot guarantee the # exact zone of the replica node. Therefore, we'll allow # for one assertion error to accommodate GKE's update # process. if is_k8s_node_gke_cos(core_api) and assert_tolerated < 1: assert_tolerated += 1 else: raise AssertionError(e) elif total_replica_count == num_of_replicas: break assert total_replica_count == 3 # Perform the initial assertion to ensure the replica count is as expected assert_replica_count() # Update the replica-auto-balance setting to `best-effort` update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "best-effort") # Perform the final assertion to ensure the replica count is as expected, # and stable after the setting update assert_replica_count(is_stable=True)
Scenario: replica auto-balance when disk scheduling is disabled on nodes in a zone.
Issue: https://github.com/longhorn/longhorn/issues/6508
Given
replica-soft-anti-affinity
setting istrue
. And node-1 is in zone-1. node-2 is in zone-2. node-3 is in zone-3. And disk scheduling is disabled on node-3. And create a volume with 3 replicas. And attach the volume to test pod node. And 3 replicas running in zone-1 and zone-2. 0 replicas running in zone-3.When set
replica-auto-balance
tobest-effort
.Then 3 replicas running in zone-1 and zone-2. 0 replicas running in zone-3. And replica count remains stable across zones and nodes.
def test_replica_auto_balance_when_no_storage_available_in_zone(client, core_api, volume_name)
-
Expand source code
@pytest.mark.v2_volume_test # NOQA def test_replica_auto_balance_when_no_storage_available_in_zone(client, core_api, volume_name): # NOQA """ Scenario: replica auto-balance when there is no storage available on nodes in a zone. Issue: https://github.com/longhorn/longhorn/issues/6671 Given `replica-soft-anti-affinity` setting is `true`. And node-1 is in zone-1. node-2 is in zone-2. node-3 is in zone-3. And fill up the storage on node-3. And create a volume with 3 replicas. And attach the volume to test pod node. And 3 replicas running in zone-1 and zone-2. 0 replicas running in zone-3. When set `replica-auto-balance` to `best-effort`. Then 3 replicas running in zone-1 and zone-2. 0 replicas running in zone-3. And replica count remains stable across zones and nodes. """ # Set `replica-soft-anti-affinity` to `true`. update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true") # Assign nodes to respective zones node1, node2, node3 = client.list_node() # The GKE zone label is periodically updated with the actual zone. # Invoke _set_k8s_node_zone_label to refresh the zone label with each # retry iteration to maintain the expected zone label. def _set_and_wait_k8s_node_zone_label(): node_zone_map = { node1.name: ZONE1, node2.name: ZONE2, node3.name: ZONE3 } set_and_wait_k8s_nodes_zone_label(core_api, node_zone_map) _set_and_wait_k8s_node_zone_label() # Fill up the storage on node 3 for _, disk in node3.disks.items(): disk.storageReserved = disk.storageMaximum update_disks = get_update_disks(node3.disks) update_node_disks(client, node3.name, disks=update_disks, retry=True) # Create a volume with 3 replicas num_of_replicas = 3 volume = client.create_volume(name=volume_name, numberOfReplicas=num_of_replicas, dataEngine=DATA_ENGINE) # Wait for the volume to detach and attach it to the test pod node volume = wait_for_volume_detached(client, volume_name) volume.attach(hostId=get_self_host_id()) # Define a function to assert replica count def assert_replica_count(is_stable=False): assert_tolerated = 0 for _ in range(RETRY_COUNTS): if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() time.sleep(RETRY_INTERVAL) zone3_replica_count = get_zone_replica_count( client, volume_name, ZONE3, chk_running=True) assert zone3_replica_count == 0 total_replica_count = \ get_zone_replica_count( client, volume_name, ZONE1, chk_running=True) + \ get_zone_replica_count( client, volume_name, ZONE2, chk_running=True) if is_stable: try: assert total_replica_count == num_of_replicas except AssertionError as e: # The GKE zone label undergoes periodic updates to reflect # the current zone. Consequently, we cannot guarantee the # exact zone of the replica node. Therefore, we'll allow # for one assertion error to accommodate GKE's update # process. if is_k8s_node_gke_cos(core_api) and assert_tolerated < 1: assert_tolerated += 1 else: raise AssertionError(e) elif total_replica_count == num_of_replicas: break assert total_replica_count == 3 # Perform the initial assertion to ensure the replica count is as expected assert_replica_count() # Update the replica-auto-balance setting to `best-effort` update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "best-effort") # Perform the final assertion to ensure the replica count is as expected, # and stable after the setting update assert_replica_count(is_stable=True)
Scenario: replica auto-balance when there is no storage available on nodes in a zone.
Issue: https://github.com/longhorn/longhorn/issues/6671
Given
replica-soft-anti-affinity
setting istrue
. And node-1 is in zone-1. node-2 is in zone-2. node-3 is in zone-3. And fill up the storage on node-3. And create a volume with 3 replicas. And attach the volume to test pod node. And 3 replicas running in zone-1 and zone-2. 0 replicas running in zone-3.When set
replica-auto-balance
tobest-effort
.Then 3 replicas running in zone-1 and zone-2. 0 replicas running in zone-3. And replica count remains stable across zones and nodes.
def test_replica_auto_balance_when_replica_on_unschedulable_node(client, core_api, volume_name, request)
-
Expand source code
@pytest.mark.v2_volume_test # NOQA def test_replica_auto_balance_when_replica_on_unschedulable_node(client, core_api, volume_name, request): # NOQA """ Scenario: replica auto-balance when replica already running on an unschedulable node. Issue: https://github.com/longhorn/longhorn/issues/4502 Given set `replica-soft-anti-affinity` to `true`. And set `replica-zone-soft-anti-affinity` to `true`. And set volume spec `replicaAutoBalance` to `least-effort`. And set node-1 to zone-1. set node-2 to zone-2. set node-3 to zone-3. And node-2 tagged `AVAIL`. node-3 tagged `AVAIL`. And create a volume with 2 replicas and nodeSelector `AVAIL`. And attach the volume to self-node. And 0 replicas running in zone-1. 1 replicas running in zone-2. 1 replicas running in zone-3. When cordone node-2. Then replicas should remain balanced with, 0 replicas running in zone-1. 1 replicas running in zone-2. 1 replicas running in zone-3. """ update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true") update_setting(client, SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY, "true") update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "least-effort") n1, n2, n3 = client.list_node() # The GKE zone label is periodically updated with the actual zone. # Invoke _set_k8s_node_zone_label to refresh the zone label with each # retry iteration to maintain the expected zone label. def _set_and_wait_k8s_node_zone_label(): node_zone_map = { n1.name: ZONE1, n2.name: ZONE2, n3.name: ZONE3 } set_and_wait_k8s_nodes_zone_label(core_api, node_zone_map) _set_and_wait_k8s_node_zone_label() client.update(n2, allowScheduling=True, tags=["AVAIL"]) client.update(n3, allowScheduling=True, tags=["AVAIL"]) n_replicas = 2 volume = client.create_volume(name=volume_name, numberOfReplicas=n_replicas, nodeSelector=["AVAIL"], dataLocality="best-effort", dataEngine=DATA_ENGINE) volume = wait_for_volume_detached(client, volume_name) volume.attach(hostId=get_self_host_id()) for _ in range(RETRY_COUNTS): if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() z1_r_count = get_zone_replica_count( client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count( client, volume_name, ZONE2, chk_running=True) z3_r_count = get_zone_replica_count( client, volume_name, ZONE3, chk_running=True) if z1_r_count == 0 and (z2_r_count and z3_r_count == 1): break time.sleep(RETRY_INTERVAL) assert z1_r_count == 0 and (z2_r_count and z3_r_count == 1) # Set cordon on node def finalizer(): set_node_cordon(core_api, n2.name, False) request.addfinalizer(finalizer) set_node_cordon(core_api, n2.name, True) assert_tolerated = 0 for _ in range(RETRY_COUNTS): if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() z1_r_count = get_zone_replica_count( client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count( client, volume_name, ZONE2, chk_running=True) z3_r_count = get_zone_replica_count( client, volume_name, ZONE3, chk_running=True) try: assert z1_r_count == 0 and (z2_r_count and z3_r_count == 1) except AssertionError as e: # The GKE zone label undergoes periodic updates to reflect # the current zone. Consequently, we cannot guarantee the # exact zone of the replica node. Therefore, we'll allow # for one assertion error to accommodate GKE's update process. if is_k8s_node_gke_cos(core_api) and assert_tolerated < 1: assert_tolerated += 1 else: raise AssertionError(e) volume = client.by_id_volume(volume_name) for status in volume.rebuildStatus: assert not status.isRebuilding time.sleep(RETRY_INTERVAL)
Scenario: replica auto-balance when replica already running on an unschedulable node.
Issue: https://github.com/longhorn/longhorn/issues/4502
Given set
replica-soft-anti-affinity
totrue
. And setreplica-zone-soft-anti-affinity
totrue
. And set volume specreplicaAutoBalance
toleast-effort
. And set node-1 to zone-1. set node-2 to zone-2. set node-3 to zone-3. And node-2 taggedAVAIL
. node-3 taggedAVAIL
. And create a volume with 2 replicas and nodeSelectorAVAIL
. And attach the volume to self-node. And 0 replicas running in zone-1. 1 replicas running in zone-2. 1 replicas running in zone-3.When cordone node-2. Then replicas should remain balanced with, 0 replicas running in zone-1. 1 replicas running in zone-2. 1 replicas running in zone-3.
def test_replica_auto_balance_zone_best_effort(client, core_api, volume_name)
-
Expand source code
@pytest.mark.v2_volume_test # NOQA def test_replica_auto_balance_zone_best_effort(client, core_api, volume_name): # NOQA """ Scenario: replica auto-balance zones with best-effort. Given set `replica-soft-anti-affinity` to `true`. And set `replica-zone-soft-anti-affinity` to `true`. And set volume spec `replicaAutoBalance` to `best-effort`. And set node-1 to zone-1. set node-2 to zone-2. set node-3 to zone-3. And disable scheduling for node-2. disable scheduling for node-3. And create a volume with 6 replicas. And attach the volume to self-node. And 6 replicas running in zone-1. 0 replicas running in zone-2. 0 replicas running in zone-3. When enable scheduling for node-2. Then count replicas running on each node. And 3 replicas running in zone-1. 3 replicas running in zone-2. 0 replicas running in zone-3. When enable scheduling for node-3. Then count replicas running on each node. And 2 replicas running in zone-1. 2 replicas running in zone-2. 2 replicas running in zone-3. """ update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true") update_setting(client, SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY, "true") update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "best-effort") n1, n2, n3 = client.list_node() # The GKE zone label is periodically updated with the actual zone. # Invoke _set_k8s_node_zone_label to refresh the zone label with each # retry iteration to maintain the expected zone label. def _set_and_wait_k8s_node_zone_label(): node_zone_map = { n1.name: ZONE1, n2.name: ZONE2, n3.name: ZONE3 } set_and_wait_k8s_nodes_zone_label(core_api, node_zone_map) _set_and_wait_k8s_node_zone_label() client.update(n2, allowScheduling=False) client.update(n3, allowScheduling=False) n_replicas = 6 volume = create_and_check_volume(client, volume_name, num_of_replicas=n_replicas) volume.attach(hostId=get_self_host_id()) for _ in range(RETRY_COUNTS): z1_r_count = get_zone_replica_count( client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count( client, volume_name, ZONE2, chk_running=True) z3_r_count = get_zone_replica_count( client, volume_name, ZONE3, chk_running=True) if z1_r_count == 6 and z2_r_count == z3_r_count == 0: break if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() time.sleep(RETRY_INTERVAL) assert z1_r_count == 6 assert z2_r_count == 0 assert z3_r_count == 0 client.update(n2, allowScheduling=True) for _ in range(RETRY_COUNTS): z1_r_count = get_zone_replica_count( client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count( client, volume_name, ZONE2, chk_running=True) z3_r_count = get_zone_replica_count( client, volume_name, ZONE3, chk_running=True) if z1_r_count == z2_r_count == 3 and z3_r_count == 0: break if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() time.sleep(RETRY_INTERVAL_LONG) assert z1_r_count == 3 assert z2_r_count == 3 assert z3_r_count == 0 client.update(n3, allowScheduling=True) for _ in range(RETRY_COUNTS): z1_r_count = get_zone_replica_count( client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count( client, volume_name, ZONE2, chk_running=True) z3_r_count = get_zone_replica_count( client, volume_name, ZONE3, chk_running=True) if z1_r_count == z2_r_count == z3_r_count == 2: break if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() time.sleep(RETRY_INTERVAL_LONG) assert z1_r_count == 2 assert z2_r_count == 2 assert z3_r_count == 2
Scenario: replica auto-balance zones with best-effort.
Given set
replica-soft-anti-affinity
totrue
. And setreplica-zone-soft-anti-affinity
totrue
. And set volume specreplicaAutoBalance
tobest-effort
. And set node-1 to zone-1. set node-2 to zone-2. set node-3 to zone-3. And disable scheduling for node-2. disable scheduling for node-3. And create a volume with 6 replicas. And attach the volume to self-node. And 6 replicas running in zone-1. 0 replicas running in zone-2. 0 replicas running in zone-3.When enable scheduling for node-2. Then count replicas running on each node. And 3 replicas running in zone-1. 3 replicas running in zone-2. 0 replicas running in zone-3.
When enable scheduling for node-3. Then count replicas running on each node. And 2 replicas running in zone-1. 2 replicas running in zone-2. 2 replicas running in zone-3.
def test_replica_auto_balance_zone_best_effort_with_data_locality(client, core_api, volume_name, pod)
-
Expand source code
@pytest.mark.v2_volume_test # NOQA def test_replica_auto_balance_zone_best_effort_with_data_locality(client, core_api, volume_name, pod): # NOQA """ Background: Given set `replica-soft-anti-affinity` to `true`. And set `replica-zone-soft-anti-affinity` to `true`. And set `default-data-locality` to `best-effort`. And set `replicaAutoBalance` to `best-effort`. And set node-1 to zone-1. set node-2 to zone-1. set node-3 to zone-2. And create volume with 2 replicas. And create pv for volume. And create pvc for volume. Scenario Outline: replica auto-balance zones with best-effort should not remove pod local replicas when data locality is enabled (best-effort). Given create and wait pod on <pod-node>. And disable scheduling and evict node-3. And count replicas on each nodes. And 1 replica running on <pod-node>. 1 replica running on <duplicate-node>. 0 replica running on node-3. When enable scheduling for node-3. Then count replicas on each nodes. And 1 replica running on <pod-node>. 0 replica running on <duplicate-node>. 1 replica running on node-3. And count replicas in each zones. And 1 replica running in zone-1. 1 replica running in zone-2. And loop 3 times with each wait 5 seconds and count replicas on each nodes. To ensure no addition scheduling is happening. 1 replica running on <pod-node>. 0 replica running on <duplicate-node>. 1 replica running on node-3. And delete pod. Examples: | pod-node | duplicate-node | | node-1 | node-2 | | node-2 | node-1 | | node-1 | node-2 | """ update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true") update_setting(client, SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY, "true") update_setting(client, SETTING_DEFAULT_DATA_LOCALITY, "best-effort") update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "best-effort") n1, n2, n3 = client.list_node() # The GKE zone label is periodically updated with the actual zone. # Invoke _set_k8s_node_zone_label to refresh the zone label with each # retry iteration to maintain the expected zone label. def _set_and_wait_k8s_node_zone_label(): node_zone_map = { n1.name: ZONE1, n2.name: ZONE1, n3.name: ZONE2 } set_and_wait_k8s_nodes_zone_label(core_api, node_zone_map) _set_and_wait_k8s_node_zone_label() n_replicas = 2 volume = create_and_check_volume(client, volume_name, num_of_replicas=n_replicas) create_pv_for_volume(client, core_api, volume, volume_name) create_pvc_for_volume(client, core_api, volume, volume_name) pod['spec']['volumes'] = [{ "name": "pod-data", "persistentVolumeClaim": { "claimName": volume_name } }] for i in range(1, 4): pod_node_name = n2.name if i % 2 == 0 else n1.name pod['spec']['nodeSelector'] = { "kubernetes.io/hostname": pod_node_name } create_and_wait_pod(core_api, pod) client.update(n3, allowScheduling=False, evictionRequested=True) duplicate_node = [n1.name, n2.name] duplicate_node.remove(pod_node_name) for _ in range(RETRY_COUNTS): if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() pod_node_r_count = get_host_replica_count( client, volume_name, pod_node_name, chk_running=True) duplicate_node_r_count = get_host_replica_count( client, volume_name, duplicate_node[0], chk_running=True) balance_node_r_count = get_host_replica_count( client, volume_name, n3.name, chk_running=False) if pod_node_r_count == duplicate_node_r_count == 1 and \ balance_node_r_count == 0: break time.sleep(RETRY_INTERVAL) assert pod_node_r_count == 1 assert duplicate_node_r_count == 1 assert balance_node_r_count == 0 client.update(n3, allowScheduling=True) for _ in range(RETRY_COUNTS): if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() pod_node_r_count = get_host_replica_count( client, volume_name, pod_node_name, chk_running=True) duplicate_node_r_count = get_host_replica_count( client, volume_name, duplicate_node[0], chk_running=False) balance_node_r_count = get_host_replica_count( client, volume_name, n3.name, chk_running=True) if pod_node_r_count == balance_node_r_count == 1 and \ duplicate_node_r_count == 0: break time.sleep(RETRY_INTERVAL) assert pod_node_r_count == 1 assert duplicate_node_r_count == 0 assert balance_node_r_count == 1 z1_r_count = get_zone_replica_count( client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count( client, volume_name, ZONE2, chk_running=True) assert z1_r_count == z2_r_count == 1 # loop 3 times and each to wait 5 seconds to ensure there is no # re-scheduling happening. for _ in range(3): if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() time.sleep(5) assert pod_node_r_count == get_host_replica_count( client, volume_name, pod_node_name, chk_running=True) assert duplicate_node_r_count == get_host_replica_count( client, volume_name, duplicate_node[0], chk_running=False) assert balance_node_r_count == get_host_replica_count( client, volume_name, n3.name, chk_running=True) delete_and_wait_pod(core_api, pod['metadata']['name'])
Background: Given set
replica-soft-anti-affinity
totrue
. And setreplica-zone-soft-anti-affinity
totrue
. And setdefault-data-locality
tobest-effort
. And setreplicaAutoBalance
tobest-effort
. And set node-1 to zone-1. set node-2 to zone-1. set node-3 to zone-2. And create volume with 2 replicas. And create pv for volume. And create pvc for volume.Scenario Outline: replica auto-balance zones with best-effort should not remove pod local replicas when data locality is enabled (best-effort).
Given create and wait pod on
. And disable scheduling and evict node-3. And count replicas on each nodes. And 1 replica running on . 1 replica running on . 0 replica running on node-3. When enable scheduling for node-3. Then count replicas on each nodes. And 1 replica running on
. 0 replica running on . 1 replica running on node-3. And count replicas in each zones. And 1 replica running in zone-1. 1 replica running in zone-2. And loop 3 times with each wait 5 seconds and count replicas on each nodes. To ensure no addition scheduling is happening. 1 replica running on . 0 replica running on . 1 replica running on node-3. And delete pod.
Examples
| pod-node | duplicate-node | | node-1 | node-2 | | node-2 | node-1 | | node-1 | node-2 |
def test_replica_auto_balance_zone_best_effort_with_uneven_node_in_zones(client, core_api, volume_name, pod)
-
Expand source code
@pytest.mark.skip(reason="REQUIRE_5_NODES") def test_replica_auto_balance_zone_best_effort_with_uneven_node_in_zones(client, core_api, volume_name, pod): # NOQA """ Given set `replica-soft-anti-affinity` to `true`. And set `replica-zone-soft-anti-affinity` to `true`. And set `replicaAutoBalance` to `best-effort`. And set node-1 to zone-1. set node-2 to zone-1. set node-3 to zone-1. set node-4 to zone-2. set node-5 to zone-2. And disable scheduling for node-2. disable scheduling for node-3. disable scheduling for node-4. disable scheduling for node-5. And create volume with 4 replicas. And attach the volume to node-1. Scenario: replica auto-balance zones with best-effort should balance replicas in zone. Given 4 replica running on node-1. 0 replica running on node-2. 0 replica running on node-3. 0 replica running on node-4. 0 replica running on node-5. When enable scheduling for node-4. Then count replicas on each zones. And 2 replica running on zode-1. 2 replica running on zode-2. When enable scheduling for node-2. enable scheduling for node-3. Then count replicas on each nodes. And 1 replica running on node-1. 1 replica running on node-2. 1 replica running on node-3. 1 replica running on node-4. 0 replica running on node-5. When enable scheduling for node-5. Then count replicas on each zones. And 2 replica running on zode-1. 2 replica running on zode-2. """ update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true") update_setting(client, SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY, "true") update_setting(client, SETTING_DEFAULT_DATA_LOCALITY, "best-effort") update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "best-effort") n1, n2, n3, n4, n5 = client.list_node() # The GKE zone label is periodically updated with the actual zone. # Invoke _set_k8s_node_zone_label to refresh the zone label with each # retry iteration to maintain the expected zone label. def _set_and_wait_k8s_node_zone_label(): node_zone_map = { n1.name: ZONE1, n2.name: ZONE1, n3.name: ZONE1, n4.name: ZONE2, n5.name: ZONE2 } set_and_wait_k8s_nodes_zone_label(core_api, node_zone_map) _set_and_wait_k8s_node_zone_label() client.update(n2, allowScheduling=False) client.update(n3, allowScheduling=False) client.update(n4, allowScheduling=False) client.update(n5, allowScheduling=False) n_replicas = 4 volume = create_and_check_volume(client, volume_name, num_of_replicas=n_replicas) volume.attach(hostId=n1.name) for _ in range(RETRY_COUNTS): n1_r_count = get_host_replica_count( client, volume_name, n1.name, chk_running=True) n2_r_count = get_host_replica_count( client, volume_name, n2.name, chk_running=False) n3_r_count = get_host_replica_count( client, volume_name, n3.name, chk_running=False) n4_r_count = get_host_replica_count( client, volume_name, n4.name, chk_running=False) n5_r_count = get_host_replica_count( client, volume_name, n5.name, chk_running=False) if n1_r_count == 4 and \ n2_r_count == n3_r_count == n4_r_count == n5_r_count == 0: break if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() time.sleep(RETRY_INTERVAL) assert n1_r_count == 4 assert n2_r_count == 0 assert n3_r_count == 0 assert n4_r_count == 0 assert n5_r_count == 0 client.update(n4, allowScheduling=True) for _ in range(RETRY_COUNTS): z1_r_count = get_zone_replica_count( client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count( client, volume_name, ZONE2, chk_running=True) if z1_r_count == z2_r_count == 2: break if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() time.sleep(RETRY_INTERVAL) assert z1_r_count == 2 assert z2_r_count == 2 client.update(n2, allowScheduling=True) client.update(n3, allowScheduling=True) for _ in range(RETRY_COUNTS): n1_r_count = get_host_replica_count( client, volume_name, n1.name, chk_running=True) n2_r_count = get_host_replica_count( client, volume_name, n2.name, chk_running=True) n3_r_count = get_host_replica_count( client, volume_name, n3.name, chk_running=True) n4_r_count = get_host_replica_count( client, volume_name, n4.name, chk_running=True) n5_r_count = get_host_replica_count( client, volume_name, n5.name, chk_running=False) if n1_r_count == n2_r_count == n3_r_count == n4_r_count == 1 and \ n5_r_count == 0: break if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() time.sleep(RETRY_INTERVAL) assert n1_r_count == 1 assert n2_r_count == 1 assert n3_r_count == 1 assert n4_r_count == 1 assert n5_r_count == 0 client.update(n5, allowScheduling=True) for _ in range(RETRY_COUNTS): z1_r_count = get_zone_replica_count( client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count( client, volume_name, ZONE2, chk_running=True) if z1_r_count == z2_r_count == 2: break if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() time.sleep(RETRY_INTERVAL) assert z1_r_count == 2 assert z2_r_count == 2
Given set
replica-soft-anti-affinity
totrue
. And setreplica-zone-soft-anti-affinity
totrue
. And setreplicaAutoBalance
tobest-effort
. And set node-1 to zone-1. set node-2 to zone-1. set node-3 to zone-1. set node-4 to zone-2. set node-5 to zone-2. And disable scheduling for node-2. disable scheduling for node-3. disable scheduling for node-4. disable scheduling for node-5. And create volume with 4 replicas. And attach the volume to node-1.Scenario: replica auto-balance zones with best-effort should balance replicas in zone.
Given 4 replica running on node-1. 0 replica running on node-2. 0 replica running on node-3. 0 replica running on node-4. 0 replica running on node-5.
When enable scheduling for node-4. Then count replicas on each zones. And 2 replica running on zode-1. 2 replica running on zode-2.
When enable scheduling for node-2. enable scheduling for node-3. Then count replicas on each nodes. And 1 replica running on node-1. 1 replica running on node-2. 1 replica running on node-3. 1 replica running on node-4. 0 replica running on node-5.
When enable scheduling for node-5. Then count replicas on each zones. And 2 replica running on zode-1. 2 replica running on zode-2.
def test_replica_auto_balance_zone_least_effort(client, core_api, volume_name)
-
Expand source code
@pytest.mark.v2_volume_test # NOQA def test_replica_auto_balance_zone_least_effort(client, core_api, volume_name): # NOQA """ Scenario: replica auto-balance zones with least-effort. Given set `replica-soft-anti-affinity` to `true`. And set `replica-zone-soft-anti-affinity` to `true`. And set volume spec `replicaAutoBalance` to `least-effort`. And set node-1 to zone-1. set node-2 to zone-2. set node-3 to zone-3. And disable scheduling for node-2. disable scheduling for node-3. And create a volume with 6 replicas. And attach the volume to self-node. And 6 replicas running in zone-1. 0 replicas running in zone-2. 0 replicas running in zone-3. When enable scheduling for node-2. Then count replicas running on each node. And zone-1 replica count != zone-2 replica count. zone-2 replica count != 0. zone-3 replica count == 0. When enable scheduling for node-3. Then count replicas running on each node. And zone-1 replica count != zone-3 replica count. zone-2 replica count != 0. zone-3 replica count != 0. """ update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true") update_setting(client, SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY, "true") update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "least-effort") n1, n2, n3 = client.list_node() # The GKE zone label is periodically updated with the actual zone. # Invoke _set_k8s_node_zone_label to refresh the zone label with each # retry iteration to maintain the expected zone label. def _set_and_wait_k8s_node_zone_label(): node_zone_map = { n1.name: ZONE1, n2.name: ZONE2, n3.name: ZONE3 } set_and_wait_k8s_nodes_zone_label(core_api, node_zone_map) _set_and_wait_k8s_node_zone_label() client.update(n2, allowScheduling=False) client.update(n3, allowScheduling=False) n_replicas = 6 volume = create_and_check_volume(client, volume_name, num_of_replicas=n_replicas) volume.attach(hostId=get_self_host_id()) for _ in range(RETRY_COUNTS): z1_r_count = get_zone_replica_count( client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count( client, volume_name, ZONE2, chk_running=True) z3_r_count = get_zone_replica_count( client, volume_name, ZONE3, chk_running=True) if z1_r_count == 6 and z2_r_count == z3_r_count == 0: break if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() time.sleep(RETRY_INTERVAL) assert z1_r_count == 6 assert z2_r_count == 0 assert z3_r_count == 0 client.update(n2, allowScheduling=True) for _ in range(RETRY_COUNTS): z1_r_count = get_zone_replica_count( client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count( client, volume_name, ZONE2, chk_running=True) z3_r_count = get_zone_replica_count( client, volume_name, ZONE3, chk_running=True) all_r_count = z1_r_count + z2_r_count + z3_r_count if z2_r_count != 0 and all_r_count == n_replicas: break if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() time.sleep(RETRY_INTERVAL) assert z1_r_count != z2_r_count assert z2_r_count != 0 assert z3_r_count == 0 client.update(n3, allowScheduling=True) for _ in range(RETRY_COUNTS): z1_r_count = get_zone_replica_count( client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count( client, volume_name, ZONE2, chk_running=True) z3_r_count = get_zone_replica_count( client, volume_name, ZONE3, chk_running=True) all_r_count = z1_r_count + z2_r_count + z3_r_count if z3_r_count != 0 and all_r_count == n_replicas: break if is_k8s_node_gke_cos(core_api): _set_and_wait_k8s_node_zone_label() time.sleep(RETRY_INTERVAL) assert z1_r_count != z3_r_count assert z2_r_count != 0 assert z3_r_count != 0
Scenario: replica auto-balance zones with least-effort.
Given set
replica-soft-anti-affinity
totrue
. And setreplica-zone-soft-anti-affinity
totrue
. And set volume specreplicaAutoBalance
toleast-effort
. And set node-1 to zone-1. set node-2 to zone-2. set node-3 to zone-3. And disable scheduling for node-2. disable scheduling for node-3. And create a volume with 6 replicas. And attach the volume to self-node. And 6 replicas running in zone-1. 0 replicas running in zone-2. 0 replicas running in zone-3.When enable scheduling for node-2. Then count replicas running on each node. And zone-1 replica count != zone-2 replica count. zone-2 replica count != 0. zone-3 replica count == 0.
When enable scheduling for node-3. Then count replicas running on each node. And zone-1 replica count != zone-3 replica count. zone-2 replica count != 0. zone-3 replica count != 0.
def test_replica_zone_anti_affinity(client, core_api, volume_name, k8s_node_zone_tags)
-
Expand source code
@pytest.mark.v2_volume_test # NOQA @pytest.mark.node # NOQA def test_replica_zone_anti_affinity(client, core_api, volume_name, k8s_node_zone_tags): # NOQA """ Test replica scheduler with zone anti-affinity 1. Set zone anti-affinity to hard. 2. Label nodes 1 & 2 with same zone label "zone1". Label node 3 with zone label "zone2". 3. Create a volume with 3 replicas. 4. Wait for volume condition `scheduled` to be false. 5. Label node 2 with zone label "zone3". 6. Wait for volume condition `scheduled` to be success. 7. Clear the volume. 8. Set zone anti-affinity to soft. 9. Change the zone labels on node 1 & 2 & 3 to "zone1". 10. Create a volume. 11. Wait for volume condition `scheduled` to be success. 12. Clean up the replica count, the zone labels and the volume. """ wait_longhorn_nodes_zone_not_empty(client) replica_node_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(replica_node_soft_anti_affinity_setting, value="false") replica_zone_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY) client.update(replica_zone_soft_anti_affinity_setting, value="false") volume = create_and_check_volume(client, volume_name) lh_nodes = client.list_node() count = 0 node_zone_map = {} for node in lh_nodes: count += 1 node_zone_map[node.name] = "lh-zone" + str(count) set_and_wait_k8s_nodes_zone_label(core_api, node_zone_map) wait_for_volume_condition_scheduled(client, volume_name, "status", CONDITION_STATUS_TRUE) replica_zone_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY) client.update(replica_zone_soft_anti_affinity_setting, value="true") volume = client.by_id_volume(volume_name) client.delete(volume) wait_for_volume_delete(client, volume_name) node_zone_map = {} for node in lh_nodes: node_zone_map[node.name] = "lh-zone1" set_and_wait_k8s_nodes_zone_label(core_api, node_zone_map) volume = create_and_check_volume(client, volume_name) wait_for_volume_condition_scheduled(client, volume_name, "status", CONDITION_STATUS_TRUE)
Test replica scheduler with zone anti-affinity
- Set zone anti-affinity to hard.
- Label nodes 1 & 2 with same zone label "zone1". Label node 3 with zone label "zone2".
- Create a volume with 3 replicas.
- Wait for volume condition
scheduled
to be false. - Label node 2 with zone label "zone3".
- Wait for volume condition
scheduled
to be success. - Clear the volume.
- Set zone anti-affinity to soft.
- Change the zone labels on node 1 & 2 & 3 to "zone1".
- Create a volume.
- Wait for volume condition
scheduled
to be success. - Clean up the replica count, the zone labels and the volume.
-
Expand source code
@pytest.mark.v2_volume_test # NOQA def test_zone_tags(client, core_api, volume_name, k8s_node_zone_tags): # NOQA """ Test anti affinity zone feature 1. Add Kubernetes zone labels to the nodes 1. Only two zones now: zone1 and zone2 2. Create a volume with two replicas 3. Verify zone1 and zone2 either has one replica. 4. Remove a random replica and wait for volume to finish rebuild 5. Verify zone1 and zone2 either has one replica. 6. Repeat step 4-5 a few times. 7. Update volume to 3 replicas, make sure they're scheduled on 3 nodes 8. Remove a random replica and wait for volume to finish rebuild 9. Make sure replicas are on different nodes 10. Repeat step 8-9 a few times """ wait_longhorn_nodes_zone_not_empty(client) volume = create_and_check_volume(client, volume_name, num_of_replicas=2) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) volume = client.by_id_volume(volume_name) zone1_replica_count = get_zone_replica_count(client, volume_name, ZONE1) zone2_replica_count = get_zone_replica_count(client, volume_name, ZONE2) assert zone1_replica_count == zone2_replica_count for i in range(randrange(3, 5)): volume = client.by_id_volume(volume_name) replica_count = len(volume.replicas) assert replica_count == 2 replica_id = randrange(0, replica_count) replica_name = volume.replicas[replica_id].name volume.replicaRemove(name=replica_name) wait_for_volume_degraded(client, volume_name) wait_for_volume_healthy(client, volume_name) wait_for_volume_replica_count(client, volume_name, replica_count) volume = client.by_id_volume(volume_name) replica_names = map(lambda replica: replica.name, volume["replicas"]) wait_new_replica_ready(client, volume_name, replica_names) zone1_replica_count = \ get_zone_replica_count(client, volume_name, ZONE1) zone2_replica_count = \ get_zone_replica_count(client, volume_name, ZONE2) assert zone1_replica_count == zone2_replica_count volume.updateReplicaCount(replicaCount=3) wait_for_volume_degraded(client, volume_name) wait_for_volume_replica_count(client, volume_name, 3) wait_for_volume_healthy(client, volume_name) volume = client.by_id_volume(volume_name) lh_node_names = list(map(lambda node: node.name, client.list_node())) for replica in volume.replicas: lh_node_names.remove(replica.hostId) assert lh_node_names == [] for i in range(randrange(3, 5)): volume = client.by_id_volume(volume_name) replica_count = len(volume.replicas) assert replica_count == 3 replica_id = randrange(0, replica_count) replica_name = volume.replicas[replica_id].name volume.replicaRemove(name=replica_name) wait_for_volume_degraded(client, volume_name) wait_for_volume_healthy(client, volume_name) wait_for_volume_replica_count(client, volume_name, replica_count) volume = client.by_id_volume(volume_name) lh_node_names = list(map(lambda node: node.name, client.list_node())) for replica in volume.replicas: lh_node_names.remove(replica.hostId) assert lh_node_names == []
Test anti affinity zone feature
- Add Kubernetes zone labels to the nodes
- Only two zones now: zone1 and zone2
- Create a volume with two replicas
- Verify zone1 and zone2 either has one replica.
- Remove a random replica and wait for volume to finish rebuild
- Verify zone1 and zone2 either has one replica.
- Repeat step 4-5 a few times.
- Update volume to 3 replicas, make sure they're scheduled on 3 nodes
- Remove a random replica and wait for volume to finish rebuild
- Make sure replicas are on different nodes
- Repeat step 8-9 a few times
- Add Kubernetes zone labels to the nodes
def wait_longhorn_nodes_zone_not_empty(client)
-
Expand source code
def wait_longhorn_nodes_zone_not_empty(client): # NOQA lh_nodes = client.list_node() node_names = map(lambda node: node.name, lh_nodes) for node_name in node_names: for j in range(RETRY_COUNTS): lh_node = client.by_id_node(node_name) if lh_node.zone != '': break time.sleep(RETRY_INTERVAL) assert lh_node.zone != ''