forked from Mirror/frr
tests: Fix ospf[6]_gr_topo1 tests to work better under load
2 things: a) Each test was setting up for graceful restart with calls to `graceful-restart prepare ip[v6] ospf`, then sleeping for 3 or 5 seconds. Then killing the ospf process. Under heavy load there is no guarantee that zebra has received/processed this signal. Write some code to ensure that this happens b) Tests are issuing commands in this order: 1) issue gr prepare command 2) kill router 3) <ensure routes were still installed in zebra> 4) start router 5) <ensure routes were stil installed in zebra> Imagine that the system is under some load and there is a small amount of time before step 5 happens. In this case ospf could have come up and started neighbor relations and also started installing routes. If zebra receives a new route before step 5 is issued then the route could be in a state where it is not installed, because it is being sent to the kernel for installation. This would fail the test because it would only look 1 time. This is fixed by giving time on restart for the routes to be in the installed state. Signed-off-by: Donald Sharp <sharpd@nvidia.com>
This commit is contained in:
parent
76ab1a9702
commit
6255aad0bc
|
@ -175,8 +175,19 @@ def check_routers(initial_convergence=False, exiting=None, restarting=None):
|
|||
for rname in ["rt1", "rt2", "rt3", "rt4", "rt5", "rt6", "rt7"]:
|
||||
# Check the RIB first, which should be preserved across restarts in
|
||||
# all routers of the routing domain.
|
||||
# If we are not on initial convergence *but* we are checking
|
||||
# after a restart. Looking in the zebra rib for installed
|
||||
# is a recipe for test failure. Why? because if we are restarting
|
||||
# then ospf is in the process of establishing neighbors and passing
|
||||
# new routes to zebra. Zebra will not mark the route as installed
|
||||
# when it receives a replacement from ospf until it has finished
|
||||
# processing it. Let's give it a few seconds to allow this to happen
|
||||
# under load.
|
||||
if initial_convergence == True:
|
||||
tries = 240
|
||||
else:
|
||||
if restarting != None:
|
||||
tries = 40
|
||||
else:
|
||||
tries = 1
|
||||
router_compare_json_output(
|
||||
|
@ -212,6 +223,26 @@ def check_routers(initial_convergence=False, exiting=None, restarting=None):
|
|||
)
|
||||
|
||||
|
||||
def ensure_gr_is_in_zebra(rname):
|
||||
retry = True
|
||||
retry_times = 10
|
||||
tgen = get_topogen()
|
||||
|
||||
while retry and retry_times > 0:
|
||||
out = tgen.net[rname].cmd(
|
||||
'vtysh -c "show zebra client" | grep "Client: ospf6$" -A 40 | grep "Capabilities "'
|
||||
)
|
||||
|
||||
if "Graceful Restart" not in out:
|
||||
sleep(2)
|
||||
retry_times -= 1
|
||||
else:
|
||||
retry = False
|
||||
|
||||
assertmsg = "%s does not appear to have Graceful Restart setup" % rname
|
||||
assert not retry and retry_times > 0, assertmsg
|
||||
|
||||
|
||||
#
|
||||
# Test initial network convergence
|
||||
#
|
||||
|
@ -238,10 +269,9 @@ def test_gr_rt1():
|
|||
pytest.skip(tgen.errors)
|
||||
|
||||
tgen.net["rt1"].cmd('vtysh -c "graceful-restart prepare ipv6 ospf"')
|
||||
sleep(5)
|
||||
ensure_gr_is_in_zebra("rt1")
|
||||
kill_router_daemons(tgen, "rt1", ["ospf6d"], save_config=False)
|
||||
check_routers(exiting="rt1")
|
||||
|
||||
start_router_daemons(tgen, "rt1", ["ospf6d"])
|
||||
check_routers(restarting="rt1")
|
||||
|
||||
|
@ -258,7 +288,7 @@ def test_gr_rt2():
|
|||
pytest.skip(tgen.errors)
|
||||
|
||||
tgen.net["rt2"].cmd('vtysh -c "graceful-restart prepare ipv6 ospf"')
|
||||
sleep(5)
|
||||
ensure_gr_is_in_zebra("rt2")
|
||||
kill_router_daemons(tgen, "rt2", ["ospf6d"], save_config=False)
|
||||
check_routers(exiting="rt2")
|
||||
|
||||
|
@ -278,7 +308,7 @@ def test_gr_rt3():
|
|||
pytest.skip(tgen.errors)
|
||||
|
||||
tgen.net["rt3"].cmd('vtysh -c "graceful-restart prepare ipv6 ospf"')
|
||||
sleep(5)
|
||||
ensure_gr_is_in_zebra("rt3")
|
||||
kill_router_daemons(tgen, "rt3", ["ospf6d"], save_config=False)
|
||||
check_routers(exiting="rt3")
|
||||
|
||||
|
@ -298,7 +328,7 @@ def test_gr_rt4():
|
|||
pytest.skip(tgen.errors)
|
||||
|
||||
tgen.net["rt4"].cmd('vtysh -c "graceful-restart prepare ipv6 ospf"')
|
||||
sleep(5)
|
||||
ensure_gr_is_in_zebra("rt4")
|
||||
kill_router_daemons(tgen, "rt4", ["ospf6d"], save_config=False)
|
||||
check_routers(exiting="rt4")
|
||||
|
||||
|
@ -318,7 +348,7 @@ def test_gr_rt5():
|
|||
pytest.skip(tgen.errors)
|
||||
|
||||
tgen.net["rt5"].cmd('vtysh -c "graceful-restart prepare ipv6 ospf"')
|
||||
sleep(5)
|
||||
ensure_gr_is_in_zebra("rt5")
|
||||
kill_router_daemons(tgen, "rt5", ["ospf6d"], save_config=False)
|
||||
check_routers(exiting="rt5")
|
||||
|
||||
|
@ -338,7 +368,7 @@ def test_gr_rt6():
|
|||
pytest.skip(tgen.errors)
|
||||
|
||||
tgen.net["rt6"].cmd('vtysh -c "graceful-restart prepare ipv6 ospf"')
|
||||
sleep(5)
|
||||
ensure_gr_is_in_zebra("rt6")
|
||||
kill_router_daemons(tgen, "rt6", ["ospf6d"], save_config=False)
|
||||
check_routers(exiting="rt6")
|
||||
|
||||
|
@ -358,7 +388,7 @@ def test_gr_rt7():
|
|||
pytest.skip(tgen.errors)
|
||||
|
||||
tgen.net["rt7"].cmd('vtysh -c "graceful-restart prepare ipv6 ospf"')
|
||||
sleep(5)
|
||||
ensure_gr_is_in_zebra("rt7")
|
||||
kill_router_daemons(tgen, "rt7", ["ospf6d"], save_config=False)
|
||||
check_routers(exiting="rt7")
|
||||
|
||||
|
|
|
@ -184,8 +184,19 @@ def check_routers(initial_convergence=False, exiting=None, restarting=None):
|
|||
for rname in ["rt1", "rt2", "rt3", "rt4", "rt5", "rt6", "rt7"]:
|
||||
# Check the RIB first, which should be preserved across restarts in
|
||||
# all routers of the routing domain.
|
||||
# If we are not on initial convergence *but* we are checking
|
||||
# after a restart. Looking in the zebra rib for installed
|
||||
# is a recipe for test failure. Why? because if we are restarting
|
||||
# then ospf is in the process of establishing neighbors and passing
|
||||
# new routes to zebra. Zebra will not mark the route as installed
|
||||
# when it receives a replacement from ospf until it has finished
|
||||
# processing it. Let's give it a few seconds to allow this to happen
|
||||
# under load.
|
||||
if initial_convergence == True:
|
||||
tries = 240
|
||||
else:
|
||||
if restarting != None:
|
||||
tries = 40
|
||||
else:
|
||||
tries = 1
|
||||
router_compare_json_output(
|
||||
|
@ -215,6 +226,26 @@ def check_routers(initial_convergence=False, exiting=None, restarting=None):
|
|||
)
|
||||
|
||||
|
||||
def ensure_gr_is_in_zebra(rname):
|
||||
retry = True
|
||||
retry_times = 10
|
||||
tgen = get_topogen()
|
||||
|
||||
while retry and retry_times > 0:
|
||||
out = tgen.net[rname].cmd(
|
||||
'vtysh -c "show zebra client" | grep "Client: ospf$" -A 40 | grep "Capabilities "'
|
||||
)
|
||||
|
||||
if "Graceful Restart" not in out:
|
||||
sleep(2)
|
||||
retry_times -= 1
|
||||
else:
|
||||
retry = False
|
||||
|
||||
assertmsg = "%s does not appear to have Graceful Restart setup" % rname
|
||||
assert not retry and retry_times > 0, assertmsg
|
||||
|
||||
|
||||
#
|
||||
# Test initial network convergence
|
||||
#
|
||||
|
@ -241,7 +272,7 @@ def test_gr_rt1():
|
|||
pytest.skip(tgen.errors)
|
||||
|
||||
tgen.net["rt1"].cmd('vtysh -c "graceful-restart prepare ip ospf"')
|
||||
sleep(3)
|
||||
ensure_gr_is_in_zebra("rt1")
|
||||
kill_router_daemons(tgen, "rt1", ["ospfd"], save_config=False)
|
||||
check_routers(exiting="rt1")
|
||||
|
||||
|
@ -261,7 +292,7 @@ def test_gr_rt2():
|
|||
pytest.skip(tgen.errors)
|
||||
|
||||
tgen.net["rt2"].cmd('vtysh -c "graceful-restart prepare ip ospf"')
|
||||
sleep(3)
|
||||
ensure_gr_is_in_zebra("rt2")
|
||||
kill_router_daemons(tgen, "rt2", ["ospfd"], save_config=False)
|
||||
check_routers(exiting="rt2")
|
||||
|
||||
|
@ -281,7 +312,7 @@ def test_gr_rt3():
|
|||
pytest.skip(tgen.errors)
|
||||
|
||||
tgen.net["rt3"].cmd('vtysh -c "graceful-restart prepare ip ospf"')
|
||||
sleep(3)
|
||||
ensure_gr_is_in_zebra("rt3")
|
||||
kill_router_daemons(tgen, "rt3", ["ospfd"], save_config=False)
|
||||
check_routers(exiting="rt3")
|
||||
|
||||
|
@ -301,7 +332,7 @@ def test_gr_rt4():
|
|||
pytest.skip(tgen.errors)
|
||||
|
||||
tgen.net["rt4"].cmd('vtysh -c "graceful-restart prepare ip ospf"')
|
||||
sleep(3)
|
||||
ensure_gr_is_in_zebra("rt4")
|
||||
kill_router_daemons(tgen, "rt4", ["ospfd"], save_config=False)
|
||||
check_routers(exiting="rt4")
|
||||
|
||||
|
@ -321,7 +352,7 @@ def test_gr_rt5():
|
|||
pytest.skip(tgen.errors)
|
||||
|
||||
tgen.net["rt5"].cmd('vtysh -c "graceful-restart prepare ip ospf"')
|
||||
sleep(3)
|
||||
ensure_gr_is_in_zebra("rt5")
|
||||
kill_router_daemons(tgen, "rt5", ["ospfd"], save_config=False)
|
||||
check_routers(exiting="rt5")
|
||||
|
||||
|
@ -341,7 +372,7 @@ def test_gr_rt6():
|
|||
pytest.skip(tgen.errors)
|
||||
|
||||
tgen.net["rt6"].cmd('vtysh -c "graceful-restart prepare ip ospf"')
|
||||
sleep(3)
|
||||
ensure_gr_is_in_zebra("rt6")
|
||||
kill_router_daemons(tgen, "rt6", ["ospfd"], save_config=False)
|
||||
check_routers(exiting="rt6")
|
||||
|
||||
|
@ -361,7 +392,7 @@ def test_gr_rt7():
|
|||
pytest.skip(tgen.errors)
|
||||
|
||||
tgen.net["rt7"].cmd('vtysh -c "graceful-restart prepare ip ospf"')
|
||||
sleep(3)
|
||||
ensure_gr_is_in_zebra("rt7")
|
||||
kill_router_daemons(tgen, "rt7", ["ospfd"], save_config=False)
|
||||
check_routers(exiting="rt7")
|
||||
|
||||
|
|
Loading…
Reference in a new issue