Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions ruby/lib/minitest/queue.rb
Original file line number Diff line number Diff line change
Expand Up @@ -180,14 +180,14 @@ def handle_test_result(reporter, example, result)
# When we do a bisect, we don't care about the result other than the test we're running the bisect on
result.mark_as_flaked!
failed = false
elsif failed
queue.report_failure!
else
queue.report_success!
end

if failed && CI::Queue.requeueable?(result) && queue.requeue(example)
result.requeue!
elsif failed
queue.report_failure!
else
queue.report_success!
end
reporter.record(result)
end
Expand Down
21 changes: 21 additions & 0 deletions ruby/test/ci/queue/redis_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,27 @@ def test_initialise_from_rediss_uri
assert_instance_of CI::Queue::Redis::Worker, queue
end

def test_circuit_breaker_does_not_count_requeued_failures
# Bug: report_failure! was called before the requeue check, so successfully
# requeued tests incremented the consecutive failure counter. With
# max_consecutive_failures=3 and 3+ deterministic failures that are all
# requeued, the circuit breaker fired prematurely and the worker exited,
# stranding requeued tests in the queue with no worker to process them.
queue = worker(1, max_requeues: 5, requeue_tolerance: 1.0, max_consecutive_failures: 3)

# All tests fail (deterministic failures that get requeued)
tests_processed = poll(queue, false)

# With 4 tests and max_requeues=5, all tests should be processed multiple
# times (requeued after each failure). The circuit breaker should NOT fire
# because requeued failures are transient, not consecutive "real" failures.
assert tests_processed.size > TEST_LIST.size,
"Expected tests to be requeued and re-processed, but only #{tests_processed.size} " \
"tests were processed (circuit breaker likely fired prematurely). " \
"Circuit breaker open? #{queue.config.circuit_breakers.any?(&:open?)}"
end


private

def shuffled_test_list
Expand Down
28 changes: 27 additions & 1 deletion ruby/test/integration/minitest_redis_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -296,9 +296,35 @@ def test_circuit_breaker
)
end

# Requeued failures don't count toward the circuit breaker. The worker
# processes more tests than the old behavior (which stopped at 3).
# Exact count depends on queue ordering, but must be > 3.
output = normalize(out.lines.last.strip)
ran_count = output.match(/Ran (\d+) tests/)[1].to_i
assert ran_count > 3,
"Expected more than 3 tests to run (requeues shouldn't trip breaker), got: #{output}"
end

def test_circuit_breaker_without_requeues
out, err = capture_subprocess_io do
system(
@exe, 'run',
'--queue', @redis_url,
'--seed', 'foobar',
'--build', '1',
'--worker', '1',
'--timeout', '1',
'--max-requeues', '0',
'--max-consecutive-failures', '3',
'-Itest',
'test/failing_test.rb',
chdir: 'test/fixtures/',
)
end

assert_equal "This worker is exiting early because it encountered too many consecutive test failures, probably because of some corrupted state.\n", err
output = normalize(out.lines.last.strip)
assert_equal 'Ran 3 tests, 3 assertions, 0 failures, 0 errors, 0 skips, 3 requeues in X.XXs', output
assert_equal 'Ran 3 tests, 3 assertions, 3 failures, 0 errors, 0 skips, 0 requeues in X.XXs', output
end

def test_redis_runner
Expand Down
8 changes: 6 additions & 2 deletions ruby/test/support/queue_helpers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@ def poll(queue, success = true)
test_order << test
failed = !(success.respond_to?(:call) ? success.call(test) : success)
if failed
queue.report_failure!
queue.requeue(test) || queue.acknowledge(test.id)
if queue.requeue(test)
# Requeued — don't report to circuit breaker
else
queue.report_failure!
queue.acknowledge(test.id)
end
else
queue.report_success!
queue.acknowledge(test.id)
Expand Down
Loading