Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions app/jobs/pollable_job_wrapper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ def error(job, exception)
end

def failure(job)
if @handler.respond_to?(:recover_from_failure)
begin
@handler.recover_from_failure
rescue StandardError => e
logger.error("failure recovery failed: #{e.class}: #{e.message}")
end
end

change_state(job, PollableJobModel::FAILED_STATE)
end

Expand Down
8 changes: 8 additions & 0 deletions app/jobs/v3/services/synchronize_broker_catalog_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ def display_name
'service_broker.catalog.synchronize'
end

def recover_from_failure
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this tested in a unit test?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not yet, will add that.

ServiceBroker.where(guid: broker_guid, state: ServiceBrokerStateEnum::SYNCHRONIZING).
update(state: ServiceBrokerStateEnum::SYNCHRONIZATION_FAILED)
rescue StandardError => e
logger = Steno.logger('cc.background')
logger.error("Failed to recover broker state for #{broker_guid}: #{e.class}: #{e.message}")
end

private

attr_reader :broker_guid, :user_audit_info
Expand Down
29 changes: 26 additions & 3 deletions app/jobs/v3/services/update_broker_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ def display_name
'service_broker.update'
end

def recover_from_failure
ServiceBroker.where(guid: broker_guid, state: ServiceBrokerStateEnum::SYNCHRONIZING).
update(state: ServiceBrokerStateEnum::SYNCHRONIZATION_FAILED)
rescue StandardError => e
logger = Steno.logger('cc.background')
logger.error("Failed to recover broker state for #{broker_guid}: #{e.class}: #{e.message}")
end

private

attr_reader :update_request_guid, :broker_guid, :previous_broker_state, :user_audit_info
Expand Down Expand Up @@ -66,17 +74,32 @@ def perform

@warnings
rescue StandardError => e
ServiceBroker.where(id: update_request.service_broker_id).update(state: previous_broker_state)
rollback_broker_state

raise V3::ServiceBrokerUpdate::InvalidServiceBroker.new(e.message) if e.is_a?(Sequel::ValidationFailed)

raise e
raise
ensure
update_request.destroy
destroy_update_request
end

private

def rollback_broker_state
return unless update_request

ServiceBroker.where(id: update_request.service_broker_id, state: ServiceBrokerStateEnum::SYNCHRONIZING).
update(state: previous_broker_state)
rescue StandardError
# Best effort only; wrapper failure hook will retry
end

def destroy_update_request
update_request&.destroy
rescue StandardError
# Don't mask original failure
end

def update_params
params = {}
params[:name] = update_request.name unless update_request.name.nil?
Expand Down
4 changes: 4 additions & 0 deletions app/jobs/wrapping_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ def error(job, e)
handler.error(job, e) if handler.respond_to?(:error)
end

def recover_from_failure
handler.recover_from_failure if handler.respond_to?(:recover_from_failure)
end

def display_name
handler.respond_to?(:display_name) ? handler.display_name : handler.class.name
end
Expand Down
51 changes: 51 additions & 0 deletions spec/unit/jobs/pollable_job_wrapper_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,57 @@ class BigException < StandardError
execute_all_jobs(expected_successes: 0, expected_failures: 1)
end
end

describe '#failure' do
let(:delayed_job) { instance_double(Delayed::Backend::Sequel::Job, guid: 'job-guid') }
let!(:pollable_job) do
VCAP::CloudController::PollableJobModel.make(delayed_job_guid: 'job-guid', state: VCAP::CloudController::PollableJobModel::PROCESSING_STATE)
end

context 'when handler implements recover_from_failure' do
let(:handler) do
instance_double(VCAP::CloudController::V3::UpdateBrokerJob, recover_from_failure: nil, warnings: nil)
end
let(:wrapper) { PollableJobWrapper.new(handler) }

it 'calls recover_from_failure and marks the pollable job failed' do
wrapper.failure(delayed_job)

expect(handler).to have_received(:recover_from_failure)
expect(pollable_job.reload.state).to eq(VCAP::CloudController::PollableJobModel::FAILED_STATE)
end

context 'when recover_from_failure raises an error' do
let(:logger) { instance_double(Steno::Logger, error: nil) }

before do
allow(handler).to receive(:recover_from_failure).and_raise(StandardError.new('recovery failed'))
allow(Steno).to receive(:logger).with('cc.pollable.job.wrapper').and_return(logger)
end

it 'logs the error without re-raising' do
expect { wrapper.failure(delayed_job) }.not_to raise_error
expect(logger).to have_received(:error).with(/failure recovery failed/)
end

it 'still marks the pollable job as failed' do
wrapper.failure(delayed_job)

expect(pollable_job.reload.state).to eq(VCAP::CloudController::PollableJobModel::FAILED_STATE)
end
end
end

context 'when handler does not implement recover_from_failure' do
let(:handler) { double('HandlerWithoutRecovery', warnings: nil) }
let(:wrapper) { PollableJobWrapper.new(handler) }

it 'still marks the pollable job as failed without error' do
expect { wrapper.failure(delayed_job) }.not_to raise_error
expect(pollable_job.reload.state).to eq(VCAP::CloudController::PollableJobModel::FAILED_STATE)
end
end
end
end
end

Expand Down
64 changes: 64 additions & 0 deletions spec/unit/jobs/v3/services/synchronize_broker_catalog_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,70 @@ def incompatible_catalog
allow(catalog).to receive_messages(valid?: true, compatible?: false, incompatibility_errors: incompatibility_errors)
end
end

describe '#recover_from_failure' do
let(:broker) do
ServiceBroker.create(
name: 'test-broker',
broker_url: 'http://example.org/broker-url',
auth_username: 'username',
auth_password: 'password'
)
end
let(:user_audit_info) { instance_double(UserAuditInfo, { user_guid: Sham.guid }) }

subject(:job) do
SynchronizeBrokerCatalogJob.new(broker.guid, user_audit_info:)
end

context 'when broker is in SYNCHRONIZING state' do
before do
broker.update(state: ServiceBrokerStateEnum::SYNCHRONIZING)
end

it 'sets the broker to SYNCHRONIZATION_FAILED' do
expect do
job.recover_from_failure
end.to change { broker.reload.state }.
from(ServiceBrokerStateEnum::SYNCHRONIZING).
to(ServiceBrokerStateEnum::SYNCHRONIZATION_FAILED)
end
end

shared_examples 'does not change the broker state' do |expected_state|
it 'leaves the state unchanged' do
broker.update(state: expected_state)
job.recover_from_failure

expect(broker.reload.state).to eq(expected_state)
end
end

context 'when broker is in a different state' do
include_examples 'does not change the broker state', ServiceBrokerStateEnum::AVAILABLE
include_examples 'does not change the broker state', ServiceBrokerStateEnum::DELETE_IN_PROGRESS
end

context 'when database error occurs during recovery' do
let(:dataset) { instance_double(Sequel::Dataset) }
let(:logger) { instance_double(Steno::Logger, error: nil) }

before do
broker.update(state: ServiceBrokerStateEnum::SYNCHRONIZING)
allow(ServiceBroker).to receive(:where).
with(guid: broker.guid, state: ServiceBrokerStateEnum::SYNCHRONIZING).
and_return(dataset)
allow(dataset).to receive(:update).and_raise(Sequel::DatabaseError.new(RuntimeError.new('connection lost')))
allow(Steno).to receive(:logger).with('cc.background').and_return(logger)
end

it 'logs the error and does not raise' do
expect { job.recover_from_failure }.not_to raise_error
expect(logger).to have_received(:error).with(/Failed to recover broker state/)
expect(broker.reload.state).to eq(ServiceBrokerStateEnum::SYNCHRONIZING)
end
end
end
end
end
end
Expand Down
79 changes: 79 additions & 0 deletions spec/unit/jobs/v3/services/update_broker_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,29 @@ module V3
end
end

context 'when database disconnects during state rollback' do
let(:catalog_error) { StandardError.new('Catalog fetch failed') }
let(:mock_dataset) { instance_double(Sequel::Postgres::Dataset) }

before do
allow_any_instance_of(VCAP::CloudController::V3::ServiceBrokerCatalogUpdater).to receive(:refresh).and_raise(catalog_error)
allow(mock_dataset).to receive(:update).and_raise(Sequel::DatabaseDisconnectError.new('connection lost'))
allow(ServiceBroker).to receive(:where).and_call_original
allow(ServiceBroker).to receive(:where).
with(id: update_broker_request.service_broker_id, state: ServiceBrokerStateEnum::SYNCHRONIZING).
and_return(mock_dataset)
end

it 're-raises the original error instead of the rollback database error' do
expect { job.perform }.to raise_error(catalog_error)
end

it 'still cleans up the update request' do
expect { job.perform }.to raise_error(catalog_error)
expect(ServiceBrokerUpdateRequest.where(id: update_broker_request.id).all).to be_empty
end
end

context 'when the broker ceases to exist during the job' do
it 'raises a ServiceBrokerGone error' do
broker.destroy
Expand All @@ -457,6 +480,62 @@ module V3
end
end

describe '#recover_from_failure' do
let(:previous_state) { ServiceBrokerStateEnum::AVAILABLE }

subject(:job) do
UpdateBrokerJob.new(update_broker_request.guid, broker.guid, previous_state, user_audit_info:)
end

context 'when broker is in SYNCHRONIZING state' do
before do
broker.update(state: ServiceBrokerStateEnum::SYNCHRONIZING)
end

it 'sets the broker to SYNCHRONIZATION_FAILED' do
expect do
job.recover_from_failure
end.to change { broker.reload.state }.
from(ServiceBrokerStateEnum::SYNCHRONIZING).
to(ServiceBrokerStateEnum::SYNCHRONIZATION_FAILED)
end
end

shared_examples 'does not change the broker state' do |expected_state|
it 'leaves the state unchanged' do
broker.update(state: expected_state)
job.recover_from_failure

expect(broker.reload.state).to eq(expected_state)
end
end

context 'when broker is in a different state' do
include_examples 'does not change the broker state', ServiceBrokerStateEnum::AVAILABLE
include_examples 'does not change the broker state', ServiceBrokerStateEnum::DELETE_IN_PROGRESS
end

context 'when database error occurs during recovery' do
let(:dataset) { instance_double(Sequel::Dataset) }
let(:logger) { instance_double(Steno::Logger, error: nil) }

before do
broker.update(state: ServiceBrokerStateEnum::SYNCHRONIZING)
allow(ServiceBroker).to receive(:where).
with(guid: broker.guid, state: ServiceBrokerStateEnum::SYNCHRONIZING).
and_return(dataset)
allow(dataset).to receive(:update).and_raise(Sequel::DatabaseError.new(RuntimeError.new('connection lost')))
allow(Steno).to receive(:logger).with('cc.background').and_return(logger)
end

it 'logs the error and does not raise' do
expect { job.recover_from_failure }.not_to raise_error
expect(logger).to have_received(:error).with(/Failed to recover broker state/)
expect(broker.reload.state).to eq(ServiceBrokerStateEnum::SYNCHRONIZING)
end
end
end

def setup_broker_with_invalid_catalog
catalog = instance_double(Services::ServiceBrokers::V2::Catalog)

Expand Down
22 changes: 22 additions & 0 deletions spec/unit/jobs/wrapping_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,28 @@ module Jobs
end
end
end

describe '#recover_from_failure' do
context 'when the wrapped job has the recover_from_failure method defined' do
it 'delegates to the handler' do
handler = double('Job', recover_from_failure: nil)
job = WrappingJob.new(handler)

expect(handler).to receive(:recover_from_failure)
job.recover_from_failure
end
end

context 'when the wrapped job does not have the recover_from_failure method defined' do
it 'does not raise an exception' do
handler = Object.new
job = WrappingJob.new(handler)
expect do
job.recover_from_failure
end.not_to raise_error
end
end
end
end
end
end
Loading