Audio crackles with delay in series

I am learning to write audio plugins using VST3 sdk. I have an issue implementing 2 delays in serie, the audio starts crackling.

Here is the processing code:

tresult PLUGIN_API StereoProcessor::process (Vst::ProcessData& data)
{
    //--- First : Read inputs parameter changes-----------
    if (data.inputParameterChanges)
    {
        int32 numParamsChanged = data.inputParameterChanges->getParameterCount();
        for (int32 index = 0; index < numParamsChanged; index++)
        {
            IParamValueQueue* paramQueue = data.inputParameterChanges->getParameterData(index);
            if (paramQueue)
            {
                ParamValue value;
                int32 sampleOffset;
                int32 numPoints = paramQueue->getPointCount();
                switch (paramQueue->getParameterId())
                {
                case kLevelId:
                    if (paramQueue->getPoint(numPoints - 1, sampleOffset, value) == kResultTrue)
                        mLevel = value;
                    break;
                case kDelayId:
                    if (paramQueue->getPoint(numPoints - 1, sampleOffset, value) == kResultTrue)
                        mDelay = value;
                    break;
                case kBypassId:
                    if (paramQueue->getPoint(numPoints - 1, sampleOffset, value) == kResultTrue)
                    {
                        mBypass = (value > 0.5f);
                    }
                    break;
                }
            }
        }
    }

    if (data.numSamples > 0)
    {
        SpeakerArrangement arr;
        getBusArrangement(kOutput, 0, arr);
        int32 numChannels = SpeakerArr::getChannelCount(arr);

        // do something in Bypass or mono: copy input to output
        if (mBypass || numChannels == 1)
        {
            for (int32 channel = 0; channel < numChannels; channel++)
            {
                float* inputChannel = data.inputs[0].channelBuffers32[channel];
                float* outputChannel = data.outputs[0].channelBuffers32[channel];

                for (int32 sample = 0; sample < data.numSamples; sample++)
                    outputChannel[sample] = inputChannel[sample];
            }
            return kResultTrue;
        }

        // apply delay
        int32 wDelayInSamples = std::max<int32>(processSetup.sampleRate / 2'000, (int32)(mWidth * mWidthBufferSize));
        int32 dDelayInSamples = (int32)(mDelay * mDelayBufferSize);
        float delaySwitch = wDelayInSamples ? 1 : 0;
        for (int32 channel = 0; channel < numChannels; channel++)
        {
            float* inputChannel = data.inputs[0].channelBuffers32[channel];
            float* outputChannel = data.outputs[0].channelBuffers32[channel];
            float* wBuffer = mBuffer[channel];
            float* dBuffer = wBuffer + mWidthBufferSize;

            int32 tempWBufferPos = mWidthBufferPos;
            int32 tempDBufferPos = mDelayBufferPos;

            for (int32 sample = 0; sample < data.numSamples; sample++)
            {
                float tempSample = *inputChannel++;
                float tempWSample = wBuffer[tempWBufferPos];
                *outputChannel++ = tempSample + mLevel*tempWSample;
                wBuffer[tempWBufferPos] = tempSample + delaySwitch*dBuffer[tempDBufferPos];
                dBuffer[tempDBufferPos] = tempWSample;
                
                tempWBufferPos++;
                if (tempWBufferPos >= wDelayInSamples)
                    tempWBufferPos = 0;
                
                tempDBufferPos++;
                if (tempDBufferPos >= dDelayInSamples)
                    tempDBufferPos = 0;             
            }
        }
        mWidthBufferPos += data.numSamples;
        while (wDelayInSamples && mWidthBufferPos >= wDelayInSamples)
            mWidthBufferPos -= wDelayInSamples;

        mDelayBufferPos += data.numSamples;
        while (dDelayInSamples && mDelayBufferPos >= dDelayInSamples)
            mDelayBufferPos -= dDelayInSamples;
        }
    return kResultTrue;
}

The buffer and delay sizes are initialized here:

//------------------------------------------------------------------------
tresult PLUGIN_API StereoProcessor::setActive (TBool state)
{
    //--- called when the Plug-in is enable/disable (On/Off) -----
    SpeakerArrangement arr;
    if (getBusArrangement(kOutput, 0, arr) != kResultTrue)
        return kResultFalse;
    int32 numChannels = SpeakerArr::getChannelCount(arr);
    if (numChannels == 0)
        return kResultFalse;

    if (state)
    {
        mWidthBufferSize = processSetup.sampleRate / 10 + 0.5;
        mDelayBufferSize = processSetup.sampleRate + 0.5;
        int32 size = mWidthBufferSize + mDelayBufferSize;
        mBuffer = (float**)std::malloc(numChannels * sizeof(float*));
        for (int32 channel = 0; channel < numChannels; channel++)
        {
            mBuffer[channel] = (float*)std::calloc(size, sizeof(float));    // 1 second delay max
        }
        mWidthBufferPos = 0;
        mDelayBufferPos = 0;
    }
    else
    {
        if (mBuffer)
        {
            for (int32 channel = 0; channel < numChannels; channel++)
            {
                std::free(mBuffer[channel]);
            }
            std::free(mBuffer);
            mBuffer = nullptr;
        }
    }
    return AudioEffect::setActive(state);
}

If I remove the last loop at the end (and set the second delay switch to 0) then I don’t get any issue.

        //float delaySwitch = wDelayInSamples ? 1 : 0;
        float delaySwitch = 0;
        
        // ...
        
        /*
        mDelayBufferPos += data.numSamples;
        while (dDelayInSamples && mDelayBufferPos >= dDelayInSamples)
            mDelayBufferPos -= dDelayInSamples;
        }
        */

Is there anything wrong with my code?
How can I run two delay in series without audio issues?

Thanks

When you say “crackling,” is this clipping because you add values that go over the 1.0 FS?
Or is it that you’re adding values that read outside the buffer?
Or is it that it processes too slowly and doesn’t keep up?

For the first one, try reducing the gain before your effect, and see if the crackling goes away.
For the second one, add some kind of macro/assert that lets you capture if the code reads or writes outside the intended area of the buffer. (Also: seeing “0.5” when talking about integral numbers of samples makes me think things could go wrong there – I generally always round up, and generally aligns buffers to multiples of at least 4 samples, for best performance.)

And, generally, I will allocate buffers rounded up to a power of two, so that I can index them using a simple bitmask.