VTrain/base/Framework/ModelProcess/ModelProcess.cpp

/*
 * Copyright(C) 2020. Huawei Technologies Co.,Ltd. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifdef USE_DCMI_INTERFACE
#include <unistd.h>
#include <termios.h>
#endif
#include "ModelProcess.h"
#include "FileManager.h"

ModelProcess::ModelProcess(const int deviceId, const std::string &modelName)
{
    deviceId_ = deviceId;
    modelName_ = modelName;
}

ModelProcess::ModelProcess() {}

ModelProcess::~ModelProcess()
{
    if (!isDeInit_)
    {
        DeInit();
    }
}

void ModelProcess::DestroyDataset(const aclmdlDataset *dataset) const
{
    // Just release the DataBuffer object and DataSet object, remain the buffer, because it is managerd by user
    if (dataset != nullptr)
    {
        for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(dataset); i++)
        {
            aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(dataset, i);
            if (dataBuffer != nullptr)
            {
                aclDestroyDataBuffer(dataBuffer);
                dataBuffer = nullptr;
            }
        }
        aclmdlDestroyDataset(dataset);
        dataset = nullptr;
    }
}

aclmdlDesc *ModelProcess::GetModelDesc() const
{
    return modelDesc_.get();
}

//从device侧获得推理数据
int ModelProcess::ModelInference_from_dvpp(void *buffer, uint32_t buffer_size)
{
    std::vector<void *> inputBuffers{buffer};
    std::vector<size_t> inputSizes{buffer_size};

    //创建输入数据集
    aclmdlDataset *input = nullptr;
    input = CreateAndFillDataset(inputBuffers, inputSizes);
    if (input == nullptr)
    {
        return APP_ERR_COMM_FAILURE;
    }

    APP_ERROR ret = 0;

    //创建输出数据集
    aclmdlDataset *output = nullptr;
    output = CreateAndFillDataset(outputBuffers_, outputSizes_);
    if (output == nullptr)
    {
        DestroyDataset(input);
        input = nullptr;
        return APP_ERR_COMM_FAILURE;
    }

    //对同一个modelId的模型，由于与模型关联的资源（例如stream、内存等）唯一，因此不能在多线程中并发使用，否则，可能导致业务异常。
    //同一个id的可以加锁，不同id的不用加锁
    mtx_.lock();
    ret = aclmdlExecute(modelId_, input, output);
    mtx_.unlock();

    if (ret != APP_ERR_OK)
    {
        LogError << "aclmdlExecute failed, ret[" << ret << "].";
        //加上，防止内存泄露
        DestroyDataset(input);
        DestroyDataset(output);
        return ret;
    }

    DestroyDataset(input);
    DestroyDataset(output);

    return APP_ERR_OK;
}

int ModelProcess::ModelInference(const std::vector<void *> &inputBufs, const std::vector<size_t> &inputSizes,
                                 const std::vector<void *> &ouputBufs, const std::vector<size_t> &outputSizes, size_t dynamicBatchSize)
{
    LogDebug << "ModelProcess:Begin to inference.";
    aclmdlDataset *input = nullptr;
    input = CreateAndFillDataset(inputBufs, inputSizes);
    if (input == nullptr)
    {
        return APP_ERR_COMM_FAILURE;
    }
    APP_ERROR ret = 0;
    if (dynamicBatchSize != 0)
    {
        size_t index;
        ret = aclmdlGetInputIndexByName(modelDesc_.get(), ACL_DYNAMIC_TENSOR_NAME, &index);
        if (ret != ACL_ERROR_NONE)
        {
            LogError << "aclmdlGetInputIndexByName failed, maybe static model";
            return APP_ERR_COMM_CONNECTION_FAILURE;
        }
        ret = aclmdlSetDynamicBatchSize(modelId_, input, index, dynamicBatchSize);
        if (ret != ACL_ERROR_NONE)
        {
            LogError << "dynamic batch set failed, modelId_=" << modelId_ << ", input=" << input << ", index=" << index
                     << ", dynamicBatchSize=" << dynamicBatchSize;
            return APP_ERR_COMM_CONNECTION_FAILURE;
        }
        LogDebug << "set dynamicBatchSize success, dynamicBatchSize=" << dynamicBatchSize;
    }
    aclmdlDataset *output = nullptr;
    output = CreateAndFillDataset(ouputBufs, outputSizes);
    if (output == nullptr)
    {
        DestroyDataset(input);
        input = nullptr;
        return APP_ERR_COMM_FAILURE;
    }
    mtx_.lock();
    ret = aclmdlExecute(modelId_, input, output);
    mtx_.unlock();
    if (ret != APP_ERR_OK)
    {
        LogError << "aclmdlExecute failed, ret[" << ret << "].";
        return ret;
    }

    DestroyDataset(input);
    DestroyDataset(output);
    return APP_ERR_OK;
}

int ModelProcess::ModelInferDynamicHW(const std::vector<void *> &inputBufs, const std::vector<size_t> &inputSizes,
                                      const std::vector<void *> &ouputBufs, const std::vector<size_t> &outputSizes)
{
    LogDebug << "ModelProcess:Begin to inference with dynamic width and height.";
    aclmdlDataset *input = nullptr;
    input = CreateAndFillDataset(inputBufs, inputSizes);
    if (input == nullptr)
    {
        return APP_ERR_COMM_FAILURE;
    }
    size_t index;
    APP_ERROR ret = aclmdlGetInputIndexByName(modelDesc_.get(), ACL_DYNAMIC_TENSOR_NAME, &index);
    if (ret != ACL_ERROR_NONE)
    {
        LogError << "Failed to execute aclmdlGetInputIndexByName, maybe static model.";
        return APP_ERR_COMM_CONNECTION_FAILURE;
    }
    ret = aclmdlSetDynamicHWSize(modelId_, input, index, modelHeight_, modelWidth_);
    if (ret != ACL_ERROR_NONE)
    {
        LogError << "Failed to set dynamic HW, modelId_=" << modelId_ << ", input=" << input << ", index="
                 << index << ", dynamicW=" << modelWidth_ << ", dynamicH=" << modelHeight_;
        return APP_ERR_COMM_CONNECTION_FAILURE;
    }
    LogDebug << "Set dynamicHWSize success, dynamicHWSize=" << modelWidth_ << ", " << modelHeight_;

    aclmdlDataset *output = nullptr;
    output = CreateAndFillDataset(ouputBufs, outputSizes);
    if (output == nullptr)
    {
        DestroyDataset(input);
        input = nullptr;
        return APP_ERR_COMM_FAILURE;
    }
    mtx_.lock();
    ret = aclmdlExecute(modelId_, input, output);
    mtx_.unlock();
    if (ret != APP_ERR_OK)
    {
        LogError << "aclmdlExecute failed, ret[" << ret << "].";
        return ret;
    }

    DestroyDataset(input);
    DestroyDataset(output);

    return APP_ERR_OK;
}

int ModelProcess::DeInit()
{
    LogInfo << "Model[" << modelName_ << "][" << deviceId_ << "] deinit begin";

    isDeInit_ = true;

    //卸载模型
    APP_ERROR ret = aclmdlUnload(modelId_);
    if (ret != APP_ERR_OK)
    {
        LogError << "aclmdlUnload  failed, ret[" << ret << "].";
        return ret;
    }

    //释放工作内存
    if (modelDevPtr_ != nullptr)
    {
        ret = aclrtFree(modelDevPtr_);
        if (ret != APP_ERR_OK)
        {
            LogError << "aclrtFree  failed, ret[" << ret << "].";
            return ret;
        }
        modelDevPtr_ = nullptr;
    }
    //释放权值内存
    if (weightDevPtr_ != nullptr)
    {
        ret = aclrtFree(weightDevPtr_);
        if (ret != APP_ERR_OK)
        {
            LogError << "aclrtFree  failed, ret[" << ret << "].";
            return ret;
        }
        weightDevPtr_ = nullptr;
    }

    //释放输入内存
    for (size_t i = 0; i < inputBuffers_.size(); i++)
    {
        if (inputBuffers_[i] != nullptr)
        {
            aclrtFree(inputBuffers_[i]);
            inputBuffers_[i] = nullptr;
        }
    }
    inputBuffers_.clear();
    inputSizes_.clear();

    //释放输出内存
    for (size_t i = 0; i < outputBuffers_.size(); i++)
    {
        if (outputBuffers_[i] != nullptr)
        {
            aclrtFree(outputBuffers_[i]);
            outputBuffers_[i] = nullptr;
        }
    }
    outputBuffers_.clear();
    outputSizes_.clear();

    LogInfo << "Model[" << modelName_ << "][" << deviceId_ << "] deinit success";

    return APP_ERR_OK;
}

APP_ERROR ModelProcess::LoadModel(const std::shared_ptr<uint8_t> &modelData, int modelSize)
{
    APP_ERROR ret = aclmdlQuerySizeFromMem(modelData.get(), modelSize, &modelDevPtrSize_, &weightDevPtrSize_);
    if (ret != APP_ERR_OK)
    {
        LogError << "aclmdlQuerySizeFromMem failed, ret[" << ret << "].";
        return ret;
    }
    LogDebug << "modelDevPtrSize_[" << modelDevPtrSize_ << "], weightDevPtrSize_[" << weightDevPtrSize_ << "].";

    //申请Device上的内存
    ret = aclrtMalloc(&modelDevPtr_, modelDevPtrSize_, ACL_MEM_MALLOC_HUGE_FIRST);
    if (ret != APP_ERR_OK)
    {
        LogError << "aclrtMalloc dev_ptr failed, ret[" << ret << "].";
        return ret;
    }
    //申请Device上的内存
    ret = aclrtMalloc(&weightDevPtr_, weightDevPtrSize_, ACL_MEM_MALLOC_HUGE_FIRST);
    if (ret != APP_ERR_OK)
    {
        LogError << "aclrtMalloc weight_ptr failed, ret[" << ret << "] (" << GetAppErrCodeInfo(ret) << ").";
        return ret;
    }

    //从内存加载离线模型数据，由用户自行管理模型运行的内存
    //统完成模型加载后，返回的模型ID，作为后续操作时用于识别模型的标志。
    ret = aclmdlLoadFromMemWithMem(modelData.get(), modelSize, &modelId_, modelDevPtr_, modelDevPtrSize_, weightDevPtr_, weightDevPtrSize_);
    if (ret != APP_ERR_OK)
    {
        LogError << "aclmdlLoadFromMemWithMem failed, ret[" << ret << "].";
        return ret;
    }

    //对同一个modelId的模型，由于与模型关联的资源（例如stream、内存等）唯一，因此不能在多线程中并发使用，否则，可能导致业务异常。
    //每次加载id都递增
    // printf("modelid:%d\n", modelId_);

    ret = aclrtGetCurrentContext(&contextModel_);
    if (ret != APP_ERR_OK)
    {
        LogError << "aclrtMalloc weight_ptr failed, ret[" << ret << "].";
        return ret;
    }

    // get input and output size
    aclmdlDesc *modelDesc = aclmdlCreateDesc();
    if (modelDesc == nullptr)
    {
        LogError << "aclmdlCreateDesc failed.";
        return APP_ERR_ACL_FAILURE;
    }
    ret = aclmdlGetDesc(modelDesc, modelId_);
    if (ret != APP_ERR_OK)
    {
        LogError << "aclmdlGetDesc ret fail, ret:" << ret << ".";
        return ret;
    }
    modelDesc_.reset(modelDesc, aclmdlDestroyDesc);

    //输入内存不用分配，用dvpp的输出内存

    //分配输出内存
    OutputBufferWithSizeMalloc(ACL_MEM_MALLOC_NORMAL_ONLY);

    return APP_ERR_OK;
}

APP_ERROR ModelProcess::Init(std::string modelPath)
{
    LogInfo << "ModelProcess:Begin to init instance.";
    int modelSize = 0;
    std::shared_ptr<uint8_t> modelData = nullptr;

    // modelPath should point to an encrypted model when isEncrypted is true
    APP_ERROR ret = ReadBinaryFile(modelPath, modelData, modelSize);
    if (ret != APP_ERR_OK)
    {
        LogError << "read model file failed, ret[" << ret << "].";
        return ret;
    }

    return LoadModel(modelData, modelSize);
}

#ifdef USE_DCMI_INTERFACE
void ModelProcess::SetConsoleDispMode(int fd, int option)
{
    struct termios term;
    if (tcgetattr(fd, &term) == -1)
    {
        LogWarn << "Failed to get the attribution of the terminal, errno=" << errno << ".";
        return;
    }

    const tcflag_t echoFlags = (ECHO | ECHOE | ECHOK | ECHONL);
    if (option)
    {
        term.c_lflag |= echoFlags;
    }
    else
    {
        term.c_lflag &= ~echoFlags;
    }
    int err = tcsetattr(fd, TCSAFLUSH, &term);
    if ((err == -1) || (err == EINTR))
    {
        LogWarn << "Failed to set the attribution of the terminal, errno=" << errno << ".";
        return;
    }
    return;
}

/*
 * @description: Get id and password of secret key encrypted information
 * @return: APP_ERR_OK success
 * @return: Other values failure
 * @attention: This function needs to be implemented by users.
 */
APP_ERROR ModelProcess::GetKeyIdPassword(unsigned int &id, unsigned char password[], unsigned int &passwordLen) const
{
    LogInfo << "This function should be implemented by users.";

    LogInfo << "Please input secret key encryped index:";
    while (1)
    {
        std::cin >> id;
        if (std::cin.rdstate() == std::ios::goodbit)
        {
            // Clear newline character
            std::cin.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
            break;
        }
        // Clear the cin state and buffer to receive the next input
        std::cin.clear();
        std::cin.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
        LogInfo << "Input error, please input secret key encryped index again:";
    }

    LogInfo << "Please input secret key encryped password:";
    // Disable the terminal display when entering the password
    SetConsoleDispMode(STDIN_FILENO, 0);
    std::cin.get(reinterpret_cast<char *>(password), MAX_ENCODE_LEN);
    // Enable the terminal display when entering the password
    SetConsoleDispMode(STDIN_FILENO, 1);
    passwordLen = strlen(reinterpret_cast<char *>(password));
    return APP_ERR_OK;
}

APP_ERROR ModelProcess::Init(const std::string &modelPath, bool isEncrypted, int cardId, int deviceId)
{
    if (!isEncrypted)
    {
        return Init(modelPath);
    }

    LogInfo << "ModelProcess:Begin to init instance.";
    int modelSize = 0;
    std::shared_ptr<uint8_t> modelData = nullptr;

    // modelPath should point to an encrypted model when isEncrypted is true
    APP_ERROR ret = ReadBinaryFile(modelPath, modelData, modelSize);
    if (ret != APP_ERR_OK)
    {
        LogError << "Failed to read model file, ret[" << ret << "].";
        return ret;
    }

    // Users need to implement this function as required
    ret = GetKeyIdPassword(encryptModelData_.id, encryptModelData_.password, encryptModelData_.password_len);
    if (ret != APP_ERR_OK)
    {
        return ret;
    }

    ret = dcmi_init();
    if (ret != APP_ERR_OK)
    {
        LogError << "Failed to initialize dcmi, ret = " << ret << ".";
        return ret;
    }

    // Read secret key from dcmi
    ret = dcmi_get_ai_model_info(cardId, deviceId, &encryptModelData_);
    if (ret != APP_ERR_OK)
    {
        LogError << "Failed to get model info from dcmi, ret[" << ret << "].";
        return ret;
    }

    // Clear password immediately after use
    aclrtMemset(encryptModelData_.password, sizeof(encryptModelData_.password), 0, sizeof(encryptModelData_.password));
    LogInfo << "Users need to decrypt model before the next operation.";

    // User should modify the decryptedModelData and encryptedModelSize according to the actual situation
    std::shared_ptr<uint8_t> decryptedModelData = modelData;
    int encryptedModelSize = modelSize;

    // Load decrypted model
    return LoadModel(decryptedModelData, encryptedModelSize);
}
#endif

aclmdlDataset *ModelProcess::CreateAndFillDataset(const std::vector<void *> &bufs, const std::vector<size_t> &sizes)
    const
{
    APP_ERROR ret = APP_ERR_OK;
    aclmdlDataset *dataset = aclmdlCreateDataset();
    if (dataset == nullptr)
    {
        LogError << "ACL_ModelInputCreate failed.";
        return nullptr;
    }

    for (size_t i = 0; i < bufs.size(); ++i)
    {
        aclDataBuffer *data = aclCreateDataBuffer(bufs[i], sizes[i]);
        if (data == nullptr)
        {
            DestroyDataset(dataset);
            LogError << "aclCreateDataBuffer failed.";
            return nullptr;
        }

        ret = aclmdlAddDatasetBuffer(dataset, data);
        if (ret != APP_ERR_OK)
        {
            DestroyDataset(dataset);
            LogError << "ACL_ModelInputDataAdd failed, ret[" << ret << "].";
            return nullptr;
        }
    }
    return dataset;
}

size_t ModelProcess::GetModelNumInputs() const
{
    return aclmdlGetNumInputs(modelDesc_.get());
}

size_t ModelProcess::GetModelNumOutputs() const
{
    return aclmdlGetNumOutputs(modelDesc_.get());
}

size_t ModelProcess::GetModelInputSizeByIndex(const size_t &i) const
{
    return aclmdlGetInputSizeByIndex(modelDesc_.get(), i);
}

size_t ModelProcess::GetModelOutputSizeByIndex(const size_t &i) const
{
    return aclmdlGetOutputSizeByIndex(modelDesc_.get(), i);
}

APP_ERROR ModelProcess::InputBufferWithSizeMalloc(aclrtMemMallocPolicy policy)
{
    size_t inputNum = aclmdlGetNumInputs(modelDesc_.get());
    LogDebug << modelName_ << "model inputNum is : " << inputNum << ".";
    for (size_t i = 0; i < inputNum; ++i)
    {
        void *buffer = nullptr;
        // modify size
        size_t size = aclmdlGetInputSizeByIndex(modelDesc_.get(), i);
        APP_ERROR ret = aclrtMalloc(&buffer, size, policy);
        if (ret != APP_ERR_OK)
        {
            LogFatal << modelName_ << "model input aclrtMalloc fail(ret=" << ret
                     << "), buffer=" << buffer << ", size=" << size << ".";
            // Free the buffer malloced successfully before return error
            ReleaseModelBuffer(inputBuffers_);
            return ret;
        }
        inputBuffers_.push_back(buffer);
        inputSizes_.push_back(size);
        LogDebug << modelName_ << "model inputBuffer i=" << i << ", size=" << size << ".";
    }
    return APP_ERR_OK;
}

APP_ERROR ModelProcess::OutputBufferWithSizeMalloc(aclrtMemMallocPolicy policy)
{
    size_t outputNum = aclmdlGetNumOutputs(modelDesc_.get());
    LogDebug << modelName_ << "model outputNum is : " << outputNum << ".";
    for (size_t i = 0; i < outputNum; ++i)
    {
        void *buffer = nullptr;
        // modify size

        size_t size = aclmdlGetOutputSizeByIndex(modelDesc_.get(), i);
        APP_ERROR ret = aclrtMalloc(&buffer, size, policy);
        if (ret != APP_ERR_OK)
        {
            LogFatal << modelName_ << "model output aclrtMalloc fail(ret=" << ret
                     << "), buffer=" << buffer << ", size=" << size << ".";
            // Free the buffer malloced successfully before return error
            ReleaseModelBuffer(outputBuffers_);
            return ret;
        }
        outputBuffers_.push_back(buffer);
        outputSizes_.push_back(size);
        LogDebug << modelName_ << "," << aclmdlGetOutputNameByIndex(modelDesc_.get(), i) << ",model outputBuffer i=" << i << ", size=" << size << ".";
    }
    return APP_ERR_OK;
}

void ModelProcess::ReleaseModelBuffer(std::vector<void *> &modelBuffers) const
{
    for (size_t i = 0; i < modelBuffers.size(); i++)
    {
        if (modelBuffers[i] != nullptr)
        {
            aclrtFree(modelBuffers[i]);
            modelBuffers[i] = nullptr;
        }
    }
}

void ModelProcess::SetModelWH(uint32_t width, uint32_t height)
{
    modelWidth_ = width;
    modelHeight_ = height;
}