# ruff: noqa: N802, N803, N806, N815, N816
import os
import re

import numpy as np
from scipy import signal

# Simple utilities for displaying generated code in the notebook
from utils import cleanup, display_text

import archimedes as arc

THEME = os.environ.get("ARCHIMEDES_THEME", "dark")
arc.theme.set_theme(THEME)

Generating driver code¶

In the previous part of this tutorial, we learned how to automatically translate Python code to highly efficient C implementations. However, the C code generated by CasADi uses a generic interface that requires that all arrays be initialized externally. If we want to call the generated function from our own C code or use it in an embedded setting, this creates overhead of having to manually determine and adjust array sizes, names, initializations, etc.

In embedded systems, memory management is critical. The core algorithm code generated in Part 1 is extremely efficient, but it expects:

  1. Pre-allocated memory buffers for all inputs and outputs

  2. Properly configured pointers to these buffers

  3. Initialization of working memory needed by the algorithm

Manually handling these details would be tedious and error-prone - especially when algorithms change during development.

Archimedes handles this with a templated “driver code” system, where you can generate code that takes care of this boilerplate for you. On this page we will go through some of the details of this system, beginning with generating a simple plain C-code driver.

First, let’s recall the basic IIR filter implementation we were working with:

# Optionally give descriptive names for return values (these don't need
# to match the variable names)
@arc.compile(return_names=["u_hist", "y_hist"])
def iir_filter(u, b, a, u_prev, y_prev):
    # Update input history
    u_prev[1:] = u_prev[:-1]
    u_prev[0] = u

    # Compute output using the direct II transposed structure
    y = (np.dot(b, u_prev) - np.dot(a[1:], y_prev[: len(a) - 1])) / a[0]

    # Update output history
    y_prev[1:] = y_prev[:-1]
    y_prev[0] = y

    return u_prev, y_prev

The only modification from last time is that we will now use the driver argument to codegen. To begin with we will just give the specification “c”, which will generate a basic main.c file.

cleanup()  # Clean up any previous generated code

# Design a simple IIR filter with SciPy
dt = 0.01  # Sampling time [seconds]
Wn = 10  # Cutoff frequency [Hz]
order = 4
b, a = signal.butter(order, Wn, "low", analog=False, fs=1 / dt)

# Create "template" arguments for type inference
u = 1.0
u_prev = np.zeros(len(b))
y_prev = np.zeros(len(a) - 1)
args = (u, b, a, u_prev, y_prev)

arc.codegen(iir_filter, "iir_filter.c", args, driver="c")

with open("main.c", "r") as f:
    main_c = f.read()

display_text(main_c)
// gcc main.c iir_filter.c

#include "iir_filter.h"

// PROTECTED-REGION-START: imports
// ... User-defined imports and includes
// PROTECTED-REGION-END

// Allocate memory for inputs and outputs
double u = 1.0;
double b[5] = {0.004824343357716228, 0.019297373430864913, 0.02894606014629737, 0.019297373430864913, 0.004824343357716228};
double a[5] = {1.0, -2.369513007182038, 2.313988414415881, -1.054665405878568, 0.18737949236818502};
double u_prev[5] = {0.0, 0.0, 0.0, 0.0, 0.0};
double y_prev[4] = {0.0, 0.0, 0.0, 0.0};

double u_hist[5] = {0};
double y_hist[4] = {0};

// Prepare pointers to inputs, outputs, and work arrays
const double* arg[iir_filter_SZ_ARG] = {0};
double* res[iir_filter_SZ_RES] = {0};
long long int iw[iir_filter_SZ_IW];
double w[iir_filter_SZ_W];

// PROTECTED-REGION-START: allocation
// ... User-defined memory allocation and function declaration
// PROTECTED-REGION-END

int main(int argc, char *argv[]) {
    // Set up input and output pointers
    arg[0] = &u;
    arg[1] = b;
    arg[2] = a;
    arg[3] = u_prev;
    arg[4] = y_prev;

    res[0] = u_hist;
    res[1] = y_hist;

    // PROTECTED-REGION-START: main
    // ... User-defined program body
    iir_filter(arg, res, iw, w, 0);
    // PROTECTED-REGION-END

    return 0;
}

If you are familiar with C, this code will be largely self-explanatory. It takes care of all array and pointer initialization, specifically initializing all arrays to the values of the “template arguments” we passed to codegen. For example, if we want to choose a different order and cutoff filter, we can do so easily:

cleanup()  # Cleanup any previous runs

# Design a simple IIR filter with SciPy
dt = 0.01  # Sampling time [seconds]
Wn = 20  # Cutoff frequency [Hz]
order = 2
b, a = signal.butter(order, Wn, "low", analog=False, fs=1 / dt)

# Create "template" arguments for type inference
u = 1.0
u_prev = np.zeros(len(b))
y_prev = np.zeros(len(a) - 1)
args = (u, b, a, u_prev, y_prev)

arc.codegen(iir_filter, "iir_filter.c", args, driver="c")

with open("main.c", "r") as f:
    main_c = f.read()

display_text(main_c)
// gcc main.c iir_filter.c

#include "iir_filter.h"

// PROTECTED-REGION-START: imports
// ... User-defined imports and includes
// PROTECTED-REGION-END

// Allocate memory for inputs and outputs
double u = 1.0;
double b[3] = {0.20657208382614792, 0.41314416765229584, 0.20657208382614792};
double a[3] = {1.0, -0.3695273773512414, 0.19581571265583306};
double u_prev[3] = {0.0, 0.0, 0.0};
double y_prev[2] = {0.0, 0.0};

double u_hist[3] = {0};
double y_hist[2] = {0};

// Prepare pointers to inputs, outputs, and work arrays
const double* arg[iir_filter_SZ_ARG] = {0};
double* res[iir_filter_SZ_RES] = {0};
long long int iw[iir_filter_SZ_IW];
double w[iir_filter_SZ_W];

// PROTECTED-REGION-START: allocation
// ... User-defined memory allocation and function declaration
// PROTECTED-REGION-END

int main(int argc, char *argv[]) {
    // Set up input and output pointers
    arg[0] = &u;
    arg[1] = b;
    arg[2] = a;
    arg[3] = u_prev;
    arg[4] = y_prev;

    res[0] = u_hist;
    res[1] = y_hist;

    // PROTECTED-REGION-START: main
    // ... User-defined program body
    iir_filter(arg, res, iw, w, 0);
    // PROTECTED-REGION-END

    return 0;
}

Protected regions¶

One unusual feature is the PROTECTED-REGION-START and PROTECTED-REGION-END comments. These are used by Archimedes to bracked regions where you can modify the autogenerated code and your changes will be preserved.

Typical modifications you might make in protected regions include:

  1. Input/Output Handling: Adding code to read sensors or update actuators

  2. State Management: Copying outputs back to inputs for the next iteration

  3. Diagnostic Code: Adding debug output or performance measurements

  4. Interfacing with Other Sydstems: Communication code for UART, I2C, etc.

Here’s an example where we will insert some code to copy the output back to the input arrays, as you would in a typical filtering application. For this example we will insert this change programmatically using regex matching, but of course it will typically be easier to just make these changes by hand.

# New C code to insert
new_code = """
    // Call the IIR filter
    iir_filter(arg, res, iw, w, 0);

    // Copy the output to the input history arrays
    int n = sizeof(a) / sizeof(a[0]) - 1;
    for (int i = 0; i < n; i++) {
        y_prev[i] = y_hist[i];
        u_prev[i] = u_hist[i];
    }
    u_prev[n] = u_hist[n];
"""

with open("main.c", "r") as f:
    main_c = f.read()

# Replace the region content
pattern = r"(// PROTECTED-REGION-START: main\n)(.+?)(    // PROTECTED-REGION-END)"
main_c = re.sub(pattern, f"\\1{new_code}\n\\3", main_c, flags=re.DOTALL)

# Write updated content back to file
with open("main.c", "w") as f:
    f.write(main_c)

display_text(main_c)
// gcc main.c iir_filter.c

#include "iir_filter.h"

// PROTECTED-REGION-START: imports
// ... User-defined imports and includes
// PROTECTED-REGION-END

// Allocate memory for inputs and outputs
double u = 1.0;
double b[3] = {0.20657208382614792, 0.41314416765229584, 0.20657208382614792};
double a[3] = {1.0, -0.3695273773512414, 0.19581571265583306};
double u_prev[3] = {0.0, 0.0, 0.0};
double y_prev[2] = {0.0, 0.0};

double u_hist[3] = {0};
double y_hist[2] = {0};

// Prepare pointers to inputs, outputs, and work arrays
const double* arg[iir_filter_SZ_ARG] = {0};
double* res[iir_filter_SZ_RES] = {0};
long long int iw[iir_filter_SZ_IW];
double w[iir_filter_SZ_W];

// PROTECTED-REGION-START: allocation
// ... User-defined memory allocation and function declaration
// PROTECTED-REGION-END

int main(int argc, char *argv[]) {
    // Set up input and output pointers
    arg[0] = &u;
    arg[1] = b;
    arg[2] = a;
    arg[3] = u_prev;
    arg[4] = y_prev;

    res[0] = u_hist;
    res[1] = y_hist;

    // PROTECTED-REGION-START: main

    // Call the IIR filter
    iir_filter(arg, res, iw, w, 0);

    // Copy the output to the input history arrays
    int n = sizeof(a) / sizeof(a[0]) - 1;
    for (int i = 0; i < n; i++) {
        y_prev[i] = y_hist[i];
        u_prev[i] = u_hist[i];
    }
    u_prev[n] = u_hist[n];

    // PROTECTED-REGION-END

    return 0;
}

Now we can change the Python code and re-generate the C, but our protected code will be preserved. For example, let’s go back to a 3rd-order filter with a 10 Hz cutoff frequency:

# Design a simple IIR filter with SciPy
dt = 0.01  # Sampling time [seconds]
Wn = 10  # Cutoff frequency [Hz]
order = 3
b, a = signal.butter(order, Wn, "low", analog=False, fs=1 / dt)

# Create "template" arguments for type inference
u = 1.0
u_prev = np.zeros(len(b))
y_prev = np.zeros(len(a) - 1)
args = (u, b, a, u_prev, y_prev)

arc.codegen(iir_filter, "iir_filter.c", args, driver="c")

with open("main.c", "r") as f:
    main_c = f.read()

display_text(main_c)
// gcc main.c iir_filter.c

#include "iir_filter.h"

// PROTECTED-REGION-START: imports
// ... User-defined imports and includes
// PROTECTED-REGION-END

// Allocate memory for inputs and outputs
double u = 1.0;
double b[4] = {0.018098933007514428, 0.05429679902254328, 0.05429679902254328, 0.018098933007514428};
double a[4] = {1.0, -1.7600418803431688, 1.182893262037831, -0.27805991763454646};
double u_prev[4] = {0.0, 0.0, 0.0, 0.0};
double y_prev[3] = {0.0, 0.0, 0.0};

double u_hist[4] = {0};
double y_hist[3] = {0};

// Prepare pointers to inputs, outputs, and work arrays
const double* arg[iir_filter_SZ_ARG] = {0};
double* res[iir_filter_SZ_RES] = {0};
long long int iw[iir_filter_SZ_IW];
double w[iir_filter_SZ_W];

// PROTECTED-REGION-START: allocation
// ... User-defined memory allocation and function declaration
// PROTECTED-REGION-END

int main(int argc, char *argv[]) {
    // Set up input and output pointers
    arg[0] = &u;
    arg[1] = b;
    arg[2] = a;
    arg[3] = u_prev;
    arg[4] = y_prev;

    res[0] = u_hist;
    res[1] = y_hist;

    // PROTECTED-REGION-START: main
    // Call the IIR filter
    iir_filter(arg, res, iw, w, 0);

    // Copy the output to the input history arrays
    int n = sizeof(a) / sizeof(a[0]) - 1;
    for (int i = 0; i < n; i++) {
        y_prev[i] = y_hist[i];
        u_prev[i] = u_hist[i];
    }
    u_prev[n] = u_hist[n];
    // PROTECTED-REGION-END

    return 0;
}

The beauty of this approach is that you can regenerate the C code whenever your algorithm changes, without losing these customizations.

Working with templates¶

This combination allows you to switch seamlessly between the high-level Python and low-level C. One limitation is that you are limited to the original protected regions in the template file, so if you want to make more extensive changes or define your own protected regions you may want to create your own template files.

The template files are in standard Jinja2 format; you can copy them and just point the code generation to your own custom template if you would like. For example, c_driver_custom.j2 is a lightly modified version of the default C template that calls the generated function inside of an infinite loop at approximately the sampling frequency (without accounting for the runtime of the filter itself).

cleanup()  # Cleanup any previous runs

driver_config = {
    "sample_rate": dt,
    "template_path": "c_driver_custom.j2",
    "output_path": "main_custom.c",
}

arc.codegen(iir_filter, "iir_filter.c", args, driver="c", driver_config=driver_config)

with open(driver_config["output_path"], "r") as f:
    main_c = f.read()

display_text(main_c)
// gcc main_custom.c iir_filter.c

#include "iir_filter.h"
#include <unistd.h>

// PROTECTED-REGION-START: imports
// ... User-defined imports and includes
// PROTECTED-REGION-END

// Sampling rate: 100 Hz
const unsigned long SAMPLE_RATE_US = 10000;

// Allocate memory for inputs and outputs
double u = 1.0;
double b[4] = {0.018098933007514428, 0.05429679902254328, 0.05429679902254328, 0.018098933007514428};
double a[4] = {1.0, -1.7600418803431688, 1.182893262037831, -0.27805991763454646};
double u_prev[4] = {0.0, 0.0, 0.0, 0.0};
double y_prev[3] = {0.0, 0.0, 0.0};

double u_hist[4] = {0};
double y_hist[3] = {0};

// Prepare pointers to inputs, outputs, and work arrays
const double* arg[iir_filter_SZ_ARG] = {0};
double* res[iir_filter_SZ_RES] = {0};
long long int iw[iir_filter_SZ_IW];
double w[iir_filter_SZ_W];

// PROTECTED-REGION-START: allocation
// ... User-defined memory allocation and function declaration
// PROTECTED-REGION-END

int main(int argc, char *argv[]) {
    // Set up input and output pointers
    arg[0] = &u;
    arg[1] = b;
    arg[2] = a;
    arg[3] = u_prev;
    arg[4] = y_prev;

    res[0] = u_hist;
    res[1] = y_hist;

    while (true) {
        // PROTECTED-REGION-START: loop
        // ... User-defined program body
        iir_filter(arg, res, iw, w, 0);

        // Sleep for the specified sample rate
        usleep(SAMPLE_RATE_US);
        // PROTECTED-REGION-END
    }

    return 0;
}

Summary¶

In this part of the hardware deployment tutorial, we covered the basic mechanisms of creating “driver” code to call the auto-generated C code. The auto-generated driver code will likely need to be modified for your specific use case; there are two basic mechanisms for customizing the driver code while preserving the ability to modify your high-level Python algorithms:

  1. Protected regions: Everything between PROTECTED-REGION-* tags in the driver file will be preserved by codegen, meaning that you can manually edit these parts of the code, re-generate your C code, and your edits will be preserved.

  2. Custom templates: For more extensive or structural modifications, the Jinja2 templates themselves can be modified.

Combining these customizable driver templates with Python-to-C code generation provides a flexible tool kit for writing high-level logic in Python and then rapidly deploying to a C environment. However, hardware control applications typically require going beyond plain C code and targeting an embedded processor. In the final part of this tutorial we will bring it all together and see a simple example of auto-generating code for an Arduino development board.

cleanup()