Other Parts Discussed in Thread: CONTROLSUITE
Tool/software: TI C/C++ Compiler
Hi,
I have programmed a Kalman Filter using floating points. The filter is used in a controller context and implemented with a 3. order system. The problem is that the filter use around 27000 instructions pr. run, meaning my control loop is significantly decreased to around 2kHz (would prefer around 20kHz).
This is my Kalman Filter script:
/*
* Kalman.c
*
* Created on: Sep 10, 2018
* Author: simon
*/
#include "Kalman.h"
#include "matrix_calc.h"
#include "../IO/SCI.h"
#include <stdio.h>
void predictStep(kalman_t *kalman, input_vector in_old);
states updateStep(kalman_t *kalman, measurement_vector z_measure);
states state = {0,0,0};
float x[N][M] = {{10,3,4}, {3,1,2}, {4,2,6}};
float y[N][M] = {{1,2,1}, {2,1,2}, {1,2,6}};
float z[N][M];
char str2[30];
size_t n = N;
size_t m = M;
size_t p = P;
void kalman_init(kalman_t *kalman, input_vector init_uk, states init_x, float (*init_p)[N])
{
kalman->xk_old[0][0]= init_x.Torque;
kalman->xk_old[1][0]= init_x.Field;
kalman->xk_old[2][0]= init_x.Speed;
int i,j;
for(i=0;i<M;i++) {
for(j=0;j<N;j++) {
kalman->Pk_old[i][j] = init_p[i][j];
}
}
kalman->A[0][0] = 1;
kalman->A[0][1] = 0.1;
kalman->A[0][2] = 0.1;
kalman->A[1][0] = 0;
kalman->A[1][1] = 1;
kalman->A[1][2] = 1;
kalman->A[2][0] = 0;
kalman->A[2][1] = 1;
kalman->A[2][2] = 1;
kalman->B[0][0] = 1;
kalman->B[0][1] = 0.1;
kalman->B[1][0] = 0;
kalman->B[1][1] = 1;
kalman->B[2][0] = 0;
kalman->B[2][1] = 1;
kalman->Rk[0][0] = 1;
kalman->Rk[0][1] = 0;
kalman->Rk[0][2] = 0;
kalman->Rk[1][0] = 0;
kalman->Rk[1][1] = 1;
kalman->Rk[1][2] = 0;
kalman->Rk[2][0] = 0;
kalman->Rk[2][1] = 0;
kalman->Rk[2][2] = 1;
kalman->Qk[0][0] = 1;
kalman->Qk[0][1] = 0;
kalman->Qk[0][2] = 0;
kalman->Qk[1][0] = 0;
kalman->Qk[1][1] = 1;
kalman->Qk[1][2] = 0;
kalman->Qk[2][0] = 0;
kalman->Qk[2][1] = 0;
kalman->Qk[2][2] = 1;
kalman->H[0][0] = 1;
kalman->H[0][1] = 0.1;
kalman->H[0][2] = 0.1;
kalman->H[1][0] = 0;
kalman->H[1][1] = 1;
kalman->H[1][2] = 1;
kalman->H[2][0] = 0;
kalman->H[2][1] = 1;
kalman->H[2][2] = 1;
matrix_transpose(n, n, kalman->A,kalman->AT);
matrix_transpose(n, n, kalman->H,kalman->HT);
matrix_identity(n, kalman->I);
predictStep(kalman, init_uk);
}
states KalmanFilter(kalman_t *kalman, input_vector uk_old, measurement_vector zk, Uint8 event) {
switch(event) {
case NEW_OBSERVATION:
{
state = updateStep(kalman, zk);
break;
}
case NEW_SAMPLE:
{
predictStep(kalman, uk_old);
break;
}
}
return state;
}
void predictStep(kalman_t *kalman, input_vector in_old) {
kalman->uk_old[0][0] = in_old.vq;
kalman->uk_old[1][0] = in_old.vd;
//xk = A*xk_old + B*uk_old
matrix_multiply(n, n, 1, kalman->A, kalman->xk_old, kalman->Axk_old);
matrix_multiply(n, p, 1, kalman->B, kalman->uk_old, kalman->Buk_old);
matrix_addition(n, 1, kalman->Axk_old,kalman->Buk_old,kalman->xk);
//zk = H*x
matrix_multiply(n, n, 1, kalman->H, kalman->xk, kalman->zk);
//Pk = A*Pk_old*A^T + Qk
matrix_multiply(n, n, n, kalman->A, kalman->Pk_old, kalman->APk_old);
matrix_multiply(n, n, n, kalman->APk_old, kalman->AT, kalman->APk_oldAT);
matrix_addition(n, n, kalman->APk_oldAT, kalman->Qk, kalman->Pk);
}
states updateStep(kalman_t *kalman, measurement_vector zk) {
states state;
//Load input struct into Kalman parameter
kalman->zk_meas[0][0] = zk.id;
kalman->zk_meas[1][0] = zk.iq;
kalman->zk_meas[2][0] = zk.wt;
//Kk = Pk*H^T*[H*Pk*H^T+Rk]^-1
matrix_multiply(n, n, n, kalman->Pk, kalman->HT, kalman->PkHT);
matrix_multiply(n, n, n, kalman->H, kalman->PkHT, kalman->HPkHT);
matrix_addition(n, n, kalman->HPkHT, kalman->Qk, kalman->HPkHT_Rk);
matrix_inverse(n, kalman->HPkHT_Rk, kalman->I_HPkHT_Rk);
matrix_multiply(n, n, n, kalman->PkHT, kalman->I_HPkHT_Rk, kalman->Kk);
//xk = xk + Kk(zk_meas-zk)
matrix_subtraction(m, 1, kalman->zk_meas, kalman->zk, kalman->meas_error);
matrix_multiply(n, m, 1, kalman->Kk, kalman->meas_error, kalman->Kkmeas_error);
matrix_addition(n, 1, kalman->xk, kalman->Kkmeas_error, kalman->xk_old);
//Pk = (I-Kk*H)*Pk
matrix_multiply(n, n, n, kalman->Kk, kalman->H, kalman->KkH);
matrix_subtraction(n, n, kalman->I, kalman->KkH, kalman->I_nKkH);
matrix_multiply(n, n, n, kalman->I_nKkH, kalman->Pk, kalman->Pk_old);
//Load states into output struct
state.Torque= kalman->xk_old[0][0];
state.Field = kalman->xk_old[1][0];
state.Speed = kalman->xk_old[2][0];
return state;
}
and this is my matrix calculation file:
/*
* matrix_calc.c
*
* Created on: Sep 10, 2018
* Author: simon
*/
#include "matrix_calc.h"
void matrix_multiply(size_t n, size_t m, size_t p, float mat1[n][m], float mat2[m][p], float res[n][p])
{
int i, j, k;
for (i = 0; i < n; i++)
{
for (j = 0; j < p; j++)
{
res[i][j] = 0;
for (k = 0; k < m; k++)
res[i][j] += mat1[i][k]*mat2[k][j];
}
}
}
void matrix_transpose (size_t n, size_t m, float mat1[n][m], float res[m][n])
{
int i,j;
for(i=0;i<n;i++){
for(j=0;j<m;j++){
res[j][i] = mat1[i][j];
}
}
}
void matrix_addition (size_t n, size_t m, float mat1[n][m], float mat2[n][m], float res[n][m])
{
int i,j;
for(i=0;i<m;i++) {
for(j=0;j<n;j++) {
res[i][j] = mat1[i][j]+mat2[i][j];
}
}
}
void matrix_subtraction (size_t n, size_t m, float mat1[n][m], float mat2[n][m], float res[n][m])
{
int i,j;
for(i=0;i<m;i++) {
for(j=0;j<n;j++) {
res[i][j] = mat1[i][j]-mat2[i][j];
}
}
}
float matrix_determinant(size_t k, float mat[k][k])
{
float determinant = 0;
int i = 0;
for(i = 0; i < k; i++) {
determinant = determinant + (mat[0][i] * (mat[1][(i+1)%3] * mat[2][(i+2)%3] - mat[1][(i+2)%3] * mat[2][(i+1)%3]));
}
return determinant;
}
void matrix_inverse(size_t k, float mat[k][k], float res[k][k]) {
int i,j;
float determinant;
determinant = matrix_determinant(k,mat);
for(i = 0; i < k; i++){
for(j = 0; j < k; j++) {
res[i][j]=((mat[(j+1)%3][(i+1)%3] * mat[(j+2)%3][(i+2)%3]) - (mat[(j+1)%3][(i+2)%3] * mat[(j+2)%3][(i+1)%3]))/ determinant;
}
}
}
void matrix_identity(size_t k, float res[k][k]) {
int i,j;
for(i = 0; i < 3; i++){
for(j = 0; j < 3; j++) {
if (j == i) {
res[i][j] = 1;
}
else if (j != i) {
res[i][j] = 0;
}
}
}
}
My Compiler flags:
-v28 -ml -mt --float_support=fpu32 --vcu_support=vcu0 --opt_for_speed=5 --fp_reassoc=off --fp_mode=strict --include_path="/home/simon/workspace_v8/Scarab_PCB_test" --include_path="/home/simon/controlSUITE/libs/dsp/FPU/v131/include" --include_path="/home/simon/controlSUITE/libs/app_libs/motor_control/math_blocks/v4.3" --include_path="/home/simon/controlSUITE/device_support/f2806x/v151/F2806x_common/include" --include_path="/home/simon/controlSUITE/device_support/f2806x/v151/F2806x_headers/include" --include_path="/home/simon/controlSUITE/libs/math/IQmath/v160/include" --include_path="/home/simon/controlSUITE/libs/math/FPUfastRTS/V100/include" --include_path="/home/simon/controlSUITE/device_support/f2806x/v151/MWare" --include_path="/opt/TI/ti-cgt-c2000_6.4.2/include" --advice:performance=all -g --diag_wrap=off --diag_warning=225 --display_error_number
And my Linker flags:
-v28 -ml -mt --float_support=fpu32 --vcu_support=vcu0 --opt_for_speed=5 --fp_reassoc=off --fp_mode=strict --advice:performance=all -g --diag_wrap=off --diag_warning=225 --display_error_number -z -m"Scarab_PCB_test.map" --stack_size=0x200 --warn_sections -i"/opt/TI/ti-cgt-c2000_6.4.2/lib" -i"/home/simon/controlSUITE/device_support/f2806x/v151/F2806x_headers/cmd" -i"/home/simon/controlSUITE/device_support/f2806x/v151/MWare/lib" -i"/home/simon/controlSUITE/device_support/f2806x/v151/MWare/driverlib/Debug" -i"/home/simon/controlSUITE/device_support/f2806x/v151/F2806x_common/lib" -i"/home/simon/controlSUITE/libs/math/IQmath/v160/lib" -i"/home/simon/controlSUITE/libs/math/FPUfastRTS/V100/lib" -i"/opt/TI/ti-cgt-c2000_6.4.2/include" --priority --reread_libs --diag_wrap=off --display_error_number --xml_link_info="Scarab_PCB_test_linkInfo.xml" --rom_model
Of course, the inverse matrix calculation is the worst one, but all the matrix functions basically use too many instrunctions.
Do anybody have suggestions for optimizing the code or the project configuration that could lead to a significant improve of the overall matrix calculation speed?
Br,
Simon