Program Listing for File Sim3Tracker.cpp¶
↰ Return to documentation for file (lib/Tracking/Sim3Tracker.cpp)
#include "Sim3Tracker.h"
#include <opencv2/highgui/highgui.hpp>
#include "DataStructures/Frame.h"
#include "Tracking/TrackingReference.h"
#include "util/globalFuncs.h"
#include "IOWrapper/ImageDisplay.h"
#include "Tracking/LGSX.h"
namespace lsd_slam
{
#if defined(ENABLE_NEON)
#define callOptimized(function, arguments) function##NEON arguments
#else
#if defined(ENABLE_SSE)
#define callOptimized(function, arguments) (USESSE ? function##SSE arguments : function arguments)
#else
#define callOptimized(function, arguments) function arguments
#endif
#endif
Sim3Tracker::Sim3Tracker( const ImageSize &sz )
: settings(),
_imgSize( sz ),
_debugImages( sz )
{
int area = _imgSize.area();
buf_warped_residual = new float[area];
buf_warped_weights = new float[area];
buf_warped_dx = new float[area];
buf_warped_dy = new float[area];
buf_warped_x = new float[area];
buf_warped_y = new float[area];
buf_warped_z = new float[area];
buf_d = new float[area];
buf_residual_d = new float[area];
buf_idepthVar = new float[area];
buf_warped_idepthVar = new float[area];
buf_weight_p = new float[area];
buf_weight_d = new float[area];
buf_weight_Huber = new float[area];
buf_weight_VarP = new float[area];
buf_weight_VarD = new float[area];
buf_warped_size = 0;
lastResidual = 0;
iterationNumber = 0;
lastDepthResidual = lastPhotometricResidual = lastDepthResidualUnweighted = lastPhotometricResidualUnweighted = lastResidualUnweighted = 0;
pointUsage = 0;
}
Sim3Tracker::~Sim3Tracker()
{
delete[] buf_warped_residual;
delete[] buf_warped_weights;
delete[] buf_warped_dx;
delete[] buf_warped_dy;
delete[] buf_warped_x;
delete[] buf_warped_y;
delete[] buf_warped_z;
delete[] buf_d;
delete[] buf_residual_d;
delete[] buf_idepthVar;
delete[] buf_warped_idepthVar;
delete[] buf_weight_p;
delete[] buf_weight_d;
delete[] buf_weight_Huber;
delete[] buf_weight_VarP;
delete[] buf_weight_VarD;
}
Sim3 Sim3Tracker::trackFrameSim3(
const std::shared_ptr<TrackingReference> &reference,
const std::shared_ptr<Frame> &frame,
const Sim3& frameToReference_initialEstimate,
int startLevel, int finalLevel)
{
boost::shared_lock<boost::shared_mutex> lock = frame->getActiveLock();
diverged = false;
affineEstimation_a = 1; affineEstimation_b = 0;
// ============ track frame ============
Sim3 referenceToFrame = frameToReference_initialEstimate.inverse();
LGS7 ls7;
int numCalcResidualCalls[PYRAMID_LEVELS];
int numCalcWarpUpdateCalls[PYRAMID_LEVELS];
Sim3ResidualStruct finalResidual;
bool warp_update_up_to_date = false;
for(int lvl=startLevel;lvl >= finalLevel;lvl--)
{
numCalcResidualCalls[lvl] = 0;
numCalcWarpUpdateCalls[lvl] = 0;
if(settings.maxItsPerLvl[lvl] == 0)
continue;
reference->makePointCloud(lvl);
// evaluate baseline-residual.
callOptimized(calcSim3Buffers, (reference, frame, referenceToFrame, lvl));
if(buf_warped_size < 0.5 * MIN_GOODPERALL_PIXEL_ABSMIN * (_imgSize.width>>lvl)*(_imgSize.height>>lvl) || buf_warped_size < 10)
{
diverged = true;
return Sim3();
}
Sim3ResidualStruct lastErr = callOptimized(calcSim3WeightsAndResidual,(referenceToFrame));
if(plotSim3TrackingIterationInfo) callOptimized(calcSim3Buffers,(reference, frame, referenceToFrame, lvl, true));
numCalcResidualCalls[lvl]++;
if(useAffineLightningEstimation)
{
affineEstimation_a = affineEstimation_a_lastIt;
affineEstimation_b = affineEstimation_b_lastIt;
}
float LM_lambda = settings.lambdaInitial[lvl];
warp_update_up_to_date = false;
for(int iteration=0; iteration < settings.maxItsPerLvl[lvl]; iteration++)
{
// calculate LS System, result is saved in ls.
callOptimized(calcSim3LGS,(ls7));
warp_update_up_to_date = true;
numCalcWarpUpdateCalls[lvl]++;
iterationNumber = iteration;
int incTry=0;
while(true)
{
// solve LS system with current lambda
Vector7 b = - ls7.b / ls7.num_constraints;
Matrix7x7 A = ls7.A / ls7.num_constraints;
for(int i=0;i<7;i++) A(i,i) *= 1+LM_lambda;
Vector7 inc = A.ldlt().solve(b);
incTry++;
float absInc = inc.dot(inc);
if(!(absInc >= 0 && absInc < 1))
{
// ERROR tracking diverged.
lastSim3Hessian.setZero();
return Sim3();
}
// apply increment. pretty sure this way round is correct, but hard to test.
Sim3 new_referenceToFrame =Sim3::exp(inc.cast<sophusType>()) * referenceToFrame;
//Sim3 new_referenceToFrame = referenceToFrame * Sim3::exp((inc));
// re-evaluate residual
callOptimized(calcSim3Buffers,(reference, frame, new_referenceToFrame, lvl));
if(buf_warped_size < 0.5 * MIN_GOODPERALL_PIXEL_ABSMIN * (_imgSize.width>>lvl)*(_imgSize.height>>lvl) || buf_warped_size < 10)
{
diverged = true;
return Sim3();
}
Sim3ResidualStruct error = callOptimized(calcSim3WeightsAndResidual,(new_referenceToFrame));
if(plotSim3TrackingIterationInfo) callOptimized(calcSim3Buffers,(reference, frame, new_referenceToFrame, lvl, true));
numCalcResidualCalls[lvl]++;
// accept inc?
if(error.mean < lastErr.mean)
{
// accept inc
referenceToFrame = new_referenceToFrame;
warp_update_up_to_date = false;
if(useAffineLightningEstimation)
{
affineEstimation_a = affineEstimation_a_lastIt;
affineEstimation_b = affineEstimation_b_lastIt;
}
// debug output
LOGF_IF(DEBUG, Conf().print.trackingIterationInfo, "(%d-%d): ACCEPTED increment of %f with lambda %.1f, residual: %f -> %f\n",
lvl,iteration, sqrt(inc.dot(inc)), LM_lambda, lastErr.mean, error.mean);
LOGF_IF(DEBUG, Conf().print.trackingIterationInfo, " p=%.4f %.4f %.4f %.4f %.4f %.4f %.4f\n",
referenceToFrame.log()[0],referenceToFrame.log()[1],referenceToFrame.log()[2],
referenceToFrame.log()[3],referenceToFrame.log()[4],referenceToFrame.log()[5],
referenceToFrame.log()[6]);
// converged?
if(error.mean / lastErr.mean > settings.convergenceEps[lvl])
{
LOGF_IF(DEBUG, Conf().print.trackingIterationInfo,"(%d-%d): FINISHED pyramid level (last residual reduction too small).\n",
lvl,iteration);
iteration = settings.maxItsPerLvl[lvl];
}
finalResidual = lastErr = error;
if(LM_lambda <= 0.2)
LM_lambda = 0;
else
LM_lambda *= settings.lambdaSuccessFac;
break;
}
else
{
LOGF_IF(DEBUG,Conf().print.trackingIterationInfo,"(%d-%d): REJECTED increment of %f with lambda %.1f, (residual: %f -> %f)\n",
lvl,iteration, sqrt(inc.dot(inc)), LM_lambda, lastErr.mean, error.mean);
if(!(inc.dot(inc) > settings.stepSizeMin[lvl]))
{
LOGF_IF(DEBUG,Conf().print.trackingIterationInfo,"(%d-%d): FINISHED pyramid level (stepsize too small).\n",
lvl,iteration);
iteration = settings.maxItsPerLvl[lvl];
break;
}
if(LM_lambda == 0)
LM_lambda = 0.2;
else
LM_lambda *= std::pow(settings.lambdaFailFac, incTry);
}
}
}
}
if(Conf().print.trackingIterationInfo)
{
printf("Tracking: ");
for(int lvl=PYRAMID_LEVELS-1;lvl >= 0;lvl--)
{
printf("lvl %d: %d (%d); ",
lvl,
numCalcResidualCalls[lvl],
numCalcWarpUpdateCalls[lvl]);
}
printf("\n");
printf("pOld = %.5f %.5f %.5f %.5f %.5f %.5f %.5f\n",
frameToReference_initialEstimate.inverse().log()[0],frameToReference_initialEstimate.inverse().log()[1],frameToReference_initialEstimate.inverse().log()[2],
frameToReference_initialEstimate.inverse().log()[3],frameToReference_initialEstimate.inverse().log()[4],frameToReference_initialEstimate.inverse().log()[5],
frameToReference_initialEstimate.inverse().log()[6]);
printf("pNew = %.5f %.5f %.5f %.5f %.5f %.5f %.5f\n",
referenceToFrame.log()[0],referenceToFrame.log()[1],referenceToFrame.log()[2],
referenceToFrame.log()[3],referenceToFrame.log()[4],referenceToFrame.log()[5],
referenceToFrame.log()[6]);
printf("final res mean: %f meanD %f, meanP %f\n", finalResidual.mean, finalResidual.meanD, finalResidual.meanP);
}
// Make sure that there is a warp update at the final position to get the correct information matrix
if (!warp_update_up_to_date)
{
reference->makePointCloud(finalLevel);
callOptimized(calcSim3Buffers,(reference, frame, referenceToFrame, finalLevel));
finalResidual = callOptimized(calcSim3WeightsAndResidual,(referenceToFrame));
callOptimized(calcSim3LGS,(ls7));
}
lastSim3Hessian = ls7.A;
if(referenceToFrame.scale() <= 0 )
{
diverged = true;
return Sim3();
}
lastResidual = finalResidual.mean;
lastDepthResidual = finalResidual.meanD;
lastPhotometricResidual = finalResidual.meanP;
return referenceToFrame.inverse();
}
#if defined(ENABLE_SSE)
void Sim3Tracker::calcSim3BuffersSSE(
const std::shared_ptr<TrackingReference> &reference,
const std::shared_ptr<Frame> &frame,
const Sim3& referenceToFrame,
int level, bool plotWeights)
{
calcSim3Buffers(
reference,
frame,
referenceToFrame,
level, plotWeights);
}
#endif
#if defined(ENABLE_NEON)
void Sim3Tracker::calcSim3BuffersNEON(
const std::shared_ptr<TrackingReference> &reference,
const std::shared_ptr<Frame> &frame,
const Sim3& referenceToFrame,
int level, bool plotWeights)
{
calcSim3Buffers(
reference,
frame,
referenceToFrame,
level, plotWeights);
}
#endif
void Sim3Tracker::calcSim3Buffers(
const std::shared_ptr<TrackingReference> &reference,
const std::shared_ptr<Frame> &frame,
const Sim3& referenceToFrame,
int level, bool plotWeights)
{
if(plotSim3TrackingIterationInfo)
{
cv::Vec3b col = cv::Vec3b(255,170,168);
fillCvMat(&_debugImages.debugImageResiduals,col);
fillCvMat(&_debugImages.debugImageOldImageSource,col);
fillCvMat(&_debugImages.debugImageOldImageWarped,col);
fillCvMat(&_debugImages.debugImageDepthResiduals,col);
}
if(plotWeights && plotSim3TrackingIterationInfo)
{
cv::Vec3b col = cv::Vec3b(255,170,168);
fillCvMat(&_debugImages.debugImageHuberWeight,col);
fillCvMat(&_debugImages.debugImageWeightD,col);
fillCvMat(&_debugImages.debugImageWeightP,col);
fillCvMat(&_debugImages.debugImageWeightedResP,col);
fillCvMat(&_debugImages.debugImageWeightedResD,col);
}
// get static values
int w = frame->width(level);
int h = frame->height(level);
Eigen::Matrix3f KLvl = frame->K(level);
float fx_l = KLvl(0,0);
float fy_l = KLvl(1,1);
float cx_l = KLvl(0,2);
float cy_l = KLvl(1,2);
Eigen::Matrix3f rotMat = referenceToFrame.rxso3().matrix().cast<float>();
Eigen::Matrix3f rotMatUnscaled = referenceToFrame.rotationMatrix().cast<float>();
Eigen::Vector3f transVec = referenceToFrame.translation().cast<float>();
// Calculate rotation around optical axis for rotating source frame gradients
Eigen::Vector3f forwardVector(0, 0, -1);
Eigen::Vector3f rotatedForwardVector = rotMatUnscaled * forwardVector;
Eigen::Quaternionf shortestBackRotation;
shortestBackRotation.setFromTwoVectors(rotatedForwardVector, forwardVector);
Eigen::Matrix3f rollMat = shortestBackRotation.toRotationMatrix() * rotMatUnscaled;
float xRoll0 = rollMat(0, 0);
float xRoll1 = rollMat(0, 1);
float yRoll0 = rollMat(1, 0);
float yRoll1 = rollMat(1, 1);
const Eigen::Vector3f* refPoint_max = reference->posData[level] + reference->numData[level];
const Eigen::Vector3f* refPoint = reference->posData[level];
const Eigen::Vector2f* refColVar = reference->colorAndVarData[level];
const Eigen::Vector2f* refGrad = reference->gradData[level];
const float* frame_idepth = frame->idepth(level);
const float* frame_idepthVar = frame->idepthVar(level);
const Eigen::Vector4f* frame_intensityAndGradients = frame->gradients(level);
float sxx=0,syy=0,sx=0,sy=0,sw=0;
float usageCount = 0;
int idx=0;
for(;refPoint<refPoint_max; refPoint++, refGrad++, refColVar++)
{
Eigen::Vector3f Wxp = rotMat * (*refPoint) + transVec;
float u_new = (Wxp[0]/Wxp[2])*fx_l + cx_l;
float v_new = (Wxp[1]/Wxp[2])*fy_l + cy_l;
// step 1a: coordinates have to be in image:
// (inverse test to exclude NANs)
if(!(u_new > 1 && v_new > 1 && u_new < w-2 && v_new < h-2))
continue;
*(buf_warped_x+idx) = Wxp(0);
*(buf_warped_y+idx) = Wxp(1);
*(buf_warped_z+idx) = Wxp(2);
Eigen::Vector3f resInterp = getInterpolatedElement43(frame_intensityAndGradients, u_new, v_new, w);
// save values
#if USE_ESM_TRACKING == 1
// get rotated gradient of point
float rotatedGradX = xRoll0 * (*refGrad)[0] + xRoll1 * (*refGrad)[1];
float rotatedGradY = yRoll0 * (*refGrad)[0] + yRoll1 * (*refGrad)[1];
*(buf_warped_dx+idx) = fx_l * 0.5f * (resInterp[0] + rotatedGradX);
*(buf_warped_dy+idx) = fy_l * 0.5f * (resInterp[1] + rotatedGradY);
#else
*(buf_warped_dx+idx) = fx_l * resInterp[0];
*(buf_warped_dy+idx) = fy_l * resInterp[1];
#endif
float c1 = affineEstimation_a * (*refColVar)[0] + affineEstimation_b;
float c2 = resInterp[2];
float residual_p = c1 - c2;
float weight = fabsf(residual_p) < 2.0f ? 1 : 2.0f / fabsf(residual_p);
sxx += c1*c1*weight;
syy += c2*c2*weight;
sx += c1*weight;
sy += c2*weight;
sw += weight;
*(buf_warped_residual+idx) = residual_p;
*(buf_idepthVar+idx) = (*refColVar)[1];
// new (only for Sim3):
int idx_rounded = (int)(u_new+0.5f) + w*(int)(v_new+0.5f);
float var_frameDepth = frame_idepthVar[idx_rounded];
float ref_idepth = 1.0f / Wxp[2];
*(buf_d+idx) = 1.0f / (*refPoint)[2];
if(var_frameDepth > 0)
{
float residual_d = ref_idepth - frame_idepth[idx_rounded];
*(buf_residual_d+idx) = residual_d;
*(buf_warped_idepthVar+idx) = var_frameDepth;
}
else
{
*(buf_residual_d+idx) = -1;
*(buf_warped_idepthVar+idx) = -1;
}
// DEBUG STUFF
if(plotSim3TrackingIterationInfo)
{
// for debug plot only: find x,y again.
// horribly inefficient, but who cares at this point...
int width = _imgSize.width;
Eigen::Vector3f point = KLvl * (*refPoint);
int x = point[0] / point[2] + 0.5f;
int y = point[1] / point[2] + 0.5f;
setPixelInCvMat(&_debugImages.debugImageOldImageSource,getGrayCvPixel((float)resInterp[2]),u_new+0.5,v_new+0.5,(width/w));
setPixelInCvMat(&_debugImages.debugImageOldImageWarped,getGrayCvPixel((float)resInterp[2]),x,y,(width/w));
setPixelInCvMat(&_debugImages.debugImageResiduals,getGrayCvPixel(residual_p+128),x,y,(width/w));
if(*(buf_warped_idepthVar+idx) >= 0)
{
setPixelInCvMat(&_debugImages.debugImageDepthResiduals,getGrayCvPixel(128 + 800 * *(buf_residual_d+idx)),x,y,(width/w));
if(plotWeights)
{
setPixelInCvMat(&_debugImages.debugImageWeightD,getGrayCvPixel(255 * (1/60.0f) * sqrtf(*(buf_weight_VarD+idx))),x,y,(width/w));
setPixelInCvMat(&_debugImages.debugImageWeightedResD,getGrayCvPixel(128 + (128/5.0f) * sqrtf(*(buf_weight_VarD+idx)) * *(buf_residual_d+idx)),x,y,(width/w));
}
}
if(plotWeights)
{
setPixelInCvMat(&_debugImages.debugImageWeightP,getGrayCvPixel(255 * 4 * sqrtf(*(buf_weight_VarP+idx))),x,y,(width/w));
setPixelInCvMat(&_debugImages.debugImageHuberWeight,getGrayCvPixel(255 * *(buf_weight_Huber+idx)),x,y,(width/w));
setPixelInCvMat(&_debugImages.debugImageWeightedResP,getGrayCvPixel(128 + (128/5.0f) * sqrtf(*(buf_weight_VarP+idx)) * *(buf_warped_residual+idx)),x,y,(width/w));
}
}
idx++;
float depthChange = (*refPoint)[2] / Wxp[2];
usageCount += depthChange < 1 ? depthChange : 1;
}
buf_warped_size = idx;
pointUsage = usageCount / (float)reference->numData[level];
affineEstimation_a_lastIt = sqrtf((syy - sy*sy/sw) / (sxx - sx*sx/sw));
affineEstimation_b_lastIt = (sy - affineEstimation_a_lastIt*sx)/sw;
if(plotSim3TrackingIterationInfo)
{
Util::displayImage( "P Residuals", _debugImages.debugImageResiduals );
Util::displayImage( "D Residuals", _debugImages.debugImageDepthResiduals );
if(plotWeights)
{
Util::displayImage( "Huber Weights", _debugImages.debugImageHuberWeight );
Util::displayImage( "DV Weights", _debugImages.debugImageWeightD );
Util::displayImage( "IV Weights", _debugImages.debugImageWeightP );
Util::displayImage( "WP Res", _debugImages.debugImageWeightedResP );
Util::displayImage( "WD Res", _debugImages.debugImageWeightedResD );
}
}
}
#if defined(ENABLE_SSE)
Sim3ResidualStruct Sim3Tracker::calcSim3WeightsAndResidualSSE(
const Sim3& referenceToFrame)
{
const __m128 txs = _mm_set1_ps((float)(referenceToFrame.translation()[0]));
const __m128 tys = _mm_set1_ps((float)(referenceToFrame.translation()[1]));
const __m128 tzs = _mm_set1_ps((float)(referenceToFrame.translation()[2]));
const __m128 zeros = _mm_set1_ps(0.0f);
const __m128 ones = _mm_set1_ps(1.0f);
const __m128 depthVarFacs = _mm_set1_ps((float)settings.var_weight);// float depthVarFac = var_weight; // the depth var is over-confident. this is a constant multiplier to remedy that.... HACK
const __m128 sigma_i2s = _mm_set1_ps((float)cameraPixelNoise2);
const __m128 huber_ress = _mm_set1_ps((float)(settings.huber_d));
__m128 sumResP = zeros;
__m128 sumResD = zeros;
__m128 numTermsD = zeros;
Sim3ResidualStruct sumRes;
memset(&sumRes, 0, sizeof(Sim3ResidualStruct));
for(int i=0;i<buf_warped_size-3;i+=4)
{
// calc dw/dd:
//float g0 = (tx * pz - tz * px) / (pz*pz*d);
__m128 pzs = _mm_load_ps(buf_warped_z+i); // z'
__m128 pz2ds = _mm_rcp_ps(_mm_mul_ps(_mm_mul_ps(pzs, pzs), _mm_load_ps(buf_d+i))); // 1 / (z' * z' * d)
__m128 g0s = _mm_sub_ps(_mm_mul_ps(pzs, txs), _mm_mul_ps(_mm_load_ps(buf_warped_x+i), tzs));
g0s = _mm_mul_ps(g0s,pz2ds);
//float g1 = (ty * pz - tz * py) / (pz*pz*d);
__m128 g1s = _mm_sub_ps(_mm_mul_ps(pzs, tys), _mm_mul_ps(_mm_load_ps(buf_warped_y+i), tzs));
g1s = _mm_mul_ps(g1s,pz2ds);
//float g2 = (pz - tz) / (pz*pz*d);
__m128 g2s = _mm_sub_ps(pzs, tzs);
g2s = _mm_mul_ps(g2s,pz2ds);
// calc w_p
// float drpdd = gx * g0 + gy * g1; // ommitting the minus
__m128 drpdds = _mm_add_ps(
_mm_mul_ps(g0s, _mm_load_ps(buf_warped_dx+i)),
_mm_mul_ps(g1s, _mm_load_ps(buf_warped_dy+i)));
//float w_p = 1.0f / (sigma_i2 + s * drpdd * drpdd);
__m128 w_ps = _mm_rcp_ps(_mm_add_ps(sigma_i2s,
_mm_mul_ps(drpdds,
_mm_mul_ps(drpdds,
_mm_mul_ps(depthVarFacs,
_mm_load_ps(buf_idepthVar+i))))));
//float w_d = 1.0f / (sv + g2*g2*s);
__m128 w_ds = _mm_rcp_ps(_mm_add_ps(_mm_load_ps(buf_warped_idepthVar+i),
_mm_mul_ps(g2s,
_mm_mul_ps(g2s,
_mm_mul_ps(depthVarFacs,
_mm_load_ps(buf_idepthVar+i))))));
//float weighted_rp = fabs(rp*sqrtf(w_p));
__m128 weighted_rps = _mm_mul_ps(_mm_load_ps(buf_warped_residual+i),
_mm_sqrt_ps(w_ps));
weighted_rps = _mm_max_ps(weighted_rps, _mm_sub_ps(zeros,weighted_rps));
//float weighted_rd = fabs(rd*sqrtf(w_d));
__m128 weighted_rds = _mm_mul_ps(_mm_load_ps(buf_residual_d+i),
_mm_sqrt_ps(w_ds));
weighted_rds = _mm_max_ps(weighted_rds, _mm_sub_ps(zeros,weighted_rds));
// depthValid = sv > 0
__m128 depthValid = _mm_cmplt_ps(zeros, _mm_load_ps(buf_warped_idepthVar+i)); // bitmask 0xFFFFFFFF for idepth valid, 0x000000 otherwise
// float weighted_abs_res = sv > 0 ? weighted_rd+weighted_rp : weighted_rp;
__m128 weighted_abs_ress = _mm_add_ps(_mm_and_ps(weighted_rds,depthValid), weighted_rps);
//float wh = fabs(weighted_abs_res < huber_res ? 1 : huber_res / weighted_abs_res);
__m128 whs = _mm_cmplt_ps(weighted_abs_ress, huber_ress); // bitmask 0xFFFFFFFF for 1, 0x000000 for huber_res_ponly / weighted_rp
whs = _mm_or_ps(
_mm_and_ps(whs, ones),
_mm_andnot_ps(whs, _mm_mul_ps(huber_ress, _mm_rcp_ps(weighted_abs_ress))));
if(i+3 < buf_warped_size)
{
//if(sv > 0) sumRes.numTermsD++;
numTermsD = _mm_add_ps(numTermsD,
_mm_and_ps(depthValid, ones));
//if(sv > 0) sumRes.sumResD += wh * w_d * rd*rd;
sumResD = _mm_add_ps(sumResD,
_mm_and_ps(depthValid, _mm_mul_ps(whs, _mm_mul_ps(weighted_rds, weighted_rds))));
// sumRes.sumResP += wh * w_p * rp*rp;
sumResP = _mm_add_ps(sumResP,
_mm_mul_ps(whs, _mm_mul_ps(weighted_rps, weighted_rps)));
}
//*(buf_weight_p+i) = wh * w_p;
_mm_store_ps(buf_weight_p+i, _mm_mul_ps(whs, w_ps) );
//if(sv > 0) *(buf_weight_d+i) = wh * w_d; else *(buf_weight_d+i) = 0;
_mm_store_ps(buf_weight_d+i, _mm_and_ps(depthValid, _mm_mul_ps(whs, w_ds)) );
}
sumRes.sumResP = SSEE(sumResP,0) + SSEE(sumResP,1) + SSEE(sumResP,2) + SSEE(sumResP,3);
sumRes.numTermsP = (buf_warped_size >> 2) << 2;
sumRes.sumResD = SSEE(sumResD,0) + SSEE(sumResD,1) + SSEE(sumResD,2) + SSEE(sumResD,3);
sumRes.numTermsD = SSEE(numTermsD,0) + SSEE(numTermsD,1) + SSEE(numTermsD,2) + SSEE(numTermsD,3);
sumRes.mean = (sumRes.sumResD + sumRes.sumResP) / (sumRes.numTermsD + sumRes.numTermsP);
sumRes.meanD = (sumRes.sumResD) / (sumRes.numTermsD);
sumRes.meanP = (sumRes.sumResP) / (sumRes.numTermsP);
return sumRes;
}
#endif
#if defined(ENABLE_NEON)
Sim3ResidualStruct Sim3Tracker::calcSim3WeightsAndResidualNEON(
const Sim3& referenceToFrame)
{
return calcSim3WeightsAndResidual(
referenceToFrame);
}
#endif
Sim3ResidualStruct Sim3Tracker::calcSim3WeightsAndResidual(
const Sim3& referenceToFrame)
{
float tx = referenceToFrame.translation()[0];
float ty = referenceToFrame.translation()[1];
float tz = referenceToFrame.translation()[2];
Sim3ResidualStruct sumRes;
memset(&sumRes, 0, sizeof(Sim3ResidualStruct));
float sum_rd=0, sum_rp=0, sum_wrd=0, sum_wrp=0, sum_wp=0, sum_wd=0, sum_num_d=0, sum_num_p=0;
for(int i=0;i<buf_warped_size;i++)
{
float px = *(buf_warped_x+i); // x'
float py = *(buf_warped_y+i); // y'
float pz = *(buf_warped_z+i); // z'
float d = *(buf_d+i); // d
float rp = *(buf_warped_residual+i); // r_p
float rd = *(buf_residual_d+i); // r_d
float gx = *(buf_warped_dx+i); // \delta_x I
float gy = *(buf_warped_dy+i); // \delta_y I
float s = settings.var_weight * *(buf_idepthVar+i); // \sigma_d^2
float sv = settings.var_weight * *(buf_warped_idepthVar+i); // \sigma_d^2'
// calc dw/dd (first 2 components):
float g0 = (tx * pz - tz * px) / (pz*pz*d);
float g1 = (ty * pz - tz * py) / (pz*pz*d);
float g2 = (pz - tz) / (pz*pz*d);
// calc w_p
float drpdd = gx * g0 + gy * g1; // ommitting the minus
float w_p = 1.0f / (cameraPixelNoise2 + s * drpdd * drpdd);
float w_d = 1.0f / (sv + g2*g2*s);
float weighted_rd = fabs(rd*sqrtf(w_d));
float weighted_rp = fabs(rp*sqrtf(w_p));
float weighted_abs_res = sv > 0 ? weighted_rd+weighted_rp : weighted_rp;
float wh = fabs(weighted_abs_res < settings.huber_d ? 1 : settings.huber_d / weighted_abs_res);
if(sv > 0)
{
sumRes.sumResD += wh * w_d * rd*rd;
sumRes.numTermsD++;
}
sumRes.sumResP += wh * w_p * rp*rp;
sumRes.numTermsP++;
if(plotSim3TrackingIterationInfo)
{
// for debug
*(buf_weight_Huber+i) = wh;
*(buf_weight_VarP+i) = w_p;
*(buf_weight_VarD+i) = sv > 0 ? w_d : 0;
sum_rp += fabs(rp);
sum_wrp += fabs(weighted_rp);
sum_wp += sqrtf(w_p);
sum_num_p++;
if(sv > 0)
{
sum_rd += fabs(weighted_rd);
sum_wrd += fabs(rd);
sum_wd += sqrtf(w_d);
sum_num_d++;
}
}
*(buf_weight_p+i) = wh * w_p;
if(sv > 0)
*(buf_weight_d+i) = wh * w_d;
else
*(buf_weight_d+i) = 0;
}
sumRes.mean = (sumRes.sumResD + sumRes.sumResP) / (sumRes.numTermsD + sumRes.numTermsP);
sumRes.meanD = (sumRes.sumResD) / (sumRes.numTermsD);
sumRes.meanP = (sumRes.sumResP) / (sumRes.numTermsP);
if(plotSim3TrackingIterationInfo)
{
printf("rd %f, rp %f, wrd %f, wrp %f, wd %f, wp %f\n ",
sum_rd/sum_num_d,
sum_rp/sum_num_p,
sum_wrd/sum_num_d,
sum_wrp/sum_num_p,
sum_wd/sum_num_d,
sum_wp/sum_num_p);
}
return sumRes;
}
#if defined(ENABLE_SSE)
void Sim3Tracker::calcSim3LGSSSE(LGS7 &ls7)
{
LGS4 ls4;
LGS6 ls6;
ls6.initialize( _imgSize.area() );
ls4.initialize( _imgSize.area() );
const __m128 zeros = _mm_set1_ps(0.0f);
for(int i=0;i<buf_warped_size-3;i+=4)
{
__m128 val1, val2, val3, val4;
__m128 J41, J42, J43, J44;
__m128 J61, J62, J63, J64, J65, J66;
// redefine pz
__m128 pz = _mm_load_ps(buf_warped_z+i);
pz = _mm_rcp_ps(pz); // pz := 1/z
//v4[3] = z;
J44 = pz;
__m128 gx = _mm_load_ps(buf_warped_dx+i);
val1 = _mm_mul_ps(pz, gx); // gx / z => SET [0]
//v[0] = z*gx;
J61 = val1;
__m128 gy = _mm_load_ps(buf_warped_dy+i);
val1 = _mm_mul_ps(pz, gy); // gy / z => SET [1]
//v[1] = z*gy;
J62 = val1;
__m128 px = _mm_load_ps(buf_warped_x+i);
val1 = _mm_mul_ps(px, gy);
val1 = _mm_mul_ps(val1, pz); // px * gy * z
__m128 py = _mm_load_ps(buf_warped_y+i);
val2 = _mm_mul_ps(py, gx);
val2 = _mm_mul_ps(val2, pz); // py * gx * z
val1 = _mm_sub_ps(val1, val2); // px * gy * z - py * gx * z => SET [5]
//v[5] = -py * z * gx + px * z * gy;
J66 = val1;
// redefine pz
pz = _mm_mul_ps(pz,pz); // pz := 1/(z*z)
//v4[0] = z_sqr;
J41 = pz;
//v4[1] = z_sqr * py;
__m128 pypz = _mm_mul_ps(pz, py);
J42 = pypz;
//v4[2] = -z_sqr * px;
__m128 mpxpz = _mm_sub_ps(zeros,_mm_mul_ps(pz, px));
J43 = mpxpz;
// will use these for the following calculations a lot.
val1 = _mm_mul_ps(px, gx);
val1 = _mm_mul_ps(val1, pz); // px * z_sqr * gx
val2 = _mm_mul_ps(py, gy);
val2 = _mm_mul_ps(val2, pz); // py * z_sqr * gy
val3 = _mm_add_ps(val1, val2);
val3 = _mm_sub_ps(zeros,val3); //-px * z_sqr * gx -py * z_sqr * gy
//v[2] = -px * z_sqr * gx -py * z_sqr * gy; => SET [2]
J63 = val3;
val3 = _mm_mul_ps(val1, py); // px * z_sqr * gx * py
val4 = _mm_add_ps(gy, val3); // gy + px * z_sqr * gx * py
val3 = _mm_mul_ps(val2, py); // py * py * z_sqr * gy
val4 = _mm_add_ps(val3, val4); // gy + px * z_sqr * gx * py + py * py * z_sqr * gy
val4 = _mm_sub_ps(zeros,val4); //val4 = -val4.
//v[3] = -px * py * z_sqr * gx +
// -py * py * z_sqr * gy +
// -gy; => SET [3]
J64 = val4;
val3 = _mm_mul_ps(val1, px); // px * px * z_sqr * gx
val4 = _mm_add_ps(gx, val3); // gx + px * px * z_sqr * gx
val3 = _mm_mul_ps(val2, px); // px * py * z_sqr * gy
val4 = _mm_add_ps(val4, val3); // gx + px * px * z_sqr * gx + px * py * z_sqr * gy
//v[4] = px * px * z_sqr * gx +
// px * py * z_sqr * gy +
// gx; => SET [4]
J65 = val4;
if(i+3<buf_warped_size)
{
ls4.updateSSE(J41, J42, J43, J44, _mm_load_ps(buf_residual_d+i), _mm_load_ps(buf_weight_d+i));
ls6.updateSSE(J61, J62, J63, J64, J65, J66, _mm_load_ps(buf_warped_residual+i), _mm_load_ps(buf_weight_p+i));
}
else
{
for(int k=0;i+k<buf_warped_size;k++)
{
Vector6 v6;
v6 << SSEE(J61,k),SSEE(J62,k),SSEE(J63,k),SSEE(J64,k),SSEE(J65,k),SSEE(J66,k);
Vector4 v4;
v4 << SSEE(J41,k),SSEE(J42,k),SSEE(J43,k),SSEE(J44,k);
ls4.update(v4, *(buf_residual_d+i+k), *(buf_weight_d+i+k));
ls6.update(v6, *(buf_warped_residual+i+k), *(buf_weight_p+i+k));
}
}
}
ls4.finishNoDivide();
ls6.finishNoDivide();
ls7.initializeFrom(ls6, ls4);
}
#endif
#if defined(ENABLE_NEON)
void Sim3Tracker::calcSim3LGSNEON(LGS7 &ls7)
{
calcSim3LGS(ls7);
}
#endif
void Sim3Tracker::calcSim3LGS(LGS7 &ls7)
{
LGS4 ls4;
LGS6 ls6;
ls6.initialize(_imgSize.area());
ls4.initialize(_imgSize.area());
for(int i=0;i<buf_warped_size;i++)
{
float px = *(buf_warped_x+i); // x'
float py = *(buf_warped_y+i); // y'
float pz = *(buf_warped_z+i); // z'
float wp = *(buf_weight_p+i); // wr/wp
float wd = *(buf_weight_d+i); // wr/wd
float rp = *(buf_warped_residual+i); // r_p
float rd = *(buf_residual_d+i); // r_d
float gx = *(buf_warped_dx+i); // \delta_x I
float gy = *(buf_warped_dy+i); // \delta_y I
float z = 1.0f / pz;
float z_sqr = 1.0f / (pz*pz);
Vector6 v;
Vector4 v4;
v[0] = z*gx + 0;
v[1] = 0 + z*gy;
v[2] = (-px * z_sqr) * gx +
(-py * z_sqr) * gy;
v[3] = (-px * py * z_sqr) * gx +
(-(1.0 + py * py * z_sqr)) * gy;
v[4] = (1.0 + px * px * z_sqr) * gx +
(px * py * z_sqr) * gy;
v[5] = (-py * z) * gx +
(px * z) * gy;
// new:
v4[0] = z_sqr;
v4[1] = z_sqr * py;
v4[2] = -z_sqr * px;
v4[3] = z;
ls6.update(v, rp, wp); // Jac = - v
ls4.update(v4, rd, wd); // Jac = v4
}
ls4.finishNoDivide();
ls6.finishNoDivide();
ls7.initializeFrom(ls6, ls4);
}
void Sim3Tracker::calcResidualAndBuffers_debugStart()
{
if(plotTrackingIterationInfo || saveAllTrackingStagesInternal)
{
int other = saveAllTrackingStagesInternal ? 255 : 0;
fillCvMat(&_debugImages.debugImageResiduals,cv::Vec3b(other,other,255));
fillCvMat(&_debugImages.debugImageExternalWeights,cv::Vec3b(other,other,255));
fillCvMat(&_debugImages.debugImageWeights,cv::Vec3b(other,other,255));
fillCvMat(&_debugImages.debugImageOldImageSource,cv::Vec3b(other,other,255));
fillCvMat(&_debugImages.debugImageOldImageWarped,cv::Vec3b(other,other,255));
fillCvMat(&_debugImages.debugImageScaleEstimation,cv::Vec3b(255,other,other));
fillCvMat(&_debugImages.debugImageDepthResiduals,cv::Vec3b(other,other,255));
}
}
void Sim3Tracker::calcResidualAndBuffers_debugFinish(int w)
{
if(plotTrackingIterationInfo)
{
Util::displayImage( "Weights", _debugImages.debugImageWeights );
Util::displayImage( "second_frame", _debugImages.debugImageSecondFrame );
Util::displayImage( "Intensities of second_frame at transformed positions", _debugImages.debugImageOldImageSource );
Util::displayImage( "Intensities of second_frame at pointcloud in first_frame", _debugImages.debugImageOldImageWarped );
Util::displayImage( "Residuals", _debugImages.debugImageResiduals );
Util::displayImage( "DepthVar Weights", _debugImages.debugImageExternalWeights );
Util::displayImage( "Depth Residuals", _debugImages.debugImageDepthResiduals );
// AMM: Disable this functionality for now.
// wait for key and handle it
// bool looping = true;
// while(looping)
// {
// int k = Util::waitKey(1);
// if(k == -1)
// {
// if(autoRunWithinFrame)
// break;
// else
// continue;
// }
//
// char key = k;
// if(key == ' ')
// looping = false;
// else
// handleKey(k);
// }
}
if(saveAllTrackingStagesInternal)
{
char charbuf[500];
snprintf(charbuf,500,"save/%sresidual-%d-%d.png",packagePath.c_str(),w,iterationNumber);
cv::imwrite(charbuf,_debugImages.debugImageResiduals);
snprintf(charbuf,500,"save/%swarped-%d-%d.png",packagePath.c_str(),w,iterationNumber);
cv::imwrite(charbuf,_debugImages.debugImageOldImageWarped);
snprintf(charbuf,500,"save/%sweights-%d-%d.png",packagePath.c_str(),w,iterationNumber);
cv::imwrite(charbuf,_debugImages.debugImageWeights);
printf("saved three images for lvl %d, iteration %d\n",w,iterationNumber);
}
}
//=== Sim3TrackerDebugImages ==
Sim3TrackerDebugImages::Sim3TrackerDebugImages( const ImageSize &imgSize )
: debugImageWeights(imgSize.cvSize(),CV_8UC3),
debugImageResiduals(imgSize.cvSize(),CV_8UC3),
debugImageSecondFrame(imgSize.cvSize(),CV_8UC3),
debugImageOldImageWarped(imgSize.cvSize(),CV_8UC3),
debugImageOldImageSource(imgSize.cvSize(),CV_8UC3),
debugImageExternalWeights(imgSize.cvSize(),CV_8UC3),
debugImageDepthResiduals(imgSize.cvSize(),CV_8UC3),
debugImageScaleEstimation(imgSize.cvSize(),CV_8UC3),
debugImageHuberWeight(imgSize.cvSize(),CV_8UC3),
debugImageWeightD(imgSize.cvSize(),CV_8UC3),
debugImageWeightP(imgSize.cvSize(),CV_8UC3),
debugImageWeightedResP(imgSize.cvSize(),CV_8UC3),
debugImageWeightedResD(imgSize.cvSize(),CV_8UC3)
{
}
}