注:本文来自计算机视觉life独家课程 视觉三维重建:原理剖析+逐行代码详解 中的课件及注释代码。
Stereo Processing by Semi-global Matching and Mutual Information
SURE: Photogrammetric Surface Reconstruction from Imagery
(左右滑动可以查看完整公式)
SGM能量方程如下公式:
这是一个二维优化问题,为了提高优化效率,SGM将问题转化为使用一维路径聚合的方式来近似二维最优,提高效率的同时也保证了效果。
(左右滑动可以查看完整公式)
式中,为匹配代价,第二项和第三项是平滑项,我们期望的是视差是连续的。所以如果当前像素与邻域像素视差相差比较小(1个像素)我们会给一个比较小的惩罚,如果大于一个像素则给一个大的惩罚。但是实际场景中肯定会有一些视差不连续区域相差比较大(比如:前景和背景)如图示:
为了处理这种情况,我们对 进行调整:
(左右滑动可以查看完整公式)
思考:为什么这样调整第二个惩罚项?
答案:如果像素和它的邻域像素亮度差很大,那么该像素很可能是位于视差非连续区域,则一定程度上允许其和邻域像素的视差差值超过1个像素,对于超过1个像素的惩罚力度就适当减小一点。
具体求解过程中,SGM路径代价聚合的思路:
将像素所有视差下的匹配代价进行像素周围所有路径上的一维聚合得到路径下的路径代价值,然后将所有路径代价值相加得到该像素聚合后的匹配代价值。
一维聚合路径示意图(图中各个箭头方向就是聚合各个路径):
某一维路径代价计算公式如下:
(左右滑动可以查看完整公式)
式子中
则最终代价值(所有路径之和):
代价聚合后, 每个像素直接找聚合代价最小对应的视差值就是我们所要求的视差值。
每个像素的视差范围计算方法:
如果当前像素的视差值无效,在搜索窗口=31, 反之=7 以当前像素为中心,窗口大小搜索所有有效视差存储在中,的大小是,中值是,最大值,最小值:
调整如下:
d<d\_{min}(x\_b-r\_i):< p=""></d_{min}(x_b-r_i):$<>
代码框架
部分代码实现
(左右滑动可以查看完整代码)
//参考文献:SGM:Stereo Processing by Semiglobal Matching and Mutual Information
// tSGM:SURE: Photogrammetric surface reconstruction from imagery
// SGM 算法主要实现两种经典SGM和tSGM,主要区别是代价聚合的视差搜索范围不同,故聚合代价有区别。经典SGM使用的全局视差范围,每个
// 像素的视差范围都是相同。tSGM 视差搜素范围每个像素的范围都不一样。这样可以大大减少代价聚合的计算量。
void SemiGlobalMatcher::Match(const Scene& scene, IIndex idxImage, IIndex numNeighbors, unsigned minResolution)
{
const Image& leftImage = scene.images[idxImage];
const float fMinScore(MAXF(leftImage.neighbors.front().score* (OPTDENSE::fViewMinScoreRatio*0.1f), OPTDENSE::fViewMinScore));
FOREACH(idxNeighbor, leftImage.neighbors) {
const ViewScore& neighbor = leftImage.neighbors[idxNeighbor];
// exclude neighbors that over the limit or too small score
// 分数较小的邻域帧不参与计算
ASSERT(scene.images[neighbor.idx.ID].IsValid());
if ((numNeighbors && idxNeighbor >= numNeighbors) ||
(neighbor.score < fMinScore))
break;
// check if the disparity-map was already estimated for the same image pairs
// 判断是否已经进行视差图计算,如果已经计算则不再计算
const Image& rightImage = scene.images[neighbor.idx.ID];
const String pairName(MAKE_PATH(String::FormatString("%04u_%04u", leftImage.ID, rightImage.ID)));
if (File::isPresent((pairName+".dimap").c_str()) || File::isPresent(MAKE_PATH(String::FormatString("%04u_%04u.dimap", rightImage.ID, leftImage.ID))))
continue;
TD_TIMER_STARTD();
IndexArr points;
Matrix3x3 H; Matrix4x4 Q;
ViewData leftData, rightData;
MaskMap leftMaskMap, rightMaskMap; {
// fetch pairs of corresponding image points
//TODO: use precomputed points from SelectViews()
//!!! 查找左右图能看到的3D点,改进点是可以提前计算好避免重复计算浪费时间
Point3fArr leftPoints, rightPoints;
FOREACH(idxPoint, scene.pointcloud.points) {
const PointCloud::ViewArr& views = scene.pointcloud.pointViews[idxPoint];
if (views.FindFirst(idxImage) != PointCloud::ViewArr::NO_INDEX) {
points.push_back((uint32_t)idxPoint);
if (views.FindFirst(neighbor.idx.ID) != PointCloud::ViewArr::NO_INDEX) {
const Point3 X(scene.pointcloud.points[idxPoint]);
leftPoints.emplace_back(leftImage.camera.TransformPointW2I3(X));
rightPoints.emplace_back(rightImage.camera.TransformPointW2I3(X));
}
}
}
// stereo-rectify image pair
// Step 3_2_2_1 极线校正,H 像素坐标从原图转换到校正后的图上,Q 把校正图像上的[x' y' disparity 1] 转换到原图坐标系下 [x*z y*z z 1]*w
if (!Image::StereoRectifyImages(leftImage, rightImage, leftPoints, rightPoints, leftData.imageColor, rightData.imageColor, leftMaskMap, rightMaskMap, H, Q))
continue;
ASSERT(leftData.imageColor.size() == rightData.imageColor.size());
}
#ifdef _USE_FILTER_DEMO
// run openCV implementation
const LPCSTR argv[] = {"disparityFiltering", "-algorithm=sgbm", "-max_disparity=160", "-no-downscale"};
disparityFiltering(leftData.imageColor, rightData.imageColor, (int)SizeOfArray(argv), argv);
#endif
// color to gray conversion
// 彩色图转灰度图
#if SGM_SIMILARITY == SGM_SIMILARITY_CENSUS
leftData.imageColor.toGray(leftData.imageGray, cv::COLOR_BGR2GRAY, false, true);
rightData.imageColor.toGray(rightData.imageGray, cv::COLOR_BGR2GRAY, false, true);
#else
leftData.imageColor.toGray(leftData.imageGray, cv::COLOR_BGR2GRAY, true, true);
rightData.imageColor.toGray(rightData.imageGray, cv::COLOR_BGR2GRAY, true, true);
#endif
// compute scale used for the disparity estimation
REAL scale(1);
// 计算用来depth计算的scale
if (minResolution) {
unsigned resolutionLevel(8);
Image8U::computeMaxResolution(leftData.imageGray.width(), leftData.imageGray.height(), resolutionLevel, minResolution);
scale = REAL(1)/MAXF(2,POWI(2,resolutionLevel));
}
// 如果scale不等于1就使用tSGM算法(金字塔)上述论文有介绍
const bool tSGM(!ISEQUAL(scale, REAL(1)));
DisparityMap leftDisparityMap, rightDisparityMap; AccumCostMap costMap;
do {
#if 0
// export the intermediate disparity-maps
if (!leftDisparityMap.empty()) {
const REAL _scale(scale*0.5);
Matrix3x3 _H(H); Matrix4x4 _Q(Q);
Image::ScaleStereoRectification(_H, _Q, _scale);
ExportDisparityDataRawFull(String::FormatString("%s_%d.dimap", pairName.c_str(), log2i(ROUND2INT(REAL(1)/_scale))), leftDisparityMap, costMap, Image8U::computeResize(leftImage.GetSize(), _scale), H, _Q, 1);
}
#endif
// initialize
// Step 3_2_2_2 视差图初始化
const ViewData leftDataLevel(leftData.GetImage(scale));
const ViewData rightDataLevel(rightData.GetImage(scale));
const cv::Size size(leftDataLevel.imageGray.size());
const cv::Size sizeValid(size.width-2*halfWindowSizeX, size.height-2*halfWindowSizeY);
const bool bFirstLevel(leftDisparityMap.empty());//第一次计算标志
if (bFirstLevel) {
// initialize the disparity-map with a rough estimate based on the sparse point-cloud
//TODO: remove DepthData::ViewData dependency
// 初始用稀疏点云对depth进行初始化,与patchMatch初始化一样。
Image leftImageLevel(leftImage.GetImage(scene.platforms, scale*0.5, false));
DepthData::ViewData image;
image.pImageData = &leftImageLevel; // used only for avgDepth
image.image.create(leftImageLevel.GetSize());
image.camera = leftImageLevel.camera;
DepthMap depthMap;
Depth dMin, dMax;
TriangulatePoints2DepthMap(image, scene.pointcloud, points, depthMap, dMin, dMax);
points.Release();
Matrix3x3 H2(H); Matrix4x4 Q2(Q);
Image::ScaleStereoRectification(H2, Q2, scale*0.5);
const cv::Size sizeHalf(Image8U::computeResize(size, 0.5));
// 有效视差去除边界不大于窗口的像素
const cv::Size sizeValidHalf(sizeHalf.width-2*halfWindowSizeX, sizeHalf.height-2*halfWindowSizeY);
leftDisparityMap.create(sizeValidHalf);
//depth转视差图,H,Q介绍看上个函数极限矫正StereoRectifyImages
Depth2DisparityMap(depthMap, H2.inv(), Q2.inv(), 1, leftDisparityMap);
// resize masks
cv::resize(leftMaskMap, leftMaskMap, size, 0, 0, cv::INTER_NEAREST);
cv::resize(rightMaskMap, rightMaskMap, size, 0, 0, cv::INTER_NEAREST);
const cv::Rect ROI(halfWindowSizeX,halfWindowSizeY, sizeValid.width,sizeValid.height);
leftMaskMap(ROI).copyTo(leftMaskMap);
rightMaskMap(ROI).copyTo(rightMaskMap);
} else {
// upscale masks
UpscaleMask(leftMaskMap, sizeValid);
UpscaleMask(rightMaskMap, sizeValid);
}
// Step 3_2_2_3 计算视差图estimate right-left disparity-map
Index numCosts;
if (tSGM) {
// upscale the disparity-map from the previous level
// 左右视差转换为右左视差
FlipDirection(leftDisparityMap, rightDisparityMap);
// 计算每个像素点的视差搜索范围(对应论文中SURE: Photogrammetric Surface Reconstruction from Imagery 2.2.2的介绍)
numCosts = Disparity2RangeMap(rightDisparityMap, rightMaskMap, bFirstLevel?11:5, bFirstLevel?33:7);
} else {
// extract global min and max disparities
// 标准的SGM,每个像素视差范围都一致,故计算一个全局的最小最大视差即可。
Range range{std::numeric_limits<Disparity>::max(), std::numeric_limits<Disparity>::min()};
ASSERT(leftDisparityMap.isContinuous());
const Disparity* pd = leftDisparityMap.ptr<const Disparity>();
const Disparity* const pde = pd+leftDisparityMap.area();
do {
const Disparity d(*pd);
if (range.minDisp > d)
range.minDisp = d;
if (range.maxDisp < d)
range.maxDisp = d;
} while (++pd < pde);
// set disparity search range to the global min/max range
//? numDisp+16原因?
const Disparity numDisp(range.numDisp()+16);
const Disparity disp(range.minDisp+range.maxDisp);
range.minDisp = disp-numDisp;//2*minDisp-16
range.maxDisp = disp+numDisp;//2*maxDisp+16
maxNumDisp = range.numDisp();
numCosts = 0;//(size.width-2*halfWindowSizeX, size.height-2*halfWindowSizeY)*(range.maxDisp-range.minDisp)
imagePixels.resize(sizeValid.area());
for (PixelData& pixel: imagePixels) {
pixel.range = range;
pixel.idx = numCosts;
numCosts += maxNumDisp;
}
}
imageCosts.resize(numCosts);
imageAccumCosts.resize(numCosts);
//构建right-left 视差图
Match(rightDataLevel, leftDataLevel, rightDisparityMap, costMap);
// estimate left-right disparity-map
if (tSGM) {
numCosts = Disparity2RangeMap(leftDisparityMap, leftMaskMap, bFirstLevel?11:5, bFirstLevel?33:7);
imageCosts.resize(numCosts);
imageAccumCosts.resize(numCosts);
} else {
//图像左右,换了视差*-1
for (PixelData& pixel: imagePixels) {
const Disparity maxDisp(-pixel.range.minDisp);
pixel.range.minDisp = -pixel.range.maxDisp;
pixel.range.maxDisp = maxDisp;
}
}
//构建left-right视差图
Match(leftDataLevel, rightDataLevel, leftDisparityMap, costMap);
// Step 3_2_2_4 视差一致性检查check disparity-map cross-consistency
#if 0
if (ISEQUAL(scale, REAL(1))) {
cv::Ptr<cv::ximgproc::DisparityWLSFilter> filter = cv::ximgproc::createDisparityWLSFilterGeneric(true);
const cv::Rect rcValid(halfWindowSizeX,halfWindowSizeY, sizeValid.width,sizeValid.height);
Image32F leftDisparityMap32F, rightDisparityMap32F, filtered32F;
leftDisparityMap.convertTo(leftDisparityMap32F, CV_32F, 16);
rightDisparityMap.convertTo(rightDisparityMap32F, CV_32F, 16);
filter->filter(leftDisparityMap32F, leftData.imageColor(rcValid), filtered32F, rightDisparityMap32F);
filtered32F.convertTo(leftDisparityMap, CV_16S, 1.0/16, 0.5);
} else
#endif
if (bFirstLevel) {
// perform a rigorous filtering of the estimated disparity maps in order to
// estimate the common region or interest and set the validity masks
// 错误匹配剔除方法是左右一致性法,将左右影像互换位置,即左影像成为右影像,右影像成为左影像,再做一次立体匹配,得到另一张视差图,因为视差图中每个值所反映的是
// 两个像素之间的对应关系,所以依据视差的唯一性约束,通过左影像的视差图,找到每个像素在右影像的同名点像素及该像素对应的视差值,这两个视差值之间的差值若小于一定
// 阈值(一般为1个像素),则满足唯一性约束被保留,反之则不满足唯一性约束而被剔除
ConsistencyCrossCheck(leftDisparityMap, rightDisparityMap);
ConsistencyCrossCheck(rightDisparityMap, leftDisparityMap);
// 视差后处理剔除speckles
cv::filterSpeckles(leftDisparityMap, NO_DISP, OPTDENSE::nSpeckleSize, 5);
cv::filterSpeckles(rightDisparityMap, NO_DISP, OPTDENSE::nSpeckleSize, 5);
ExtractMask(leftDisparityMap, leftMaskMap);
ExtractMask(rightDisparityMap, rightMaskMap);
} else {
// simply run a left-right consistency check
// 左右一致性检查
ConsistencyCrossCheck(leftDisparityMap, rightDisparityMap);
}
} while ((scale*=2) < REAL(1)+ZEROTOLERANCE<REAL>());
#if 0
// remove speckles
if (OPTDENSE::nSpeckleSize > 0)
cv::filterSpeckles(leftDisparityMap, NO_DISP, OPTDENSE::nSpeckleSize, 5);
#endif
// sub-pixel disparity-map estimation
// Step 3_2_2_5 亚像素提取:采用二次曲线内插的方法获得子像素精度
RefineDisparityMap(leftDisparityMap);
#if 1
// export disparity-map for the left image
DEBUG_EXTRA("Disparity-map for images %3u and %3u: %dx%d (%s)", leftImage.ID, rightImage.ID,
leftImage.width, leftImage.height, TD_TIMER_GET_FMT().c_str());
ExportPointCloud(pairName+".ply", leftImage, leftDisparityMap, Q, subpixelSteps);
ExportDisparityMap(pairName+".png", leftDisparityMap);
ExportDisparityDataRawFull(pairName+".dimap", leftDisparityMap, costMap, leftImage.GetSize(), H, Q, subpixelSteps);
#else
// convert disparity-map to final depth-map for the left image
//转换为深度图
DepthMap depthMap(leftImage.image.size()); ConfidenceMap confMap;
Disparity2DepthMap(leftDisparityMap, costMap, H, Q, subpixelSteps, depthMap, confMap);
DEBUG_EXTRA("Depth-map for images %3u and %3u: %dx%d (%s)", leftImage.ID, rightImage.ID,
depthMap.width(), depthMap.height(), TD_TIMER_GET_FMT().c_str());
ExportDepthMap(pairName+".png", depthMap);
MVS::ExportPointCloud(pairName+".ply", leftImage, depthMap, NormalMap());
ExportDepthDataRaw(pairName+".dmap", leftImage.name, IIndexArr{leftImage.ID,rightImage.ID}, leftImage.GetSize(), leftImage.camera.K, leftImage.camera.R, leftImage.camera.C, 0, FLT_MAX, depthMap, NormalMap(), confMap);
#endif
}
}
注:本文来自计算机视觉life独家课程 视觉三维重建:原理剖析+逐行代码详解 中的课件及注释代码。