From f06248bb53d966f4bc05497840db8896beba98c3 Mon Sep 17 00:00:00 2001
From: muehlenb <muehlenb@uni-bremen.de>
Date: Wed, 28 Apr 2021 14:21:43 +0200
Subject: [PATCH] Some minor performance optimization (which were made in our
 development software for the paper and are now transfered into this UE4
 project).

Further changes will follow.
---
 .../Private/GridDetector.cpp                  | 122 +++++++++++++++++-
 .../MultiplePointCloudsRegistrator.cpp        |  91 ++++++++++++-
 2 files changed, 209 insertions(+), 4 deletions(-)

diff --git a/VRKinectReg/Plugins/VRKinectRegistration/Source/VRKinectRegistration/Private/GridDetector.cpp b/VRKinectReg/Plugins/VRKinectRegistration/Source/VRKinectRegistration/Private/GridDetector.cpp
index 6f4f268b..ed4c6d1b 100644
--- a/VRKinectReg/Plugins/VRKinectRegistration/Source/VRKinectRegistration/Private/GridDetector.cpp
+++ b/VRKinectReg/Plugins/VRKinectRegistration/Source/VRKinectRegistration/Private/GridDetector.cpp
@@ -122,6 +122,7 @@
 #include <algorithm>
 
 #include <unordered_set>
+#include <chrono>
 
 THIRD_PARTY_INCLUDES_START
 #include <pcl/io/pcd_io.h>
@@ -335,6 +336,22 @@ void mergeNeighbours(float* pcData, std::vector<std::shared_ptr<Scanline3D>>& in
 			float* bStartPtr = &pcData[b->startIdx * 3];
 			float* bEndPtr = &pcData[b->endIdx * 3];
 
+			/* Faster implementation by doing it directly in this loop and avoiding function calls: */
+			float aMeanX = (aStartPtr[0] + aEndPtr[0]) / 2;
+			float aMeanY = (aStartPtr[1] + aEndPtr[1]) / 2;
+			float aMeanZ = (aStartPtr[2] + aEndPtr[2]) / 2;
+
+			float bMeanX = (bStartPtr[0] + bEndPtr[0]) / 2;
+			float bMeanY = (bStartPtr[1] + bEndPtr[1]) / 2;
+			float bMeanZ = (bStartPtr[2] + bEndPtr[2]) / 2;
+
+			float dX = (aMeanX - bMeanX);
+			float dY = (aMeanY - bMeanY);
+			float dZ = (aMeanZ - bMeanZ);
+
+			float distance = dX * dX + dY * dY + dZ * dZ;
+
+			/* Previous Implementation, slower:
 			FVector aStart(aStartPtr[0], aStartPtr[1], aStartPtr[2]);
 			FVector aEnd(aEndPtr[0], aEndPtr[1], aEndPtr[2]);
 			FVector bStart(bStartPtr[0], bStartPtr[1], bStartPtr[2]);
@@ -344,6 +361,7 @@ void mergeNeighbours(float* pcData, std::vector<std::shared_ptr<Scanline3D>>& in
 			FVector bMean = (bStart + bEnd) / 2;
 
 			float distance = (aMean - bMean).SizeSquared();
+			*/
 
 			// ALSO A PARAMETER OF THE ALGORITHM:
 			if (distance < SCANLINE_MERGING_MAX_SQR_MIDPOINT_DISTANCE)
@@ -510,6 +528,77 @@ void filterAllPointsInRadius(
 	unsigned int pcSize, 
 	boost::shared_ptr<pcl::PointCloud<pcl::PointXYZ>> points
 ) {
+
+	/* 
+	 * NEW IMPLEMENTATION, HIGHLY INCREASES PERFORMANCE (BY FIRST CALCULATING A ROUGH BOUNDING SPHERE 
+	 * TO AVOID TESTING SO MUCH POINTS AGAINST ALL POINTS OF THE POINT CLOUD) 
+	 */
+
+	// Calculate rough bounding sphere:
+	pcl::PointXYZ min(100000, 100000, 100000);
+	pcl::PointXYZ max(-100000, -100000, -100000);
+
+	for (pcl::PointXYZ p : *points) {
+		min.x = std::min(p.x, min.x);
+		min.y = std::min(p.y, min.y);
+		min.z = std::min(p.z, min.z);
+
+		max.x = std::max(p.x, max.x);
+		max.y = std::max(p.y, max.y);
+		max.z = std::max(p.z, max.z);
+	}
+
+	float sphereMidX = (max.x + min.x) / 2;
+	float sphereMidY = (max.y + min.y) / 2;
+	float sphereMidZ = (max.z + min.z) / 2;
+	float sphereSqrRad = 0;
+
+	for (pcl::PointXYZ p : *points) {
+		float dX = sphereMidX - p.x;
+		float dY = sphereMidY - p.y;
+		float dZ = sphereMidZ - p.z;
+
+		float sqrDistance = dX * dX + dY * dY + dZ * dZ;
+		sphereSqrRad = std::max(sphereSqrRad, sqrDistance);
+	}
+
+	// add max distance to sphereSqrRad:
+	sphereSqrRad = std::sqrt(FILTER_PC_MAX_SQR_RADIUS) + std::sqrt(sphereSqrRad);
+	sphereSqrRad = sphereSqrRad * sphereSqrRad;
+
+	float maxDistancePTPSqr = FILTER_PC_MAX_SQR_RADIUS;
+
+	// Now, for every point in the point cloud, first check against the rough boundin sphere, then against all other points:
+	for (unsigned int i = 0; i < pcSize; ++i) {
+		// First check against bounding sphere:
+		float* pointPtr = &pcData[i * 3];
+
+		float dX = sphereMidX - pointPtr[0];
+		float dY = sphereMidY - pointPtr[1];
+		float dZ = sphereMidZ - pointPtr[2];
+
+		float distance = dX * dX + dY * dY + dZ * dZ;
+
+		// If point is not in the bounding sphere (incl. additional margin), skip;
+		if (distance > sphereSqrRad)
+			continue;
+
+		for (pcl::PointXYZ p : *points) {
+			float d2X = p.x - pointPtr[0];
+			float d2Y = p.y - pointPtr[1];
+			float d2Z = p.z - pointPtr[2];
+
+			float distance2 = d2X * d2X + d2Y * d2Y + d2Z * d2Z;
+
+			if (distance2 <= maxDistancePTPSqr) {
+				out_allInRadius->push_back(pcl::PointXYZ(pointPtr[0], pointPtr[1], pointPtr[2]));
+				out_allInRadiusIndices.push_back(i);
+				break;
+			}
+		}
+	}
+
+	/* Old implementation, uses no rough sphere bounding box. 
 	auto sqrEuclDistance = [](pcl::PointXYZ a, pcl::PointXYZ b) {
 		pcl::PointXYZ v(a.x - b.x, a.y - b.y, a.z - b.z);
 
@@ -534,6 +623,7 @@ void filterAllPointsInRadius(
 			out_allInRadiusIndices.push_back(i);
 		}
 	}
+	*/
 }
 
 /*
@@ -961,6 +1051,19 @@ std::vector<int> filterPointsAround(float* pcData, unsigned int pcSize, FVector
 	for (unsigned int i = 0; i < pcSize; ++i) {
 		float* pc = &pcData[i*3];
 
+		float pX = pc[0] - gridCenter.X;
+		float pY = pc[1] - gridCenter.Y;
+		float pZ = pc[2] - gridCenter.Z;
+
+		if (isnan(pX) || isnan(pY) || isnan(pZ))
+			continue;
+
+		/* Faster implementation (because this is executed for all points of the original point cloud): */
+		float axis1Distance = abs(pX * axis1.X + pY * axis1.Y + pZ * axis1.Z);
+		float axis2Distance = abs(pX * axis2.X + pY * axis2.Y + pZ * axis2.Z);
+		float normalDistance = abs(pX * normal.X + pY * normal.Y + pZ * normal.Z);
+
+		/* Old implementation, slower:
 		FVector v(pc[0], pc[1], pc[2]);
 
 		FVector p = v - gridCenter;
@@ -971,6 +1074,7 @@ std::vector<int> filterPointsAround(float* pcData, unsigned int pcSize, FVector
 		float axis1Distance = abs(FVector::DotProduct(p, axis1));
 		float axis2Distance = abs(FVector::DotProduct(p, axis2));
 		float normalDistance = abs(FVector::DotProduct(p, normal));
+		*/
 
 		if (normalDistance > maxNormalDist)
 			continue;
@@ -992,6 +1096,8 @@ std::vector<int> filterPointsAround(float* pcData, unsigned int pcSize, FVector
 */
 
 GridDetectionResult GridDetector::detectGrid(PointCloudImage pointCloud) {
+	auto startTimeMeasure = std::chrono::system_clock::now();
+
 	// Filter connected scanlines:
 	std::vector<std::shared_ptr<Scanline3D>> scanlines = filterConnectedScanlines(pointCloud.data, pointCloud.sourceImageWidth, pointCloud.sourceImageHeight);
 	
@@ -1075,6 +1181,10 @@ GridDetectionResult GridDetector::detectGrid(PointCloudImage pointCloud) {
 		// Filter all points which are not connected to another point over minimal 1 good angle:
 		gridMidPoints = filterByTwoMajorClusters(clusters, gridMidPoints, 1);
 
+		unsigned int MIN_GRID_POINTS = 16; // Should also be a parameter.
+		if (gridMidPoints.size() < MIN_GRID_POINTS)
+			continue;
+
 		// Calculate weighted:
 		FVector gridMidWeighted = mean(gridMidPoints);
 
@@ -1126,16 +1236,15 @@ GridDetectionResult GridDetector::detectGrid(PointCloudImage pointCloud) {
 
 		float MAX_EDGE_LENGTH_OF_OBB = 0.36;
 		float MIN_EDGE_LENGTH_OF_OBB = 0.28f;
-		unsigned int MIN_GRID_POINTS = 16;
 		float MAX_GRID_MID_TO_WEIGHTED_DISTNACE = 0.03f;
 
 		// Only continue, if the minimal bounding box has the size of a grid:
-		if (gridMidPoints.size() >= MIN_GRID_POINTS && FVector::Distance(gridMidWeighted, probableGridMid) < MAX_GRID_MID_TO_WEIGHTED_DISTNACE
+		if (FVector::Distance(gridMidWeighted, probableGridMid) < MAX_GRID_MID_TO_WEIGHTED_DISTNACE
 			&& axis1Length > MIN_EDGE_LENGTH_OF_OBB && axis2Length > MIN_EDGE_LENGTH_OF_OBB
 			&& axis1Length < MAX_EDGE_LENGTH_OF_OBB && axis2Length < MAX_EDGE_LENGTH_OF_OBB)
 		{
 			// Filter points AROUND the grid for orientation estimation (these points are normally hands and arms which hold the grid):
-			std::vector<int> pointsAroundGridIndices = filterPointsAround(pointCloud.data, pointCloud.sourceImageWidth * pointCloud.sourceImageHeight, probableGridMid, axis1, axis2, planeN, 0.25f, 0.6f, 0.10f);
+			std::vector<int> pointsAroundGridIndices = filterPointsAround(pointCloud.data, pointCloud.sourceImageWidth * pointCloud.sourceImageHeight, probableGridMid, axis1, axis2, planeN, 0.25f, 0.4f, 0.10f);
 
 			FVector midPointOfPointsAroundGrid(0);
 
@@ -1185,10 +1294,17 @@ GridDetectionResult GridDetector::detectGrid(PointCloudImage pointCloud) {
 			for (FVector holePoints : gridMidPoints) {
 				result.centersOfFoundHoles.Add(holePoints);
 			}
+
+
+			float totalRuntime = float((std::chrono::duration<double>(std::chrono::system_clock::now() - startTimeMeasure)).count() * 1000.f);
+			UE_LOG(LogTemp, Warning, TEXT("Time Needed for Grid detection: %f"), totalRuntime)
 			return result;
 		}
 	}
 
+
+	float totalRuntime = float((std::chrono::duration<double>(std::chrono::system_clock::now() - startTimeMeasure)).count() * 1000.f);
+	UE_LOG(LogTemp, Warning, TEXT("Time Needed for Grid detection: %f"), totalRuntime)
 	return GridDetectionResult();
 }
 
diff --git a/VRKinectReg/Plugins/VRKinectRegistration/Source/VRKinectRegistration/Private/MultiplePointCloudsRegistrator.cpp b/VRKinectReg/Plugins/VRKinectRegistration/Source/VRKinectRegistration/Private/MultiplePointCloudsRegistrator.cpp
index b9511360..01ff695b 100644
--- a/VRKinectReg/Plugins/VRKinectRegistration/Source/VRKinectRegistration/Private/MultiplePointCloudsRegistrator.cpp
+++ b/VRKinectReg/Plugins/VRKinectRegistration/Source/VRKinectRegistration/Private/MultiplePointCloudsRegistrator.cpp
@@ -64,6 +64,95 @@ FTransform pclMatrixToFTransform(pcl::registration::TransformationEstimationSVD<
 	return FTransform(x, y, z, trl);
 }
 
+TArray<FTransform> MultiplePointCloudsRegistrator::registrateInto(int pointCloudID) {
+	if (pointCloudCount < 2) {
+		UE_LOG(LogTemp, Warning, TEXT("The number of specified point clouds has to be 2 at minimum! Was %i"), pointCloudCount);
+		return TArray<FTransform>();
+	}
+
+	if (pointCloudID < 0 || pointCloudID >= correspondences.Num()) {
+		UE_LOG(LogTemp, Warning, TEXT("The given point cloud id doesn't exists. Given point cloud id: %i. Size of point clouds: %i."), pointCloudID, pointCloudCount);
+		return TArray<FTransform>();
+	}
+
+	if (correspondences[pointCloudID].Num() < 4) {
+		UE_LOG(LogTemp, Warning, TEXT("There have to be at least 4 reference points for the specified point cloud id! There were just %i"), correspondences[pointCloudID].Num());
+		return TArray<FTransform>();
+	}
+
+	TArray<FTransform> result;
+
+	for (int i = 0; i < pointCloudCount; ++i) {
+		if (i == pointCloudID) {
+			result.Add(FTransform());
+			continue;
+		}
+
+		// Create point clouds (expected struct by pcl for correspondence points):
+		boost::shared_ptr<pcl::PointCloud<pcl::PointXYZ>> sourcePC(new pcl::PointCloud<pcl::PointXYZ>());
+		boost::shared_ptr<pcl::PointCloud<pcl::PointXYZ>> targetPC(new pcl::PointCloud<pcl::PointXYZ>());
+
+		boost::shared_ptr<pcl::PointCloud<pcl::PointXYZ>> sourcePCFiltered(new pcl::PointCloud<pcl::PointXYZ>()); // Result of Correspondence Rejection
+		boost::shared_ptr<pcl::PointCloud<pcl::PointXYZ>> targetPCFiltered(new pcl::PointCloud<pcl::PointXYZ>()); // Result of Correspondence Rejection
+
+		// Copy all points into sourcePC and targetPC, where both vectors contains NO NAN value:
+		for (int h = 0; h < correspondences[pointCloudID].Num(); ++h) {
+			FVector srcPoint = correspondences[i][h];
+			FVector tgtPoint = correspondences[pointCloudID][h];
+
+			if (!srcPoint.ContainsNaN() && !tgtPoint.ContainsNaN()) {
+				sourcePC->push_back(pcl::PointXYZ(srcPoint.X * 100, srcPoint.Y * 100, srcPoint.Z * 100));
+				targetPC->push_back(pcl::PointXYZ(tgtPoint.X * 100, tgtPoint.Y * 100, tgtPoint.Z * 100));
+				//UE_LOG(LogTemp, Warning, TEXT("Added P1: %f, %f, %f AND P2: %f, %f, %f"), srcPoint.X, srcPoint.Y, srcPoint.Z, tgtPoint.X, tgtPoint.Y, tgtPoint.Z)
+			}
+		}
+
+		// Correspondence rejection:
+		{
+			pcl::Correspondences filteredCorresp;
+			pcl::Correspondences corresp;
+
+			for (int h = 0; h < sourcePC->size(); ++h) {
+				corresp.push_back(pcl::Correspondence(h, h, std::numeric_limits<float>::max()));
+			}
+
+			boost::shared_ptr<pcl::Correspondences> corPtr(new pcl::Correspondences(corresp));
+
+			pcl::registration::CorrespondenceRejectorSampleConsensus<pcl::PointXYZ> crsac;
+			crsac.setInputSource(sourcePC);
+			crsac.setInputTarget(targetPC);
+			crsac.setInputCorrespondences(corPtr);
+			crsac.setInlierThreshold(4.00f);
+			crsac.setMaximumIterations(1000);
+			crsac.getRemainingCorrespondences(corresp, filteredCorresp);
+
+			for (pcl::Correspondence correspondence : filteredCorresp) {
+				sourcePCFiltered->push_back((*sourcePC)[correspondence.index_query]);
+				targetPCFiltered->push_back((*targetPC)[correspondence.index_match]);
+			}
+		}
+
+		// If there are not enough points, add simply identity:
+		if (sourcePCFiltered->size() < 4 || targetPCFiltered->size() < 4) {
+			result.Add(FTransform());
+			continue;
+		}
+
+		pcl::registration::TransformationEstimationSVD<pcl::PointXYZ, pcl::PointXYZ> estimator;
+		pcl::registration::TransformationEstimationSVD<pcl::PointXYZ, pcl::PointXYZ>::Matrix4 mat;
+		estimator.estimateRigidTransformation(*sourcePCFiltered, *targetPCFiltered, mat);
+
+		FTransform transform = pclMatrixToFTransform(mat);
+		FVector translation = transform.GetTranslation();
+
+		result.Add(transform);
+	}
+
+	return result;
+}
+
+
+/*
 TArray<FTransform> MultiplePointCloudsRegistrator::registrateInto(int pointCloudID) {
 	if (pointCloudCount < 2) {
 		UE_LOG(LogTemp, Warning, TEXT("The number of specified point clouds has to be 2 at minimum! Was %i"), pointCloudCount);
@@ -149,4 +238,4 @@ TArray<FTransform> MultiplePointCloudsRegistrator::registrateInto(int pointCloud
 	}
 
 	return result;
-}
\ No newline at end of file
+}*/
\ No newline at end of file
-- 
GitLab