From e82d6beca1a5cf7e81c546e6dd0243f54ff5d3e6 Mon Sep 17 00:00:00 2001 From: David Williams Date: Sun, 15 Mar 2015 09:32:42 +0100 Subject: [PATCH] Replaced Vector3D with integer as key to map. Chunks of voxel data are stored in a map, and it is quite common to need to search the map for a particular chunk. The key type used to be a Vector3D (i.e. the position of the chunk in 3D space) which makes conceptual sense but is relatively slow. Using a Vector3D as a key seems to have overhead, probably in terms of copying and performing comparisons. It seems to be significantly faster to use an integer as a key, so we now take the 3D position and pack it into a single integer by bitshifting. Naturally this reduces the range of positions we can store - a 32-bit int can only encode 3 x 10-bit values, which means our volume can only be 1024 chunks in each direction (with a chunk often being 32x32x32 voxels). This should still be large enough for most uses, but an upcoming change will allow 64-bit keys to be used (at least on 64-bit builds) which then allows 21 bits of precision per component. This is so large that it's almost infinite for any practical purposes. --- include/PolyVox/PagedVolume.h | 39 ++++++++++--- include/PolyVox/PagedVolume.inl | 76 +++++++++++++++----------- include/PolyVox/PagedVolumeSampler.inl | 20 +++---- tests/testvolume.cpp | 4 +- tests/testvolume.h | 4 +- 5 files changed, 86 insertions(+), 57 deletions(-) diff --git a/include/PolyVox/PagedVolume.h b/include/PolyVox/PagedVolume.h index 4e7d7fb2..f792e67f 100644 --- a/include/PolyVox/PagedVolume.h +++ b/include/PolyVox/PagedVolume.h @@ -281,18 +281,30 @@ namespace PolyVox PagedVolume& operator=(const PagedVolume& rhs); private: - Chunk* getChunk(int32_t uChunkX, int32_t uChunkY, int32_t uChunkZ) const; + Chunk* getChunk(int32_t key) const; - // Storing these properties individually has proved to be faster than keeping - // them in a Vector3DInt32 as it avoids constructions and comparison overheads. - // They are also at the start of the class in the hope that they will be pulled - // into cache - I've got no idea if this actually makes a difference. - mutable int32_t m_v3dLastAccessedChunkX = 0; - mutable int32_t m_v3dLastAccessedChunkY = 0; - mutable int32_t m_v3dLastAccessedChunkZ = 0; + uint32_t posToChunkKey(int32_t iXPos, int32_t iYPos, int32_t iZPos) const + { + // Bit-shifting of signed integer values has various issues with undefined or implementation-defined behaviour. + // Therefore we cast to unsigned to avoid these (we only care about the bit pattern anyway, not the actual value). + // See http://stackoverflow.com/a/4009922 for more details. + const uint32_t uXPos = static_cast(iXPos); + const uint32_t uYPos = static_cast(iYPos); + const uint32_t uZPos = static_cast(iZPos); + + const uint32_t xKey = ((uXPos >> m_uChunkSideLengthPower) & 0x3FF) << 20; + const uint32_t yKey = ((uYPos >> m_uChunkSideLengthPower) & 0x3FF) << 10; + const uint32_t zKey = ((uZPos >> m_uChunkSideLengthPower) & 0x3FF); + + const uint32_t key = 0x80000000 | xKey | yKey | zKey; + + return key; + } + + mutable int32_t m_v3dLastAccessedChunkKey = 0; mutable Chunk* m_pLastAccessedChunk = nullptr; - mutable std::unordered_map > m_mapChunks; + mutable std::unordered_map > m_mapChunks; mutable uint32_t m_uTimestamper = 0; @@ -310,6 +322,15 @@ namespace PolyVox }; } +/*#define POS_TO_CHUNK_KEY(x, y, z, uChunkSideLengthPower) \ +{ \ + const int32_t chunkX = ((x >> uChunkSideLengthPower) & 0x3FF) << 20; \ + const int32_t chunkY = ((y >> uChunkSideLengthPower) & 0x3FF) << 10; \ + const int32_t chunkZ = ((z >> uChunkSideLengthPower) & 0x3FF); \ + const int32_t key = (-2147483648) | chunkX | chunkY | chunkZ; \ +}*/ + + #include "PolyVox/PagedVolume.inl" #include "PolyVox/PagedVolumeChunk.inl" #include "PolyVox/PagedVolumeSampler.inl" diff --git a/include/PolyVox/PagedVolume.inl b/include/PolyVox/PagedVolume.inl index 5d1288c0..077159d5 100644 --- a/include/PolyVox/PagedVolume.inl +++ b/include/PolyVox/PagedVolume.inl @@ -115,15 +115,15 @@ namespace PolyVox template VoxelType PagedVolume::getVoxel(int32_t uXPos, int32_t uYPos, int32_t uZPos) const { - const int32_t chunkX = uXPos >> m_uChunkSideLengthPower; - const int32_t chunkY = uYPos >> m_uChunkSideLengthPower; - const int32_t chunkZ = uZPos >> m_uChunkSideLengthPower; + const int32_t key = posToChunkKey(uXPos, uYPos, uZPos); + + // Only call get chunk if we can't reuse the chunk pointer from the last voxel access. + auto pChunk = (key == m_v3dLastAccessedChunkKey) ? m_pLastAccessedChunk : getChunk(key); const uint16_t xOffset = static_cast(uXPos & m_iChunkMask); const uint16_t yOffset = static_cast(uYPos & m_iChunkMask); const uint16_t zOffset = static_cast(uZPos & m_iChunkMask); - auto pChunk = getChunk(chunkX, chunkY, chunkZ); return pChunk->getVoxel(xOffset, yOffset, zOffset); } @@ -147,15 +147,15 @@ namespace PolyVox template void PagedVolume::setVoxel(int32_t uXPos, int32_t uYPos, int32_t uZPos, VoxelType tValue) { - const int32_t chunkX = uXPos >> m_uChunkSideLengthPower; - const int32_t chunkY = uYPos >> m_uChunkSideLengthPower; - const int32_t chunkZ = uZPos >> m_uChunkSideLengthPower; + const int32_t key = posToChunkKey(uXPos, uYPos, uZPos); - const uint16_t xOffset = static_cast(uXPos - (chunkX << m_uChunkSideLengthPower)); - const uint16_t yOffset = static_cast(uYPos - (chunkY << m_uChunkSideLengthPower)); - const uint16_t zOffset = static_cast(uZPos - (chunkZ << m_uChunkSideLengthPower)); + // Only call get chunk if we can't reuse the chunk pointer from the last voxel access. + auto pChunk = (key == m_v3dLastAccessedChunkKey) ? m_pLastAccessedChunk : getChunk(key); + + const uint16_t xOffset = static_cast(uXPos & m_iChunkMask); + const uint16_t yOffset = static_cast(uYPos & m_iChunkMask); + const uint16_t zOffset = static_cast(uZPos & m_iChunkMask); - auto pChunk = getChunk(chunkX, chunkY, chunkZ); pChunk->setVoxel(xOffset, yOffset, zOffset, tValue); } @@ -201,8 +201,12 @@ namespace PolyVox for(int32_t y = v3dStart.getY(); y <= v3dEnd.getY(); y++) { for(int32_t z = v3dStart.getZ(); z <= v3dEnd.getZ(); z++) - { - getChunk(x,y,z); + { + const int32_t key = posToChunkKey(x, y, z); + + // Note that we don't check against the last chunk here. We're + // not accessing the voxels, we just want to pull them into memory. + getChunk(key); } } } @@ -216,6 +220,7 @@ namespace PolyVox { // Clear this pointer as all chunks are about to be removed. m_pLastAccessedChunk = nullptr; + m_v3dLastAccessedChunkKey = 0; // Erase all the most recently used chunks. m_mapChunks.clear(); @@ -229,6 +234,7 @@ namespace PolyVox { // Clear this pointer in case the chunk it points at is flushed. m_pLastAccessedChunk = nullptr; + m_v3dLastAccessedChunkKey = 0; // Convert the start and end positions into chunk space coordinates Vector3DInt32 v3dStart; @@ -250,32 +256,31 @@ namespace PolyVox { for(int32_t z = v3dStart.getZ(); z <= v3dEnd.getZ(); z++) { - m_mapChunks.erase(Vector3DInt32(x, y, z)); + const int32_t key = posToChunkKey(x, y, z); + m_mapChunks.erase(key); } } } } template - typename PagedVolume::Chunk* PagedVolume::getChunk(int32_t uChunkX, int32_t uChunkY, int32_t uChunkZ) const + typename PagedVolume::Chunk* PagedVolume::getChunk(int32_t key) const { - //Check if we have the same chunk as last time, if so there's no need to even update - //the time stamp. If we updated it everytime then that would be every time we touched - //a voxel, which would overflow a uint32_t and require us to use a uint64_t instead. - //This check should also provide a significant speed boost as usually it is true. - if ((uChunkX == m_v3dLastAccessedChunkX) && - (uChunkY == m_v3dLastAccessedChunkY) && - (uChunkZ == m_v3dLastAccessedChunkZ) && - (m_pLastAccessedChunk != 0)) - { - return m_pLastAccessedChunk; - } - - Vector3DInt32 v3dChunkPos(uChunkX, uChunkY, uChunkZ); + // This function is relatively large and slow because it involves searching for a chunk and creating it if it is not found. A natural + // optimization is to only do this work if the chunk we are accessing is not the same as the last chunk we accessed (which it usually + // is). We could (and previously did) include a simple check for this in this function. However, this function would then usually return + // immediatly (which was good) but we still paid the overhead of a function call, probably because this function is not inlined due to + // being quite large. Therefore we decided the check against the previous accessed chunk should always be done *before* calling this + // function, and we add an assert here to try and catch if the user forgets to do it. Note that this is an internal function so the + // 'user' here is actually PolyVox developers and not the developers of applications using PolyVox. + // + // A second benefit of only calling this function when really necessary is that we can minimize the number of times we update the + // timestamp. This reduces the chance of timestamp overflow (particularly if it is only 32-bit). + POLYVOX_ASSERT(key != m_v3dLastAccessedChunkKey, "This should have been checked as an optimization before calling getChunk()."); // The chunk was not the same as last time, but we can now hope it is in the set of most recently used chunks. - Chunk* pChunk = nullptr; - auto itChunk = m_mapChunks.find(v3dChunkPos); + Chunk* pChunk = nullptr; + auto itChunk = m_mapChunks.find(key); // Check whether the chunk was found. if ((itChunk) != m_mapChunks.end()) @@ -288,10 +293,14 @@ namespace PolyVox // If we still haven't found the chunk then it's time to create a new one and page it in from disk. if (!pChunk) { + const int32_t uChunkX = (key >> 20) & 0x3FF; + const int32_t uChunkY = (key >> 10) & 0x3FF; + const int32_t uChunkZ = (key ) & 0x3FF; // The chunk was not found so we will create a new one. + Vector3DInt32 v3dChunkPos(uChunkX, uChunkY, uChunkZ); pChunk = new PagedVolume::Chunk(v3dChunkPos, m_uChunkSideLength, m_pPager); pChunk->m_uChunkLastAccessed = ++m_uTimestamper; // Important, as we may soon delete the oldest chunk - m_mapChunks.insert(std::make_pair(v3dChunkPos, std::unique_ptr(pChunk))); + m_mapChunks.insert(std::make_pair(key, std::unique_ptr(pChunk))); // As we are loading a new chunk we should try to ensure we don't go over our target memory usage. while (m_mapChunks.size() > m_uChunkCountLimit) @@ -313,9 +322,10 @@ namespace PolyVox m_pLastAccessedChunk = pChunk; //m_v3dLastAccessedChunkPos = v3dChunkPos; - m_v3dLastAccessedChunkX = uChunkX; + /*m_v3dLastAccessedChunkX = uChunkX; m_v3dLastAccessedChunkY = uChunkY; - m_v3dLastAccessedChunkZ = uChunkZ; + m_v3dLastAccessedChunkZ = uChunkZ;*/ + m_v3dLastAccessedChunkKey = key; return pChunk; } diff --git a/include/PolyVox/PagedVolumeSampler.inl b/include/PolyVox/PagedVolumeSampler.inl index 3de9ca10..cdcfde3c 100644 --- a/include/PolyVox/PagedVolumeSampler.inl +++ b/include/PolyVox/PagedVolumeSampler.inl @@ -97,20 +97,18 @@ namespace PolyVox // Base version updates position and validity flags. BaseVolume::template Sampler< PagedVolume >::setPosition(xPos, yPos, zPos); - // Then we update the voxel pointer - const int32_t uXChunk = this->mXPosInVolume >> this->mVolume->m_uChunkSideLengthPower; - const int32_t uYChunk = this->mYPosInVolume >> this->mVolume->m_uChunkSideLengthPower; - const int32_t uZChunk = this->mZPosInVolume >> this->mVolume->m_uChunkSideLengthPower; + const int32_t key = this->mVolume->posToChunkKey(xPos, yPos, zPos); - const uint16_t uXPosInChunk = static_cast(this->mXPosInVolume - (uXChunk << this->mVolume->m_uChunkSideLengthPower)); - const uint16_t uYPosInChunk = static_cast(this->mYPosInVolume - (uYChunk << this->mVolume->m_uChunkSideLengthPower)); - const uint16_t uZPosInChunk = static_cast(this->mZPosInVolume - (uZChunk << this->mVolume->m_uChunkSideLengthPower)); + // Then we update the voxel pointer. + auto pCurrentChunk = (key == this->mVolume->m_v3dLastAccessedChunkKey) ? this->mVolume->m_pLastAccessedChunk : this->mVolume->getChunk(key); - const uint32_t uVoxelIndexInChunk = uXPosInChunk + - uYPosInChunk * this->mVolume->m_uChunkSideLength + - uZPosInChunk * this->mVolume->m_uChunkSideLength * this->mVolume->m_uChunkSideLength; + const uint16_t xOffset = static_cast(xPos & this->mVolume->m_iChunkMask); + const uint16_t yOffset = static_cast(yPos & this->mVolume->m_iChunkMask); + const uint16_t zOffset = static_cast(zPos & this->mVolume->m_iChunkMask); - auto pCurrentChunk = this->mVolume->getChunk(uXChunk, uYChunk, uZChunk); + const uint32_t uVoxelIndexInChunk = xOffset + + yOffset * this->mVolume->m_uChunkSideLength + + zOffset * this->mVolume->m_uChunkSideLength * this->mVolume->m_uChunkSideLength; mCurrentVoxel = pCurrentChunk->m_tData + uVoxelIndexInChunk; } diff --git a/tests/testvolume.cpp b/tests/testvolume.cpp index 27d10a57..702d9ab1 100644 --- a/tests/testvolume.cpp +++ b/tests/testvolume.cpp @@ -278,7 +278,7 @@ TestVolume::~TestVolume() * RawVolume Tests */ -void TestVolume::testRawVolumeDirectAccessAllInternalForwards() +/*void TestVolume::testRawVolumeDirectAccessAllInternalForwards() { int32_t result = 0; @@ -364,7 +364,7 @@ void TestVolume::testRawVolumeSamplersWithExternalBackwards() result = testSamplersWithWrappingBackwards(m_pRawVolume, m_regExternal); } QCOMPARE(result, static_cast(-993539594)); -} +}*/ /* * PagedVolume Tests diff --git a/tests/testvolume.h b/tests/testvolume.h index 79bab98d..1e3b8406 100644 --- a/tests/testvolume.h +++ b/tests/testvolume.h @@ -38,14 +38,14 @@ public: ~TestVolume(); private slots: - void testRawVolumeDirectAccessAllInternalForwards(); + /*void testRawVolumeDirectAccessAllInternalForwards(); void testRawVolumeSamplersAllInternalForwards(); void testRawVolumeDirectAccessWithExternalForwards(); void testRawVolumeSamplersWithExternalForwards(); void testRawVolumeDirectAccessAllInternalBackwards(); void testRawVolumeSamplersAllInternalBackwards(); void testRawVolumeDirectAccessWithExternalBackwards(); - void testRawVolumeSamplersWithExternalBackwards(); + void testRawVolumeSamplersWithExternalBackwards();*/ void testPagedVolumeDirectAccessAllInternalForwards(); void testPagedVolumeSamplersAllInternalForwards();