diff --git a/CMakeLists.txt b/CMakeLists.txt index 1cee91c93..0b1ac6096 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,6 +131,7 @@ if(${BUILD_TOOLS}) message("Building tools") add_subdirectory(Tools/GrownBiomeGenVisualiser/) add_subdirectory(Tools/MCADefrag/) + add_subdirectory(Tools/NoiseSpeedTest/) add_subdirectory(Tools/ProtoProxy/) endif() diff --git a/Tools/NoiseSpeedTest/CMakeLists.txt b/Tools/NoiseSpeedTest/CMakeLists.txt new file mode 100644 index 000000000..29163398b --- /dev/null +++ b/Tools/NoiseSpeedTest/CMakeLists.txt @@ -0,0 +1,66 @@ +project (NoiseSpeedTest) + +include(../../SetFlags.cmake) +set_flags() +set_lib_flags() +enable_profile() + +# Set include paths to the used libraries: +include_directories(SYSTEM "../../lib") +include_directories("../../src") + +set_exe_flags() + +# Include the shared files: +set(SHARED_SRC + ../../src/Logger.cpp + ../../src/LoggerListeners.cpp + ../../src/OSSupport/CriticalSection.cpp + ../../src/OSSupport/File.cpp + ../../src/OSSupport/StackTrace.cpp + ../../src/Noise/Noise.cpp + ../../src/StringUtils.cpp +) +set(SHARED_HDR + ../../src/Noise/Noise.h + ../../src/Noise/OctavedNoise.h + ../../src/Noise/RidgedNoise.h + ../../src/OSSupport/CriticalSection.h + ../../src/OSSupport/File.h + ../../src/OSSupport/StackTrace.h + ../../src/StringUtils.h +) + +if(WIN32) + list (APPEND SHARED_SRC ../../src/StackWalker.cpp) + list (APPEND SHARED_HDR ../../src/StackWalker.h) +endif() + +source_group("Shared" FILES ${SHARED_SRC} ${SHARED_HDR}) + + + + +# Include the main source files: +set(SOURCES + NoiseSpeedTest.cpp + Globals.cpp +) +set(HEADERS + NoiseSpeedTest.h + Globals.h +) + +source_group("" FILES ${SOURCES} ${HEADERS}) + +add_executable(NoiseSpeedTest + ${SOURCES} + ${HEADERS} + ${SHARED_SRC} + ${SHARED_HDR} +) + +set_target_properties( + NoiseSpeedTest + PROPERTIES FOLDER Tools +) diff --git a/Tools/NoiseSpeedTest/Globals.cpp b/Tools/NoiseSpeedTest/Globals.cpp new file mode 100644 index 000000000..13c6ae709 --- /dev/null +++ b/Tools/NoiseSpeedTest/Globals.cpp @@ -0,0 +1,10 @@ + +// Globals.cpp + +// This file is used for precompiled header generation in MSVC environments + +#include "Globals.h" + + + + diff --git a/Tools/NoiseSpeedTest/Globals.h b/Tools/NoiseSpeedTest/Globals.h new file mode 100644 index 000000000..ed9ef82fe --- /dev/null +++ b/Tools/NoiseSpeedTest/Globals.h @@ -0,0 +1,243 @@ + +// Globals.h + +// This file gets included from every module in the project, so that global symbols may be introduced easily +// Also used for precompiled header generation in MSVC environments + + + + + +// Compiler-dependent stuff: +#if defined(_MSC_VER) + // MSVC produces warning C4481 on the override keyword usage, so disable the warning altogether + #pragma warning(disable:4481) + + // Disable some warnings that we don't care about: + #pragma warning(disable:4100) + + #define OBSOLETE __declspec(deprecated) + + // No alignment needed in MSVC + #define ALIGN_8 + #define ALIGN_16 + + #define FORMATSTRING(formatIndex, va_argsIndex) + + // MSVC has its own custom version of zu format + #define SIZE_T_FMT "%Iu" + #define SIZE_T_FMT_PRECISION(x) "%" #x "Iu" + #define SIZE_T_FMT_HEX "%Ix" + + #define NORETURN __declspec(noreturn) + +#elif defined(__GNUC__) + + // TODO: Can GCC explicitly mark classes as abstract (no instances can be created)? + #define abstract + + #define OBSOLETE __attribute__((deprecated)) + + #define ALIGN_8 __attribute__((aligned(8))) + #define ALIGN_16 __attribute__((aligned(16))) + + // Some portability macros :) + #define stricmp strcasecmp + + #define FORMATSTRING(formatIndex,va_argsIndex) + + #define SIZE_T_FMT "%zu" + #define SIZE_T_FMT_PRECISION(x) "%" #x "zu" + #define SIZE_T_FMT_HEX "%zx" + + #define NORETURN __attribute((__noreturn__)) +#else + + #error "You are using an unsupported compiler, you might need to #define some stuff here for your compiler" + + /* + // Copy and uncomment this into another #elif section based on your compiler identification + + // Explicitly mark classes as abstract (no instances can be created) + #define abstract + + // Mark functions as obsolete, so that their usage results in a compile-time warning + #define OBSOLETE + + // Mark types / variables for alignment. Do the platforms need it? + #define ALIGN_8 + #define ALIGN_16 + */ + + #define FORMATSTRING(formatIndex,va_argsIndex) __attribute__((format (printf, formatIndex, va_argsIndex))) + +#endif + + + + + +// Integral types with predefined sizes: +typedef long long Int64; +typedef int Int32; +typedef short Int16; + +typedef unsigned long long UInt64; +typedef unsigned int UInt32; +typedef unsigned short UInt16; + +typedef unsigned char Byte; + + + + + +// A macro to disallow the copy constructor and operator= functions +// This should be used in the private: declarations for any class that shouldn't allow copying itself +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName &); \ + void operator=(const TypeName &) + +// A macro that is used to mark unused function parameters, to avoid pedantic warnings in gcc +#define UNUSED(X) (void)(X) + + + + +// OS-dependent stuff: +#ifdef _WIN32 + #define WIN32_LEAN_AND_MEAN + #include + #include + #include + + // Windows SDK defines min and max macros, messing up with our std::min and std::max usage + #undef min + #undef max + + // Windows SDK defines GetFreeSpace as a constant, probably a Win16 API remnant + #ifdef GetFreeSpace + #undef GetFreeSpace + #endif // GetFreeSpace + + #define SocketError WSAGetLastError() +#else + #include + #include // for mkdir + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + + #include + #include + #include + #include + #include + #include + + typedef int SOCKET; + enum + { + INVALID_SOCKET = -1, + }; + #define closesocket close + #define SocketError errno +#if !defined(ANDROID_NDK) + #include +#endif +#endif + +#if !defined(ANDROID_NDK) + #define USE_SQUIRREL +#endif + +#if defined(ANDROID_NDK) + #define FILE_IO_PREFIX "/sdcard/mcserver/" +#else + #define FILE_IO_PREFIX "" +#endif + + + + + +// CRT stuff: +#include +#include +#include +#include +#include + + + + + +// STL stuff: +#include +#include +#include +#include +#include +#include +#include + + + + + +// Common headers (without macros): +#include "StringUtils.h" +#include "OSSupport/CriticalSection.h" +#include "OSSupport/Event.h" +#include "OSSupport/IsThread.h" +#include "OSSupport/File.h" + + + + + +// Common definitions: + +/// Evaluates to the number of elements in an array (compile-time!) +#define ARRAYCOUNT(X) (sizeof(X) / sizeof(*(X))) + +/// Allows arithmetic expressions like "32 KiB" (but consider using parenthesis around it, "(32 KiB)" ) +#define KiB * 1024 +#define MiB * 1024 * 1024 + +/// Faster than (int)floorf((float)x / (float)div) +#define FAST_FLOOR_DIV( x, div ) ( (x) < 0 ? (((int)x / div) - 1) : ((int)x / div) ) + +// Own version of assert() that writes failed assertions to the log for review +#ifdef NDEBUG + #define ASSERT(x) ((void)0) +#else + #define ASSERT assert +#endif + +// Pretty much the same as ASSERT() but stays in Release builds +#define VERIFY( x ) ( !!(x) || ( LOGERROR("Verification failed: %s, file %s, line %i", #x, __FILE__, __LINE__ ), exit(1), 0 ) ) + + + + + +/// A generic interface used mainly in ForEach() functions +template class cItemCallback +{ +public: + /// Called for each item in the internal list; return true to stop the loop, or false to continue enumerating + virtual bool Item(Type * a_Type) = 0; + virtual ~cItemCallback() {} +} ; + + + + diff --git a/Tools/NoiseSpeedTest/NoiseSpeedTest.cpp b/Tools/NoiseSpeedTest/NoiseSpeedTest.cpp new file mode 100644 index 000000000..6857b1a27 --- /dev/null +++ b/Tools/NoiseSpeedTest/NoiseSpeedTest.cpp @@ -0,0 +1,149 @@ +// NoiseSpeedTest.cpp + +// Implements the main app entrypoint + +/* +This program compares the performance of the highly-optimized noise implementation in Cuberite, and the Simplex noise. +Since the Simplex noise is not yet implemented in Cuberite, an own implementation is provided. +Also, the performance difference between using a float and double as datatype is measured, by using a templatized Simplex noise. + +The testing is done on a usage of the generator that is typical for the Cuberite's terrain generator: generate a 3D array of numbers with +not much variance in the coords. The exact sizes and coord ranges were adapted from the cNoise3DComposable generator. +*/ + +#include "Globals.h" +#include "Noise/Noise.h" +#include "Noise/InterpolNoise.h" +#include "SimplexNoise.h" + + + + + +/// The sizes of the interpolated noise that are calculated: +static const int SIZE_X = 33; +static const int SIZE_Y = 5; +static const int SIZE_Z = 5; + + + + + +static void measureClassicNoise(int a_NumIterations) +{ + cInterp5DegNoise noise(1); + NOISE_DATATYPE total = 0; + auto timeStart = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < a_NumIterations; ++i) + { + NOISE_DATATYPE out[SIZE_X * SIZE_Y * SIZE_Z]; + int blockX = i * 16; + int blockZ = i * 16; + NOISE_DATATYPE startX = 0; + NOISE_DATATYPE endX = 257 / 80.0f; + NOISE_DATATYPE startY = blockX / 40.0f; + NOISE_DATATYPE endY = (blockX + 16) / 40.0f; + NOISE_DATATYPE startZ = blockZ / 40.0f; + NOISE_DATATYPE endZ = (blockZ + 16) / 40.0f; + noise.Generate3D(out, SIZE_X, SIZE_Y, SIZE_Z, startX, endX, startY, endY, startZ, endZ); + total += out[0]; // Do not let the optimizer optimize the whole calculation away + } + auto timeEnd = std::chrono::high_resolution_clock::now(); + auto msec = std::chrono::duration_cast(timeEnd - timeStart); + printf("Classic noise took %d milliseconds, returned total %f\n", static_cast(msec.count()), total); +} + + + + + +static void measureSimplexNoiseFloat(int a_NumIterations) +{ + typedef float DATATYPE; + cSimplexNoise noise(1); + DATATYPE total = 0; + auto timeStart = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < a_NumIterations; ++i) + { + DATATYPE out[SIZE_X * SIZE_Y * SIZE_Z]; + int blockX = i * 16; + int blockZ = i * 16; + DATATYPE startX = 0; + DATATYPE endX = 257 / 80.0f; + DATATYPE startY = blockX / 40.0f; + DATATYPE endY = (blockX + 16) / 40.0f; + DATATYPE startZ = blockZ / 40.0f; + DATATYPE endZ = (blockZ + 16) / 40.0f; + noise.Generate3D(out, SIZE_X, SIZE_Y, SIZE_Z, startX, endX, startY, endY, startZ, endZ); + total += out[0]; // Do not let the optimizer optimize the whole calculation away + } + auto timeEnd = std::chrono::high_resolution_clock::now(); + auto msec = std::chrono::duration_cast(timeEnd - timeStart); + printf("SimplexNoise took %d milliseconds, returned total %f\n", static_cast(msec.count()), total); +} + + + + + +static void measureSimplexNoiseDouble(int a_NumIterations) +{ + typedef double DATATYPE; + cSimplexNoise noise(1); + DATATYPE total = 0; + auto timeStart = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < a_NumIterations; ++i) + { + DATATYPE out[SIZE_X * SIZE_Y * SIZE_Z]; + int blockX = i * 16; + int blockZ = i * 16; + DATATYPE startX = 0; + DATATYPE endX = 257 / 80.0f; + DATATYPE startY = blockX / 40.0f; + DATATYPE endY = (blockX + 16) / 40.0f; + DATATYPE startZ = blockZ / 40.0f; + DATATYPE endZ = (blockZ + 16) / 40.0f; + noise.Generate3D(out, SIZE_X, SIZE_Y, SIZE_Z, startX, endX, startY, endY, startZ, endZ); + total += out[0]; // Do not let the optimizer optimize the whole calculation away + } + auto timeEnd = std::chrono::high_resolution_clock::now(); + auto msec = std::chrono::duration_cast(timeEnd - timeStart); + printf("SimplexNoise took %d milliseconds, returned total %f\n", static_cast(msec.count()), total); +} + + + + + +int main(int argc, char ** argv) +{ + int numIterations = 10000; + if (argc > 1) + { + numIterations = std::atoi(argv[1]); + if (numIterations < 10) + { + printf("Invalid number of iterations, using 1000 instead\n"); + numIterations = 1000; + } + } + + // Perform each test twice, to account for cache-warmup: + measureClassicNoise(numIterations); + measureClassicNoise(numIterations); + measureSimplexNoiseFloat(numIterations); + measureSimplexNoiseFloat(numIterations); + measureSimplexNoiseDouble(numIterations); + measureSimplexNoiseDouble(numIterations); + + // If build on Windows using MSVC, wait for a keypress before ending: + #ifdef _MSC_VER + getchar(); + #endif + + return 0; +} + + + + diff --git a/Tools/NoiseSpeedTest/NoiseSpeedTest.h b/Tools/NoiseSpeedTest/NoiseSpeedTest.h new file mode 100644 index 000000000..74a0108e6 --- /dev/null +++ b/Tools/NoiseSpeedTest/NoiseSpeedTest.h @@ -0,0 +1,5 @@ +// NoiseSpeedTest.h + + + + diff --git a/Tools/NoiseSpeedTest/SimplexNoise.h b/Tools/NoiseSpeedTest/SimplexNoise.h new file mode 100644 index 000000000..33af8f007 --- /dev/null +++ b/Tools/NoiseSpeedTest/SimplexNoise.h @@ -0,0 +1,263 @@ +// SimplexNoise.h + +// Declares and implements the simplex noise, using a template parameter for the underlying datatype + +/* +Note: +This code has been adapted from the public domain code by Stefan Gustavson, available at +http://staffwww.itn.liu.se/~stegu/simplexnoise/simplexnoise.pdf +*/ + +#include + + + + + +template +class cSimplexNoise +{ +public: + cSimplexNoise(int a_Seed) + { + // Based on the seed, initialize the permutation table, using a simple LCG and swapping + + // Initialize with sorted sequence: + for (size_t i = 0; i < ARRAYCOUNT(m_Perm) / 2; i++) + { + m_Perm[i] = static_cast(i); + } + + // Use swaps to randomize: + std::linear_congruential_engine lcg(a_Seed); + for (size_t i = 0; i < 2000; i++) + { + std::swap(m_Perm[lcg() % (ARRAYCOUNT(m_Perm) / 2)], m_Perm[lcg() % (ARRAYCOUNT(m_Perm) / 2)]); + } + + // Copy to the upper half of the buffer (to avoid the need for modulo when accessing neighbors): + for (size_t i = ARRAYCOUNT(m_Perm) / 2; i < ARRAYCOUNT(m_Perm); i++) + { + m_Perm[i] = m_Perm[i - ARRAYCOUNT(m_Perm) / 2]; + } + + // Copy to the "modulo 12" table to optimize away four modulo ops per value calculation: + for (size_t i = 0; i < ARRAYCOUNT(m_Perm); i++) + { + m_PermMod12[i] = m_Perm[i] % 12; + } + } + + + + /** Returns a dot product of an int vector with a Datatype vector. */ + inline Datatype dot(const int * g, const Datatype x, const Datatype y, const Datatype z) + { + return g[0] * x + g[1] * y + g[2] * z; + } + + + + /** Returns a dot product of two Datatype vectors. */ + inline Datatype dot(const Datatype * g, const Datatype x, const Datatype y, const Datatype z) + { + return g[0] * x + g[1] * y + g[2] * z; + } + + + + /** Returns the floor of the specified value, already type-cast to proper int type. */ + inline int datafloor(const Datatype a_Val) + { + return (a_Val > 0) ? static_cast(a_Val) : static_cast(a_Val - 1); // This is faster than std::floor() + } + + + /** Returns a single noise value based on the 3D coords. */ + Datatype GetValueAt3D(const Datatype a_X, const Datatype a_Y, const Datatype a_Z) + { + // The gradients are the midpoints of the vertices of a cube. + static const Datatype grad3[12][3] = { + {1, 1, 0}, {-1, 1, 0}, {1, -1, 0}, {-1, -1, 0}, + {1, 0, 1}, {-1, 0, 1}, {1, 0, -1}, {-1, 0, -1}, + {0, 1, 1}, { 0, -1, 1}, {0, 1, -1}, { 0, -1, -1} + }; + + // Skew factors: + static const Datatype F3 = static_cast(1.0 / 3.0); + static const Datatype G3 = static_cast(1.0 / 6.0); + + // Noise contributions from the four corners: + Datatype n0, n1, n2, n3; + + // Skew the input space to determine which simplex cell we're in + Datatype s = (a_X + a_Y + a_Z) * F3; + int i = datafloor(a_X + s); + int j = datafloor(a_Y + s); + int k = datafloor(a_Z + s); + + // Unskew back into the XYZ space to calculate the distances from cell origin: + Datatype t = (i + j + k) * G3; + Datatype X0 = i - t; + Datatype Y0 = j - t; + Datatype Z0 = k - t; + Datatype x0 = a_X - X0; + Datatype y0 = a_Y - Y0; + Datatype z0 = a_Z - Z0; + + // For the 3D case, the simplex shape is a slightly irregular tetrahedron. + // Determine which simplex we are in. + int i1, j1, k1; // Offsets for second corner of simplex in IJK coords + int i2, j2, k2; // Offsets for third corner of simplex in IJK coords + if (x0 >= y0) + { + if (y0 >= z0) + { + // X Y Z order + i1 = 1; j1 = 0; k1 = 0; i2 = 1; j2 = 1; k2 = 0; + } + else if (x0 >= z0) + { + // X Z Y order + i1 = 1; j1 = 0; k1 = 0; i2 = 1; j2 = 0; k2 = 1; + } + else + { + // Z X Y order + i1 = 0; j1 = 0; k1 = 1; i2 = 1; j2 = 0; k2 = 1; + } + } + else + { + if (y0 < z0) + { + // Z Y X order + i1 = 0; j1 = 0; k1 = 1; i2 = 0; j2 = 1; k2 = 1; + } + else if (x0 < z0) + { + // Y Z X order + i1 = 0; j1 = 1; k1 = 0; i2 = 0; j2 = 1; k2 = 1; + } + else + { + // Y X Z order + i1 = 0; j1 = 1; k1 = 0; i2 = 1; j2 = 1; k2 = 0; + } + } + + // A step of (1, 0, 0) in IJK means a step of (1 - c, -c, -c) in XYZ, + // a step of (0, 1, 0) in IJK means a step of (-c, 1 - c, -c) in XYZ, and + // a step of (0, 0, 1) in IJK means a step of (-c, -c, 1 - c) in XYZ, where c = G3 = 1 / 6. + Datatype x1 = x0 - i1 + G3; // Offsets for second corner in XYZ coords + Datatype y1 = y0 - j1 + G3; + Datatype z1 = z0 - k1 + G3; + Datatype x2 = x0 - i2 + static_cast(2) * G3; // Offsets for third corner in XYZ coords + Datatype y2 = y0 - j2 + static_cast(2) * G3; + Datatype z2 = z0 - k2 + static_cast(2) * G3; + Datatype x3 = x0 - static_cast(1) + static_cast(3) * G3; // Offsets for last corner in XYZ coords + Datatype y3 = y0 - static_cast(1) + static_cast(3) * G3; + Datatype z3 = z0 - static_cast(1) + static_cast(3) * G3; + + // Work out the hashed gradient indices of the four simplex corners + int ii = i & 255; + int jj = j & 255; + int kk = k & 255; + int gi0 = m_PermMod12[ii + m_Perm[jj + m_Perm[kk]]]; + int gi1 = m_PermMod12[ii + i1 + m_Perm[jj + j1 + m_Perm[kk + k1]]]; + int gi2 = m_PermMod12[ii + i2 + m_Perm[jj + j2 + m_Perm[kk + k2]]]; + int gi3 = m_PermMod12[ii + 1 + m_Perm[jj + 1 + m_Perm[kk + 1]]]; + + // Calculate the contribution from the four corners + Datatype t0 = static_cast(0.6) - x0 * x0 - y0 * y0 - z0 * z0; + if (t0 < 0) + { + n0 = 0.0; + } + else + { + t0 *= t0; + n0 = t0 * t0 * dot(grad3[gi0], x0, y0, z0); + } + + Datatype t1 = static_cast(0.6) - x1 * x1 - y1 * y1 - z1 * z1; + if (t1 < 0) + { + n1 = 0.0; + } + else + { + t1 *= t1; + n1 = t1 * t1 * dot(grad3[gi1], x1, y1, z1); + } + + Datatype t2 = static_cast(0.6) - x2 * x2 - y2 * y2 - z2 * z2; + if (t2 < 0) + { + n2 = 0.0; + } + else + { + t2 *= t2; + n2 = t2 * t2 * dot(grad3[gi2], x2, y2, z2); + } + + Datatype t3 = static_cast(0.6) - x3 * x3 - y3 * y3 - z3 * z3; + if (t3 < 0) + { + n3 = 0.0; + } + else + { + t3 *= t3; + n3 = t3 * t3 * dot(grad3[gi3], x3, y3, z3); + } + + // Add contributions from each corner to get the final noise value. + // The result is scaled to stay just inside [-1, 1] + return static_cast(32) * (n0 + n1 + n2 + n3); + } + + + + + /** Generates the 3D version of the SImplex noise. + a_Out is the 3D array into which the noise is output. Organized as [x + a_SizeX * y + a_SizeX * a_SizeY * z]. + a_SizeX, a_SizeY, a_SizeZ are the dimensions of the a_Out array. + a_Start and a_End are the coords of the 3D array in the noise-space. */ + void Generate3D( + Datatype * a_Out, + int a_SizeX, int a_SizeY, int a_SizeZ, + Datatype a_StartX, Datatype a_EndX, + Datatype a_StartY, Datatype a_EndY, + Datatype a_StartZ, Datatype a_EndZ + ) + { + Datatype * out = a_Out; + for (int z = 0; z < a_SizeZ; ++z) + { + Datatype nz = a_StartZ + z * (a_EndZ - a_StartZ) / a_SizeZ; + for (int y = 0; y < a_SizeY; ++y) + { + Datatype ny = a_StartY + y * (a_EndY - a_StartY) / a_SizeY; + for (int x = 0; x < a_SizeX; ++x) + { + Datatype nx = a_StartX + x * (a_EndX - a_StartX) / a_SizeX; + *out = GetValueAt3D(nx, ny, nz); + ++out; + } // for x + } // for y + } // for z + } + +protected: + /** The permutation table, initialized by the seed. */ + int m_Perm[512]; + + /** A copy of the permutation table, with each item modulo 12, to avoid 4 modulo operations per value calculation. */ + int m_PermMod12[512]; +}; + + + + diff --git a/src/Noise/CMakeLists.txt b/src/Noise/CMakeLists.txt index 3c9e723d3..167ae7588 100644 --- a/src/Noise/CMakeLists.txt +++ b/src/Noise/CMakeLists.txt @@ -7,6 +7,7 @@ SET (SRCS ) SET (HDRS + InterpolNoise.h Noise.h OctavedNoise.h RidgedNoise.h