|
[Aqsis-commits] [SCM] Aqsis Renderer branch, master,
updated. Release_1.6.0_Phase2-218-g72efd5f
From: Chris Foster <c42f@us...> - 2010-06-27 13:52
|
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Aqsis Renderer".
The branch, master has been updated
via 72efd5f1fb0e2326f1978f0b83a41877e7d2ec47 (commit)
via 264151b3aedf21dd4b41ab35fc29007337b90f80 (commit)
from 10c6c8d5ac05855fbc98d3511da6a3f57cdf99a5 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 72efd5f1fb0e2326f1978f0b83a41877e7d2ec47
Author: Chris Foster <chris42f@...>
Date: Sun Jun 27 23:47:15 2010 +1000
Fractal rendering test using boost.thread
This change adds a version of the fractal renderer which is parallelized
using boost.thread. A class abstracts the rendering order, so arbitrary
tile orders should be possible.
Also add wrappers for boost::interprocess::detail::atomic_{inc,dec}32
for convenience.
diff --git a/prototypes/newcore/experiments/fractal_threading.cpp b/prototypes/newcore/experiments/fractal_threading.cpp
index f14db1d..940ea3c 100644
--- a/prototypes/newcore/experiments/fractal_threading.cpp
+++ b/prototypes/newcore/experiments/fractal_threading.cpp
@@ -379,17 +379,18 @@ void renderImageTiled(TIFF* tif, int width, int superSamp, const float* filter,
struct SampTile : public RefCounted
{
private:
- int m_x;
- int m_y;
+ Imath::V2i m_pos;
boost::scoped_array<float> m_samps;
public:
SampTile(int x, int y, int width)
- : m_x(x), m_y(y),
+ : m_pos(x,y),
m_samps(new float[3*width*width])
{ }
float* samps() { return m_samps.get(); }
const float* samps() const { return m_samps.get(); }
+
+ const Imath::V2i& pos() const { return m_pos; }
};
typedef boost::intrusive_ptr<SampTile> SampTilePtr;
@@ -431,7 +432,6 @@ void renderImageParallelOmp(TIFF* tif, int width, int superSamp,
int ntiles = nftiles + 1;
int sampTileWidth = superSamp*tileWidth;
- int tileSize = sampTileWidth*sampTileWidth*3;
typedef boost::unordered_map<Imath::V2i, TileFilterBlock> FilterBlockContainer;
FilterBlockContainer waitingBlocks(2*nftiles);
@@ -514,6 +514,211 @@ void renderImageParallelOmp(TIFF* tif, int width, int superSamp,
}
+//------------------------------------------------------------------------------
+
+/// Collate sample tiles and filter them when ready
+class TileCollator
+{
+ public:
+ class SharedData
+ {
+ private:
+ typedef boost::unordered_map<Imath::V2i, TileFilterBlock> BlockContainer;
+ BlockContainer waitingBlocks;
+ boost::mutex waitingBlocksMutex;
+ int nfilterTiles;
+ int tileWidth;
+ int superSamp;
+ // Filter stuff
+ const float* filter;
+ int filterWidth;
+ // Output
+ TIFF* outFile;
+ boost::mutex tifMutex;
+ friend class TileCollator;
+ public:
+ SharedData(int ntiles, int tileWidth, int superSamp,
+ const float* filter, int filterWidth, TIFF* outFile)
+ : waitingBlocks(2*ntiles),
+ nfilterTiles(ntiles-1),
+ tileWidth(tileWidth),
+ superSamp(superSamp),
+ filter(filter),
+ filterWidth(filterWidth),
+ outFile(outFile)
+ { }
+ };
+
+ private:
+ SharedData& m_;
+ std::vector<uint8> m_colFilt;
+
+ public:
+ TileCollator(SharedData& sharedData)
+ : m_(sharedData),
+ m_colFilt(3*m_.tileWidth*m_.tileWidth)
+ { }
+
+ void push(const SampTilePtr& tile)
+ {
+ // The _sample_ tile with coordinates (tx,ty) overlaps the four
+ // _filtering_ tiles with coordinates:
+ //
+ // (tx-1, ty-1) (tx,ty-1)
+ // (tx-1, ty) (tx,ty)
+ //
+ // we insert the sample tile into each of these filtering tiles,
+ // ignoring filtering tile coordinates which lie outside the image
+ // boundaries.
+ for(int j = 0; j < 2; ++j)
+ for(int i = 0; i < 2; ++i)
+ {
+ // Position of filter tile
+ Imath::V2i p = tile->pos() + Imath::V2i(i-1,j-1);
+ // Ignore filtering tiles outside image boundaries
+ if(p.x >= 0 && p.y >= 0 && p.x < m_.nfilterTiles &&
+ p.y < m_.nfilterTiles)
+ {
+ TileFilterBlock block;
+ {
+ boost::lock_guard<boost::mutex> lock(m_.waitingBlocksMutex);
+ SharedData::BlockContainer::iterator blockIt =
+ m_.waitingBlocks.find(p);
+ if(blockIt == m_.waitingBlocks.end())
+ {
+ // Create blank block if it didn't exist
+ std::pair<SharedData::BlockContainer::iterator, bool> insRes =
+ m_.waitingBlocks.insert(std::make_pair(p, TileFilterBlock()));
+ assert(insRes.second);
+ blockIt = insRes.first;
+ }
+ blockIt->second.tiles[1-j][1-i] = tile;
+ if(blockIt->second.readyForFilter())
+ {
+ block = blockIt->second;
+ m_.waitingBlocks.erase(blockIt);
+ }
+ }
+ if(block.readyForFilter())
+ {
+ // Filter, quantize & save result. This can be done in parallel.
+ const float* toFilter[2][2] = {
+ {block.tiles[0][0]->samps(), block.tiles[0][1]->samps()},
+ {block.tiles[1][0]->samps(), block.tiles[1][1]->samps()},
+ };
+ uint8* output = &m_colFilt[0];
+ filterAndQuantizeTile(output, toFilter, m_.tileWidth,
+ m_.superSamp, m_.filter, m_.filterWidth);
+ boost::lock_guard<boost::mutex> lock(m_.tifMutex);
+ TIFFWriteTile(m_.outFile, output, p.x*m_.tileWidth,
+ p.y*m_.tileWidth, 0, 0);
+ }
+ }
+ }
+ }
+};
+
+/// Define the tile ordering for sample tile rendering
+class TileScheduler
+{
+ private:
+ boost::mutex m_mutex;
+ int m_ntiles;
+ int m_tx;
+ int m_ty;
+
+ public:
+ TileScheduler(int ntiles)
+ : m_ntiles(ntiles),
+ m_tx(0),
+ m_ty(0)
+ {}
+
+ bool nextTile(int& tx, int& ty)
+ {
+ boost::lock_guard<boost::mutex> lock(m_mutex);
+ if(m_ty >= m_ntiles)
+ return false;
+ tx = m_tx;
+ ty = m_ty;
+ ++m_tx;
+ if(m_tx >= m_ntiles)
+ {
+ ++m_ty;
+ m_tx = 0;
+ }
+ return true;
+ }
+};
+
+
+/// Render thread functor for use with boost.thread
+class RenderThreadFunc
+{
+ private:
+ TileScheduler& m_scheduler;
+ TileCollator::SharedData& m_collatorShared;
+ int m_sampTileWidth;
+ Transform m_trans;
+ int m_maxIter;
+
+ public:
+ RenderThreadFunc(TileScheduler& scheduler,
+ TileCollator::SharedData& collatorShared,
+ int sampTileWidth, const Transform& trans,
+ int maxIter)
+ : m_scheduler(scheduler),
+ m_collatorShared(collatorShared),
+ m_sampTileWidth(sampTileWidth),
+ m_trans(trans),
+ m_maxIter(maxIter)
+ { }
+
+ void operator()()
+ {
+ int tx = 0, ty = 0;
+ TileCollator collator(m_collatorShared);
+ while(m_scheduler.nextTile(tx, ty))
+ {
+ SampTilePtr tile = new SampTile(tx, ty, m_sampTileWidth);
+ renderTile(tile->samps(), m_sampTileWidth,
+ m_trans.offset(m_sampTileWidth*tx,
+ m_sampTileWidth*ty), m_maxIter);
+ collator.push(tile);
+ }
+ }
+};
+
+
+/// Parallel tiled method using boost.thread
+noinline
+void renderImageParallel(TIFF* tif, int width, int superSamp, const float* filter,
+ int filterWidth, const Transform& trans, int maxIter)
+{
+ const int tileWidth = 16;
+ writeHeader(tif, width, width, tileWidth);
+ // number of sample tiles over width of image
+ int ntiles = ceildiv(width, tileWidth) + 1;
+
+ TileScheduler scheduler(ntiles);
+ TileCollator::SharedData collatorShared(ntiles, tileWidth, superSamp,
+ filter, filterWidth, tif);
+ RenderThreadFunc threadFunc(scheduler, collatorShared,
+ superSamp*tileWidth, trans, maxIter);
+
+ int nthreads = boost::thread::hardware_concurrency();
+ if(nthreads > 1)
+ {
+ boost::thread_group threads;
+ for(int i = 0; i < nthreads; ++i)
+ threads.create_thread(threadFunc);
+ threads.join_all();
+ }
+ else
+ threadFunc();
+}
+
+
//------------------------------------------------------------------------------
int main()
@@ -540,8 +745,10 @@ int main()
// trans, maxIter);
// renderImageTiled(tif, width, superSamp, &filter[0], filterWidth,
// trans, maxIter);
- renderImageParallelOmp(tif, width, superSamp, &filter[0], filterWidth,
- trans, maxIter);
+// renderImageParallelOmp(tif, width, superSamp, &filter[0], filterWidth,
+// trans, maxIter);
+ renderImageParallel(tif, width, superSamp, &filter[0], filterWidth,
+ trans, maxIter);
TIFFClose(tif);
diff --git a/prototypes/newcore/refcount.h b/prototypes/newcore/refcount.h
index d1de93a..60f4cc3 100644
--- a/prototypes/newcore/refcount.h
+++ b/prototypes/newcore/refcount.h
@@ -26,6 +26,15 @@
#include <boost/type_traits/is_base_of.hpp>
#include <boost/utility/enable_if.hpp>
+inline boost::uint32_t atomic_inc32(volatile boost::uint32_t* i)
+{
+ return boost::interprocess::detail::atomic_inc32(i);
+}
+inline boost::uint32_t atomic_dec32(volatile boost::uint32_t* i)
+{
+ return boost::interprocess::detail::atomic_dec32(i);
+}
+
//------------------------------------------------------------------------------
/// Reference counted base mixin for use with boost::intrusive_ptr.
///
@@ -44,11 +53,11 @@ class RefCounted
// int decRef() const { return --m_refCount; }
int incRef() const
{
- return boost::interprocess::detail::atomic_inc32(&m_refCount) + 1;
+ return atomic_inc32(&m_refCount) + 1;
}
int decRef() const
{
- return boost::interprocess::detail::atomic_dec32(&m_refCount) - 1;
+ return atomic_dec32(&m_refCount) - 1;
}
protected:
@@ -57,7 +66,8 @@ class RefCounted
private:
// mutable int m_refCount;
- mutable boost::uint32_t m_refCount;
+ // todo: is volatile needed here?
+ mutable volatile boost::uint32_t m_refCount;
};
commit 264151b3aedf21dd4b41ab35fc29007337b90f80
Author: Chris Foster <chris42f@...>
Date: Sun Jun 27 13:21:20 2010 +1000
Use Transform struct for clarity
This is a minor change.
diff --git a/prototypes/newcore/experiments/fractal_threading.cpp b/prototypes/newcore/experiments/fractal_threading.cpp
index 9a3060e..f14db1d 100644
--- a/prototypes/newcore/experiments/fractal_threading.cpp
+++ b/prototypes/newcore/experiments/fractal_threading.cpp
@@ -112,16 +112,33 @@ void mandelColor(float* rgb, double x, double y, int maxIter)
colorMap(rgb, t, maxIter);
}
+struct Transform
+{
+ double dx;
+ double x0;
+ double dy;
+ double y0;
+
+ Transform offset(int x, int y) const
+ {
+ Transform t = *this;
+ t.x0 += dx*x;
+ t.y0 += dy*y;
+ return t;
+ }
+};
+
+
noinline
-void renderTile(float* rgb, int w, double x0, double y0, double dx, double dy,
+void renderTile(float* rgb, int w, const Transform& trans,
int maxIter)
{
for(int j = 0; j < w; ++j)
{
- double y = y0 + j*dy;
+ double y = trans.y0 + j*trans.dy;
for(int i = 0; i < w; ++i)
{
- double x = x0 + i*dx;
+ double x = trans.x0 + i*trans.dx;
mandelColor(&rgb[3*(w*j + i)], x, y, maxIter);
}
}
@@ -262,13 +279,12 @@ void writeHeader(TIFF* tif, int width, int height, int tileWidth = -1)
/// Render a big buffer of samples, then filter those samples.
noinline
void renderImageSimple(TIFF* tif, int width, int superSamp, const float* filter,
- int filterWidth, double xoff, double xmult, double yoff,
- double ymult, int maxIter)
+ int filterWidth, const Transform& trans, int maxIter)
{
writeHeader(tif, width, width);
int fullWidth = superSamp*(width + filterWidth-1);
std::vector<float> colVals(3*fullWidth*fullWidth, -1);
- renderTile(&colVals[0], fullWidth, xoff, yoff, xmult, ymult, maxIter);
+ renderTile(&colVals[0], fullWidth, trans, maxIter);
std::vector<uint8> colFilt(width*3, 0);
@@ -312,8 +328,7 @@ void renderImageSimple(TIFF* tif, int width, int superSamp, const float* filter,
/// renderImageSimple().
noinline
void renderImageTiled(TIFF* tif, int width, int superSamp, const float* filter,
- int filterWidth, double xoff, double xmult, double yoff,
- double ymult, int maxIter)
+ int filterWidth, const Transform& trans, int maxIter)
{
const int tileWidth = 16;
writeHeader(tif, width, width, tileWidth);
@@ -337,8 +352,8 @@ void renderImageTiled(TIFF* tif, int width, int superSamp, const float* filter,
for(int tx = 0; tx < ntiles; ++tx, ++poolPos)
{
float* stor = tilePool[poolPos % poolSize];
- renderTile(stor, sampTileWidth, xoff + xmult*sampTileWidth*tx,
- yoff + ymult*sampTileWidth*ty, xmult, ymult, maxIter);
+ renderTile(stor, sampTileWidth, trans.offset(sampTileWidth*tx,
+ sampTileWidth*ty), maxIter);
if(ty > 0 && tx > 0)
{
// When four adjacent tiles are rendered, filter the interior
@@ -404,9 +419,9 @@ std::size_t hash_value(Imath::V2i const& p)
/// Parallel tiled method using OpenMP.
noinline
-void renderImageParallelOmp(TIFF* tif, int width, int superSamp, const float* filter,
- int filterWidth, double xoff, double xmult, double yoff,
- double ymult, int maxIter)
+void renderImageParallelOmp(TIFF* tif, int width, int superSamp,
+ const float* filter, int filterWidth,
+ const Transform& trans, int maxIter)
{
const int tileWidth = 16;
writeHeader(tif, width, width, tileWidth);
@@ -440,8 +455,8 @@ void renderImageParallelOmp(TIFF* tif, int width, int superSamp, const float* fi
{
SampTilePtr tile = new SampTile(tx, ty, sampTileWidth);
float* stor = tile->samps();
- renderTile(stor, sampTileWidth, xoff + xmult*sampTileWidth*tx,
- yoff + ymult*sampTileWidth*ty, xmult, ymult, maxIter);
+ renderTile(stor, sampTileWidth, trans.offset(sampTileWidth*tx,
+ sampTileWidth*ty), maxIter);
// The sample tile with coordinates (tx,ty) overlaps the
// four filtering tiles with coordinates:
//
@@ -485,7 +500,7 @@ void renderImageParallelOmp(TIFF* tif, int width, int superSamp, const float* fi
{block.tiles[1][0]->samps(), block.tiles[1][1]->samps()},
};
filterAndQuantizeTile(&colFilt[0], toFilter, tileWidth,
- superSamp, &filter[0], filterWidth);
+ superSamp, filter, filterWidth);
# pragma omp critical
TIFFWriteTile(tif, &colFilt[0], p.x*tileWidth,
p.y*tileWidth, 0, 0);
@@ -509,10 +524,11 @@ int main()
const int superSamp = 3;
double scale = 0.01;
- double xmult = scale/superSamp * 2.0/width;
- double xoff = -0.79 + scale * -1.0;
- double ymult = -scale/superSamp * 2.0/width;
- double yoff = 0.15 + scale * 1.0;
+ Transform trans;
+ trans.dx = scale/superSamp * 2.0/width;
+ trans.x0 = -0.79 + scale * -1.0;
+ trans.dy = -scale/superSamp * 2.0/width;
+ trans.y0 = 0.15 + scale * 1.0;
const int filterWidth = 3;
std::vector<float> filter;
@@ -521,11 +537,11 @@ int main()
TIFF* tif = TIFFOpen("mandel.tif", "w");
// renderImageSimple(tif, width, superSamp, &filter[0], filterWidth,
-// xoff, xmult, yoff, ymult, maxIter);
+// trans, maxIter);
// renderImageTiled(tif, width, superSamp, &filter[0], filterWidth,
-// xoff, xmult, yoff, ymult, maxIter);
+// trans, maxIter);
renderImageParallelOmp(tif, width, superSamp, &filter[0], filterWidth,
- xoff, xmult, yoff, ymult, maxIter);
+ trans, maxIter);
TIFFClose(tif);
-----------------------------------------------------------------------
Summary of changes:
.../newcore/experiments/fractal_threading.cpp | 279 ++++++++++++++++++--
prototypes/newcore/refcount.h | 16 +-
2 files changed, 264 insertions(+), 31 deletions(-)
hooks/post-receive
--
Aqsis Renderer
|
| Thread | Author | Date |
|---|---|---|
| [Aqsis-commits] [SCM] Aqsis Renderer branch, master, updated. Release_1.6.0_Phase2-218-g72efd5f | Chris Foster <c42f@us...> |