You can subscribe to this list here.
2006 |
Jan
|
Feb
|
Mar
|
Apr
(102) |
May
(78) |
Jun
(70) |
Jul
(46) |
Aug
|
Sep
(2) |
Oct
(59) |
Nov
(84) |
Dec
(41) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2007 |
Jan
(401) |
Feb
(151) |
Mar
(38) |
Apr
(43) |
May
(77) |
Jun
(52) |
Jul
(65) |
Aug
(33) |
Sep
(15) |
Oct
(14) |
Nov
(9) |
Dec
(4) |
2008 |
Jan
|
Feb
(15) |
Mar
(7) |
Apr
(41) |
May
(16) |
Jun
|
Jul
(8) |
Aug
(43) |
Sep
(111) |
Oct
(58) |
Nov
(93) |
Dec
(185) |
2009 |
Jan
(221) |
Feb
(150) |
Mar
(76) |
Apr
(250) |
May
(242) |
Jun
(182) |
Jul
(232) |
Aug
(101) |
Sep
(121) |
Oct
(78) |
Nov
(110) |
Dec
(155) |
2010 |
Jan
(67) |
Feb
(57) |
Mar
(72) |
Apr
(140) |
May
(54) |
Jun
(35) |
Jul
(39) |
Aug
(30) |
Sep
(35) |
Oct
(46) |
Nov
(34) |
Dec
(29) |
2011 |
Jan
(15) |
Feb
(22) |
Mar
(23) |
Apr
(37) |
May
(21) |
Jun
(29) |
Jul
(23) |
Aug
(19) |
Sep
(9) |
Oct
(18) |
Nov
(17) |
Dec
(5) |
2012 |
Jan
(34) |
Feb
(18) |
Mar
(37) |
Apr
(34) |
May
(24) |
Jun
(10) |
Jul
(42) |
Aug
(55) |
Sep
(9) |
Oct
(9) |
Nov
(5) |
Dec
(34) |
2013 |
Jan
(41) |
Feb
(53) |
Mar
(12) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(8) |
Oct
(34) |
Nov
(107) |
Dec
(28) |
2014 |
Jan
(15) |
Feb
(33) |
Mar
(28) |
Apr
(8) |
May
(3) |
Jun
(1) |
Jul
(2) |
Aug
(3) |
Sep
(6) |
Oct
|
Nov
(4) |
Dec
(2) |
2015 |
Jan
(29) |
Feb
(17) |
Mar
(44) |
Apr
(28) |
May
(16) |
Jun
(18) |
Jul
(18) |
Aug
(23) |
Sep
(39) |
Oct
(25) |
Nov
(5) |
Dec
(2) |
2016 |
Jan
(13) |
Feb
(33) |
Mar
(58) |
Apr
(12) |
May
(5) |
Jun
(32) |
Jul
(43) |
Aug
(33) |
Sep
(10) |
Oct
(4) |
Nov
(10) |
Dec
(1) |
From: <ze...@us...> - 2016-12-02 00:27:19
|
Revision: 5577 http://sourceforge.net/p/desmume/code/5577 Author: zeromus Date: 2016-12-02 00:27:18 +0000 (Fri, 02 Dec 2016) Log Message: ----------- we moved to github https://github.com/TASVideos/desmume/ Removed Paths: ------------- branches/ docs/ tags/ trunk/ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-11-23 20:41:09
|
Revision: 5576 http://sourceforge.net/p/desmume/code/5576 Author: rogerman Date: 2016-11-23 20:41:07 +0000 (Wed, 23 Nov 2016) Log Message: ----------- Texture Handler: - Finish refactoring and cleaning up TexCache (now renamed to ?\226?\128?\156TextureCache?\226?\128?\157) and TexCacheItem (now renamed to ?\226?\128?\156TextureStore?\226?\128?\157). - TextureCache items are now evicted based on age and usage instead of arbitrarily. Modified Paths: -------------- trunk/desmume/src/OGLRender.cpp trunk/desmume/src/OGLRender.h trunk/desmume/src/OGLRender_3_2.cpp trunk/desmume/src/rasterize.cpp trunk/desmume/src/rasterize.h trunk/desmume/src/texcache.cpp trunk/desmume/src/texcache.h Modified: trunk/desmume/src/OGLRender.cpp =================================================================== --- trunk/desmume/src/OGLRender.cpp 2016-11-11 02:34:02 UTC (rev 5575) +++ trunk/desmume/src/OGLRender.cpp 2016-11-23 20:41:07 UTC (rev 5576) @@ -636,12 +636,44 @@ } } -void OGLTextureDeleteCallback(TexCacheItem *texItem, void *param1, void *param2) +OpenGLTexture::OpenGLTexture() { - OpenGLRenderer *oglRenderer = (OpenGLRenderer *)param1; - oglRenderer->DeleteTexture(texItem); + _cacheSize = GetUnpackSizeUsingFormat(TexFormat_32bpp); + _invSizeS = 0.0f; + _invSizeT = 0.0f; + + glGenTextures(1, &_texID); } +OpenGLTexture::OpenGLTexture(u32 texAttributes, u32 palAttributes) : TextureStore(texAttributes, palAttributes) +{ + _cacheSize = GetUnpackSizeUsingFormat(TexFormat_32bpp); + _invSizeS = 1.0f / (float)_sizeS; + _invSizeT = 1.0f / (float)_sizeT; + + glGenTextures(1, &_texID); +} + +OpenGLTexture::~OpenGLTexture() +{ + glDeleteTextures(1, &this->_texID); +} + +GLuint OpenGLTexture::GetID() const +{ + return this->_texID; +} + +GLfloat OpenGLTexture::GetInvWidth() const +{ + return this->_invSizeS; +} + +GLfloat OpenGLTexture::GetInvHeight() const +{ + return this->_invSizeT; +} + template<bool require_profile, bool enable_3_2> static Render3D* OpenGLRendererCreate() { @@ -1199,13 +1231,6 @@ // Kill the texture cache now before all of our texture IDs disappear. texCache.Reset(); - while(!ref->freeTextureIDs.empty()) - { - GLuint temp = ref->freeTextureIDs.front(); - ref->freeTextureIDs.pop(); - glDeleteTextures(1, &temp); - } - glFinish(); } @@ -1352,7 +1377,6 @@ INFO("OpenGL: Multisampled FBOs are unsupported. Multisample antialiasing will be disabled.\n"); } - this->InitTextures(); this->InitFinalRenderStates(&oglExtensionSet); // This must be done last return OGLERROR_NOERR; @@ -2063,13 +2087,6 @@ return OGLERROR_NOERR; } -Render3DError OpenGLRenderer_1_2::InitTextures() -{ - this->ExpandFreeTextures(); - - return OGLERROR_NOERR; -} - Render3DError OpenGLRenderer_1_2::InitTables() { static bool needTableInit = true; @@ -2228,20 +2245,6 @@ } } -Render3DError OpenGLRenderer_1_2::ExpandFreeTextures() -{ - static const GLsizei kInitTextures = 128; - GLuint oglTempTextureID[kInitTextures]; - glGenTextures(kInitTextures, oglTempTextureID); - - for(GLsizei i = 0; i < kInitTextures; i++) - { - this->ref->freeTextureIDs.push(oglTempTextureID[i]); - } - - return OGLERROR_NOERR; -} - Render3DError OpenGLRenderer_1_2::EnableVertexAttributes() { OGLRenderRef &OGLRef = *this->ref; @@ -2416,14 +2419,6 @@ return OGLERROR_NOERR; } -Render3DError OpenGLRenderer_1_2::DeleteTexture(const TexCacheItem *item) -{ - this->ref->freeTextureIDs.push((GLuint)item->texid); - texCache.cache_size -= item->unpackSize; - - return OGLERROR_NOERR; -} - Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine) { OGLRenderRef &OGLRef = *this->ref; @@ -2692,7 +2687,7 @@ Render3DError OpenGLRenderer_1_2::EndRender(const u64 frameCount) { //needs to happen before endgl because it could free some textureids for expired cache items - texCache.Evict(TEXCACHE_MAX_SIZE); + texCache.Evict(); this->ReadBackPixels(); @@ -2953,54 +2948,44 @@ return OGLERROR_NOERR; } + + OpenGLTexture *theTexture = (OpenGLTexture *)texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + const bool isNewTexture = (theTexture == NULL); - TexCacheItem *theTexture = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + if (isNewTexture) + { + theTexture = new OpenGLTexture(thePoly.texParam, thePoly.texPalette); + texCache.Add(theTexture); + } + const NDSTextureFormat packFormat = theTexture->GetPackFormat(); + // Enable textures if they weren't already enabled if (this->isShaderSupported) { glUniform1i(OGLRef.uniformPolyEnableTexture, GL_TRUE); - glUniform1i(OGLRef.uniformTexSingleBitAlpha, (theTexture->packFormat != TEXMODE_A3I5 && theTexture->packFormat != TEXMODE_A5I3) ? GL_TRUE : GL_FALSE); - glUniform2f(OGLRef.uniformPolyTexScale, theTexture->invSizeX, theTexture->invSizeY); + glUniform1i(OGLRef.uniformTexSingleBitAlpha, (packFormat != TEXMODE_A3I5 && packFormat != TEXMODE_A5I3) ? GL_TRUE : GL_FALSE); + glUniform2f(OGLRef.uniformPolyTexScale, theTexture->GetInvWidth(), theTexture->GetInvHeight()); } else { glEnable(GL_TEXTURE_2D); glMatrixMode(GL_TEXTURE); glLoadIdentity(); - glScalef(theTexture->invSizeX, theTexture->invSizeY, 1.0f); + glScalef(theTexture->GetInvWidth(), theTexture->GetInvHeight(), 1.0f); } - if (theTexture->unpackFormat != TexFormat_32bpp) + glBindTexture(GL_TEXTURE_2D, theTexture->GetID()); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? OGLRef.stateTexMirroredRepeat : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? OGLRef.stateTexMirroredRepeat : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + + if (theTexture->IsLoadNeeded()) { - //has the ogl renderer initialized the texture? - const bool isNewTexture = (theTexture->GetDeleteCallback() == NULL); - if (isNewTexture) - { - theTexture->SetDeleteCallback(&OGLTextureDeleteCallback, this, NULL); - - if (OGLRef.freeTextureIDs.empty()) - { - this->ExpandFreeTextures(); - } - - theTexture->texid = (u32)OGLRef.freeTextureIDs.front(); - OGLRef.freeTextureIDs.pop(); - - theTexture->unpackSize = theTexture->GetUnpackSizeUsingFormat(TexFormat_32bpp); - texCache.cache_size += theTexture->unpackSize; - } - theTexture->Unpack<TexFormat_32bpp>((u32 *)this->_workingTextureUnpackBuffer); - glBindTexture(GL_TEXTURE_2D, (GLuint)theTexture->texid); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? OGLRef.stateTexMirroredRepeat : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? OGLRef.stateTexMirroredRepeat : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - - const NDSTextureFormat texFormat = theTexture->GetTextureFormat(); const u32 *textureSrc = (u32 *)this->_workingTextureUnpackBuffer; - size_t texWidth = theTexture->sizeX; - size_t texHeight = theTexture->sizeY; + size_t texWidth = theTexture->GetWidth(); + size_t texHeight = theTexture->GetHeight(); if (this->_textureDeposterizeDstSurface.Surface != NULL) { @@ -3031,17 +3016,17 @@ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); - this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); if (isNewTexture) { glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, theTexture->sizeX, theTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, theTexture->GetWidth(), theTexture->GetHeight(), 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } else { glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, theTexture->sizeX, theTexture->sizeY, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, theTexture->GetWidth(), theTexture->GetHeight(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } break; } @@ -3051,29 +3036,29 @@ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 2); - this->TextureUpscale<4>(texFormat, textureSrc, texWidth, texHeight); + this->TextureUpscale<4>(packFormat, textureSrc, texWidth, texHeight); if (isNewTexture) { glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - texWidth = theTexture->sizeX; - texHeight = theTexture->sizeY; - this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); + texWidth = theTexture->GetWidth(); + texHeight = theTexture->GetHeight(); + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, theTexture->sizeX, theTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, theTexture->GetWidth(), theTexture->GetHeight(), 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } else { glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - texWidth = theTexture->sizeX; - texHeight = theTexture->sizeY; - this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); + texWidth = theTexture->GetWidth(); + texHeight = theTexture->GetHeight(); + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexSubImage2D(GL_TEXTURE_2D, 2, 0, 0, theTexture->sizeX, theTexture->sizeY, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + glTexSubImage2D(GL_TEXTURE_2D, 2, 0, 0, theTexture->GetWidth(), theTexture->GetHeight(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } break; } @@ -3095,12 +3080,10 @@ glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.0f); } } - else - { - //otherwise, just bind it - glBindTexture(GL_TEXTURE_2D, (GLuint)theTexture->texid); - } + theTexture->ResetCacheAge(); + theTexture->IncreaseCacheUsageCount(1); + return OGLERROR_NOERR; } @@ -3867,7 +3850,6 @@ INFO("OpenGL: Multisampled FBOs are unsupported. Multisample antialiasing will be disabled.\n"); } - this->InitTextures(); this->InitFinalRenderStates(&oglExtensionSet); // This must be done last return OGLERROR_NOERR; @@ -4644,42 +4626,32 @@ return OGLERROR_NOERR; } - TexCacheItem *theTexture = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + OpenGLTexture *theTexture = (OpenGLTexture *)texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + const bool isNewTexture = (theTexture == NULL); + if (isNewTexture) + { + theTexture = new OpenGLTexture(thePoly.texParam, thePoly.texPalette); + texCache.Add(theTexture); + } + + const NDSTextureFormat packFormat = theTexture->GetPackFormat(); + glUniform1i(OGLRef.uniformPolyEnableTexture, GL_TRUE); - glUniform1i(OGLRef.uniformTexSingleBitAlpha, (theTexture->packFormat != TEXMODE_A3I5 && theTexture->packFormat != TEXMODE_A5I3) ? GL_TRUE : GL_FALSE); - glUniform2f(OGLRef.uniformPolyTexScale, theTexture->invSizeX, theTexture->invSizeY); + glUniform1i(OGLRef.uniformTexSingleBitAlpha, (packFormat != TEXMODE_A3I5 && packFormat != TEXMODE_A5I3) ? GL_TRUE : GL_FALSE); + glUniform2f(OGLRef.uniformPolyTexScale, theTexture->GetInvWidth(), theTexture->GetInvHeight()); - if (theTexture->unpackFormat != TexFormat_32bpp) + glBindTexture(GL_TEXTURE_2D, theTexture->GetID()); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + + if (theTexture->IsLoadNeeded()) { - //has the ogl renderer initialized the texture? - const bool isNewTexture = (theTexture->GetDeleteCallback() == NULL); - if (isNewTexture) - { - theTexture->SetDeleteCallback(&OGLTextureDeleteCallback, this, NULL); - - if (OGLRef.freeTextureIDs.empty()) - { - this->ExpandFreeTextures(); - } - - theTexture->texid = (u32)OGLRef.freeTextureIDs.front(); - OGLRef.freeTextureIDs.pop(); - - theTexture->unpackSize = theTexture->GetUnpackSizeUsingFormat(TexFormat_32bpp); - texCache.cache_size += theTexture->unpackSize; - } - theTexture->Unpack<TexFormat_32bpp>((u32 *)this->_workingTextureUnpackBuffer); - glBindTexture(GL_TEXTURE_2D, (GLuint)theTexture->texid); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - - const NDSTextureFormat texFormat = theTexture->GetTextureFormat(); const u32 *textureSrc = (u32 *)this->_workingTextureUnpackBuffer; - size_t texWidth = theTexture->sizeX; - size_t texHeight = theTexture->sizeY; + size_t texWidth = theTexture->GetWidth(); + size_t texHeight = theTexture->GetHeight(); if (this->_textureDeposterizeDstSurface.Surface != NULL) { @@ -4710,15 +4682,17 @@ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); + if (isNewTexture) { glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, theTexture->sizeX, theTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, theTexture->GetWidth(), theTexture->GetHeight(), 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } else { glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, theTexture->sizeX, theTexture->sizeY, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, theTexture->GetWidth(), theTexture->GetHeight(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } break; } @@ -4728,29 +4702,29 @@ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 2); - this->TextureUpscale<4>(texFormat, textureSrc, texWidth, texHeight); + this->TextureUpscale<4>(packFormat, textureSrc, texWidth, texHeight); if (isNewTexture) { glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - texWidth = theTexture->sizeX; - texHeight = theTexture->sizeY; - this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); + texWidth = theTexture->GetWidth(); + texHeight = theTexture->GetHeight(); + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, theTexture->sizeX, theTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, theTexture->GetWidth(), theTexture->GetHeight(), 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } else { glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - texWidth = theTexture->sizeX; - texHeight = theTexture->sizeY; - this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); + texWidth = theTexture->GetWidth(); + texHeight = theTexture->GetHeight(); + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexSubImage2D(GL_TEXTURE_2D, 2, 0, 0, theTexture->sizeX, theTexture->sizeY, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + glTexSubImage2D(GL_TEXTURE_2D, 2, 0, 0, theTexture->GetWidth(), theTexture->GetHeight(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } break; } @@ -4772,12 +4746,10 @@ glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.0f); } } - else - { - //otherwise, just bind it - glBindTexture(GL_TEXTURE_2D, (GLuint)theTexture->texid); - } + theTexture->ResetCacheAge(); + theTexture->IncreaseCacheUsageCount(1); + return OGLERROR_NOERR; } Modified: trunk/desmume/src/OGLRender.h =================================================================== --- trunk/desmume/src/OGLRender.h 2016-11-11 02:34:02 UTC (rev 5575) +++ trunk/desmume/src/OGLRender.h 2016-11-23 20:41:07 UTC (rev 5576) @@ -24,6 +24,7 @@ #include <set> #include <string> #include "render3D.h" +#include "texcache.h" #include "types.h" #ifndef OGLRENDER_3_2_H @@ -491,9 +492,6 @@ GLuint vaoGeometryStatesID; GLuint vaoPostprocessStatesID; - // Textures - std::queue<GLuint> freeTextureIDs; - // Client-side Buffers GLfloat *color4fBuffer; GLushort *vertIndexBuffer; @@ -526,8 +524,6 @@ extern const GLfloat PostprocessVtxBuffer[16]; extern const GLubyte PostprocessElementBuffer[6]; -extern void OGLTextureDeleteCallback(TexCacheItem *texItem, void *param1, void *param2); - //This is called by OGLRender whenever it initializes. //Platforms, please be sure to set this up. //return true if you successfully init. @@ -559,6 +555,23 @@ bool IsVersionSupported(unsigned int checkVersionMajor, unsigned int checkVersionMinor, unsigned int checkVersionRevision); +class OpenGLTexture : public TextureStore +{ +private: + GLuint _texID; + GLfloat _invSizeS; + GLfloat _invSizeT; + +public: + OpenGLTexture(); + OpenGLTexture(u32 texAttributes, u32 palAttributes); + virtual ~OpenGLTexture(); + + GLuint GetID() const; + GLfloat GetInvWidth() const; + GLfloat GetInvHeight() const; +}; + #if defined(ENABLE_SSE2) class OpenGLRenderer : public Render3D_SSE2 #else @@ -609,7 +622,6 @@ virtual void DestroyGeometryProgram() = 0; virtual Render3DError CreateVAOs() = 0; virtual void DestroyVAOs() = 0; - virtual Render3DError InitTextures() = 0; virtual Render3DError InitFinalRenderStates(const std::set<std::string> *oglExtensionSet) = 0; virtual Render3DError InitTables() = 0; virtual Render3DError InitPostprocessingPrograms(const std::string &edgeMarkVtxShader, @@ -635,7 +647,6 @@ virtual Render3DError UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer) = 0; virtual void GetExtensionSet(std::set<std::string> *oglExtensionSet) = 0; - virtual Render3DError ExpandFreeTextures() = 0; virtual Render3DError EnableVertexAttributes() = 0; virtual Render3DError DisableVertexAttributes() = 0; virtual Render3DError DownsampleFBO() = 0; @@ -648,7 +659,6 @@ virtual ~OpenGLRenderer(); virtual Render3DError InitExtensions() = 0; - virtual Render3DError DeleteTexture(const TexCacheItem *item) = 0; bool IsExtensionPresent(const std::set<std::string> *oglExtensionSet, const std::string extensionName) const; bool ValidateShaderCompile(GLuint theShader) const; @@ -673,7 +683,6 @@ virtual void DestroyMultisampledFBO(); virtual Render3DError CreateVAOs(); virtual void DestroyVAOs(); - virtual Render3DError InitTextures(); virtual Render3DError InitFinalRenderStates(const std::set<std::string> *oglExtensionSet); virtual Render3DError InitTables(); @@ -702,7 +711,6 @@ virtual Render3DError UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer); virtual void GetExtensionSet(std::set<std::string> *oglExtensionSet); - virtual Render3DError ExpandFreeTextures(); virtual Render3DError EnableVertexAttributes(); virtual Render3DError DisableVertexAttributes(); virtual Render3DError DownsampleFBO(); @@ -729,8 +737,6 @@ virtual Render3DError Reset(); virtual Render3DError RenderFinish(); virtual Render3DError SetFramebufferSize(size_t w, size_t h); - - virtual Render3DError DeleteTexture(const TexCacheItem *item); }; class OpenGLRenderer_1_3 : public OpenGLRenderer_1_2 Modified: trunk/desmume/src/OGLRender_3_2.cpp =================================================================== --- trunk/desmume/src/OGLRender_3_2.cpp 2016-11-11 02:34:02 UTC (rev 5575) +++ trunk/desmume/src/OGLRender_3_2.cpp 2016-11-23 20:41:07 UTC (rev 5576) @@ -609,7 +609,6 @@ } } - this->InitTextures(); this->InitFinalRenderStates(&oglExtensionSet); // This must be done last return OGLERROR_NOERR; @@ -1686,7 +1685,6 @@ Render3DError OpenGLRenderer_3_2::SetupTexture(const POLY &thePoly, bool enableTexturing) { - OGLRenderRef &OGLRef = *this->ref; const PolygonTexParams params = thePoly.getTexParams(); // Check if we need to use textures @@ -1695,37 +1693,27 @@ return OGLERROR_NOERR; } - TexCacheItem *theTexture = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); - if (theTexture->unpackFormat != TexFormat_32bpp) + OpenGLTexture *theTexture = (OpenGLTexture *)texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + const bool isNewTexture = (theTexture == NULL); + + if (isNewTexture) { - //has the ogl renderer initialized the texture? - const bool isNewTexture = (theTexture->GetDeleteCallback() == NULL); - if (isNewTexture) - { - theTexture->SetDeleteCallback(&OGLTextureDeleteCallback, this, NULL); - - if (OGLRef.freeTextureIDs.empty()) - { - this->ExpandFreeTextures(); - } - - theTexture->texid = (u32)OGLRef.freeTextureIDs.front(); - OGLRef.freeTextureIDs.pop(); - - theTexture->unpackSize = theTexture->GetUnpackSizeUsingFormat(TexFormat_32bpp); - texCache.cache_size += theTexture->unpackSize; - } - + theTexture = new OpenGLTexture(thePoly.texParam, thePoly.texPalette); + texCache.Add(theTexture); + } + + glBindTexture(GL_TEXTURE_2D, theTexture->GetID()); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + + if (theTexture->IsLoadNeeded()) + { theTexture->Unpack<TexFormat_32bpp>((u32 *)this->_workingTextureUnpackBuffer); - glBindTexture(GL_TEXTURE_2D, (GLuint)theTexture->texid); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - - const NDSTextureFormat texFormat = theTexture->GetTextureFormat(); const u32 *textureSrc = (u32 *)this->_workingTextureUnpackBuffer; - size_t texWidth = theTexture->sizeX; - size_t texHeight = theTexture->sizeY; + const NDSTextureFormat packFormat = theTexture->GetPackFormat(); + size_t texWidth = theTexture->GetWidth(); + size_t texHeight = theTexture->GetHeight(); if (this->_textureDeposterizeDstSurface.Surface != NULL) { @@ -1756,17 +1744,17 @@ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); - this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); if (isNewTexture) { glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, theTexture->sizeX, theTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, theTexture->GetWidth(), theTexture->GetHeight(), 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } else { glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, theTexture->sizeX, theTexture->sizeY, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, theTexture->GetWidth(), theTexture->GetHeight(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } break; } @@ -1776,29 +1764,29 @@ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 2); - this->TextureUpscale<4>(texFormat, textureSrc, texWidth, texHeight); + this->TextureUpscale<4>(packFormat, textureSrc, texWidth, texHeight); if (isNewTexture) { glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - texWidth = theTexture->sizeX; - texHeight = theTexture->sizeY; - this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); + texWidth = theTexture->GetWidth(); + texHeight = theTexture->GetHeight(); + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, theTexture->sizeX, theTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, theTexture->GetWidth(), theTexture->GetHeight(), 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } else { glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - texWidth = theTexture->sizeX; - texHeight = theTexture->sizeY; - this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); + texWidth = theTexture->GetWidth(); + texHeight = theTexture->GetHeight(); + this->TextureUpscale<2>(packFormat, textureSrc, texWidth, texHeight); glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexSubImage2D(GL_TEXTURE_2D, 2, 0, 0, theTexture->sizeX, theTexture->sizeY, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + glTexSubImage2D(GL_TEXTURE_2D, 2, 0, 0, theTexture->GetWidth(), theTexture->GetHeight(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } break; } @@ -1820,12 +1808,10 @@ glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.0f); } } - else - { - //otherwise, just bind it - glBindTexture(GL_TEXTURE_2D, (GLuint)theTexture->texid); - } + theTexture->ResetCacheAge(); + theTexture->IncreaseCacheUsageCount(1); + return OGLERROR_NOERR; } Modified: trunk/desmume/src/rasterize.cpp =================================================================== --- trunk/desmume/src/rasterize.cpp 2016-11-11 02:34:02 UTC (rev 5575) +++ trunk/desmume/src/rasterize.cpp 2016-11-23 20:41:07 UTC (rev 5576) @@ -54,7 +54,6 @@ #include "matrix.h" #include "render3D.h" #include "gfx3d.h" -#include "texcache.h" #include "MMU.h" #include "NDSSystem.h" #include "utils/task.h" @@ -331,7 +330,7 @@ { protected: SoftRasterizerRenderer *_softRender; - TexCacheItem *lastTexKey; + SoftRasterizerTexture *lastTexKey; VERT* verts[MAX_CLIPPED_VERTS]; int polynum; @@ -351,19 +350,16 @@ int width, height; s32 wmask, hmask; int wrap; - int wshift; - int texFormat; - void setup(u32 texParam) + void setup(SoftRasterizerTexture *theTexture, u32 texParam) { - texFormat = (texParam>>26)&7; - wshift = ((texParam>>20)&0x07) + 3; - width=(1 << wshift); - height=(8 << ((texParam>>23)&0x07)); - wmask = width-1; - hmask = height-1; + width = theTexture->GetRenderWidth(); + height = theTexture->GetRenderHeight(); + wmask = theTexture->GetRenderWidthMask(); + hmask = theTexture->GetRenderHeightMask(); + wrap = (texParam>>16)&0xF; - enabled = gfx3d.renderState.enableTexturing && (texFormat!=0); + enabled = gfx3d.renderState.enableTexturing && (theTexture->GetPackFormat() != TEXMODE_NONE); } FORCEINLINE void clamp(s32 &val, const int size, const s32 sizemask) @@ -461,7 +457,10 @@ sampler.dowrap(iu, iv); FragmentColor color; - color.color = lastTexKey->unpackData[(iv<<sampler.wshift)+iu]; + const u32 *textureData = lastTexKey->GetUnpackData(); + + color.color = textureData[( iv << lastTexKey->GetRenderWidthShift() ) + iu]; + return color; } @@ -1006,15 +1005,15 @@ const size_t dstWidth = this->_softRender->GetFramebufferWidth(); const size_t dstHeight = this->_softRender->GetFramebufferHeight(); - lastTexKey = NULL; - const GFX3D_Clipper::TClippedPoly &firstClippedPoly = this->_softRender->clippedPolys[0]; const POLY &firstPoly = *firstClippedPoly.poly; PolygonAttributes polyAttr = firstPoly.getAttributes(); u32 lastPolyAttr = firstPoly.polyAttr; u32 lastTexParams = firstPoly.texParam; u32 lastTexPalette = firstPoly.texPalette; - sampler.setup(firstPoly.texParam); + + lastTexKey = this->_softRender->polyTexKeys[0]; + sampler.setup(lastTexKey, firstPoly.texParam); //iterate over polys for (size_t i = 0; i < polyCount; i++) @@ -1035,13 +1034,15 @@ if (lastTexParams != thePoly.texParam || lastTexPalette != thePoly.texPalette) { - sampler.setup(thePoly.texParam); lastTexParams = thePoly.texParam; lastTexPalette = thePoly.texPalette; + + lastTexKey = this->_softRender->polyTexKeys[i]; + sampler.setup(lastTexKey, thePoly.texParam); + lastTexKey->ResetCacheAge(); + lastTexKey->IncreaseCacheUsageCount(1); } - lastTexKey = this->_softRender->polyTexKeys[i]; - for (int j = 0; j < type; j++) this->verts[j] = &clippedPoly.clipVerts[j]; for (int j = type; j < MAX_CLIPPED_VERTS; j++) @@ -1147,12 +1148,60 @@ } } -void SoftRasterizerTextureDeleteCallback(TexCacheItem *texItem, void *param1, void *param2) +SoftRasterizerTexture::SoftRasterizerTexture(u32 texAttributes, u32 palAttributes) : TextureStore(texAttributes, palAttributes) { - free_aligned(texItem->unpackData); - texCache.cache_size -= texItem->unpackSize; + _cacheSize = GetUnpackSizeUsingFormat(TexFormat_15bpp); + _unpackData = (u32 *)malloc_alignedCacheLine(_cacheSize); + _renderWidth = _sizeS; + _renderHeight = _sizeT; + _renderWidthMask = _renderWidth - 1; + _renderHeightMask = _renderHeight - 1; + + _renderWidthShift = 0; + + u32 tempWidth = _renderWidth; + while ( (tempWidth & 1) == 0) + { + tempWidth >>= 1; + _renderWidthShift++; + } } +SoftRasterizerTexture::~SoftRasterizerTexture() +{ + free_aligned(this->_unpackData); +} + +u32* SoftRasterizerTexture::GetUnpackData() +{ + return this->_unpackData; +} + +u32 SoftRasterizerTexture::GetRenderWidth() const +{ + return this->_renderWidth; +} + +u32 SoftRasterizerTexture::GetRenderHeight() const +{ + return this->_renderHeight; +} + +u32 SoftRasterizerTexture::GetRenderWidthMask() const +{ + return this->_renderWidthMask; +} + +u32 SoftRasterizerTexture::GetRenderHeightMask() const +{ + return this->_renderHeightMask; +} + +u32 SoftRasterizerTexture::GetRenderWidthShift() const +{ + return this->_renderWidthShift; +} + GPU3DInterface gpu3DRasterize = { "SoftRasterizer", SoftRasterizerRendererCreate, @@ -1380,21 +1429,18 @@ u32 lastTexParams = firstPoly.texParam; u32 lastTexPalette = firstPoly.texPalette; - TexCacheItem *lastTexItem = texCache.GetTexture(firstPoly.texParam, firstPoly.texPalette); - if (lastTexItem->unpackFormat != TexFormat_15bpp) + SoftRasterizerTexture *lastTexItem = (SoftRasterizerTexture *)texCache.GetTexture(firstPoly.texParam, firstPoly.texPalette); + if (lastTexItem == NULL) { - const bool isNewTexture = (lastTexItem->GetDeleteCallback() == NULL); - if (isNewTexture) - { - lastTexItem->SetDeleteCallback(&SoftRasterizerTextureDeleteCallback, this, NULL); - lastTexItem->unpackSize = lastTexItem->GetUnpackSizeUsingFormat(TexFormat_15bpp); - lastTexItem->unpackData = (u32 *)malloc_alignedCacheLine(lastTexItem->unpackSize); - texCache.cache_size += lastTexItem->unpackSize; - } - - lastTexItem->Unpack<TexFormat_15bpp>(lastTexItem->unpackData); + lastTexItem = new SoftRasterizerTexture(firstPoly.texParam, firstPoly.texPalette); + texCache.Add(lastTexItem); } + if (lastTexItem->IsLoadNeeded()) + { + lastTexItem->Unpack<TexFormat_15bpp>(lastTexItem->GetUnpackData()); + } + for (size_t i = 0; i < this->_clippedPolyCount; i++) { const GFX3D_Clipper::TClippedPoly &clippedPoly = clippedPolys[i]; @@ -1406,21 +1452,18 @@ //and then it won't be safe. if (lastTexParams != thePoly.texParam || lastTexPalette != thePoly.texPalette) { - lastTexItem = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); - if (lastTexItem->unpackFormat != TexFormat_15bpp) + lastTexItem = (SoftRasterizerTexture *)texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + if (lastTexItem == NULL) { - const bool isNewTexture = (lastTexItem->GetDeleteCallback() == NULL); - if (isNewTexture) - { - lastTexItem->SetDeleteCallback(&SoftRasterizerTextureDeleteCallback, this, NULL); - lastTexItem->unpackSize = lastTexItem->GetUnpackSizeUsingFormat(TexFormat_15bpp); - lastTexItem->unpackData = (u32 *)malloc_alignedCacheLine(lastTexItem->unpackSize); - texCache.cache_size += lastTexItem->unpackSize; - } - - lastTexItem->Unpack<TexFormat_15bpp>(lastTexItem->unpackData); + lastTexItem = new SoftRasterizerTexture(thePoly.texParam, thePoly.texPalette); + texCache.Add(lastTexItem); } + if (lastTexItem->IsLoadNeeded()) + { + lastTexItem->Unpack<TexFormat_15bpp>(lastTexItem->GetUnpackData()); + } + lastTexParams = thePoly.texParam; lastTexPalette = thePoly.texPalette; } @@ -1571,7 +1614,7 @@ { rasterizerUnit[0].mainLoop<false>(); this->_renderGeometryNeedsFinish = false; - texCache.Evict(TEXCACHE_MAX_SIZE); // Since we're finishing geometry rendering here and now, also check the texture cache now. + texCache.Evict(); // Since we're finishing geometry rendering here and now, also check the texture cache now. } // printf("rendered %d of %d polys after backface culling\n",gfx3d.polylist->count-culled,gfx3d.polylist->count); @@ -1981,7 +2024,7 @@ } // Now that geometry rendering is finished on all threads, check the texture cache. - texCache.Evict(TEXCACHE_MAX_SIZE); + texCache.Evict(); // Do multithreaded post-processing. if (this->currentRenderState->enableEdgeMarking || this->currentRenderState->enableFog) Modified: trunk/desmume/src/rasterize.h =================================================================== --- trunk/desmume/src/rasterize.h 2016-11-11 02:34:02 UTC (rev 5575) +++ trunk/desmume/src/rasterize.h 2016-11-23 20:41:07 UTC (rev 5576) @@ -20,6 +20,7 @@ #include "render3D.h" #include "gfx3d.h" +#include "texcache.h" #define SOFTRASTERIZER_DEPTH_EQUAL_TEST_TOLERANCE 0x200 @@ -39,6 +40,28 @@ bool fogAlphaOnly; }; +class SoftRasterizerTexture : public TextureStore +{ +protected: + u32 *_unpackData; + u32 _renderWidth; + u32 _renderHeight; + u32 _renderWidthMask; + u32 _renderHeightMask; + u32 _renderWidthShift; + +public: + SoftRasterizerTexture(u32 texAttributes, u32 palAttributes); + virtual ~SoftRasterizerTexture(); + + u32* GetUnpackData(); + u32 GetRenderWidth() const; + u32 GetRenderHeight() const; + u32 GetRenderWidthMask() const; + u32 GetRenderHeightMask() const; + u32 GetRenderWidthShift() const; +}; + #if defined(ENABLE_SSE2) class SoftRasterizerRenderer : public Render3D_SSE2 #else @@ -75,7 +98,7 @@ FragmentColor toonColor32LUT[32]; GFX3D_Clipper::TClippedPoly *clippedPolys; FragmentAttributesBuffer *_framebufferAttributes; - TexCacheItem *polyTexKeys[POLYLIST_SIZE]; + SoftRasterizerTexture *polyTexKeys[POLYLIST_SIZE]; bool polyVisible[POLYLIST_SIZE]; bool polyBackfacing[POLYLIST_SIZE]; GFX3D_State *currentRenderState; Modified: trunk/desmume/src/texcache.cpp =================================================================== --- trunk/desmume/src/texcache.cpp 2016-11-11 02:34:02 UTC (rev 5575) +++ trunk/desmume/src/texcache.cpp 2016-11-23 20:41:07 UTC (rev 5576) @@ -195,241 +195,210 @@ return ret; } -TexCache texCache; +static bool TextureLRUCompare(TextureStore *tex1, TextureStore *tex2) +{ + const size_t cacheAge1 = tex1->GetCacheAge(); + const size_t cacheAge2 = tex2->GetCacheAge(); + + if (cacheAge1 == cacheAge2) + { + return ( tex1->GetCacheUseCount() > tex2->GetCacheUseCount() ); + } + + return (cacheAge1 < cacheAge2); +} -TexCache::TexCache() +TextureCache texCache; + +TextureCache::TextureCache() { - cacheTable.clear(); - cache_size = 0; - memset(paletteDump, 0, sizeof(paletteDump)); + _texCacheMap.clear(); + _texCacheList.reserve(4096); + _actualCacheSize = 0; + _cacheSizeThreshold = TEXCACHE_DEFAULT_THRESHOLD; + memset(_paletteDump, 0, sizeof(_paletteDump)); } -void TexCache::Invalidate() +size_t TextureCache::GetActualCacheSize() const { + return this->_actualCacheSize; +} + +size_t TextureCache::GetCacheSizeThreshold() const +{ + return this->_cacheSizeThreshold; +} + +void TextureCache::SetCacheSizeThreshold(size_t newThreshold) +{ + this->_cacheSizeThreshold = newThreshold; +} + +void TextureCache::Invalidate() +{ //check whether the palette memory changed //TODO - we should handle this instead by setting dirty flags in the vram memory mapping and noting whether palette memory was dirty. //but this will work for now MemSpan mspal = MemSpan_TexPalette(0, PALETTE_DUMP_SIZE, true); - bool paletteDirty = mspal.memcmp(this->paletteDump); + const bool paletteDirty = mspal.memcmp(this->_paletteDump); if (paletteDirty) { - mspal.dump(this->paletteDump); + mspal.dump(this->_paletteDump); } - for (TexCacheTable::iterator it(this->cacheTable.begin()); it != this->cacheTable.end(); ++it) + for (TextureCacheMap::iterator it(this->_texCacheMap.begin()); it != this->_texCacheMap.end(); ++it) { - it->second->suspectedInvalid = true; + it->second->SetSuspectedInvalid(); //when the palette changes, we assume all 4x4 textures are dirty. //this is because each 4x4 item doesnt carry along with it a copy of the entire palette, for verification //instead, we just use the one paletteDump for verifying of all 4x4 textures; and if paletteDirty is set, verification has failed - if( (it->second->GetTextureFormat() == TEXMODE_4X4) && paletteDirty ) + if( (it->second->GetPackFormat() == TEXMODE_4X4) && paletteDirty ) { - it->second->assumedInvalid = true; + it->second->SetAssumedInvalid(); } } } -void TexCache::Evict(size_t target) +void TextureCache::Evict() { //debug print //printf("%d %d/%d\n",index.size(),cache_size/1024,target/1024); //dont do anything unless we're over the target - if (cache_size < target) return; + if (this->_actualCacheSize <= this->_cacheSizeThreshold) + { + for (size_t i = 0; i < this->_texCacheList.size(); i++) + { + this->_texCacheList[i]->IncreaseCacheAge(1); + } + + return; + } //aim at cutting the cache to half of the max size - target /= 2; + size_t targetCacheSize = this->_cacheSizeThreshold / 2; - //evicts items in an arbitrary order until it is less than the max cache size - //TODO - do this based on age and not arbitrarily - while (this->cache_size > target) + // Sort the textures in cache by age and usage count. Textures that we want to keep in + // cache are placed in the front of the list, while textures we want to evict are sorted + // to the back of the list. + std::sort(this->_texCacheList.begin(), this->_texCacheList.end(), &TextureLRUCompare); + + while (this->_actualCacheSize > targetCacheSize) { - if (this->cacheTable.size() == 0) break; //just in case.. doesnt seem possible, cache_size wouldve been 0 + if (this->_texCacheMap.size() == 0) break; //just in case.. doesnt seem possible, cache_size wouldve been 0 - TexCacheItem *item = this->cacheTable.begin()->second; - const TexCacheKey key = TexCache::GenerateKey(item->textureAttributes, item->paletteAttributes); - this->cacheTable.erase(key); + TextureStore *item = this->_texCacheList.back(); + this->Remove(item); + this->_texCacheList.pop_back(); //printf("evicting! totalsize:%d\n",cache_size); delete item; } + + for (size_t i = 0; i < this->_texCacheList.size(); i++) + { + this->_texCacheList[i]->IncreaseCacheAge(1); + } } -void TexCache::Reset() +void TextureCache::Reset() { - for (TexCacheTable::iterator it(this->cacheTable.begin()); it != this->cacheTable.end(); ++it) + for (size_t i = 0; i < this->_texCacheList.size(); i++) { - TexCacheItem *item = it->second; - delete item; + delete this->_texCacheList[i]; } - this->cacheTable.clear(); - this->cache_size = 0; - memset(this->paletteDump, 0, sizeof(paletteDump)); + this->_texCacheMap.clear(); + this->_texCacheList.clear(); + this->_actualCacheSize = 0; + memset(this->_paletteDump, 0, sizeof(this->_paletteDump)); } -TexCacheItem* TexCache::GetTexture(u32 texAttributes, u32 palAttributes) +TextureStore* TextureCache::GetTexture(u32 texAttributes, u32 palAttributes) { - TexCacheItem *theTexture = NULL; - bool didCreateNewTexture = false; - bool needLoadTexData = false; - bool needLoadPalette = false; + TextureStore *theTexture = NULL; + const TextureCacheKey key = TextureCache::GenerateKey(texAttributes, palAttributes); + const TextureCacheMap::iterator cachedTexture = this->_texCacheMap.find(key); - //conditions where we reject matches: - //when the teximage or texpal params dont match - //(this is our key for identifying textures in the cache) - const TexCacheKey key = TexCache::GenerateKey(texAttributes, palAttributes); - const TexCacheTable::iterator cachedTexture = this->cacheTable.find(key); - - if (cachedTexture == this->cacheTable.end()) + if (cachedTexture == this->_texCacheMap.end()) { - theTexture = new TexCacheItem(texAttributes, palAttributes); - didCreateNewTexture = true; - needLoadTexData = true; - needLoadPalette = true; + return theTexture; } else { theTexture = cachedTexture->second; - //if the texture is assumed invalid, reject it - if (theTexture->assumedInvalid) + if (theTexture->IsAssumedInvalid()) { - needLoadTexData = true; - needLoadPalette = true; + theTexture->Update(); } - - //the texture matches params, but isnt suspected invalid. accept it. - if (!theTexture->suspectedInvalid) + else if (theTexture->IsSuspectedInvalid()) { - return theTexture; + theTexture->VRAMCompareAndUpdate(); } } - //we suspect the texture may be invalid. we need to do a byte-for-byte comparison to re-establish that it is valid: - - //dump the palette to a temp buffer, so that we don't have to worry about memory mapping. - //this isnt such a problem with texture memory, because we read sequentially from it. - //however, we read randomly from palette memory, so the mapping is more costly. - MemSpan currentPaletteMS = MemSpan_TexPalette(theTexture->paletteAddress, theTexture->paletteSize, false); - - CACHE_ALIGN u16 currentPalette[256]; -#ifdef WORDS_BIGENDIAN - currentPaletteMS.dump16(currentPalette); -#else - currentPaletteMS.dump(currentPalette); -#endif - - //when the palettes dont match: - //note that we are considering 4x4 textures to have a palette size of 0. - //they really have a potentially HUGE palette, too big for us to handle like a normal palette, - //so they go through a different system - if ( !didCreateNewTexture && (theTexture->paletteSize > 0) && memcmp(theTexture->paletteColorTable, currentPalette, theTexture->paletteSize) ) - { - needLoadPalette = true; - } - - //analyze the texture memory mapping and the specifications of this texture - MemSpan currentPackedTexDataMS = MemSpan_TexMem(theTexture->packAddress, theTexture->packSize); - - //when the texture data doesn't match - if ( !didCreateNewTexture && (theTexture->packSize > 0) && currentPackedTexDataMS.memcmp(theTexture->packData, theTexture->packSize) ) - { - needLoadTexData = true; - } - - //if the texture is 4x4 then the index data must match - MemSpan currentPackedTexIndexMS; - if (theTexture->packFormat == TEXMODE_4X4) - { - //determine the location for 4x4 index data - currentPackedTexIndexMS = MemSpan_TexMem(theTexture->packIndexAddress, theTexture->packIndexSize); - - if ( !didCreateNewTexture && (theTexture->packIndexSize > 0) && currentPackedTexIndexMS.memcmp(theTexture->packIndexData, theTexture->packIndexSize) ) - { - needLoadTexData = true; - needLoadPalette = true; - } - } - - if (!needLoadTexData && !needLoadPalette) - { - //we found a match. just return it - theTexture->suspectedInvalid = false; - return theTexture; - } - - if (needLoadTexData) - { - theTexture->SetTextureData(currentPackedTexDataMS, currentPackedTexIndexMS); - theTexture->unpackFormat = TexFormat_None; - } - - if (needLoadPalette) - { - theTexture->SetTexturePalette(currentPalette); - theTexture->unpackFormat = TexFormat_None; - } - - if (didCreateNewTexture) - { - this->cacheTable[key] = theTexture; - //printf("allocating: up to %d with %d items\n",cache_size,index.size()); - } - - theTexture->assumedInvalid = false; - theTexture->suspectedInvalid = false; return theTexture; } -TexCacheKey TexCache::GenerateKey(const u32 texAttributes, const u32 palAttributes) +void TextureCache::Add(TextureStore *texItem) { + const TextureCacheKey key = texItem->GetCacheKey(); + this->_texCacheMap[key] = texItem; + this->_texCacheList.push_back(texItem); + this->_actualCacheSize += texItem->GetCacheSize(); + //printf("allocating: up to %d with %d items\n", this->cache_size, this->cacheTable.size()); +} + +void TextureCache::Remove(TextureStore *texItem) +{ + const TextureCacheKey key = texItem->GetCacheKey(); + this->_texCacheMap.erase(key); + this->_actualCacheSize -= texItem->GetCacheSize(); +} + +TextureCacheKey TextureCache::GenerateKey(const u32 texAttributes, const u32 palAttributes) +{ // Since the repeat, flip, and coordinate transformation modes are render settings // and not data settings, we can mask out those bits to help reduce duplicate entries. - return (TexCacheKey)( ((u64)palAttributes << 32) | (u64)(texAttributes & 0x3FF0FFFF) ); + return (TextureCacheKey)( ((u64)palAttributes << 32) | (u64)(texAttributes & 0x3FF0FFFF) ); } -TexCacheItem::TexCacheItem() +TextureStore::TextureStore() { - _deleteCallback = NULL; - _deleteCallbackParam1 = NULL; - _deleteCallbackParam2 = NULL; + _textureAttributes = 0; + _paletteAttributes = 0; + _cacheKey = 0; - textureAttributes = 0; - paletteAttributes = 0; + _sizeS = 0; + _sizeT = 0; + _isPalZeroTransparent = false; - sizeX = 0; - sizeY = 0; - invSizeX = 0.0f; - invSizeY = 0.0f; - isPalZeroTransparent = false; + _packFormat = TEXMODE_NONE; + _packAddress = 0; + _packSize = 0; + _packData = NULL; - suspectedInvalid = false; - assumedInvalid = false; + _paletteAddress = 0; + _paletteSize = 0; + _paletteColorTable = NULL; - packFormat = TEXMODE_NONE; - packAddress = 0; - packSize = 0; - packData = NULL; + _packIndexAddress = 0; + _packIndexSize = 0; + _packIndexData = NULL; + _packSizeFirstSlot = 0; - paletteAddress = 0; - paletteSize = 0; - paletteColorTable = NULL; + _suspectedInvalid = false; + _assumedInvalid = false; + _isLoadNeeded = false; - unpackFormat = TexFormat_None; - unpackSize = 0; - unpackData = NULL; - - packIndexAddress = 0; - packIndexSize = 0; - packIndexData = NULL; - packSizeFirstSlot = 0; - - texid = 0; + _cacheSize = 0; + _cacheAge = 0; + _cacheUsageCount = 0; } -TexCacheItem::TexCacheItem(const u32 texAttributes, const u32 palAttributes) +TextureStore::TextureStore(const u32 texAttributes, const u32 palAttributes) { //for each texformat, multiplier from numtexels to numbytes (fixed point 30.2) static const u32 texSizes[] = {0, 4, 1, 2, 4, 1, 4, 8}; @@ -437,158 +406,242 @@ //for each texformat, number of palette entries static const u32 paletteSizeList[] = {0, 32, 4, 16, 256, 0, 8, 0}; - _deleteCallback = NULL; - _deleteCallbackParam1 = NULL; - _deleteCallbackParam2 = NULL; + _textureAttributes = texAttributes; + _paletteAttributes = palAttributes; + _cacheKey = TextureCache::GenerateKey(texAttributes, palAttributes); - texid = 0; + _sizeS = (8 << ((texAttributes >> 20) & 0x07)); + _sizeT = (8 << ((texAttributes >> 23) & 0x07)); - textureAttributes = texAttributes; - paletteAttributes = palAttributes; + _packFormat = (NDSTextureFormat)((texAttributes >> 26) & 0x07); + _packAddress = (texAttributes & 0xFFFF) << 3; + _packSize = (_sizeS * _sizeT * texSizes[_packFormat]) >> 2; //shifted because the texSizes multiplier is fixed point - sizeX = (8 << ((texAttributes >> 20) & 0x07)); - sizeY = (8 << ((texAttributes >> 23) & 0x07)); - invSizeX = 1.0f / (float)sizeX; - invSizeY = 1.0f / (float)sizeY; - - packFormat = (NDSTextureFormat)((texAttributes >> 26) & 0x07); - packAddress = (texAttributes & 0xFFFF) << 3; - packSize = (sizeX*sizeY*texSizes[packFormat]) >> 2; //shifted because the texSizes multiplier is fixed point - packData = (u8 *)malloc_alignedCacheLine(packSize); - - if ( (packFormat == TEXMODE_I2) || (packFormat == TEXMODE_I4) || (packFormat == TEXMODE_I8) ) + if ( (_packFormat == TEXMODE_I2) || (_packFormat == TEXMODE_I4) || (_packFormat == TEXMODE_I8) ) { - isPalZeroTransparent = ( ((texAttributes >> 29) & 1) != 0 ); + _isPalZeroTransparent = ( ((texAttributes >> 29) & 1) != 0 ); } else { - isPalZeroTransparent = false; + _isPalZeroTransparent = false; } - paletteAddress = (packFormat == TEXMODE_I2) ? palAttributes << 3 : palAttributes << 4; - paletteSize = paletteSizeList[packFormat] * sizeof(u16); - paletteColorTable = (paletteSize > 0) ? (u16 *)malloc_alignedCacheLine(paletteSize) : NULL; + _paletteAddress = (_packFormat == TEXMODE_I2) ? palAttributes << 3 : palAttributes << 4; + _paletteSize = paletteSizeList[_packFormat] * sizeof(u16); - unpackFormat = TexFormat_None; - unpackSize = 0; - unpackData = NULL; - - if (packFormat == TEXMODE_4X4) + if (_packFormat == TEXMODE_4X4) { const u32 indexBase = ((texAttributes & 0xC000) == 0x8000) ? 0x30000 : 0x20000; const u32 indexOffset = (texAttributes & 0x3FFF) << 2; - packIndexAddress = indexBase + indexOffset; - packIndexSize = (sizeX * sizeY) >> 3; - packIndexData = (u8 *)malloc_alignedCacheLine(packIndexSize); - packSizeFirstSlot = 0; + _packIndexAddress = indexBase + indexOffset; + _packIndexSize = (_sizeS * _sizeT) >> 3; + + _packData = (u8 *)malloc_alignedCacheLine(_packSize + _packIndexSize + _paletteSize); + _packIndexData = _packData + _packSize; + _paletteColorTable = (u16 *)(_packData + _packSize + _packIndexSize); + + MemSpan currentPackedTexIndexMS = MemSpan_TexMem(_packIndexAddress, _packIndexSize); + currentPackedTexIndexMS.dump(_packIndexData, _packIndexSize); } else { - packIndexAddress = 0; - packIndexSize = 0; - packIndexData = NULL; - packSizeFirstSlot = 0; + _packIndexAddress = 0; + _packIndexSize = 0; + _packIndexData = NULL; + + _packData = (u8 *)malloc_alignedCacheLine(_packSize + _paletteSize); + _packIndexData = NULL; + _paletteColorTable = (u16 *)(_packData + _packSize); } - suspectedInvalid = true; - assumedInvalid = true; + if (_paletteSize > 0) + { + MemSpan currentPaletteMS = MemSpan_TexPalette(_paletteAddress, _paletteSize, false); + +#ifdef WORDS_BIGENDIAN + currentPaletteMS.dump16(_paletteColorTable); +#else + currentPaletteMS.dump(_paletteColorTable); +#endif + } + else + { + _paletteColorTable = NULL; + } + + MemSpan currentPackedTexDataMS = MemSpan_TexMem(_packAddress, _packSize); + currentPackedTexDataMS.dump(_packData); + _packSizeFirstSlot = currentPackedTexDataMS.items[0].len; + + _suspectedInvalid = false; + _assumedInvalid = false; + _isLoadNeeded = true; + + _cacheSize = _packSize + _paletteSize + _packIndexSize; + _cacheAge = 0; + _cacheUsageCount = 0; } -TexCacheItem::~TexCacheItem() +TextureStore::~TextureStore() { - free_aligned(this->packData); - free_aligned(this->paletteColorTable); - free_aligned(this->packIndexData); - if (this->_deleteCallback != NULL) this->_deleteCallback(this, this->_deleteCallbackParam1, this->_deleteCallbackParam2); + free_aligned(this->_packData); } -TexCacheItemDeleteCallback TexCacheItem::GetDeleteCallback() const +u32 TextureStore::GetTextureAttributes() const { - return this->_deleteCallback; + return this->_textureAttributes; } -void TexCacheItem::SetDeleteCallback(TexCacheItemDeleteCallback callbackFunc, void *inParam1, void *inParam2) +u32 TextureStore::GetPaletteAttributes() const { - this->_deleteCallback = callbackFunc; - this->_deleteCallbackParam1 = inParam1; - this->_deleteCallbackParam2 = inParam2; + return this->_paletteAttributes; } -NDSTextureFormat TexCacheItem::GetTextureFormat() const +u32 TextureStore::GetWidth() const { - return this->packFormat; + return this->_sizeS; } -void TexCacheItem::SetTextureData(const MemSpan &packedData, const MemSpan &packedIndexData) +u32 TextureStore::GetHeight() const { + return this->_sizeT; +} + +bool TextureStore::IsPalZeroTransparent() const +{ + return this->_isPalZeroTransparent; +} + +NDSTextureFormat TextureStore::GetPackFormat() const +{ + return this->_packFormat; +} + +u32 TextureStore::GetPackAddress() const +{ + return this->_packAddress; +} + +u32 TextureStore::GetPackSize() const +{ + return this->_packSize; +} + +u8* TextureStore::GetPackData() +{ + return this->_packData; +} + +u32 TextureStore::GetPaletteAddress() const +{ + return this->_paletteAddress; +} + +u32 TextureStore::GetPaletteSize() const +{ + return this->_paletteSize; +} + +u16* TextureStore::GetPaletteColorTable() const +{ + return this->_paletteColorTable; +} + +u32 TextureStore::GetPackIndexAddress() const +{ + return this->_packIndexAddress; +} + +u32 TextureStore::GetPackIndexSize() const +{ + return this->_packIndexSize; +} + +u8* TextureStore::GetPackIndexData() +{ + return this->_packIndexData; +} + +void TextureStore::SetTextureData(const MemSpan &packedData, const MemSpan &packedIndexData) +{ //dump texture and 4x4 index data for cache keying - this->packSizeFirstSlot = packedData.items[0].len; + this->_packSizeFirstSlot = packedData.items[0].len; - packedData.dump(this->packData); + packedData.dump(this->_packData); - if (this->packFormat == TEXMODE_4X4) + if (this->_packFormat == TEXMODE_4X4) { - packedIndexData.dump(this->packIndexData, this->packIndexSize); + packedIndexData.dump(this->_packIndexData, this->_packIndexSize); } } -void TexCacheItem::SetTexturePalette(const u16 *paletteBuffer) +void TextureStore::SetTexturePalette(const MemSpan &packedPalette) { - if (this->paletteSize > 0) + if (this->_paletteSize > 0) { - memcpy(this->paletteColorTable, paletteBuffer, this->paletteSize); +#ifdef WORDS_BIGENDIAN + packedPalette.dump16(this->_paletteColorTable); +#else + packedPalette.dump(this->_paletteColorTable); +#endif } } -size_t TexCacheItem::GetUnpackSizeUsingFormat(const TexCache_TexFormat texCacheFormat) const +void TextureStore::SetTexturePalette(const u16 *paletteBuffer) { - return (this->sizeX * this->sizeY * sizeof(u32)); + if (this->_paletteSize > 0) + { + memcpy(this->_paletteColorTable, paletteBuffer, this->_paletteSize); + } } -template <TexCache_TexFormat TEXCACHEFORMAT> -void TexCacheItem::Unpack(u32 *unpackBuffer) +size_t TextureStore::GetUnpackSizeUsingFormat(const TextureStoreUnpackFormat texCacheFormat) const { - this->unpackFormat = TEXCACHEFORMAT; - + return (this->_sizeS * this->_sizeT * sizeof(u32)); +} + +template <TextureStoreUnpackFormat TEXCACHEFORMAT> +void TextureStore::Unpack(u32 *unpackBuffer) +{ // Whenever a 1-bit alpha or no-alpha texture is unpacked (this means any texture // format that is not A3I5 or A5I3), set all transparent pixels to 0 so that 3D // renderers can assume that the transparent color is 0 during texture sampling. - switch (this->packFormat) + switch (this->_packFormat) { case TEXMODE_A3I5: - NDSTextureUnpackA3I5<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, unpackBuffer); + NDSTextureUnpackA3I5<TEXCACHEFORMAT>(this->_packSize, this->_packData, this->_paletteColorTable, unpackBuffer); break; case TEXMODE_I2: - NDSTextureUnpackI2<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->isPalZeroTransparent, unpackBuffer); + NDSTextureUnpackI2<TEXCACHEFORMAT>(this->_packSize, this->_packData, this->_paletteColorTable, this->_isPalZeroTransparent, unpackBuffer); break; case TEXMODE_I4: - NDSTextureUnpackI4<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->isPalZeroTransparent, unpackBuffer); + NDSTextureUnpackI4<TEXCACHEFORMAT>(this->_packSize, this->_packData, this->_paletteColorTable, this->_isPalZeroTransparent, unpackBuffer); break; case TEXMODE_I8: - NDSTextureUnpackI8<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->isPalZeroTransparent, unpackBuffer); + NDSTextureUnpackI8<TEXCACHEFORMAT>(this->_packSize, this->_packData, this->_paletteColorTable, this->_isPalZeroTransparent, unpackBuffer); break; case TEXMODE_4X4: { - if (this->packSize > this->packSizeFirstSlot) + if (this->_packSize > this->_packSizeFirstSlot) { PROGINFO("Your 4x4 texture has overrun its texture slot.\n"); } - NDSTextureUnpack4x4<TEXCACHEFORMAT>(this->packSizeFirstSlot, (u32 *)this->packData, (u16 *)this->packIndexData, this->paletteAddress, this->textureAttributes, this->sizeX, this->sizeY, unpackBuffer); + NDSTextureUnpack4x4<TEXCACHEFORMAT>(this->_packSizeFirstSlot, (u32 *)this->_packData, (u16 *)this->_packIndexData, this->_paletteAddress, this->_textureAttributes, this->_sizeS, this->_sizeT, unpackBuffer); break; } case TEXMODE_A5I3: - NDSTextureUnpackA5I3<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, unpackBuffer); + NDSTextureUnpackA5I3<TEXCACHEFORMAT>(this->_packSize, this->_packData, this->_paletteColorTable, unpackBuffer); break; case TEXMODE_16BPP: - NDSTextureUnpackDirect16Bit<TEXCACHEFORMAT>(this->packSize, (u16 *)this->packData, unpackBuffer); + NDSTextureUnpackDirect16Bit<TEXCACHEFORMAT>(this->_packSize, (u16 *)this->_packData, unpackBuffer); break; default: @@ -598,10 +651,172 @@ #ifdef DO_DEBUG_DUMP_TEXTURE this->DebugDump(); #endif + + this->_isLoadNeeded = false; } +bool TextureStore::IsSuspectedInvalid() const +{ + return this->_suspectedInvalid; +} + +void TextureStore::SetSuspectedInvalid() +{ + this->_suspectedInvalid = true; +} + +bool TextureStore::IsAssumedInvalid() const +{ + return this->_assumedInvalid; +} + +void TextureStore::SetAssumedInvalid() +{ + this->_assumedInvalid = true; +} + +void TextureStore::SetLoadNeeded() +{ + this->_isLoadNeeded = true; +}... [truncated message content] |
From: Trek <tr...@in...> - 2016-11-15 20:49:53
|
searching for apps using libosmesa, I found this project, but it seems to me that the revision 4905 removed the osmesa support, in osmesa_3Demu.cpp init_osmesa_3Demu: - ctx = OSMesaCreateContext(OSMESA_RGBA, NULL); on my debian, with the version 0.9.8 I can run with --3d-engine=2: DeSmuME 0.9.8 svnr0 dev+ Shaders aren't supported on your system, using fixed pipeline (Shaders aren't supported by OSMesa.) Successfully created OpenGL Framebuffer object (FBO) instead, with the version 0.9.10 I got: DeSmuME 0.9.10 svn0 dev+ x64-JIT OSMesaCreateContext failed! Failed to initialise openGL 3D emulation; removing 3D support c-ya! |
From: <ze...@us...> - 2016-11-11 02:34:04
|
Revision: 5575 http://sourceforge.net/p/desmume/code/5575 Author: zeromus Date: 2016-11-11 02:34:02 +0000 (Fri, 11 Nov 2016) Log Message: ----------- oops, remove unneeded printf Modified Paths: -------------- trunk/desmume/src/utils/vfat.cpp Modified: trunk/desmume/src/utils/vfat.cpp =================================================================== --- trunk/desmume/src/utils/vfat.cpp 2016-11-11 02:33:33 UTC (rev 5574) +++ trunk/desmume/src/utils/vfat.cpp 2016-11-11 02:34:02 UTC (rev 5575) @@ -197,8 +197,6 @@ if(dataSectors<36*1024*1024/512) dataSectors = 36*1024*1024/512; - printf("dataSectors: %lld\n",dataSectors); - if(dataSectors>=(0x80000000>>9)) { printf("error allocating memory for fat (%d KBytes)\n",(dataSectors*512)/1024); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-11-11 02:33:36
|
Revision: 5574 http://sourceforge.net/p/desmume/code/5574 Author: zeromus Date: 2016-11-11 02:33:33 +0000 (Fri, 11 Nov 2016) Log Message: ----------- fix vfat (broken in r5438). wouldnt count size of files in subdirs and then would try adding them, eventually running out of space since it had been under-counted. Revision Links: -------------- http://sourceforge.net/p/desmume/code/5438 Modified Paths: -------------- trunk/desmume/src/utils/vfat.cpp Modified: trunk/desmume/src/utils/vfat.cpp =================================================================== --- trunk/desmume/src/utils/vfat.cpp 2016-11-10 07:02:54 UTC (rev 5573) +++ trunk/desmume/src/utils/vfat.cpp 2016-11-11 02:33:33 UTC (rev 5574) @@ -43,6 +43,9 @@ typedef void (*ListCallback)(RDIR* rdir, EListCallbackArg); // List all files and subdirectories recursively +//TODO: clunky architecture. we've combined the callbacks into one handler. +//we could merge the callback and list_files function, or refactor the callback into one for each enum which receives a unit of work after +//the more detailed recursing logic (caused by libretro-common integration) is handled in the lister static void list_files(const char *filepath, ListCallback list_callback) { void * hFind; @@ -64,6 +67,7 @@ const char* fname = retro_dirent_get_name(rdir); list_callback(rdir,EListCallbackArg_Item); + printf("cflash added %s\n",fname); if(retro_dirent_is_dir(rdir) && (strcmp(fname, ".")) && (strcmp(fname, ".."))) { @@ -76,28 +80,24 @@ retro_closedir(rdir); } -static u64 dataSectors = 0; -void count_ListCallback(RDIR* rdir, EListCallbackArg arg) +enum eCallbackType { - if(arg == EListCallbackArg_Pop) return; - u32 sectors = 1; - if(retro_dirent_is_dir(rdir)) - { - } - else - { - //allocate sectors for file - int32_t fileSize = path_get_size(retro_dirent_get_name(rdir)); - sectors += (fileSize+511)/512 + 1; - } - dataSectors += sectors; -} + eCallbackType_Count, eCallbackType_Build +}; +static eCallbackType callbackType; + +//for eCallbackType_Count: +static bool count_failed = false; +static u64 dataSectors = 0; + +//recursing related.. really ought to be merged with list_files functionality static std::string currPath; static std::stack<std::string> pathStack; static std::stack<std::string> virtPathStack; static std::string currVirtPath; -void build_ListCallback(RDIR* rdir, EListCallbackArg arg) + +static void DirectoryListCallback(RDIR* rdir, EListCallbackArg arg) { const char* fname = retro_dirent_get_name(rdir); @@ -119,11 +119,19 @@ virtPathStack.push(currVirtPath); currVirtPath = currVirtPath + "/" + fname; - bool ok = LIBFAT::MkDir(currVirtPath.c_str()); - if(!ok) - printf("ERROR adding dir %s via libfat\n",currVirtPath.c_str()); + if(callbackType == eCallbackType_Build) + { + bool ok = LIBFAT::MkDir(currVirtPath.c_str()); + if(!ok) + printf("ERROR adding dir %s via libfat\n",currVirtPath.c_str()); + } + else + { + dataSectors++; //directories take one sector + } + currPath = currPath + path_default_slash() + fname; return; } @@ -131,38 +139,56 @@ { std::string path = currPath + path_default_slash() + fname; - FILE* inf = fopen(path.c_str(),"rb"); - if(inf) + if(callbackType == eCallbackType_Build) { - fseek(inf,0,SEEK_END); - long len = ftell(inf); - fseek(inf,0,SEEK_SET); - u8 *buf = new u8[len]; - fread(buf,1,len,inf); - fclose(inf); + FILE* inf = fopen(path.c_str(),"rb"); + if(inf) + { + fseek(inf,0,SEEK_END); + long len = ftell(inf); + fseek(inf,0,SEEK_SET); + u8 *buf = new u8[len]; + fread(buf,1,len,inf); + fclose(inf); - std::string path = currVirtPath + "/" + fname; - printf("FAT + (%10.2f KB) %s \n",len/1024.f,path.c_str()); - bool ok = LIBFAT::WriteFile(path.c_str(),buf,len); - if(!ok) - printf("ERROR adding file to fat\n"); - delete[] buf; - } else printf("ERROR opening file for fat\n"); + std::string path = currVirtPath + "/" + fname; + printf("FAT + (%10.2f KB) %s \n",len/1024.f,path.c_str()); + bool ok = LIBFAT::WriteFile(path.c_str(),buf,len); + if(!ok) + printf("ERROR adding file to fat\n"); + delete[] buf; + } else printf("ERROR opening file for fat\n"); + } + else + { + //allocate sectors for file + int32_t fileSize = path_get_size(path.c_str()); + if(fileSize == -1) { count_failed = true; dataSectors = 0; } + else dataSectors += (fileSize+511)/512 + 1; + } } } - - bool VFAT::build(const char* path, int extra_MB) { dataSectors = 0; currVirtPath = ""; currPath = path; - list_files(path, count_ListCallback); + count_failed = false; + callbackType = eCallbackType_Count; + list_files(path, DirectoryListCallback); + + if(count_failed) + { + printf("FAILED enumerating files for fat\n"); + return false; + } + dataSectors += 8; //a few for reserved sectors, etc. + dataSectors += extra_MB*1024*1024/512; //add extra write space //dataSectors += 16*1024*1024/512; //add 16MB worth of write space. this is probably enough for anyone, but maybe it should be configurable. //we could always suggest to users to add a big file to their directory to overwrite (that would cause the image to get padded) @@ -171,6 +197,8 @@ if(dataSectors<36*1024*1024/512) dataSectors = 36*1024*1024/512; + printf("dataSectors: %lld\n",dataSectors); + if(dataSectors>=(0x80000000>>9)) { printf("error allocating memory for fat (%d KBytes)\n",(dataSectors*512)/1024); @@ -207,7 +235,8 @@ //setup libfat and write all the files through it LIBFAT::Init(memf->buf(),memf->size()); - list_files(path, build_ListCallback); + callbackType = eCallbackType_Build; + list_files(path, DirectoryListCallback); LIBFAT::Shutdown(); return true; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-11-10 07:02:56
|
Revision: 5573 http://sourceforge.net/p/desmume/code/5573 Author: zeromus Date: 2016-11-10 07:02:54 +0000 (Thu, 10 Nov 2016) Log Message: ----------- fix retriggering sounds in AC:WW Modified Paths: -------------- trunk/desmume/src/SPU.cpp Modified: trunk/desmume/src/SPU.cpp =================================================================== --- trunk/desmume/src/SPU.cpp 2016-11-03 00:39:02 UTC (rev 5572) +++ trunk/desmume/src/SPU.cpp 2016-11-10 07:02:54 UTC (rev 5573) @@ -795,8 +795,11 @@ regs.ctl_ch1bypass = (val >> 4) & 1; regs.ctl_ch3bypass = (val >> 5) & 1; regs.masteren = (val >> 7) & 1; - for(u8 i=0; i<16; i++) - KeyProbe(i); + //from r4925 - after changing 'masteren', we retrigger any sounds? doubtful. + //maybe we STOP sounds here, but we don't enable them (this would retrigger any previous sounds that had finished; glitched AC:WW) + //(probably broken in r3299) + //after commenting this out, I checked bug #1356. seems unrelated. + //for(int i=0; i<16; i++) KeyProbe(i); break; //SOUNDBIAS This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-11-03 00:39:04
|
Revision: 5572 http://sourceforge.net/p/desmume/code/5572 Author: rogerman Date: 2016-11-03 00:39:02 +0000 (Thu, 03 Nov 2016) Log Message: ----------- Texture Handler: - The 3D renderers are now responsible for managing the texture unpack buffers instead of relying on the TexCacheItem itself to do it. - The OpenGL 3D renderer now uses a fixed 4MB buffer for unpacking textures, instead of maintaining extra copies of each unpacked texture in main memory even after they?\226?\128?\153ve been uploaded to the GPU. Modified Paths: -------------- trunk/desmume/src/OGLRender.cpp trunk/desmume/src/OGLRender.h trunk/desmume/src/OGLRender_3_2.cpp trunk/desmume/src/rasterize.cpp trunk/desmume/src/texcache.cpp trunk/desmume/src/texcache.h Modified: trunk/desmume/src/OGLRender.cpp =================================================================== --- trunk/desmume/src/OGLRender.cpp 2016-11-02 18:49:36 UTC (rev 5571) +++ trunk/desmume/src/OGLRender.cpp 2016-11-03 00:39:02 UTC (rev 5572) @@ -636,7 +636,7 @@ } } -void texDeleteCallback(TexCacheItem *texItem, void *param1, void *param2) +void OGLTextureDeleteCallback(TexCacheItem *texItem, void *param1, void *param2) { OpenGLRenderer *oglRenderer = (OpenGLRenderer *)param1; oglRenderer->DeleteTexture(texItem); @@ -872,6 +872,7 @@ ref->selectedRenderingFBO = 0; _mappedFramebuffer = NULL; + _workingTextureUnpackBuffer = (FragmentColor *)malloc_alignedCacheLine(1024 * 1024 * sizeof(FragmentColor)); _pixelReadNeedsFinish = false; _currentPolyIndex = 0; _shadowPolyID.reserve(POLYLIST_SIZE); @@ -880,6 +881,7 @@ OpenGLRenderer::~OpenGLRenderer() { free_aligned(_framebufferColor); + free_aligned(_workingTextureUnpackBuffer); // Destroy OpenGL rendering states delete ref; @@ -2417,6 +2419,7 @@ Render3DError OpenGLRenderer_1_2::DeleteTexture(const TexCacheItem *item) { this->ref->freeTextureIDs.push((GLuint)item->texid); + texCache.cache_size -= item->unpackSize; return OGLERROR_NOERR; } @@ -2970,13 +2973,11 @@ if (theTexture->unpackFormat != TexFormat_32bpp) { - theTexture->Unpack<TexFormat_32bpp>(); - //has the ogl renderer initialized the texture? const bool isNewTexture = (theTexture->GetDeleteCallback() == NULL); if (isNewTexture) { - theTexture->SetDeleteCallback(&texDeleteCallback, this, NULL); + theTexture->SetDeleteCallback(&OGLTextureDeleteCallback, this, NULL); if (OGLRef.freeTextureIDs.empty()) { @@ -2985,14 +2986,19 @@ theTexture->texid = (u32)OGLRef.freeTextureIDs.front(); OGLRef.freeTextureIDs.pop(); + + theTexture->unpackSize = theTexture->GetUnpackSizeUsingFormat(TexFormat_32bpp); + texCache.cache_size += theTexture->unpackSize; } + theTexture->Unpack<TexFormat_32bpp>((u32 *)this->_workingTextureUnpackBuffer); + glBindTexture(GL_TEXTURE_2D, (GLuint)theTexture->texid); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? OGLRef.stateTexMirroredRepeat : GL_REPEAT) : GL_CLAMP_TO_EDGE)); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? OGLRef.stateTexMirroredRepeat : GL_REPEAT) : GL_CLAMP_TO_EDGE)); const NDSTextureFormat texFormat = theTexture->GetTextureFormat(); - const u32 *textureSrc = theTexture->unpackData; + const u32 *textureSrc = (u32 *)this->_workingTextureUnpackBuffer; size_t texWidth = theTexture->sizeX; size_t texHeight = theTexture->sizeY; @@ -4646,13 +4652,11 @@ if (theTexture->unpackFormat != TexFormat_32bpp) { - theTexture->Unpack<TexFormat_32bpp>(); - //has the ogl renderer initialized the texture? const bool isNewTexture = (theTexture->GetDeleteCallback() == NULL); if (isNewTexture) { - theTexture->SetDeleteCallback(&texDeleteCallback, this, NULL); + theTexture->SetDeleteCallback(&OGLTextureDeleteCallback, this, NULL); if (OGLRef.freeTextureIDs.empty()) { @@ -4661,14 +4665,19 @@ theTexture->texid = (u32)OGLRef.freeTextureIDs.front(); OGLRef.freeTextureIDs.pop(); + + theTexture->unpackSize = theTexture->GetUnpackSizeUsingFormat(TexFormat_32bpp); + texCache.cache_size += theTexture->unpackSize; } + theTexture->Unpack<TexFormat_32bpp>((u32 *)this->_workingTextureUnpackBuffer); + glBindTexture(GL_TEXTURE_2D, (GLuint)theTexture->texid); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); const NDSTextureFormat texFormat = theTexture->GetTextureFormat(); - const u32 *textureSrc = theTexture->unpackData; + const u32 *textureSrc = (u32 *)this->_workingTextureUnpackBuffer; size_t texWidth = theTexture->sizeX; size_t texHeight = theTexture->sizeY; Modified: trunk/desmume/src/OGLRender.h =================================================================== --- trunk/desmume/src/OGLRender.h 2016-11-02 18:49:36 UTC (rev 5571) +++ trunk/desmume/src/OGLRender.h 2016-11-03 00:39:02 UTC (rev 5572) @@ -526,7 +526,7 @@ extern const GLfloat PostprocessVtxBuffer[16]; extern const GLubyte PostprocessElementBuffer[6]; -extern void texDeleteCallback(TexCacheItem *texItem, void *param1, void *param2); +extern void OGLTextureDeleteCallback(TexCacheItem *texItem, void *param1, void *param2); //This is called by OGLRender whenever it initializes. //Platforms, please be sure to set this up. @@ -589,6 +589,7 @@ bool willConvertFramebufferOnGPU; FragmentColor *_mappedFramebuffer; + FragmentColor *_workingTextureUnpackBuffer; bool _pixelReadNeedsFinish; size_t _currentPolyIndex; std::vector<u8> _shadowPolyID; Modified: trunk/desmume/src/OGLRender_3_2.cpp =================================================================== --- trunk/desmume/src/OGLRender_3_2.cpp 2016-11-02 18:49:36 UTC (rev 5571) +++ trunk/desmume/src/OGLRender_3_2.cpp 2016-11-03 00:39:02 UTC (rev 5572) @@ -1698,13 +1698,11 @@ TexCacheItem *theTexture = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); if (theTexture->unpackFormat != TexFormat_32bpp) { - theTexture->Unpack<TexFormat_32bpp>(); - //has the ogl renderer initialized the texture? const bool isNewTexture = (theTexture->GetDeleteCallback() == NULL); if (isNewTexture) { - theTexture->SetDeleteCallback(&texDeleteCallback, this, NULL); + theTexture->SetDeleteCallback(&OGLTextureDeleteCallback, this, NULL); if (OGLRef.freeTextureIDs.empty()) { @@ -1713,14 +1711,19 @@ theTexture->texid = (u32)OGLRef.freeTextureIDs.front(); OGLRef.freeTextureIDs.pop(); + + theTexture->unpackSize = theTexture->GetUnpackSizeUsingFormat(TexFormat_32bpp); + texCache.cache_size += theTexture->unpackSize; } + theTexture->Unpack<TexFormat_32bpp>((u32 *)this->_workingTextureUnpackBuffer); + glBindTexture(GL_TEXTURE_2D, (GLuint)theTexture->texid); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); const NDSTextureFormat texFormat = theTexture->GetTextureFormat(); - const u32 *textureSrc = theTexture->unpackData; + const u32 *textureSrc = (u32 *)this->_workingTextureUnpackBuffer; size_t texWidth = theTexture->sizeX; size_t texHeight = theTexture->sizeY; Modified: trunk/desmume/src/rasterize.cpp =================================================================== --- trunk/desmume/src/rasterize.cpp 2016-11-02 18:49:36 UTC (rev 5571) +++ trunk/desmume/src/rasterize.cpp 2016-11-03 00:39:02 UTC (rev 5572) @@ -1147,6 +1147,12 @@ } } +void SoftRasterizerTextureDeleteCallback(TexCacheItem *texItem, void *param1, void *param2) +{ + free_aligned(texItem->unpackData); + texCache.cache_size -= texItem->unpackSize; +} + GPU3DInterface gpu3DRasterize = { "SoftRasterizer", SoftRasterizerRendererCreate, @@ -1377,7 +1383,16 @@ TexCacheItem *lastTexItem = texCache.GetTexture(firstPoly.texParam, firstPoly.texPalette); if (lastTexItem->unpackFormat != TexFormat_15bpp) { - lastTexItem->Unpack<TexFormat_15bpp>(); + const bool isNewTexture = (lastTexItem->GetDeleteCallback() == NULL); + if (isNewTexture) + { + lastTexItem->SetDeleteCallback(&SoftRasterizerTextureDeleteCallback, this, NULL); + lastTexItem->unpackSize = lastTexItem->GetUnpackSizeUsingFormat(TexFormat_15bpp); + lastTexItem->unpackData = (u32 *)malloc_alignedCacheLine(lastTexItem->unpackSize); + texCache.cache_size += lastTexItem->unpackSize; + } + + lastTexItem->Unpack<TexFormat_15bpp>(lastTexItem->unpackData); } for (size_t i = 0; i < this->_clippedPolyCount; i++) @@ -1394,7 +1409,16 @@ lastTexItem = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); if (lastTexItem->unpackFormat != TexFormat_15bpp) { - lastTexItem->Unpack<TexFormat_15bpp>(); + const bool isNewTexture = (lastTexItem->GetDeleteCallback() == NULL); + if (isNewTexture) + { + lastTexItem->SetDeleteCallback(&SoftRasterizerTextureDeleteCallback, this, NULL); + lastTexItem->unpackSize = lastTexItem->GetUnpackSizeUsingFormat(TexFormat_15bpp); + lastTexItem->unpackData = (u32 *)malloc_alignedCacheLine(lastTexItem->unpackSize); + texCache.cache_size += lastTexItem->unpackSize; + } + + lastTexItem->Unpack<TexFormat_15bpp>(lastTexItem->unpackData); } lastTexParams = thePoly.texParam; Modified: trunk/desmume/src/texcache.cpp =================================================================== --- trunk/desmume/src/texcache.cpp 2016-11-02 18:49:36 UTC (rev 5571) +++ trunk/desmume/src/texcache.cpp 2016-11-03 00:39:02 UTC (rev 5572) @@ -204,20 +204,6 @@ memset(paletteDump, 0, sizeof(paletteDump)); } -void TexCache::list_remove(TexCacheItem *item) -{ - const TexCacheKey key = TexCache::GenerateKey(item->textureAttributes, item->paletteAttributes); - this->cacheTable.erase(key); - this->cache_size -= item->unpackSize; -} - -void TexCache::list_push_front(TexCacheItem *item) -{ - const TexCacheKey key = TexCache::GenerateKey(item->textureAttributes, item->paletteAttributes); - this->cacheTable[key] = item; - this->cache_size += item->unpackSize; -} - void TexCache::Invalidate() { //check whether the palette memory changed @@ -244,7 +230,7 @@ } } -void TexCache::Evict(u32 target) +void TexCache::Evict(size_t target) { //debug print //printf("%d %d/%d\n",index.size(),cache_size/1024,target/1024); @@ -262,7 +248,9 @@ if (this->cacheTable.size() == 0) break; //just in case.. doesnt seem possible, cache_size wouldve been 0 TexCacheItem *item = this->cacheTable.begin()->second; - this->list_remove(item); + const TexCacheKey key = TexCache::GenerateKey(item->textureAttributes, item->paletteAttributes); + this->cacheTable.erase(key); + //printf("evicting! totalsize:%d\n",cache_size); delete item; } @@ -270,7 +258,15 @@ void TexCache::Reset() { - this->Evict(0); + for (TexCacheTable::iterator it(this->cacheTable.begin()); it != this->cacheTable.end(); ++it) + { + TexCacheItem *item = it->second; + delete item; + } + + this->cacheTable.clear(); + this->cache_size = 0; + memset(this->paletteDump, 0, sizeof(paletteDump)); } TexCacheItem* TexCache::GetTexture(u32 texAttributes, u32 palAttributes) @@ -378,7 +374,7 @@ if (didCreateNewTexture) { - this->list_push_front(theTexture); + this->cacheTable[key] = theTexture; //printf("allocating: up to %d with %d items\n",cache_size,index.size()); } @@ -501,7 +497,6 @@ TexCacheItem::~TexCacheItem() { free_aligned(this->packData); - free_aligned(this->unpackData); free_aligned(this->paletteColorTable); free_aligned(this->packIndexData); if (this->_deleteCallback != NULL) this->_deleteCallback(this, this->_deleteCallbackParam1, this->_deleteCallbackParam2); @@ -535,15 +530,6 @@ { packedIndexData.dump(this->packIndexData, this->packIndexSize); } - - const u32 currentUnpackSize = this->sizeX * this->sizeY * sizeof(u32); - if (this->unpackSize != currentUnpackSize) - { - u32 *oldUnpackData = this->unpackData; - this->unpackSize = currentUnpackSize; - this->unpackData = (u32 *)malloc_alignedCacheLine(currentUnpackSize); - free_aligned(oldUnpackData); - } } void TexCacheItem::SetTexturePalette(const u16 *paletteBuffer) @@ -554,8 +540,13 @@ } } +size_t TexCacheItem::GetUnpackSizeUsingFormat(const TexCache_TexFormat texCacheFormat) const +{ + return (this->sizeX * this->sizeY * sizeof(u32)); +} + template <TexCache_TexFormat TEXCACHEFORMAT> -void TexCacheItem::Unpack() +void TexCacheItem::Unpack(u32 *unpackBuffer) { this->unpackFormat = TEXCACHEFORMAT; @@ -566,19 +557,19 @@ switch (this->packFormat) { case TEXMODE_A3I5: - NDSTextureUnpackA3I5<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->unpackData); + NDSTextureUnpackA3I5<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, unpackBuffer); break; case TEXMODE_I2: - NDSTextureUnpackI2<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->isPalZeroTransparent, this->unpackData); + NDSTextureUnpackI2<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->isPalZeroTransparent, unpackBuffer); break; case TEXMODE_I4: - NDSTextureUnpackI4<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->isPalZeroTransparent, this->unpackData); + NDSTextureUnpackI4<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->isPalZeroTransparent, unpackBuffer); break; case TEXMODE_I8: - NDSTextureUnpackI8<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->isPalZeroTransparent, this->unpackData); + NDSTextureUnpackI8<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->isPalZeroTransparent, unpackBuffer); break; case TEXMODE_4X4: @@ -588,16 +579,16 @@ PROGINFO("Your 4x4 texture has overrun its texture slot.\n"); } - NDSTextureUnpack4x4<TEXCACHEFORMAT>(this->packSizeFirstSlot, (u32 *)this->packData, (u16 *)this->packIndexData, this->paletteAddress, this->textureAttributes, this->sizeX, this->sizeY, this->unpackData); + NDSTextureUnpack4x4<TEXCACHEFORMAT>(this->packSizeFirstSlot, (u32 *)this->packData, (u16 *)this->packIndexData, this->paletteAddress, this->textureAttributes, this->sizeX, this->sizeY, unpackBuffer); break; } case TEXMODE_A5I3: - NDSTextureUnpackA5I3<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->unpackData); + NDSTextureUnpackA5I3<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, unpackBuffer); break; case TEXMODE_16BPP: - NDSTextureUnpackDirect16Bit<TEXCACHEFORMAT>(this->packSize, (u16 *)this->packData, this->unpackData); + NDSTextureUnpackDirect16Bit<TEXCACHEFORMAT>(this->packSize, (u16 *)this->packData, unpackBuffer); break; default: @@ -1122,5 +1113,5 @@ } } -template void TexCacheItem::Unpack<TexFormat_15bpp>(); -template void TexCacheItem::Unpack<TexFormat_32bpp>(); +template void TexCacheItem::Unpack<TexFormat_15bpp>(u32 *unpackBuffer); +template void TexCacheItem::Unpack<TexFormat_32bpp>(u32 *unpackBuffer); Modified: trunk/desmume/src/texcache.h =================================================================== --- trunk/desmume/src/texcache.h 2016-11-02 18:49:36 UTC (rev 5571) +++ trunk/desmume/src/texcache.h 2016-11-03 00:39:02 UTC (rev 5572) @@ -59,14 +59,11 @@ TexCache(); TexCacheTable cacheTable; - u32 cache_size; //this is not really precise, it is off by a constant factor + size_t cache_size; //this is not really precise, it is off by a constant factor u8 paletteDump[PALETTE_DUMP_SIZE]; - - void list_remove(TexCacheItem *item); - void list_push_front(TexCacheItem *item); - + void Invalidate(); - void Evict(u32 target); + void Evict(size_t target); void Reset(); TexCacheItem* GetTexture(u32 texAttributes, u32 palAttributes); @@ -127,7 +124,8 @@ void SetTextureData(const MemSpan &packedData, const MemSpan &packedIndexData); void SetTexturePalette(const u16 *paletteBuffer); - template<TexCache_TexFormat TEXCACHEFORMAT> void Unpack(); + size_t GetUnpackSizeUsingFormat(const TexCache_TexFormat texCacheFormat) const; + template<TexCache_TexFormat TEXCACHEFORMAT> void Unpack(u32 *unpackBuffer); void DebugDump(); }; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-11-02 18:49:38
|
Revision: 5571 http://sourceforge.net/p/desmume/code/5571 Author: rogerman Date: 2016-11-02 18:49:36 +0000 (Wed, 02 Nov 2016) Log Message: ----------- Texture Handler: - Increase TEXCACHE_MAX_SIZE to 32MB. Fixes severe performance problems with continuously evicting/reloading the texture cache in Umihara Kawase Shun. Modified Paths: -------------- trunk/desmume/src/texcache.h Modified: trunk/desmume/src/texcache.h =================================================================== --- trunk/desmume/src/texcache.h 2016-11-02 09:37:59 UTC (rev 5570) +++ trunk/desmume/src/texcache.h 2016-11-02 18:49:36 UTC (rev 5571) @@ -27,11 +27,16 @@ #include "gfx3d.h" //this ought to be enough for anyone -//#define TEXCACHE_MAX_SIZE (64*1024*1024); +//#define TEXCACHE_MAX_SIZE (64*1024*1024) + //changed by zeromus on 15-dec. I couldnt find any games that were getting anywhere NEAR 64 //metal slug burns through sprites so fast, it can test it pretty quickly though -#define TEXCACHE_MAX_SIZE (16*1024*1024) +//#define TEXCACHE_MAX_SIZE (16*1024*1024) +// rogerman, 2016-11-02: Increase this to 32MB for games that use many large textures, such +// as Umihara Kawase Shun, which can cache over 20MB in the first level. +#define TEXCACHE_MAX_SIZE (32*1024*1024) + #define PALETTE_DUMP_SIZE ((64+16+16)*1024) enum TexCache_TexFormat This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-11-02 09:38:02
|
Revision: 5570 http://sourceforge.net/p/desmume/code/5570 Author: rogerman Date: 2016-11-02 09:37:59 +0000 (Wed, 02 Nov 2016) Log Message: ----------- Texture Handler: - Fix a bug where 4x4 formatted textures were being read incorrectly. (Regression from r5569.) Revision Links: -------------- http://sourceforge.net/p/desmume/code/5569 Modified Paths: -------------- trunk/desmume/src/texcache.cpp Modified: trunk/desmume/src/texcache.cpp =================================================================== --- trunk/desmume/src/texcache.cpp 2016-11-02 07:25:11 UTC (rev 5569) +++ trunk/desmume/src/texcache.cpp 2016-11-02 09:37:59 UTC (rev 5570) @@ -469,17 +469,9 @@ isPalZeroTransparent = false; } + paletteAddress = (packFormat == TEXMODE_I2) ? palAttributes << 3 : palAttributes << 4; paletteSize = paletteSizeList[packFormat] * sizeof(u16); - if (paletteSize > 0) - { - paletteAddress = (packFormat == TEXMODE_I2) ? palAttributes << 3 : palAttributes << 4; - paletteColorTable = (u16 *)malloc_alignedCacheLine(paletteSize); - } - else - { - paletteAddress = 0; - paletteColorTable = NULL; - } + paletteColorTable = (paletteSize > 0) ? (u16 *)malloc_alignedCacheLine(paletteSize) : NULL; unpackFormat = TexFormat_None; unpackSize = 0; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-11-02 07:25:14
|
Revision: 5569 http://sourceforge.net/p/desmume/code/5569 Author: rogerman Date: 2016-11-02 07:25:11 +0000 (Wed, 02 Nov 2016) Log Message: ----------- Texture Handler: - Rework TexCacheItem::GetTexture() so that instantiating a new object, dumping the packed data, and dumping the palette are performed as separate operations. - Invalid OpenGL textures are now updated instead of being completely replaced. - NDSTextureUnpack4x4() now uses the srcIndex pointer parameter instead of recalculating the palette address. - Delete the now obsolete MemSpan-based texture unpacking functions. Modified Paths: -------------- trunk/desmume/src/OGLRender.cpp trunk/desmume/src/OGLRender.h trunk/desmume/src/OGLRender_3_2.cpp trunk/desmume/src/texcache.cpp trunk/desmume/src/texcache.h Modified: trunk/desmume/src/OGLRender.cpp =================================================================== --- trunk/desmume/src/OGLRender.cpp 2016-11-01 21:07:17 UTC (rev 5568) +++ trunk/desmume/src/OGLRender.cpp 2016-11-02 07:25:11 UTC (rev 5569) @@ -871,7 +871,6 @@ ref->fboPostprocessID = 0; ref->selectedRenderingFBO = 0; - currTexture = NULL; _mappedFramebuffer = NULL; _pixelReadNeedsFinish = false; _currentPolyIndex = 0; @@ -2418,10 +2417,6 @@ Render3DError OpenGLRenderer_1_2::DeleteTexture(const TexCacheItem *item) { this->ref->freeTextureIDs.push((GLuint)item->texid); - if(this->currTexture == item) - { - this->currTexture = NULL; - } return OGLERROR_NOERR; } @@ -2956,125 +2951,149 @@ return OGLERROR_NOERR; } + TexCacheItem *theTexture = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + // Enable textures if they weren't already enabled if (this->isShaderSupported) { glUniform1i(OGLRef.uniformPolyEnableTexture, GL_TRUE); - glUniform1i(OGLRef.uniformTexSingleBitAlpha, (params.texFormat != TEXMODE_A3I5 && params.texFormat != TEXMODE_A5I3) ? GL_TRUE : GL_FALSE); + glUniform1i(OGLRef.uniformTexSingleBitAlpha, (theTexture->packFormat != TEXMODE_A3I5 && theTexture->packFormat != TEXMODE_A5I3) ? GL_TRUE : GL_FALSE); + glUniform2f(OGLRef.uniformPolyTexScale, theTexture->invSizeX, theTexture->invSizeY); } else { glEnable(GL_TEXTURE_2D); + glMatrixMode(GL_TEXTURE); + glLoadIdentity(); + glScalef(theTexture->invSizeX, theTexture->invSizeY, 1.0f); } - TexCacheItem *newTexture = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); - if (newTexture->unpackFormat != TexFormat_32bpp) + if (theTexture->unpackFormat != TexFormat_32bpp) { - newTexture->Unpack<TexFormat_32bpp>(); - } - - if (newTexture != this->currTexture) - { - this->currTexture = newTexture; + theTexture->Unpack<TexFormat_32bpp>(); + //has the ogl renderer initialized the texture? - if (this->currTexture->GetDeleteCallback() == NULL) + const bool isNewTexture = (theTexture->GetDeleteCallback() == NULL); + if (isNewTexture) { - this->currTexture->SetDeleteCallback(&texDeleteCallback, this, NULL); + theTexture->SetDeleteCallback(&texDeleteCallback, this, NULL); if (OGLRef.freeTextureIDs.empty()) { this->ExpandFreeTextures(); } - this->currTexture->texid = (u32)OGLRef.freeTextureIDs.front(); + theTexture->texid = (u32)OGLRef.freeTextureIDs.front(); OGLRef.freeTextureIDs.pop(); - - glBindTexture(GL_TEXTURE_2D, (GLuint)this->currTexture->texid); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? OGLRef.stateTexMirroredRepeat : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? OGLRef.stateTexMirroredRepeat : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - - const NDSTextureFormat texFormat = this->currTexture->GetTextureFormat(); - const u32 *textureSrc = this->currTexture->unpackData; - size_t texWidth = this->currTexture->sizeX; - size_t texHeight = this->currTexture->sizeY; - - if (this->_textureDeposterizeDstSurface.Surface != NULL) + } + + glBindTexture(GL_TEXTURE_2D, (GLuint)theTexture->texid); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? OGLRef.stateTexMirroredRepeat : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? OGLRef.stateTexMirroredRepeat : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + + const NDSTextureFormat texFormat = theTexture->GetTextureFormat(); + const u32 *textureSrc = theTexture->unpackData; + size_t texWidth = theTexture->sizeX; + size_t texHeight = theTexture->sizeY; + + if (this->_textureDeposterizeDstSurface.Surface != NULL) + { + this->TextureDeposterize(textureSrc, texWidth, texHeight); + textureSrc = (u32 *)this->_textureDeposterizeDstSurface.Surface; + } + + switch (this->_textureScalingFactor) + { + case 1: { - this->TextureDeposterize(textureSrc, texWidth, texHeight); - textureSrc = (u32 *)this->_textureDeposterizeDstSurface.Surface; + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + + if (isNewTexture) + { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + break; } - - switch (this->_textureScalingFactor) + + case 2: { - case 1: - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; - - case 2: + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + + this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); + + if (isNewTexture) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, theTexture->sizeX, theTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, theTexture->sizeX, theTexture->sizeY, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + break; + } + + case 4: + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 2); + + this->TextureUpscale<4>(texFormat, textureSrc, texWidth, texHeight); + + if (isNewTexture) + { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + texWidth = theTexture->sizeX; + texHeight = theTexture->sizeY; this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); + glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, this->currTexture->sizeX, this->currTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; + glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, theTexture->sizeX, theTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } - - case 4: + else { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 2); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - this->TextureUpscale<4>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); - - texWidth = this->currTexture->sizeX; - texHeight = this->currTexture->sizeY; + texWidth = theTexture->sizeX; + texHeight = theTexture->sizeY; this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); + glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, this->currTexture->sizeX, this->currTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; + glTexSubImage2D(GL_TEXTURE_2D, 2, 0, 0, theTexture->sizeX, theTexture->sizeY, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } - - default: - break; + break; } - - if (this->_textureSmooth) - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (this->_textureScalingFactor > 1) ? GL_LINEAR_MIPMAP_LINEAR : GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, this->_deviceInfo.maxAnisotropy); - } - else - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.0f); - } + + default: + break; } - else - { - //otherwise, just bind it - glBindTexture(GL_TEXTURE_2D, (GLuint)this->currTexture->texid); - } - if (this->isShaderSupported) + if (this->_textureSmooth) { - glUniform2f(OGLRef.uniformPolyTexScale, this->currTexture->invSizeX, this->currTexture->invSizeY); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (this->_textureScalingFactor > 1) ? GL_LINEAR_MIPMAP_LINEAR : GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, this->_deviceInfo.maxAnisotropy); } else { - glMatrixMode(GL_TEXTURE); - glLoadIdentity(); - glScalef(this->currTexture->invSizeX, this->currTexture->invSizeY, 1.0f); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.0f); } } + else + { + //otherwise, just bind it + glBindTexture(GL_TEXTURE_2D, (GLuint)theTexture->texid); + } return OGLERROR_NOERR; } @@ -3126,7 +3145,6 @@ memset(OGLRef.vertIndexBuffer, 0, OGLRENDER_VERT_INDEX_BUFFER_COUNT * sizeof(GLushort)); } - this->currTexture = NULL; this->_currentPolyIndex = 0; OGLRef.vtxPtrPosition = (GLvoid *)offsetof(VERT, coord); @@ -4620,108 +4638,136 @@ return OGLERROR_NOERR; } + TexCacheItem *theTexture = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + glUniform1i(OGLRef.uniformPolyEnableTexture, GL_TRUE); - glUniform1i(OGLRef.uniformTexSingleBitAlpha, (params.texFormat != TEXMODE_A3I5 && params.texFormat != TEXMODE_A5I3) ? GL_TRUE : GL_FALSE); + glUniform1i(OGLRef.uniformTexSingleBitAlpha, (theTexture->packFormat != TEXMODE_A3I5 && theTexture->packFormat != TEXMODE_A5I3) ? GL_TRUE : GL_FALSE); + glUniform2f(OGLRef.uniformPolyTexScale, theTexture->invSizeX, theTexture->invSizeY); - TexCacheItem *newTexture = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); - if (newTexture->unpackFormat != TexFormat_32bpp) + if (theTexture->unpackFormat != TexFormat_32bpp) { - newTexture->Unpack<TexFormat_32bpp>(); - } - - if (newTexture != this->currTexture) - { - this->currTexture = newTexture; + theTexture->Unpack<TexFormat_32bpp>(); + //has the ogl renderer initialized the texture? - if (this->currTexture->GetDeleteCallback() == NULL) + const bool isNewTexture = (theTexture->GetDeleteCallback() == NULL); + if (isNewTexture) { - this->currTexture->SetDeleteCallback(&texDeleteCallback, this, NULL); + theTexture->SetDeleteCallback(&texDeleteCallback, this, NULL); if (OGLRef.freeTextureIDs.empty()) { this->ExpandFreeTextures(); } - this->currTexture->texid = (u32)OGLRef.freeTextureIDs.front(); + theTexture->texid = (u32)OGLRef.freeTextureIDs.front(); OGLRef.freeTextureIDs.pop(); - - glBindTexture(GL_TEXTURE_2D, (GLuint)this->currTexture->texid); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - - const NDSTextureFormat texFormat = this->currTexture->GetTextureFormat(); - const u32 *textureSrc = this->currTexture->unpackData; - size_t texWidth = this->currTexture->sizeX; - size_t texHeight = this->currTexture->sizeY; - - if (this->_textureDeposterizeDstSurface.Surface != NULL) + } + + glBindTexture(GL_TEXTURE_2D, (GLuint)theTexture->texid); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + + const NDSTextureFormat texFormat = theTexture->GetTextureFormat(); + const u32 *textureSrc = theTexture->unpackData; + size_t texWidth = theTexture->sizeX; + size_t texHeight = theTexture->sizeY; + + if (this->_textureDeposterizeDstSurface.Surface != NULL) + { + this->TextureDeposterize(textureSrc, texWidth, texHeight); + textureSrc = (u32 *)this->_textureDeposterizeDstSurface.Surface; + } + + switch (this->_textureScalingFactor) + { + case 1: { - this->TextureDeposterize(textureSrc, texWidth, texHeight); - textureSrc = (u32 *)this->_textureDeposterizeDstSurface.Surface; + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + + if (isNewTexture) + { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + break; } - - switch (this->_textureScalingFactor) + + case 2: { - case 1: - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; - - case 2: + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + + if (isNewTexture) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, theTexture->sizeX, theTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, theTexture->sizeX, theTexture->sizeY, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + break; + } + + case 4: + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 2); + + this->TextureUpscale<4>(texFormat, textureSrc, texWidth, texHeight); + + if (isNewTexture) + { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + texWidth = theTexture->sizeX; + texHeight = theTexture->sizeY; this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); + glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, this->currTexture->sizeX, this->currTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; + glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, theTexture->sizeX, theTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } - - case 4: + else { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 2); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - this->TextureUpscale<4>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); - - texWidth = this->currTexture->sizeX; - texHeight = this->currTexture->sizeY; + texWidth = theTexture->sizeX; + texHeight = theTexture->sizeY; this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); + glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, this->currTexture->sizeX, this->currTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; + glTexSubImage2D(GL_TEXTURE_2D, 2, 0, 0, theTexture->sizeX, theTexture->sizeY, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } - - default: - break; + break; } - - if (this->_textureSmooth) - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (this->_textureScalingFactor > 1) ? GL_LINEAR_MIPMAP_LINEAR : GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, this->_deviceInfo.maxAnisotropy); - } - else - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.0f); - } + + default: + break; } + + if (this->_textureSmooth) + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (this->_textureScalingFactor > 1) ? GL_LINEAR_MIPMAP_LINEAR : GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, this->_deviceInfo.maxAnisotropy); + } else { - //otherwise, just bind it - glBindTexture(GL_TEXTURE_2D, (GLuint)this->currTexture->texid); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.0f); } - - glUniform2f(OGLRef.uniformPolyTexScale, this->currTexture->invSizeX, this->currTexture->invSizeY); } + else + { + //otherwise, just bind it + glBindTexture(GL_TEXTURE_2D, (GLuint)theTexture->texid); + } return OGLERROR_NOERR; } Modified: trunk/desmume/src/OGLRender.h =================================================================== --- trunk/desmume/src/OGLRender.h 2016-11-01 21:07:17 UTC (rev 5568) +++ trunk/desmume/src/OGLRender.h 2016-11-02 07:25:11 UTC (rev 5569) @@ -587,10 +587,7 @@ bool isVAOSupported; bool willFlipFramebufferOnGPU; bool willConvertFramebufferOnGPU; - - // Textures - TexCacheItem *currTexture; - + FragmentColor *_mappedFramebuffer; bool _pixelReadNeedsFinish; size_t _currentPolyIndex; Modified: trunk/desmume/src/OGLRender_3_2.cpp =================================================================== --- trunk/desmume/src/OGLRender_3_2.cpp 2016-11-01 21:07:17 UTC (rev 5568) +++ trunk/desmume/src/OGLRender_3_2.cpp 2016-11-02 07:25:11 UTC (rev 5569) @@ -1695,103 +1695,133 @@ return OGLERROR_NOERR; } - TexCacheItem *newTexture = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); - if (newTexture->unpackFormat != TexFormat_32bpp) + TexCacheItem *theTexture = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + if (theTexture->unpackFormat != TexFormat_32bpp) { - newTexture->Unpack<TexFormat_32bpp>(); - } - - if (newTexture != this->currTexture) - { - this->currTexture = newTexture; + theTexture->Unpack<TexFormat_32bpp>(); + //has the ogl renderer initialized the texture? - if (this->currTexture->GetDeleteCallback() == NULL) + const bool isNewTexture = (theTexture->GetDeleteCallback() == NULL); + if (isNewTexture) { - this->currTexture->SetDeleteCallback(&texDeleteCallback, this, NULL); + theTexture->SetDeleteCallback(&texDeleteCallback, this, NULL); if (OGLRef.freeTextureIDs.empty()) { this->ExpandFreeTextures(); } - this->currTexture->texid = (u32)OGLRef.freeTextureIDs.front(); + theTexture->texid = (u32)OGLRef.freeTextureIDs.front(); OGLRef.freeTextureIDs.pop(); - - glBindTexture(GL_TEXTURE_2D, (GLuint)this->currTexture->texid); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); - - const NDSTextureFormat texFormat = this->currTexture->GetTextureFormat(); - const u32 *textureSrc = this->currTexture->unpackData; - size_t texWidth = this->currTexture->sizeX; - size_t texHeight = this->currTexture->sizeY; - - if (this->_textureDeposterizeDstSurface.Surface != NULL) + } + + glBindTexture(GL_TEXTURE_2D, (GLuint)theTexture->texid); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (params.enableRepeatS ? (params.enableMirroredRepeatS ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); + + const NDSTextureFormat texFormat = theTexture->GetTextureFormat(); + const u32 *textureSrc = theTexture->unpackData; + size_t texWidth = theTexture->sizeX; + size_t texHeight = theTexture->sizeY; + + if (this->_textureDeposterizeDstSurface.Surface != NULL) + { + this->TextureDeposterize(textureSrc, texWidth, texHeight); + textureSrc = (u32 *)this->_textureDeposterizeDstSurface.Surface; + } + + switch (this->_textureScalingFactor) + { + case 1: { - this->TextureDeposterize(textureSrc, texWidth, texHeight); - textureSrc = (u32 *)this->_textureDeposterizeDstSurface.Surface; + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + + if (isNewTexture) + { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + break; } - - switch (this->_textureScalingFactor) + + case 2: { - case 1: - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; - - case 2: + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + + this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); + + if (isNewTexture) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, theTexture->sizeX, theTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, theTexture->sizeX, theTexture->sizeY, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); + } + break; + } + + case 4: + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 2); + + this->TextureUpscale<4>(texFormat, textureSrc, texWidth, texHeight); + + if (isNewTexture) + { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); + texWidth = theTexture->sizeX; + texHeight = theTexture->sizeY; this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); + glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, this->currTexture->sizeX, this->currTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; + glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, theTexture->sizeX, theTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } - - case 4: + else { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 2); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - this->TextureUpscale<4>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); - - texWidth = this->currTexture->sizeX; - texHeight = this->currTexture->sizeY; + texWidth = theTexture->sizeX; + texHeight = theTexture->sizeY; this->TextureUpscale<2>(texFormat, textureSrc, texWidth, texHeight); - glTexImage2D(GL_TEXTURE_2D, 1, GL_RGBA, texWidth, texHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, this->_textureUpscaleBuffer); + glTexSubImage2D(GL_TEXTURE_2D, 1, 0, 0, texWidth, texHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, this->_textureUpscaleBuffer); - glTexImage2D(GL_TEXTURE_2D, 2, GL_RGBA, this->currTexture->sizeX, this->currTexture->sizeY, 0, GL_RGBA, GL_UNSIGNED_BYTE, textureSrc); - break; + glTexSubImage2D(GL_TEXTURE_2D, 2, 0, 0, theTexture->sizeX, theTexture->sizeY, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, textureSrc); } - - default: - break; + break; } - - if (this->_textureSmooth) - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (this->_textureScalingFactor > 1) ? GL_LINEAR_MIPMAP_LINEAR : GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, this->_deviceInfo.maxAnisotropy); - } - else - { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.0f); - } + + default: + break; } + + if (this->_textureSmooth) + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (this->_textureScalingFactor > 1) ? GL_LINEAR_MIPMAP_LINEAR : GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, this->_deviceInfo.maxAnisotropy); + } else { - //otherwise, just bind it - glBindTexture(GL_TEXTURE_2D, (GLuint)this->currTexture->texid); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.0f); } } + else + { + //otherwise, just bind it + glBindTexture(GL_TEXTURE_2D, (GLuint)theTexture->texid); + } return OGLERROR_NOERR; } Modified: trunk/desmume/src/texcache.cpp =================================================================== --- trunk/desmume/src/texcache.cpp 2016-11-01 21:07:17 UTC (rev 5568) +++ trunk/desmume/src/texcache.cpp 2016-11-02 07:25:11 UTC (rev 5569) @@ -195,9 +195,6 @@ return ret; } -//for each texformat, number of palette entries -static const u32 paletteSizeList[] = {0, 32, 4, 16, 256, 0, 8, 0}; - TexCache texCache; TexCache::TexCache() @@ -278,21 +275,21 @@ TexCacheItem* TexCache::GetTexture(u32 texAttributes, u32 palAttributes) { - //for each texformat, multiplier from numtexels to numbytes (fixed point 30.2) - static const int texSizes[] = {0, 4, 1, 2, 4, 1, 4, 8}; - + TexCacheItem *theTexture = NULL; + bool didCreateNewTexture = false; bool needLoadTexData = false; bool needLoadPalette = false; //conditions where we reject matches: //when the teximage or texpal params dont match //(this is our key for identifying textures in the cache) - TexCacheItem *theTexture = NULL; const TexCacheKey key = TexCache::GenerateKey(texAttributes, palAttributes); const TexCacheTable::iterator cachedTexture = this->cacheTable.find(key); if (cachedTexture == this->cacheTable.end()) { + theTexture = new TexCacheItem(texAttributes, palAttributes); + didCreateNewTexture = true; needLoadTexData = true; needLoadPalette = true; } @@ -315,36 +312,12 @@ } //we suspect the texture may be invalid. we need to do a byte-for-byte comparison to re-establish that it is valid: - const NDSTextureFormat texPackFormat = (NDSTextureFormat)((texAttributes>>26)&0x07); - const u32 sizeX = (8 << ((texAttributes>>20)&0x07)); - const u32 sizeY = (8 << ((texAttributes>>23)&0x07)); - const u32 imageSize = sizeX*sizeY; //dump the palette to a temp buffer, so that we don't have to worry about memory mapping. //this isnt such a problem with texture memory, because we read sequentially from it. //however, we read randomly from palette memory, so the mapping is more costly. - const u32 palSize = paletteSizeList[texPackFormat] * sizeof(u16); - u32 palAddress; + MemSpan currentPaletteMS = MemSpan_TexPalette(theTexture->paletteAddress, theTexture->paletteSize, false); - switch (texPackFormat) - { - case TEXMODE_I2: - palAddress = palAttributes << 3; - break; - - case TEXMODE_A3I5: - case TEXMODE_I4: - case TEXMODE_I8: - case TEXMODE_A5I3: - case TEXMODE_16BPP: - case TEXMODE_4X4: - default: - palAddress = palAttributes << 4; - break; - } - - MemSpan currentPaletteMS = MemSpan_TexPalette(palAddress, palSize, false); - CACHE_ALIGN u16 currentPalette[256]; #ifdef WORDS_BIGENDIAN currentPaletteMS.dump16(currentPalette); @@ -356,84 +329,61 @@ //note that we are considering 4x4 textures to have a palette size of 0. //they really have a potentially HUGE palette, too big for us to handle like a normal palette, //so they go through a different system - if (theTexture != NULL) + if ( !didCreateNewTexture && (theTexture->paletteSize > 0) && memcmp(theTexture->paletteColorTable, currentPalette, theTexture->paletteSize) ) { - if ( (palSize > 0) && memcmp(theTexture->paletteColorTable, currentPalette, palSize) ) - { - needLoadPalette = true; - } + needLoadPalette = true; } //analyze the texture memory mapping and the specifications of this texture - const u32 texSize = (imageSize*texSizes[texPackFormat]) >> 2; //shifted because the texSizes multiplier is fixed point - MemSpan currentPackedTexDataMS = MemSpan_TexMem((texAttributes&0xFFFF)<<3, texSize); + MemSpan currentPackedTexDataMS = MemSpan_TexMem(theTexture->packAddress, theTexture->packSize); //when the texture data doesn't match - if (theTexture != NULL) + if ( !didCreateNewTexture && (theTexture->packSize > 0) && currentPackedTexDataMS.memcmp(theTexture->packData, theTexture->packSize) ) { - if (currentPackedTexDataMS.memcmp(theTexture->packData, theTexture->packSize)) - { - needLoadTexData = true; - } + needLoadTexData = true; } //if the texture is 4x4 then the index data must match MemSpan currentPackedTexIndexMS; - if (texPackFormat == TEXMODE_4X4) + if (theTexture->packFormat == TEXMODE_4X4) { //determine the location for 4x4 index data - const u32 indexBase = ((texAttributes & 0xc000) == 0x8000) ? 0x30000 : 0x20000; - const u32 indexOffset = (texAttributes & 0x3FFF) << 2; - const int indexSize = imageSize >> 3; + currentPackedTexIndexMS = MemSpan_TexMem(theTexture->packIndexAddress, theTexture->packIndexSize); - currentPackedTexIndexMS = MemSpan_TexMem(indexOffset+indexBase, indexSize); - - if (theTexture != NULL) + if ( !didCreateNewTexture && (theTexture->packIndexSize > 0) && currentPackedTexIndexMS.memcmp(theTexture->packIndexData, theTexture->packIndexSize) ) { - if (currentPackedTexIndexMS.memcmp(theTexture->packIndexData, theTexture->packIndexSize)) - { - needLoadTexData = true; - needLoadPalette = true; - } + needLoadTexData = true; + needLoadPalette = true; } } - if (needLoadTexData || needLoadPalette) + if (!needLoadTexData && !needLoadPalette) { - if (theTexture != NULL) - { - //we found a cached item for the current address, but the data is stale. - //for a variety of complicated reasons, we need to throw it out right this instant. - this->list_remove(theTexture); - delete theTexture; - theTexture = NULL; - } - - //item was not found. recruit an existing one (the oldest), or create a new one - //evict(); //reduce the size of the cache if necessary - //TODO - as a peculiarity of the texcache, eviction must happen after the entire 3d frame runs - //to support separate cache and read passes - TexCacheItem *newTexture = new TexCacheItem(); - newTexture->SetTextureData(texAttributes, currentPackedTexDataMS, currentPackedTexIndexMS); - newTexture->SetTexturePalette(palAttributes, currentPalette); - - this->list_push_front(newTexture); - //printf("allocating: up to %d with %d items\n",cache_size,index.size()); - - theTexture = newTexture; + //we found a match. just return it + theTexture->suspectedInvalid = false; + return theTexture; } - else + + if (needLoadTexData) { - if (theTexture != NULL) - { - //we found a match. just return it - //REMINDER to make it primary/newest when we have smarter code - //list_remove(curr); - //list_push_front(curr); - theTexture->suspectedInvalid = false; - } + theTexture->SetTextureData(currentPackedTexDataMS, currentPackedTexIndexMS); + theTexture->unpackFormat = TexFormat_None; } + if (needLoadPalette) + { + theTexture->SetTexturePalette(currentPalette); + theTexture->unpackFormat = TexFormat_None; + } + + if (didCreateNewTexture) + { + this->list_push_front(theTexture); + //printf("allocating: up to %d with %d items\n",cache_size,index.size()); + } + + theTexture->assumedInvalid = false; + theTexture->suspectedInvalid = false; return theTexture; } @@ -450,35 +400,112 @@ _deleteCallbackParam1 = NULL; _deleteCallbackParam2 = NULL; + textureAttributes = 0; + paletteAttributes = 0; + + sizeX = 0; + sizeY = 0; + invSizeX = 0.0f; + invSizeY = 0.0f; + isPalZeroTransparent = false; + + suspectedInvalid = false; + assumedInvalid = false; + packFormat = TEXMODE_NONE; + packAddress = 0; packSize = 0; packData = NULL; + + paletteAddress = 0; + paletteSize = 0; paletteColorTable = NULL; - isPalZeroTransparent = false; unpackFormat = TexFormat_None; unpackSize = 0; unpackData = NULL; - suspectedInvalid = false; - assumedInvalid = false; - - textureAttributes = 0; - paletteAttributes = 0; - paletteAddress = 0; - paletteSize = 0; - sizeX = 0; - sizeY = 0; - invSizeX = 0.0f; - invSizeY = 0.0f; - + packIndexAddress = 0; + packIndexSize = 0; packIndexData = NULL; packSizeFirstSlot = 0; - packIndexSize = 0; texid = 0; } +TexCacheItem::TexCacheItem(const u32 texAttributes, const u32 palAttributes) +{ + //for each texformat, multiplier from numtexels to numbytes (fixed point 30.2) + static const u32 texSizes[] = {0, 4, 1, 2, 4, 1, 4, 8}; + + //for each texformat, number of palette entries + static const u32 paletteSizeList[] = {0, 32, 4, 16, 256, 0, 8, 0}; + + _deleteCallback = NULL; + _deleteCallbackParam1 = NULL; + _deleteCallbackParam2 = NULL; + + texid = 0; + + textureAttributes = texAttributes; + paletteAttributes = palAttributes; + + sizeX = (8 << ((texAttributes >> 20) & 0x07)); + sizeY = (8 << ((texAttributes >> 23) & 0x07)); + invSizeX = 1.0f / (float)sizeX; + invSizeY = 1.0f / (float)sizeY; + + packFormat = (NDSTextureFormat)((texAttributes >> 26) & 0x07); + packAddress = (texAttributes & 0xFFFF) << 3; + packSize = (sizeX*sizeY*texSizes[packFormat]) >> 2; //shifted because the texSizes multiplier is fixed point + packData = (u8 *)malloc_alignedCacheLine(packSize); + + if ( (packFormat == TEXMODE_I2) || (packFormat == TEXMODE_I4) || (packFormat == TEXMODE_I8) ) + { + isPalZeroTransparent = ( ((texAttributes >> 29) & 1) != 0 ); + } + else + { + isPalZeroTransparent = false; + } + + paletteSize = paletteSizeList[packFormat] * sizeof(u16); + if (paletteSize > 0) + { + paletteAddress = (packFormat == TEXMODE_I2) ? palAttributes << 3 : palAttributes << 4; + paletteColorTable = (u16 *)malloc_alignedCacheLine(paletteSize); + } + else + { + paletteAddress = 0; + paletteColorTable = NULL; + } + + unpackFormat = TexFormat_None; + unpackSize = 0; + unpackData = NULL; + + if (packFormat == TEXMODE_4X4) + { + const u32 indexBase = ((texAttributes & 0xC000) == 0x8000) ? 0x30000 : 0x20000; + const u32 indexOffset = (texAttributes & 0x3FFF) << 2; + packIndexAddress = indexBase + indexOffset; + packIndexSize = (sizeX * sizeY) >> 3; + packIndexData = (u8 *)malloc_alignedCacheLine(packIndexSize); + packSizeFirstSlot = 0; + } + else + { + packIndexAddress = 0; + packIndexSize = 0; + packIndexData = NULL; + packSizeFirstSlot = 0; + } + + suspectedInvalid = true; + assumedInvalid = true; +} + TexCacheItem::~TexCacheItem() { free_aligned(this->packData); @@ -505,55 +532,19 @@ return this->packFormat; } -void TexCacheItem::SetTextureData(const u32 attr, const MemSpan &packedData, const MemSpan &packedIndexData) +void TexCacheItem::SetTextureData(const MemSpan &packedData, const MemSpan &packedIndexData) { - const u32 w = (8 << ((attr >> 20) & 0x07)); - const u32 h = (8 << ((attr >> 23) & 0x07)); - - this->textureAttributes = attr; - this->packFormat = (NDSTextureFormat)((attr >> 26) & 0x07); - - this->sizeX = w; - this->sizeY = h; - this->invSizeX = 1.0f / (float)w; - this->invSizeY = 1.0f / (float)h; - //dump texture and 4x4 index data for cache keying this->packSizeFirstSlot = packedData.items[0].len; - if (this->packSize != packedData.size) - { - u8 *oldPackData = this->packData; - this->packSize = packedData.size; - this->packData = (u8 *)malloc_alignedCacheLine(this->packSize); - free_aligned(oldPackData); - } - packedData.dump(this->packData); - if ( (this->packFormat == TEXMODE_I2) || (this->packFormat == TEXMODE_I4) || (this->packFormat == TEXMODE_I8) ) + if (this->packFormat == TEXMODE_4X4) { - this->isPalZeroTransparent = ( ((attr >> 29) & 1) != 0 ); + packedIndexData.dump(this->packIndexData, this->packIndexSize); } - else - { - this->isPalZeroTransparent = false; - - if (this->packFormat == TEXMODE_4X4) - { - if (this->packIndexSize != packedIndexData.size) - { - u8 *oldPackIndexData = this->packIndexData; - this->packIndexSize = packedIndexData.size; - this->packIndexData = (u8 *)malloc_alignedCacheLine(this->packIndexSize); - free_aligned(oldPackIndexData); - } - - packedIndexData.dump(this->packIndexData, this->packIndexSize); - } - } - const u32 currentUnpackSize = w * h * sizeof(u32); + const u32 currentUnpackSize = this->sizeX * this->sizeY * sizeof(u32); if (this->unpackSize != currentUnpackSize) { u32 *oldUnpackData = this->unpackData; @@ -563,31 +554,12 @@ } } -void TexCacheItem::SetTexturePalette(const u32 attr, const u16 *paletteBuffer) +void TexCacheItem::SetTexturePalette(const u16 *paletteBuffer) { - const u32 newPaletteSize = paletteSizeList[this->packFormat] * sizeof(u16); - - this->paletteAttributes = attr; - this->paletteAddress = (this->packFormat == TEXMODE_I2) ? attr << 3 : attr << 4; - - if (newPaletteSize > 0) + if (this->paletteSize > 0) { - if (this->paletteSize != newPaletteSize) - { - u16 *oldPaletteColorTable = this->paletteColorTable; - this->paletteSize = newPaletteSize; - this->paletteColorTable = (u16 *)malloc_alignedCacheLine(newPaletteSize); - free_aligned(oldPaletteColorTable); - } - - memcpy(this->paletteColorTable, paletteBuffer, newPaletteSize); + memcpy(this->paletteColorTable, paletteBuffer, this->paletteSize); } - else - { - free_aligned(this->paletteColorTable); - this->paletteSize = 0; - this->paletteColorTable = NULL; - } } template <TexCache_TexFormat TEXCACHEFORMAT> @@ -624,7 +596,7 @@ PROGINFO("Your 4x4 texture has overrun its texture slot.\n"); } - NDSTextureUnpack4x4<TEXCACHEFORMAT>(this->packSizeFirstSlot, this->packData, this->packIndexData, this->paletteAddress, this->textureAttributes, this->sizeX, this->sizeY, this->unpackData); + NDSTextureUnpack4x4<TEXCACHEFORMAT>(this->packSizeFirstSlot, (u32 *)this->packData, (u16 *)this->packIndexData, this->paletteAddress, this->textureAttributes, this->sizeX, this->sizeY, this->unpackData); break; } @@ -633,7 +605,7 @@ break; case TEXMODE_16BPP: - NDSTextureUnpackDirect16Bit<TEXCACHEFORMAT>(this->packSize, this->packData, this->unpackData); + NDSTextureUnpackDirect16Bit<TEXCACHEFORMAT>(this->packSize, (u16 *)this->packData, this->unpackData); break; default: @@ -657,545 +629,7 @@ } #endif -// TODO: Delete these MemSpan based functions after testing confirms that using the dumped texture data works properly. template <TexCache_TexFormat TEXCACHEFORMAT> -void NDSTextureUnpackI2(const MemSpan &ms, const u16 *pal, const bool isPalZeroTransparent, u32 *dstBuffer) -{ - u8 *adr; - -#ifdef ENABLE_SSSE3 - const __m128i pal_vec128 = _mm_loadl_epi64((__m128i *)pal); -#endif - if (isPalZeroTransparent) - { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; -#ifdef ENABLE_SSSE3 - for (size_t x = 0; x < ms.items[j].len; x+=4, adr+=4, dstBuffer+=16) - { - __m128i idx = _mm_set_epi32(0, 0, 0, *(u32 *)adr); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_or_si128( _mm_or_si128( _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi32(0x00000003)), _mm_and_si128(_mm_srli_epi32(idx, 2), _mm_set1_epi32(0x00000300)) ), _mm_and_si128(_mm_srli_epi32(idx, 4), _mm_set1_epi32(0x00030000)) ), _mm_and_si128(_mm_srli_epi32(idx, 6), _mm_set1_epi32(0x03000000)) ); - idx = _mm_slli_epi16(idx, 1); - - __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - - const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0); - const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1); - - __m128i convertedColor[4]; - - if (TEXCACHEFORMAT == TexFormat_15bpp) - { - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - else - { - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - - // Set converted colors to 0 if the palette index is 0. - idx0 = _mm_cmpeq_epi16(idx0, _mm_set1_epi16(0x0100)); - idx1 = _mm_cmpeq_epi16(idx1, _mm_set1_epi16(0x0100)); - convertedColor[0] = _mm_andnot_si128(_mm_unpacklo_epi16(idx0, idx0), convertedColor[0]); - convertedColor[1] = _mm_andnot_si128(_mm_unpackhi_epi16(idx0, idx0), convertedColor[1]); - convertedColor[2] = _mm_andnot_si128(_mm_unpacklo_epi16(idx1, idx1), convertedColor[2]); - convertedColor[3] = _mm_andnot_si128(_mm_unpackhi_epi16(idx1, idx1), convertedColor[3]); - - _mm_store_si128((__m128i *)(dstBuffer + 0), convertedColor[0]); - _mm_store_si128((__m128i *)(dstBuffer + 4), convertedColor[1]); - _mm_store_si128((__m128i *)(dstBuffer + 8), convertedColor[2]); - _mm_store_si128((__m128i *)(dstBuffer + 12), convertedColor[3]); - } -#else - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - u8 idx; - - idx = *adr & 0x03; - *dstBuffer++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - - idx = (*adr >> 2) & 0x03; - *dstBuffer++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - - idx = (*adr >> 4) & 0x03; - *dstBuffer++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - - idx = (*adr >> 6) & 0x03; - *dstBuffer++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - } -#endif - } - } - else - { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; -#ifdef ENABLE_SSSE3 - for (size_t x = 0; x < ms.items[j].len; x+=4, adr+=4, dstBuffer+=16) - { - __m128i idx = _mm_set_epi32(0, 0, 0, *(u32 *)adr); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_or_si128( _mm_or_si128( _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi32(0x00000003)), _mm_and_si128(_mm_srli_epi32(idx, 2), _mm_set1_epi32(0x00000300)) ), _mm_and_si128(_mm_srli_epi32(idx, 4), _mm_set1_epi32(0x00030000)) ), _mm_and_si128(_mm_srli_epi32(idx, 6), _mm_set1_epi32(0x03000000)) ); - idx = _mm_slli_epi16(idx, 1); - - const __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - const __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - - const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0); - const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1); - - __m128i convertedColor[4]; - - if (TEXCACHEFORMAT == TexFormat_15bpp) - { - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - else - { - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - - _mm_store_si128((__m128i *)(dstBuffer + 0), convertedColor[0]); - _mm_store_si128((__m128i *)(dstBuffer + 4), convertedColor[1]); - _mm_store_si128((__m128i *)(dstBuffer + 8), convertedColor[2]); - _mm_store_si128((__m128i *)(dstBuffer + 12), convertedColor[3]); - } -#else - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - *dstBuffer++ = CONVERT(pal[ *adr & 0x03] & 0x7FFF); - *dstBuffer++ = CONVERT(pal[(*adr >> 2) & 0x03] & 0x7FFF); - *dstBuffer++ = CONVERT(pal[(*adr >> 4) & 0x03] & 0x7FFF); - *dstBuffer++ = CONVERT(pal[(*adr >> 6) & 0x03] & 0x7FFF); - } -#endif - } - } -} - -template <TexCache_TexFormat TEXCACHEFORMAT> -void NDSTextureUnpackI4(const MemSpan &ms, const u16 *pal, const bool isPalZeroTransparent, u32 *dstBuffer) -{ - u8 *adr; - -#ifdef ENABLE_SSSE3 - const __m128i palLo = _mm_load_si128((__m128i *)pal + 0); - const __m128i palHi = _mm_load_si128((__m128i *)pal + 1); -#endif - if (isPalZeroTransparent) - { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; -#ifdef ENABLE_SSSE3 - for (size_t x = 0; x < ms.items[j].len; x+=8, adr+=8, dstBuffer+=16) - { - __m128i idx = _mm_loadl_epi64((__m128i *)adr); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi16(0x000F)), _mm_and_si128(_mm_srli_epi16(idx, 4), _mm_set1_epi16(0x0F00)) ); - idx = _mm_slli_epi16(idx, 1); - - __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - - const __m128i palMask = _mm_cmpeq_epi8( _mm_and_si128(idx, _mm_set1_epi8(0x10)), _mm_setzero_si128() ); - const __m128i palColor0A = _mm_shuffle_epi8(palLo, idx0); - const __m128i palColor0B = _mm_shuffle_epi8(palHi, idx0); - const __m128i palColor1A = _mm_shuffle_epi8(palLo, idx1); - const __m128i palColor1B = _mm_shuffle_epi8(palHi, idx1); - - const __m128i palColor0 = _mm_blendv_epi8( palColor0B, palColor0A, _mm_unpacklo_epi8(palMask, palMask) ); - const __m128i palColor1 = _mm_blendv_epi8( palColor1B, palColor1A, _mm_unpackhi_epi8(palMask, palMask) ); - - __m128i convertedColor[4]; - - if (TEXCACHEFORMAT == TexFormat_15bpp) - { - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - else - { - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - - // Set converted colors to 0 if the palette index is 0. - idx0 = _mm_cmpeq_epi16(idx0, _mm_set1_epi16(0x0100)); - idx1 = _mm_cmpeq_epi16(idx1, _mm_set1_epi16(0x0100)); - convertedColor[0] = _mm_andnot_si128(_mm_unpacklo_epi16(idx0, idx0), convertedColor[0]); - convertedColor[1] = _mm_andnot_si128(_mm_unpackhi_epi16(idx0, idx0), convertedColor[1]); - convertedColor[2] = _mm_andnot_si128(_mm_unpacklo_epi16(idx1, idx1), convertedColor[2]); - convertedColor[3] = _mm_andnot_si128(_mm_unpackhi_epi16(idx1, idx1), convertedColor[3]); - - _mm_store_si128((__m128i *)(dstBuffer + 0), convertedColor[0]); - _mm_store_si128((__m128i *)(dstBuffer + 4), convertedColor[1]); - _mm_store_si128((__m128i *)(dstBuffer + 8), convertedColor[2]); - _mm_store_si128((__m128i *)(dstBuffer + 12), convertedColor[3]); - } -#else - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - u8 idx; - - idx = *adr & 0xF; - *dstBuffer++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - - idx = *adr >> 4; - *dstBuffer++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - } -#endif - } - } - else - { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; -#ifdef ENABLE_SSSE3 - for (size_t x = 0; x < ms.items[j].len; x+=8, adr+=8, dstBuffer+=16) - { - __m128i idx = _mm_loadl_epi64((__m128i *)adr); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi16(0x000F)), _mm_and_si128(_mm_srli_epi16(idx, 4), _mm_set1_epi16(0x0F00)) ); - idx = _mm_slli_epi16(idx, 1); - - const __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - const __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - - const __m128i palMask = _mm_cmpeq_epi8( _mm_and_si128(idx, _mm_set1_epi8(0x10)), _mm_setzero_si128() ); - const __m128i palColor0A = _mm_shuffle_epi8(palLo, idx0); - const __m128i palColor0B = _mm_shuffle_epi8(palHi, idx0); - const __m128i palColor1A = _mm_shuffle_epi8(palLo, idx1); - const __m128i palColor1B = _mm_shuffle_epi8(palHi, idx1); - - const __m128i palColor0 = _mm_blendv_epi8( palColor0B, palColor0A, _mm_unpacklo_epi8(palMask, palMask) ); - const __m128i palColor1 = _mm_blendv_epi8( palColor1B, palColor1A, _mm_unpackhi_epi8(palMask, palMask) ); - - __m128i convertedColor[4]; - - if (TEXCACHEFORMAT == TexFormat_15bpp) - { - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - else - { - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - - _mm_store_si128((__m128i *)(dstBuffer + 0), convertedColor[0]); - _mm_store_si128((__m128i *)(dstBuffer + 4), convertedColor[1]); - _mm_store_si128((__m128i *)(dstBuffer + 8), convertedColor[2]); - _mm_store_si128((__m128i *)(dstBuffer + 12), convertedColor[3]); - } -#else - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - *dstBuffer++ = CONVERT(pal[*adr & 0x0F] & 0x7FFF); - *dstBuffer++ = CONVERT(pal[*adr >> 4] & 0x7FFF); - } -#endif - } - } -} - -template <TexCache_TexFormat TEXCACHEFORMAT> -void NDSTextureUnpackI8(const MemSpan &ms, const u16 *srcPal, const bool isPalZeroTransparent, u32 *dstBuffer) -{ - u8 *adr; - - if (isPalZeroTransparent) - { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - *dstBuffer++ = (*adr == 0) ? 0 : CONVERT(srcPal[*adr] & 0x7FFF); - } - } - } - else - { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - *dstBuffer++ = CONVERT(srcPal[*adr] & 0x7FFF); - } - } - } -} - -template <TexCache_TexFormat TEXCACHEFORMAT> -void NDSTextureUnpackA3I5(const MemSpan &ms, const u16 *pal, u32 *dstBuffer) -{ - u8 *adr; - - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - const u16 c = pal[*adr & 0x1F] & 0x7FFF; - const u8 alpha = *adr >> 5; - *dstBuffer++ = (TEXCACHEFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, material_3bit_to_5bit[alpha]) : COLOR555TO8888(c, material_3bit_to_8bit[alpha]); - } - } -} - -template <TexCache_TexFormat TEXCACHEFORMAT> -void NDSTextureUnpackA5I3(const MemSpan &ms, const u16 *pal, u32 *dstBuffer) -{ - u8 *adr; - -#ifdef ENABLE_SSSE3 - const __m128i pal_vec128 = _mm_load_si128((__m128i *)pal); -#endif - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; -#ifdef ENABLE_SSSE3 - for (size_t x = 0; x < ms.items[j].len; x+=16, adr+=16, dstBuffer+=16) - { - const __m128i bits = _mm_loadu_si128((__m128i *)adr); - - const __m128i idx = _mm_slli_epi16( _mm_and_si128(bits, _mm_set1_epi8(0x07)), 1 ); - const __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - const __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - - const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0); - const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1); - - __m128i tmpAlpha[2]; - __m128i convertedColor[4]; - - if (TEXCACHEFORMAT == TexFormat_15bpp) - { - const __m128i alpha = _mm_srli_epi16( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), 3 ); - const __m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); - const __m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); - - tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo); - tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo); - ColorspaceConvert555To6665_SSE2<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]); - - tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi); - tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi); - ColorspaceConvert555To6665_SSE2<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]); - } - else - { - const __m128i alpha = _mm_or_si128( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), _mm_srli_epi16(_mm_and_si128(bits, _mm_set1_epi8(0xE0)), 5) ); - const __m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); - const __m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); - - tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo); - tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo); - ColorspaceConvert555To8888_SSE2<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]); - - tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi); - tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi); - ColorspaceConvert555To8888_SSE2<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]); - } - - _mm_store_si128((__m128i *)(dstBuffer + 0), convertedColor[0]); - _mm_store_si128((__m128i *)(dstBuffer + 4), convertedColor[1]); - _mm_store_si128((__m128i *)(dstBuffer + 8), convertedColor[2]); - _mm_store_si128((__m128i *)(dstBuffer + 12), convertedColor[3]); - } -#else - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - const u16 c = pal[*adr&0x07] & 0x7FFF; - const u8 alpha = (*adr>>3); - *dstBuffer++ = (TEXCACHEFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, alpha) : COLOR555TO8888(c, material_5bit_to_8bit[alpha]); - } -#endif - } -} - -#define PAL4X4(offset) ( LE_TO_LOCAL_16( *(u16*)( MMU.texInfo.texPalSlot[((palAddress + (offset)*2)>>14)&0x7] + ((palAddress + (offset)*2)&0x3FFF) ) ) & 0x7FFF ) - -template <TexCache_TexFormat TEXCACHEFORMAT> -void NDSTextureUnpack4x4(const MemSpan &ms, const u32 palAddress, const u32 texAttributes, const u32 sizeX, const u32 sizeY, u32 *dstBuffer) -{ - if (ms.numItems != 1) - { - PROGINFO("Your 4x4 texture has overrun its texture slot.\n"); - } - //this check isnt necessary since the addressing is tied to the texture data which will also run out: - //if(msIndex.numItems != 1) PROGINFO("Your 4x4 texture index has overrun its slot.\n"); - - u16* slot1; - u32* map = (u32*)ms.items[0].ptr; - u32 limit = ms.items[0].len<<2; - u32 d = 0; - if ( (texAttributes & 0xc000) == 0x8000) - // texel are in slot 2 - slot1=(u16*)&MMU.texInfo.textureSlotAddr[1][((texAttributes & 0x3FFF)<<2)+0x010000]; - else - slot1=(u16*)&MMU.texInfo.textureSlotAddr[1][(texAttributes & 0x3FFF)<<2]; - - u16 yTmpSize = (sizeY>>2); - u16 xTmpSize = (sizeX>>2); - - //this is flagged whenever a 4x4 overruns its slot. - //i am guessing we just generate black in that case - bool dead = false; - - for (size_t y = 0; y < yTmpSize; y++) - { - u32 tmpPos[4]={(y<<2)*sizeX,((y<<2)+1)*sizeX, - ((y<<2)+2)*sizeX,((y<<2)+3)*sizeX}; - for (size_t x = 0; x < xTmpSize; x++, d++) - { - if (d >= limit) - dead = true; - - if (dead) - { - for (int sy = 0; sy < 4; sy++) - { - const u32 currentPos = (x<<2) + tmpPos[sy]; - dstBuffer[currentPos] = dstBuffer[currentPos+1] = dstBuffer[currentPos+2] = dstBuffer[currentPos+3] = 0; - } - continue; - } - - const u32 currBlock = LE_TO_LOCAL_32(map[d]); - const u16 pal1 = LE_TO_LOCAL_16(slot1[d]); - const u16 pal1offset = (pal1 & 0x3FFF)<<1; - const u8 mode = pal1>>14; - u32 tmp_col[4]; - - tmp_col[0] = COLOR555TO8888_OPAQU... [truncated message content] |
From: <rog...@us...> - 2016-11-01 21:07:19
|
Revision: 5568 http://sourceforge.net/p/desmume/code/5568 Author: rogerman Date: 2016-11-01 21:07:17 +0000 (Tue, 01 Nov 2016) Log Message: ----------- Texture Handler: - Texture items in cache are now searched using std::map instead of std::multimap. - Texture item search keys now ignore the render-specific bits of the texture attributes (repeat mode, flip mode, and coordinate transformation mode bits are ignored). This is to help reduce the number of duplicate textures in the cache. - Searching a texture and unpacking a texture are now performed as separate operations. - Texture unpacking functions now use restrict pointers instead of normal pointers. Modified Paths: -------------- trunk/desmume/src/OGLRender.cpp trunk/desmume/src/OGLRender_3_2.cpp trunk/desmume/src/rasterize.cpp trunk/desmume/src/texcache.cpp trunk/desmume/src/texcache.h Modified: trunk/desmume/src/OGLRender.cpp =================================================================== --- trunk/desmume/src/OGLRender.cpp 2016-10-30 23:16:49 UTC (rev 5567) +++ trunk/desmume/src/OGLRender.cpp 2016-11-01 21:07:17 UTC (rev 5568) @@ -1197,7 +1197,6 @@ // Kill the texture cache now before all of our texture IDs disappear. texCache.Reset(); - texCache.Reset(); while(!ref->freeTextureIDs.empty()) { @@ -2968,7 +2967,12 @@ glEnable(GL_TEXTURE_2D); } - TexCacheItem *newTexture = texCache.GetTexture(TexFormat_32bpp, thePoly.texParam, thePoly.texPalette); + TexCacheItem *newTexture = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + if (newTexture->unpackFormat != TexFormat_32bpp) + { + newTexture->Unpack<TexFormat_32bpp>(); + } + if (newTexture != this->currTexture) { this->currTexture = newTexture; @@ -4619,7 +4623,12 @@ glUniform1i(OGLRef.uniformPolyEnableTexture, GL_TRUE); glUniform1i(OGLRef.uniformTexSingleBitAlpha, (params.texFormat != TEXMODE_A3I5 && params.texFormat != TEXMODE_A5I3) ? GL_TRUE : GL_FALSE); - TexCacheItem *newTexture = texCache.GetTexture(TexFormat_32bpp, thePoly.texParam, thePoly.texPalette); + TexCacheItem *newTexture = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + if (newTexture->unpackFormat != TexFormat_32bpp) + { + newTexture->Unpack<TexFormat_32bpp>(); + } + if (newTexture != this->currTexture) { this->currTexture = newTexture; Modified: trunk/desmume/src/OGLRender_3_2.cpp =================================================================== --- trunk/desmume/src/OGLRender_3_2.cpp 2016-10-30 23:16:49 UTC (rev 5567) +++ trunk/desmume/src/OGLRender_3_2.cpp 2016-11-01 21:07:17 UTC (rev 5568) @@ -1695,7 +1695,12 @@ return OGLERROR_NOERR; } - TexCacheItem *newTexture = texCache.GetTexture(TexFormat_32bpp, thePoly.texParam, thePoly.texPalette); + TexCacheItem *newTexture = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + if (newTexture->unpackFormat != TexFormat_32bpp) + { + newTexture->Unpack<TexFormat_32bpp>(); + } + if (newTexture != this->currTexture) { this->currTexture = newTexture; Modified: trunk/desmume/src/rasterize.cpp =================================================================== --- trunk/desmume/src/rasterize.cpp 2016-10-30 23:16:49 UTC (rev 5567) +++ trunk/desmume/src/rasterize.cpp 2016-11-01 21:07:17 UTC (rev 5568) @@ -1373,8 +1373,13 @@ const POLY &firstPoly = *firstClippedPoly.poly; u32 lastTexParams = firstPoly.texParam; u32 lastTexPalette = firstPoly.texPalette; - TexCacheItem *lastTexKey = texCache.GetTexture(TexFormat_15bpp, firstPoly.texParam, firstPoly.texPalette); + TexCacheItem *lastTexItem = texCache.GetTexture(firstPoly.texParam, firstPoly.texPalette); + if (lastTexItem->unpackFormat != TexFormat_15bpp) + { + lastTexItem->Unpack<TexFormat_15bpp>(); + } + for (size_t i = 0; i < this->_clippedPolyCount; i++) { const GFX3D_Clipper::TClippedPoly &clippedPoly = clippedPolys[i]; @@ -1386,13 +1391,18 @@ //and then it won't be safe. if (lastTexParams != thePoly.texParam || lastTexPalette != thePoly.texPalette) { - lastTexKey = texCache.GetTexture(TexFormat_15bpp, thePoly.texParam, thePoly.texPalette); + lastTexItem = texCache.GetTexture(thePoly.texParam, thePoly.texPalette); + if (lastTexItem->unpackFormat != TexFormat_15bpp) + { + lastTexItem->Unpack<TexFormat_15bpp>(); + } + lastTexParams = thePoly.texParam; lastTexPalette = thePoly.texPalette; } //printf("%08X %d\n",poly->texParam,rasterizerUnit[0].textures.currentNum); - polyTexKeys[i] = lastTexKey; + polyTexKeys[i] = lastTexItem; } } Modified: trunk/desmume/src/texcache.cpp =================================================================== --- trunk/desmume/src/texcache.cpp 2016-10-30 23:16:49 UTC (rev 5567) +++ trunk/desmume/src/texcache.cpp 2016-11-01 21:07:17 UTC (rev 5568) @@ -202,19 +202,22 @@ TexCache::TexCache() { + cacheTable.clear(); cache_size = 0; memset(paletteDump, 0, sizeof(paletteDump)); } void TexCache::list_remove(TexCacheItem *item) { - this->index.erase(item->iterator); + const TexCacheKey key = TexCache::GenerateKey(item->textureAttributes, item->paletteAttributes); + this->cacheTable.erase(key); this->cache_size -= item->unpackSize; } void TexCache::list_push_front(TexCacheItem *item) { - item->iterator = this->index.insert(std::make_pair(item->textureAttributes, item)); + const TexCacheKey key = TexCache::GenerateKey(item->textureAttributes, item->paletteAttributes); + this->cacheTable[key] = item; this->cache_size += item->unpackSize; } @@ -224,13 +227,13 @@ //TODO - we should handle this instead by setting dirty flags in the vram memory mapping and noting whether palette memory was dirty. //but this will work for now MemSpan mspal = MemSpan_TexPalette(0, PALETTE_DUMP_SIZE, true); - bool paletteDirty = mspal.memcmp(paletteDump); + bool paletteDirty = mspal.memcmp(this->paletteDump); if (paletteDirty) { - mspal.dump(paletteDump); + mspal.dump(this->paletteDump); } - for (TTexCacheItemMultimap::iterator it(this->index.begin()); it != this->index.end(); ++it) + for (TexCacheTable::iterator it(this->cacheTable.begin()); it != this->cacheTable.end(); ++it) { it->second->suspectedInvalid = true; @@ -259,9 +262,9 @@ //TODO - do this based on age and not arbitrarily while (this->cache_size > target) { - if (this->index.size() == 0) break; //just in case.. doesnt seem possible, cache_size wouldve been 0 + if (this->cacheTable.size() == 0) break; //just in case.. doesnt seem possible, cache_size wouldve been 0 - TexCacheItem *item = this->index.begin()->second; + TexCacheItem *item = this->cacheTable.begin()->second; this->list_remove(item); //printf("evicting! totalsize:%d\n",cache_size); delete item; @@ -273,15 +276,54 @@ this->Evict(0); } -TexCacheItem* TexCache::GetTexture(TexCache_TexFormat texCacheFormat, u32 texAttributes, u32 palAttributes) +TexCacheItem* TexCache::GetTexture(u32 texAttributes, u32 palAttributes) { //for each texformat, multiplier from numtexels to numbytes (fixed point 30.2) static const int texSizes[] = {0, 4, 1, 2, 4, 1, 4, 8}; - NDSTextureFormat texPackFormat = (NDSTextureFormat)((texAttributes>>26)&0x07); - u32 sizeX = (8 << ((texAttributes>>20)&0x07)); - u32 sizeY = (8 << ((texAttributes>>23)&0x07)); - u32 imageSize = sizeX*sizeY; + bool needLoadTexData = false; + bool needLoadPalette = false; + + //conditions where we reject matches: + //when the teximage or texpal params dont match + //(this is our key for identifying textures in the cache) + TexCacheItem *theTexture = NULL; + const TexCacheKey key = TexCache::GenerateKey(texAttributes, palAttributes); + const TexCacheTable::iterator cachedTexture = this->cacheTable.find(key); + + if (cachedTexture == this->cacheTable.end()) + { + needLoadTexData = true; + needLoadPalette = true; + } + else + { + theTexture = cachedTexture->second; + + //if the texture is assumed invalid, reject it + if (theTexture->assumedInvalid) + { + needLoadTexData = true; + needLoadPalette = true; + } + + //the texture matches params, but isnt suspected invalid. accept it. + if (!theTexture->suspectedInvalid) + { + return theTexture; + } + } + + //we suspect the texture may be invalid. we need to do a byte-for-byte comparison to re-establish that it is valid: + const NDSTextureFormat texPackFormat = (NDSTextureFormat)((texAttributes>>26)&0x07); + const u32 sizeX = (8 << ((texAttributes>>20)&0x07)); + const u32 sizeY = (8 << ((texAttributes>>23)&0x07)); + const u32 imageSize = sizeX*sizeY; + + //dump the palette to a temp buffer, so that we don't have to worry about memory mapping. + //this isnt such a problem with texture memory, because we read sequentially from it. + //however, we read randomly from palette memory, so the mapping is more costly. + const u32 palSize = paletteSizeList[texPackFormat] * sizeof(u16); u32 palAddress; switch (texPackFormat) @@ -301,14 +343,6 @@ break; } - //analyze the texture memory mapping and the specifications of this texture - u32 texSize = (imageSize*texSizes[texPackFormat]) >> 2; //shifted because the texSizes multiplier is fixed point - MemSpan currentPackedTexDataMS = MemSpan_TexMem((texAttributes&0xFFFF)<<3, texSize); - - //dump the palette to a temp buffer, so that we don't have to worry about memory mapping. - //this isnt such a problem with texture memory, because we read sequentially from it. - //however, we read randomly from palette memory, so the mapping is more costly. - u32 palSize = paletteSizeList[texPackFormat] * sizeof(u16); MemSpan currentPaletteMS = MemSpan_TexPalette(palAddress, palSize, false); CACHE_ALIGN u16 currentPalette[256]; @@ -318,102 +352,98 @@ currentPaletteMS.dump(currentPalette); #endif - //determine the location for 4x4 index data - u32 indexBase; - if ((texAttributes & 0xc000) == 0x8000) indexBase = 0x30000; - else indexBase = 0x20000; - - u32 indexOffset = (texAttributes & 0x3FFF) << 2; - int indexSize = 0; - MemSpan currentPackedTexIndexMS; - if (texPackFormat == TEXMODE_4X4) + //when the palettes dont match: + //note that we are considering 4x4 textures to have a palette size of 0. + //they really have a potentially HUGE palette, too big for us to handle like a normal palette, + //so they go through a different system + if (theTexture != NULL) { - indexSize = imageSize >> 3; - currentPackedTexIndexMS = MemSpan_TexMem(indexOffset+indexBase, indexSize); + if ( (palSize > 0) && memcmp(theTexture->paletteColorTable, currentPalette, palSize) ) + { + needLoadPalette = true; + } } - //TODO - as a special optimization, keep the last item returned and check it first + //analyze the texture memory mapping and the specifications of this texture + const u32 texSize = (imageSize*texSizes[texPackFormat]) >> 2; //shifted because the texSizes multiplier is fixed point + MemSpan currentPackedTexDataMS = MemSpan_TexMem((texAttributes&0xFFFF)<<3, texSize); - TexCacheItem *cachedTexture = NULL; + //when the texture data doesn't match + if (theTexture != NULL) + { + if (currentPackedTexDataMS.memcmp(theTexture->packData, theTexture->packSize)) + { + needLoadTexData = true; + } + } - for(std::pair<TTexCacheItemMultimap::iterator,TTexCacheItemMultimap::iterator> - iters = index.equal_range(texAttributes); - iters.first != iters.second; - ++iters.first) + //if the texture is 4x4 then the index data must match + MemSpan currentPackedTexIndexMS; + if (texPackFormat == TEXMODE_4X4) { - cachedTexture = iters.first->second; + //determine the location for 4x4 index data + const u32 indexBase = ((texAttributes & 0xc000) == 0x8000) ? 0x30000 : 0x20000; + const u32 indexOffset = (texAttributes & 0x3FFF) << 2; + const int indexSize = imageSize >> 3; - //conditions where we reject matches: - //when the teximage or texpal params dont match - //(this is our key for identifying textures in the cache) - //NEW: due to using format as a key we dont need to check this anymore - //if(curr->texAttributes != texAttributes) continue; - if (cachedTexture->paletteAttributes != palAttributes) continue; + currentPackedTexIndexMS = MemSpan_TexMem(indexOffset+indexBase, indexSize); - //we're being asked for a different format than what we had cached. - //TODO - this could be done at the entire cache level instead of checking repeatedly - if (cachedTexture->unpackFormat != texCacheFormat) goto REJECT; - - //if the texture is assumed invalid, reject it - if (cachedTexture->assumedInvalid) goto REJECT; - - //the texture matches params, but isnt suspected invalid. accept it. - if (!cachedTexture->suspectedInvalid) return cachedTexture; - - //we suspect the texture may be invalid. we need to do a byte-for-byte comparison to re-establish that it is valid: - - //when the palettes dont match: - //note that we are considering 4x4 textures to have a palette size of 0. - //they really have a potentially HUGE palette, too big for us to handle like a normal palette, - //so they go through a different system - if (currentPaletteMS.size != 0 && memcmp(cachedTexture->paletteColorTable, currentPalette, currentPaletteMS.size)) goto REJECT; - - //when the texture data doesn't match - if (currentPackedTexDataMS.memcmp(cachedTexture->packData, cachedTexture->packSize)) goto REJECT; - - //if the texture is 4x4 then the index data must match - if (texPackFormat == TEXMODE_4X4) + if (theTexture != NULL) { - if (currentPackedTexIndexMS.memcmp(cachedTexture->packIndexData, cachedTexture->packIndexSize)) goto REJECT; + if (currentPackedTexIndexMS.memcmp(theTexture->packIndexData, theTexture->packIndexSize)) + { + needLoadTexData = true; + needLoadPalette = true; + } } + } + + if (needLoadTexData || needLoadPalette) + { + if (theTexture != NULL) + { + //we found a cached item for the current address, but the data is stale. + //for a variety of complicated reasons, we need to throw it out right this instant. + this->list_remove(theTexture); + delete theTexture; + theTexture = NULL; + } - //we found a match. just return it - //REMINDER to make it primary/newest when we have smarter code - //list_remove(curr); - //list_push_front(curr); - cachedTexture->suspectedInvalid = false; - return cachedTexture; + //item was not found. recruit an existing one (the oldest), or create a new one + //evict(); //reduce the size of the cache if necessary + //TODO - as a peculiarity of the texcache, eviction must happen after the entire 3d frame runs + //to support separate cache and read passes + TexCacheItem *newTexture = new TexCacheItem(); + newTexture->SetTextureData(texAttributes, currentPackedTexDataMS, currentPackedTexIndexMS); + newTexture->SetTexturePalette(palAttributes, currentPalette); - REJECT: - //we found a cached item for the current address, but the data is stale. - //for a variety of complicated reasons, we need to throw it out right this instant. - this->list_remove(cachedTexture); - delete cachedTexture; - break; + this->list_push_front(newTexture); + //printf("allocating: up to %d with %d items\n",cache_size,index.size()); + + theTexture = newTexture; } - - //item was not found. recruit an existing one (the oldest), or create a new one - //evict(); //reduce the size of the cache if necessary - //TODO - as a peculiarity of the texcache, eviction must happen after the entire 3d frame runs - //to support separate cache and read passes - TexCacheItem *newTexture = new TexCacheItem(); - newTexture->SetTextureData(texAttributes, currentPackedTexDataMS, currentPackedTexIndexMS); - newTexture->SetTexturePalette(palAttributes, currentPalette); - newTexture->unpackFormat = texCacheFormat; - - this->list_push_front(newTexture); - //printf("allocating: up to %d with %d items\n",cache_size,index.size()); - - switch (texCacheFormat) + else { - case TexFormat_32bpp: newTexture->Unpack<TexFormat_32bpp>(currentPackedTexDataMS); break; - case TexFormat_15bpp: newTexture->Unpack<TexFormat_15bpp>(currentPackedTexDataMS); break; - default: assert(false); return NULL; + if (theTexture != NULL) + { + //we found a match. just return it + //REMINDER to make it primary/newest when we have smarter code + //list_remove(curr); + //list_push_front(curr); + theTexture->suspectedInvalid = false; + } } - return newTexture; + return theTexture; } +TexCacheKey TexCache::GenerateKey(const u32 texAttributes, const u32 palAttributes) +{ + // Since the repeat, flip, and coordinate transformation modes are render settings + // and not data settings, we can mask out those bits to help reduce duplicate entries. + return (TexCacheKey)( ((u64)palAttributes << 32) | (u64)(texAttributes & 0x3FF0FFFF) ); +} + TexCacheItem::TexCacheItem() { _deleteCallback = NULL; @@ -424,6 +454,7 @@ packSize = 0; packData = NULL; paletteColorTable = NULL; + isPalZeroTransparent = false; unpackFormat = TexFormat_None; unpackSize = 0; @@ -476,10 +507,8 @@ void TexCacheItem::SetTextureData(const u32 attr, const MemSpan &packedData, const MemSpan &packedIndexData) { - u8 *oldPackData = this->packData; - u32 *oldUnpackData = this->unpackData; - u32 w = (8 << ((attr >> 20) & 0x07)); - u32 h = (8 << ((attr >> 23) & 0x07)); + const u32 w = (8 << ((attr >> 20) & 0x07)); + const u32 h = (8 << ((attr >> 23) & 0x07)); this->textureAttributes = attr; this->packFormat = (NDSTextureFormat)((attr >> 26) & 0x07); @@ -488,67 +517,88 @@ this->sizeY = h; this->invSizeX = 1.0f / (float)w; this->invSizeY = 1.0f / (float)h; - this->unpackSize = w * h * sizeof(u32); - this->unpackData = (u32 *)malloc_alignedCacheLine(this->unpackSize); //dump texture and 4x4 index data for cache keying - this->packSize = packedData.size; - this->packIndexSize = packedIndexData.size; - this->packData = (u8 *)malloc_alignedCacheLine(this->packSize); this->packSizeFirstSlot = packedData.items[0].len; + if (this->packSize != packedData.size) + { + u8 *oldPackData = this->packData; + this->packSize = packedData.size; + this->packData = (u8 *)malloc_alignedCacheLine(this->packSize); + free_aligned(oldPackData); + } + packedData.dump(this->packData); - if (this->packFormat == TEXMODE_4X4) + if ( (this->packFormat == TEXMODE_I2) || (this->packFormat == TEXMODE_I4) || (this->packFormat == TEXMODE_I8) ) { - u8 *oldPackIndexData = this->packIndexData; - this->packIndexData = (u8 *)malloc_alignedCacheLine(this->packIndexSize); - packedIndexData.dump(this->packIndexData, this->packIndexSize); - free_aligned(oldPackIndexData); + this->isPalZeroTransparent = ( ((attr >> 29) & 1) != 0 ); } + else + { + this->isPalZeroTransparent = false; + + if (this->packFormat == TEXMODE_4X4) + { + if (this->packIndexSize != packedIndexData.size) + { + u8 *oldPackIndexData = this->packIndexData; + this->packIndexSize = packedIndexData.size; + this->packIndexData = (u8 *)malloc_alignedCacheLine(this->packIndexSize); + free_aligned(oldPackIndexData); + } + + packedIndexData.dump(this->packIndexData, this->packIndexSize); + } + } - free_aligned(oldPackData); - free_aligned(oldUnpackData); + const u32 currentUnpackSize = w * h * sizeof(u32); + if (this->unpackSize != currentUnpackSize) + { + u32 *oldUnpackData = this->unpackData; + this->unpackSize = currentUnpackSize; + this->unpackData = (u32 *)malloc_alignedCacheLine(currentUnpackSize); + free_aligned(oldUnpackData); + } } void TexCacheItem::SetTexturePalette(const u32 attr, const u16 *paletteBuffer) { - const u32 oldPaletteSize = this->paletteSize; + const u32 newPaletteSize = paletteSizeList[this->packFormat] * sizeof(u16); this->paletteAttributes = attr; this->paletteAddress = (this->packFormat == TEXMODE_I2) ? attr << 3 : attr << 4; - this->paletteSize = paletteSizeList[this->packFormat] * sizeof(u16); - if (this->paletteSize > 0) + if (newPaletteSize > 0) { - if (this->paletteSize != oldPaletteSize) + if (this->paletteSize != newPaletteSize) { u16 *oldPaletteColorTable = this->paletteColorTable; - this->paletteColorTable = (u16 *)malloc_alignedCacheLine(this->paletteSize); - memcpy(this->paletteColorTable, paletteBuffer, this->paletteSize); + this->paletteSize = newPaletteSize; + this->paletteColorTable = (u16 *)malloc_alignedCacheLine(newPaletteSize); free_aligned(oldPaletteColorTable); } - else - { - memcpy(this->paletteColorTable, paletteBuffer, this->paletteSize); - } + + memcpy(this->paletteColorTable, paletteBuffer, newPaletteSize); } else { free_aligned(this->paletteColorTable); + this->paletteSize = 0; this->paletteColorTable = NULL; } } template <TexCache_TexFormat TEXCACHEFORMAT> -void TexCacheItem::Unpack(const MemSpan &packedData) +void TexCacheItem::Unpack() { + this->unpackFormat = TEXCACHEFORMAT; + // Whenever a 1-bit alpha or no-alpha texture is unpacked (this means any texture // format that is not A3I5 or A5I3), set all transparent pixels to 0 so that 3D // renderers can assume that the transparent color is 0 during texture sampling. - bool isPalZeroTransparent; - switch (this->packFormat) { case TEXMODE_A3I5: @@ -556,18 +606,15 @@ break; case TEXMODE_I2: - isPalZeroTransparent = ( ((this->textureAttributes >> 29) & 1) != 0 ); - NDSTextureUnpackI2<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, isPalZeroTransparent, this->unpackData); + NDSTextureUnpackI2<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->isPalZeroTransparent, this->unpackData); break; case TEXMODE_I4: - isPalZeroTransparent = ( ((this->textureAttributes >> 29) & 1) != 0 ); - NDSTextureUnpackI4<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, isPalZeroTransparent, this->unpackData); + NDSTextureUnpackI4<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->isPalZeroTransparent, this->unpackData); break; case TEXMODE_I8: - isPalZeroTransparent = ( ((this->textureAttributes >> 29) & 1) != 0 ); - NDSTextureUnpackI8<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, isPalZeroTransparent, this->unpackData); + NDSTextureUnpackI8<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->isPalZeroTransparent, this->unpackData); break; case TEXMODE_4X4: @@ -1149,7 +1196,7 @@ } template <TexCache_TexFormat TEXCACHEFORMAT> -void NDSTextureUnpackI2(const size_t srcSize, const u8 *srcData, const u16 *srcPal, const bool isPalZeroTransparent, u32 *dstBuffer) +void NDSTextureUnpackI2(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, const bool isPalZeroTransparent, u32 *__restrict dstBuffer) { #ifdef ENABLE_SSSE3 const __m128i pal_vec128 = _mm_loadl_epi64((__m128i *)srcPal); @@ -1264,7 +1311,7 @@ } template <TexCache_TexFormat TEXCACHEFORMAT> -void NDSTextureUnpackI4(const size_t srcSize, const u8 *srcData, const u16 *srcPal, const bool isPalZeroTransparent, u32 *dstBuffer) +void NDSTextureUnpackI4(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, const bool isPalZeroTransparent, u32 *__restrict dstBuffer) { #ifdef ENABLE_SSSE3 const __m128i palLo = _mm_load_si128((__m128i *)srcPal + 0); @@ -1382,7 +1429,7 @@ } template <TexCache_TexFormat TEXCACHEFORMAT> -void NDSTextureUnpackI8(const size_t srcSize, const u8 *srcData, const u16 *srcPal, const bool isPalZeroTransparent, u32 *dstBuffer) +void NDSTextureUnpackI8(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, const bool isPalZeroTransparent, u32 *__restrict dstBuffer) { if (isPalZeroTransparent) { @@ -1402,7 +1449,7 @@ } template <TexCache_TexFormat TEXCACHEFORMAT> -void NDSTextureUnpackA3I5(const size_t srcSize, const u8 *srcData, const u16 *srcPal, u32 *dstBuffer) +void NDSTextureUnpackA3I5(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, u32 *__restrict dstBuffer) { for (size_t i = 0; i < srcSize; i++, srcData++) { @@ -1413,7 +1460,7 @@ } template <TexCache_TexFormat TEXCACHEFORMAT> -void NDSTextureUnpackA5I3(const size_t srcSize, const u8 *srcData, const u16 *srcPal, u32 *dstBuffer) +void NDSTextureUnpackA5I3(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, u32 *__restrict dstBuffer) { #ifdef ENABLE_SSSE3 const __m128i pal_vec128 = _mm_load_si128((__m128i *)srcPal); @@ -1477,7 +1524,7 @@ } template <TexCache_TexFormat TEXCACHEFORMAT> -void NDSTextureUnpack4x4(const size_t srcSize, const u8 *srcData, const u8 *srcIndex, const u32 palAddress, const u32 texAttributes, const u32 sizeX, const u32 sizeY, u32 *dstBuffer) +void NDSTextureUnpack4x4(const size_t srcSize, const u8 *__restrict srcData, const u8 *__restrict srcIndex, const u32 palAddress, const u32 texAttributes, const u32 sizeX, const u32 sizeY, u32 *__restrict dstBuffer) { u16 *slot1; u32 *map = (u32 *)srcData; @@ -1619,7 +1666,7 @@ } template <TexCache_TexFormat TEXCACHEFORMAT> -void NDSTextureUnpackDirect16Bit(const size_t srcSize, const u8 *srcData, u32 *dstBuffer) +void NDSTextureUnpackDirect16Bit(const size_t srcSize, const u8 *__restrict srcData, u32 *__restrict dstBuffer) { const u16 *srcData16 = (const u16 *)srcData; const size_t pixCount = srcSize >> 1; @@ -1656,3 +1703,6 @@ *dstBuffer++ = (c & 0x8000) ? CONVERT(c & 0x7FFF) : 0; } } + +template void TexCacheItem::Unpack<TexFormat_15bpp>(); +template void TexCacheItem::Unpack<TexFormat_32bpp>(); Modified: trunk/desmume/src/texcache.h =================================================================== --- trunk/desmume/src/texcache.h 2016-10-30 23:16:49 UTC (rev 5567) +++ trunk/desmume/src/texcache.h 2016-11-01 21:07:17 UTC (rev 5568) @@ -44,7 +44,8 @@ class MemSpan; class TexCacheItem; -typedef std::multimap<u32,TexCacheItem*> TTexCacheItemMultimap; +typedef u64 TexCacheKey; +typedef std::map<TexCacheKey, TexCacheItem *> TexCacheTable; typedef void (*TexCacheItemDeleteCallback)(TexCacheItem *texItem, void *param1, void *param2); class TexCache @@ -52,7 +53,7 @@ public: TexCache(); - TTexCacheItemMultimap index; + TexCacheTable cacheTable; u32 cache_size; //this is not really precise, it is off by a constant factor u8 paletteDump[PALETTE_DUMP_SIZE]; @@ -63,7 +64,9 @@ void Evict(u32 target); void Reset(); - TexCacheItem* GetTexture(TexCache_TexFormat texCacheFormat, u32 texAttributes, u32 palAttributes); + TexCacheItem* GetTexture(u32 texAttributes, u32 palAttributes); + + static TexCacheKey GenerateKey(const u32 texAttributes, const u32 palAttributes); }; class TexCacheItem @@ -81,6 +84,7 @@ u32 packSize; u8 *packData; u16 *paletteColorTable; + bool isPalZeroTransparent; TexCache_TexFormat unpackFormat; u32 unpackSize; @@ -88,8 +92,7 @@ bool suspectedInvalid; bool assumedInvalid; - TTexCacheItemMultimap::iterator iterator; - + u32 textureAttributes; u32 paletteAttributes; u32 paletteAddress; @@ -114,7 +117,7 @@ void SetTextureData(const u32 attr, const MemSpan &packedData, const MemSpan &packedIndexData); void SetTexturePalette(const u32 attr, const u16 *paletteBuffer); - template<TexCache_TexFormat TEXCACHEFORMAT> void Unpack(const MemSpan &packedData); + template<TexCache_TexFormat TEXCACHEFORMAT> void Unpack(); void DebugDump(); }; @@ -128,13 +131,13 @@ template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpack4x4(const MemSpan &ms, const u32 palAddress, const u32 texAttributes, const u32 sizeX, const u32 sizeY, u32 *dstBuffer); template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpackDirect16Bit(const MemSpan &ms, u32 *dstBuffer); -template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpackI2(const size_t srcSize, const u8 *srcData, const u16 *srcPal, const bool isPalZeroTransparent, u32 *dstBuffer); -template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpackI4(const size_t srcSize, const u8 *srcData, const u16 *srcPal, const bool isPalZeroTransparent, u32 *dstBuffer); -template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpackI8(const size_t srcSize, const u8 *srcData, const u16 *srcPal, const bool isPalZeroTransparent, u32 *dstBuffer); -template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpackA3I5(const size_t srcSize, const u8 *srcData, const u16 *srcPal, u32 *dstBuffer); -template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpackA5I3(const size_t srcSize, const u8 *srcData, const u16 *srcPal, u32 *dstBuffer); -template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpack4x4(const size_t srcSize, const u8 *srcData, const u8 *srcIndex, const u32 palAddress, const u32 texAttributes, const u32 sizeX, const u32 sizeY, u32 *dstBuffer); -template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpackDirect16Bit(const size_t srcSize, const u8 *srcData, u32 *dstBuffer); +template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpackI2(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, const bool isPalZeroTransparent, u32 *__restrict dstBuffer); +template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpackI4(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, const bool isPalZeroTransparent, u32 *__restrict dstBuffer); +template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpackI8(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, const bool isPalZeroTransparent, u32 *__restrict dstBuffer); +template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpackA3I5(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, u32 *__restrict dstBuffer); +template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpackA5I3(const size_t srcSize, const u8 *__restrict srcData, const u16 *__restrict srcPal, u32 *__restrict dstBuffer); +template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpack4x4(const size_t srcSize, const u8 *__restrict srcData, const u8 *__restrict srcIndex, const u32 palAddress, const u32 texAttributes, const u32 sizeX, const u32 sizeY, u32 *__restrict dstBuffer); +template<TexCache_TexFormat TEXCACHEFORMAT> void NDSTextureUnpackDirect16Bit(const size_t srcSize, const u8 *__restrict srcData, u32 *__restrict dstBuffer); extern TexCache texCache; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-10-30 23:16:52
|
Revision: 5567 http://sourceforge.net/p/desmume/code/5567 Author: rogerman Date: 2016-10-30 23:16:49 +0000 (Sun, 30 Oct 2016) Log Message: ----------- Texture Handler: - Do some heavy cleanup and code refactoring. - Add SSE2-enabled unpacking function for direct 16-bit color textures. Modified Paths: -------------- trunk/desmume/src/OGLRender.cpp trunk/desmume/src/OGLRender_3_2.cpp trunk/desmume/src/rasterize.cpp trunk/desmume/src/render3D.cpp trunk/desmume/src/texcache.cpp trunk/desmume/src/texcache.h Modified: trunk/desmume/src/OGLRender.cpp =================================================================== --- trunk/desmume/src/OGLRender.cpp 2016-10-25 06:02:33 UTC (rev 5566) +++ trunk/desmume/src/OGLRender.cpp 2016-10-30 23:16:49 UTC (rev 5567) @@ -1196,7 +1196,8 @@ DestroyMultisampledFBO(); // Kill the texture cache now before all of our texture IDs disappear. - TexCache_Reset(); + texCache.Reset(); + texCache.Reset(); while(!ref->freeTextureIDs.empty()) { @@ -2694,7 +2695,7 @@ Render3DError OpenGLRenderer_1_2::EndRender(const u64 frameCount) { //needs to happen before endgl because it could free some textureids for expired cache items - TexCache_EvictFrame(); + texCache.Evict(TEXCACHE_MAX_SIZE); this->ReadBackPixels(); @@ -2967,21 +2968,21 @@ glEnable(GL_TEXTURE_2D); } - TexCacheItem *newTexture = TexCache_SetTexture(TexFormat_32bpp, thePoly.texParam, thePoly.texPalette); - if(newTexture != this->currTexture) + TexCacheItem *newTexture = texCache.GetTexture(TexFormat_32bpp, thePoly.texParam, thePoly.texPalette); + if (newTexture != this->currTexture) { this->currTexture = newTexture; //has the ogl renderer initialized the texture? - if(this->currTexture->GetDeleteCallback() == NULL) + if (this->currTexture->GetDeleteCallback() == NULL) { this->currTexture->SetDeleteCallback(&texDeleteCallback, this, NULL); - if(OGLRef.freeTextureIDs.empty()) + if (OGLRef.freeTextureIDs.empty()) { this->ExpandFreeTextures(); } - this->currTexture->texid = (u64)OGLRef.freeTextureIDs.front(); + this->currTexture->texid = (u32)OGLRef.freeTextureIDs.front(); OGLRef.freeTextureIDs.pop(); glBindTexture(GL_TEXTURE_2D, (GLuint)this->currTexture->texid); @@ -2989,7 +2990,7 @@ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? OGLRef.stateTexMirroredRepeat : GL_REPEAT) : GL_CLAMP_TO_EDGE)); const NDSTextureFormat texFormat = this->currTexture->GetTextureFormat(); - const u32 *textureSrc = (u32 *)this->currTexture->decoded; + const u32 *textureSrc = this->currTexture->unpackData; size_t texWidth = this->currTexture->sizeX; size_t texHeight = this->currTexture->sizeY; @@ -3133,7 +3134,7 @@ memset(this->clearImagePolyIDBuffer, 0, sizeof(this->clearImagePolyIDBuffer)); memset(this->clearImageFogBuffer, 0, sizeof(this->clearImageFogBuffer)); - TexCache_Reset(); + texCache.Reset(); return OGLERROR_NOERR; } @@ -4618,21 +4619,21 @@ glUniform1i(OGLRef.uniformPolyEnableTexture, GL_TRUE); glUniform1i(OGLRef.uniformTexSingleBitAlpha, (params.texFormat != TEXMODE_A3I5 && params.texFormat != TEXMODE_A5I3) ? GL_TRUE : GL_FALSE); - TexCacheItem *newTexture = TexCache_SetTexture(TexFormat_32bpp, thePoly.texParam, thePoly.texPalette); - if(newTexture != this->currTexture) + TexCacheItem *newTexture = texCache.GetTexture(TexFormat_32bpp, thePoly.texParam, thePoly.texPalette); + if (newTexture != this->currTexture) { this->currTexture = newTexture; //has the ogl renderer initialized the texture? - if(this->currTexture->GetDeleteCallback() == NULL) + if (this->currTexture->GetDeleteCallback() == NULL) { this->currTexture->SetDeleteCallback(&texDeleteCallback, this, NULL); - if(OGLRef.freeTextureIDs.empty()) + if (OGLRef.freeTextureIDs.empty()) { this->ExpandFreeTextures(); } - this->currTexture->texid = (u64)OGLRef.freeTextureIDs.front(); + this->currTexture->texid = (u32)OGLRef.freeTextureIDs.front(); OGLRef.freeTextureIDs.pop(); glBindTexture(GL_TEXTURE_2D, (GLuint)this->currTexture->texid); @@ -4640,7 +4641,7 @@ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); const NDSTextureFormat texFormat = this->currTexture->GetTextureFormat(); - const u32 *textureSrc = (u32 *)this->currTexture->decoded; + const u32 *textureSrc = this->currTexture->unpackData; size_t texWidth = this->currTexture->sizeX; size_t texHeight = this->currTexture->sizeY; Modified: trunk/desmume/src/OGLRender_3_2.cpp =================================================================== --- trunk/desmume/src/OGLRender_3_2.cpp 2016-10-25 06:02:33 UTC (rev 5566) +++ trunk/desmume/src/OGLRender_3_2.cpp 2016-10-30 23:16:49 UTC (rev 5567) @@ -1695,21 +1695,21 @@ return OGLERROR_NOERR; } - TexCacheItem *newTexture = TexCache_SetTexture(TexFormat_32bpp, thePoly.texParam, thePoly.texPalette); - if(newTexture != this->currTexture) + TexCacheItem *newTexture = texCache.GetTexture(TexFormat_32bpp, thePoly.texParam, thePoly.texPalette); + if (newTexture != this->currTexture) { this->currTexture = newTexture; //has the ogl renderer initialized the texture? - if(this->currTexture->GetDeleteCallback() == NULL) + if (this->currTexture->GetDeleteCallback() == NULL) { this->currTexture->SetDeleteCallback(&texDeleteCallback, this, NULL); - if(OGLRef.freeTextureIDs.empty()) + if (OGLRef.freeTextureIDs.empty()) { this->ExpandFreeTextures(); } - this->currTexture->texid = (u64)OGLRef.freeTextureIDs.front(); + this->currTexture->texid = (u32)OGLRef.freeTextureIDs.front(); OGLRef.freeTextureIDs.pop(); glBindTexture(GL_TEXTURE_2D, (GLuint)this->currTexture->texid); @@ -1717,7 +1717,7 @@ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (params.enableRepeatT ? (params.enableMirroredRepeatT ? GL_MIRRORED_REPEAT : GL_REPEAT) : GL_CLAMP_TO_EDGE)); const NDSTextureFormat texFormat = this->currTexture->GetTextureFormat(); - const u32 *textureSrc = (u32 *)this->currTexture->decoded; + const u32 *textureSrc = this->currTexture->unpackData; size_t texWidth = this->currTexture->sizeX; size_t texHeight = this->currTexture->sizeY; Modified: trunk/desmume/src/rasterize.cpp =================================================================== --- trunk/desmume/src/rasterize.cpp 2016-10-25 06:02:33 UTC (rev 5566) +++ trunk/desmume/src/rasterize.cpp 2016-10-30 23:16:49 UTC (rev 5567) @@ -461,7 +461,7 @@ sampler.dowrap(iu, iv); FragmentColor color; - color.color = ((u32*)lastTexKey->decoded)[(iv<<sampler.wshift)+iu]; + color.color = lastTexKey->unpackData[(iv<<sampler.wshift)+iu]; return color; } @@ -1373,7 +1373,7 @@ const POLY &firstPoly = *firstClippedPoly.poly; u32 lastTexParams = firstPoly.texParam; u32 lastTexPalette = firstPoly.texPalette; - TexCacheItem *lastTexKey = TexCache_SetTexture(TexFormat_15bpp, firstPoly.texParam, firstPoly.texPalette); + TexCacheItem *lastTexKey = texCache.GetTexture(TexFormat_15bpp, firstPoly.texParam, firstPoly.texPalette); for (size_t i = 0; i < this->_clippedPolyCount; i++) { @@ -1386,7 +1386,7 @@ //and then it won't be safe. if (lastTexParams != thePoly.texParam || lastTexPalette != thePoly.texPalette) { - lastTexKey = TexCache_SetTexture(TexFormat_15bpp, thePoly.texParam, thePoly.texPalette); + lastTexKey = texCache.GetTexture(TexFormat_15bpp, thePoly.texParam, thePoly.texPalette); lastTexParams = thePoly.texParam; lastTexPalette = thePoly.texPalette; } @@ -1537,7 +1537,7 @@ { rasterizerUnit[0].mainLoop<false>(); this->_renderGeometryNeedsFinish = false; - TexCache_EvictFrame(); // Since we're finishing geometry rendering here and now, also check the texture cache now. + texCache.Evict(TEXCACHE_MAX_SIZE); // Since we're finishing geometry rendering here and now, also check the texture cache now. } // printf("rendered %d of %d polys after backface culling\n",gfx3d.polylist->count-culled,gfx3d.polylist->count); @@ -1888,7 +1888,7 @@ memset(this->clearImagePolyIDBuffer, 0, sizeof(this->clearImagePolyIDBuffer)); memset(this->clearImageFogBuffer, 0, sizeof(this->clearImageFogBuffer)); - TexCache_Reset(); + texCache.Reset(); return RENDER3DERROR_NOERR; } @@ -1947,7 +1947,7 @@ } // Now that geometry rendering is finished on all threads, check the texture cache. - TexCache_EvictFrame(); + texCache.Evict(TEXCACHE_MAX_SIZE); // Do multithreaded post-processing. if (this->currentRenderState->enableEdgeMarking || this->currentRenderState->enableFog) Modified: trunk/desmume/src/render3D.cpp =================================================================== --- trunk/desmume/src/render3D.cpp 2016-10-25 06:02:33 UTC (rev 5566) +++ trunk/desmume/src/render3D.cpp 2016-10-30 23:16:49 UTC (rev 5567) @@ -389,7 +389,7 @@ if (needTexCacheReset) { - TexCache_Reset(); + texCache.Reset(); } } @@ -616,7 +616,7 @@ this->_willFlushFramebufferRGBA6665 = true; this->_willFlushFramebufferRGBA5551 = true; - TexCache_Reset(); + texCache.Reset(); return RENDER3DERROR_NOERR; } @@ -658,7 +658,7 @@ Render3DError Render3D::VramReconfigureSignal() { - TexCache_Invalidate(); + texCache.Invalidate(); return RENDER3DERROR_NOERR; } Modified: trunk/desmume/src/texcache.cpp =================================================================== --- trunk/desmume/src/texcache.cpp 2016-10-25 06:02:33 UTC (rev 5566) +++ trunk/desmume/src/texcache.cpp 2016-10-30 23:16:49 UTC (rev 5567) @@ -20,7 +20,6 @@ #include <string.h> #include <algorithm> #include <assert.h> -#include <map> #include "texcache.h" @@ -41,8 +40,12 @@ //only dump this from ogl renderer. for now, softrasterizer creates things in an incompatible pixel format //#define DEBUG_DUMP_TEXTURE -#define CONVERT(color) ((TEXFORMAT == TexFormat_32bpp)?(COLOR555TO8888_OPAQUE(color)):COLOR555TO6665_OPAQUE(color)) +#if defined(DEBUG_DUMP_TEXTURE) && defined(WIN32) + #define DO_DEBUG_DUMP_TEXTURE +#endif +#define CONVERT(color) ((TEXCACHEFORMAT == TexFormat_32bpp)?(COLOR555TO8888_OPAQUE(color)):COLOR555TO6665_OPAQUE(color)) + //This class represents a number of regions of memory which should be viewed as contiguous class MemSpan { @@ -86,7 +89,7 @@ //dumps the memspan to the specified buffer //you may set size to limit the size to be copied - int dump(void* buf, int size=-1) + int dump(void* buf, int size=-1) const { if(size==-1) size = this->size; size = min(this->size,size); @@ -108,7 +111,7 @@ // this function does the same than dump // but works for both little and big endian // when buf is an u16 array - int dump16(void* buf, int size=-1) + int dump16(void* buf, int size=-1) const { if(size==-1) size = this->size; size = min(this->size,size); @@ -192,842 +195,1464 @@ return ret; } -#if defined (DEBUG_DUMP_TEXTURE) && defined (WIN32) -#define DO_DEBUG_DUMP_TEXTURE -static void DebugDumpTexture(TexCacheItem* item) +//for each texformat, number of palette entries +static const u32 paletteSizeList[] = {0, 32, 4, 16, 256, 0, 8, 0}; + +TexCache texCache; + +TexCache::TexCache() { - static int ctr=0; - char fname[100]; - sprintf(fname,"c:\\dump\\%d.bmp", ctr); - ctr++; + cache_size = 0; + memset(paletteDump, 0, sizeof(paletteDump)); +} - NDS_WriteBMP_32bppBuffer(item->sizeX,item->sizeY,item->decoded,fname); +void TexCache::list_remove(TexCacheItem *item) +{ + this->index.erase(item->iterator); + this->cache_size -= item->unpackSize; } -#endif -class TexCache +void TexCache::list_push_front(TexCacheItem *item) { -public: - TexCache() - : cache_size(0) + item->iterator = this->index.insert(std::make_pair(item->textureAttributes, item)); + this->cache_size += item->unpackSize; +} + +void TexCache::Invalidate() +{ + //check whether the palette memory changed + //TODO - we should handle this instead by setting dirty flags in the vram memory mapping and noting whether palette memory was dirty. + //but this will work for now + MemSpan mspal = MemSpan_TexPalette(0, PALETTE_DUMP_SIZE, true); + bool paletteDirty = mspal.memcmp(paletteDump); + if (paletteDirty) { - memset(paletteDump,0,sizeof(paletteDump)); + mspal.dump(paletteDump); } + + for (TTexCacheItemMultimap::iterator it(this->index.begin()); it != this->index.end(); ++it) + { + it->second->suspectedInvalid = true; + + //when the palette changes, we assume all 4x4 textures are dirty. + //this is because each 4x4 item doesnt carry along with it a copy of the entire palette, for verification + //instead, we just use the one paletteDump for verifying of all 4x4 textures; and if paletteDirty is set, verification has failed + if( (it->second->GetTextureFormat() == TEXMODE_4X4) && paletteDirty ) + { + it->second->assumedInvalid = true; + } + } +} - TTexCacheItemMultimap index; +void TexCache::Evict(u32 target) +{ + //debug print + //printf("%d %d/%d\n",index.size(),cache_size/1024,target/1024); + + //dont do anything unless we're over the target + if (cache_size < target) return; + + //aim at cutting the cache to half of the max size + target /= 2; + + //evicts items in an arbitrary order until it is less than the max cache size + //TODO - do this based on age and not arbitrarily + while (this->cache_size > target) + { + if (this->index.size() == 0) break; //just in case.. doesnt seem possible, cache_size wouldve been 0 + + TexCacheItem *item = this->index.begin()->second; + this->list_remove(item); + //printf("evicting! totalsize:%d\n",cache_size); + delete item; + } +} - //this ought to be enough for anyone - //static const u32 kMaxCacheSize = 64*1024*1024; - //changed by zeromus on 15-dec. I couldnt find any games that were getting anywhere NEAR 64 - static const u32 kMaxCacheSize = 16*1024*1024; - //metal slug burns through sprites so fast, it can test it pretty quickly though +void TexCache::Reset() +{ + this->Evict(0); +} - //this is not really precise, it is off by a constant factor - u32 cache_size; - - void list_remove(TexCacheItem* item) +TexCacheItem* TexCache::GetTexture(TexCache_TexFormat texCacheFormat, u32 texAttributes, u32 palAttributes) +{ + //for each texformat, multiplier from numtexels to numbytes (fixed point 30.2) + static const int texSizes[] = {0, 4, 1, 2, 4, 1, 4, 8}; + + NDSTextureFormat texPackFormat = (NDSTextureFormat)((texAttributes>>26)&0x07); + u32 sizeX = (8 << ((texAttributes>>20)&0x07)); + u32 sizeY = (8 << ((texAttributes>>23)&0x07)); + u32 imageSize = sizeX*sizeY; + u32 palAddress; + + switch (texPackFormat) { - index.erase(item->iterator); - cache_size -= item->decode_len; + case TEXMODE_I2: + palAddress = palAttributes << 3; + break; + + case TEXMODE_A3I5: + case TEXMODE_I4: + case TEXMODE_I8: + case TEXMODE_A5I3: + case TEXMODE_16BPP: + case TEXMODE_4X4: + default: + palAddress = palAttributes << 4; + break; } - - void list_push_front(TexCacheItem* item) + + //analyze the texture memory mapping and the specifications of this texture + u32 texSize = (imageSize*texSizes[texPackFormat]) >> 2; //shifted because the texSizes multiplier is fixed point + MemSpan currentPackedTexDataMS = MemSpan_TexMem((texAttributes&0xFFFF)<<3, texSize); + + //dump the palette to a temp buffer, so that we don't have to worry about memory mapping. + //this isnt such a problem with texture memory, because we read sequentially from it. + //however, we read randomly from palette memory, so the mapping is more costly. + u32 palSize = paletteSizeList[texPackFormat] * sizeof(u16); + MemSpan currentPaletteMS = MemSpan_TexPalette(palAddress, palSize, false); + + CACHE_ALIGN u16 currentPalette[256]; +#ifdef WORDS_BIGENDIAN + currentPaletteMS.dump16(currentPalette); +#else + currentPaletteMS.dump(currentPalette); +#endif + + //determine the location for 4x4 index data + u32 indexBase; + if ((texAttributes & 0xc000) == 0x8000) indexBase = 0x30000; + else indexBase = 0x20000; + + u32 indexOffset = (texAttributes & 0x3FFF) << 2; + int indexSize = 0; + MemSpan currentPackedTexIndexMS; + if (texPackFormat == TEXMODE_4X4) { - item->iterator = index.insert(std::make_pair(item->texformat,item)); - cache_size += item->decode_len; + indexSize = imageSize >> 3; + currentPackedTexIndexMS = MemSpan_TexMem(indexOffset+indexBase, indexSize); } - - template<TexCache_TexFormat TEXFORMAT> - TexCacheItem* scan(u32 format, u32 texpal) + + //TODO - as a special optimization, keep the last item returned and check it first + + TexCacheItem *cachedTexture = NULL; + + for(std::pair<TTexCacheItemMultimap::iterator,TTexCacheItemMultimap::iterator> + iters = index.equal_range(texAttributes); + iters.first != iters.second; + ++iters.first) { - //for each texformat, number of palette entries - static const int palSizes[] = {0, 32, 4, 16, 256, 0, 8, 0}; + cachedTexture = iters.first->second; + + //conditions where we reject matches: + //when the teximage or texpal params dont match + //(this is our key for identifying textures in the cache) + //NEW: due to using format as a key we dont need to check this anymore + //if(curr->texAttributes != texAttributes) continue; + if (cachedTexture->paletteAttributes != palAttributes) continue; + + //we're being asked for a different format than what we had cached. + //TODO - this could be done at the entire cache level instead of checking repeatedly + if (cachedTexture->unpackFormat != texCacheFormat) goto REJECT; + + //if the texture is assumed invalid, reject it + if (cachedTexture->assumedInvalid) goto REJECT; + + //the texture matches params, but isnt suspected invalid. accept it. + if (!cachedTexture->suspectedInvalid) return cachedTexture; + + //we suspect the texture may be invalid. we need to do a byte-for-byte comparison to re-establish that it is valid: + + //when the palettes dont match: + //note that we are considering 4x4 textures to have a palette size of 0. + //they really have a potentially HUGE palette, too big for us to handle like a normal palette, + //so they go through a different system + if (currentPaletteMS.size != 0 && memcmp(cachedTexture->paletteColorTable, currentPalette, currentPaletteMS.size)) goto REJECT; + + //when the texture data doesn't match + if (currentPackedTexDataMS.memcmp(cachedTexture->packData, cachedTexture->packSize)) goto REJECT; + + //if the texture is 4x4 then the index data must match + if (texPackFormat == TEXMODE_4X4) + { + if (currentPackedTexIndexMS.memcmp(cachedTexture->packIndexData, cachedTexture->packIndexSize)) goto REJECT; + } + + //we found a match. just return it + //REMINDER to make it primary/newest when we have smarter code + //list_remove(curr); + //list_push_front(curr); + cachedTexture->suspectedInvalid = false; + return cachedTexture; + + REJECT: + //we found a cached item for the current address, but the data is stale. + //for a variety of complicated reasons, we need to throw it out right this instant. + this->list_remove(cachedTexture); + delete cachedTexture; + break; + } + + //item was not found. recruit an existing one (the oldest), or create a new one + //evict(); //reduce the size of the cache if necessary + //TODO - as a peculiarity of the texcache, eviction must happen after the entire 3d frame runs + //to support separate cache and read passes + TexCacheItem *newTexture = new TexCacheItem(); + newTexture->SetTextureData(texAttributes, currentPackedTexDataMS, currentPackedTexIndexMS); + newTexture->SetTexturePalette(palAttributes, currentPalette); + newTexture->unpackFormat = texCacheFormat; + + this->list_push_front(newTexture); + //printf("allocating: up to %d with %d items\n",cache_size,index.size()); + + switch (texCacheFormat) + { + case TexFormat_32bpp: newTexture->Unpack<TexFormat_32bpp>(currentPackedTexDataMS); break; + case TexFormat_15bpp: newTexture->Unpack<TexFormat_15bpp>(currentPackedTexDataMS); break; + default: assert(false); return NULL; + } + + return newTexture; +} - //for each texformat, multiplier from numtexels to numbytes (fixed point 30.2) - static const int texSizes[] = {0, 4, 1, 2, 4, 1, 4, 8}; +TexCacheItem::TexCacheItem() +{ + _deleteCallback = NULL; + _deleteCallbackParam1 = NULL; + _deleteCallbackParam2 = NULL; + + packFormat = TEXMODE_NONE; + packSize = 0; + packData = NULL; + paletteColorTable = NULL; + + unpackFormat = TexFormat_None; + unpackSize = 0; + unpackData = NULL; + + suspectedInvalid = false; + assumedInvalid = false; + + textureAttributes = 0; + paletteAttributes = 0; + paletteAddress = 0; + paletteSize = 0; + sizeX = 0; + sizeY = 0; + invSizeX = 0.0f; + invSizeY = 0.0f; + + packIndexData = NULL; + packSizeFirstSlot = 0; + packIndexSize = 0; + + texid = 0; +} - //used to hold a copy of the palette specified for this texture - CACHE_ALIGN u16 pal[256]; +TexCacheItem::~TexCacheItem() +{ + free_aligned(this->packData); + free_aligned(this->unpackData); + free_aligned(this->paletteColorTable); + free_aligned(this->packIndexData); + if (this->_deleteCallback != NULL) this->_deleteCallback(this, this->_deleteCallbackParam1, this->_deleteCallbackParam2); +} - NDSTextureFormat textureMode = (NDSTextureFormat)((format>>26)&0x07); - u32 sizeX=(8 << ((format>>20)&0x07)); - u32 sizeY=(8 << ((format>>23)&0x07)); - u32 imageSize = sizeX*sizeY; +TexCacheItemDeleteCallback TexCacheItem::GetDeleteCallback() const +{ + return this->_deleteCallback; +} - u8 *adr; +void TexCacheItem::SetDeleteCallback(TexCacheItemDeleteCallback callbackFunc, void *inParam1, void *inParam2) +{ + this->_deleteCallback = callbackFunc; + this->_deleteCallbackParam1 = inParam1; + this->_deleteCallbackParam2 = inParam2; +} - u32 paletteAddress; +NDSTextureFormat TexCacheItem::GetTextureFormat() const +{ + return this->packFormat; +} - switch (textureMode) +void TexCacheItem::SetTextureData(const u32 attr, const MemSpan &packedData, const MemSpan &packedIndexData) +{ + u8 *oldPackData = this->packData; + u32 *oldUnpackData = this->unpackData; + u32 w = (8 << ((attr >> 20) & 0x07)); + u32 h = (8 << ((attr >> 23) & 0x07)); + + this->textureAttributes = attr; + this->packFormat = (NDSTextureFormat)((attr >> 26) & 0x07); + + this->sizeX = w; + this->sizeY = h; + this->invSizeX = 1.0f / (float)w; + this->invSizeY = 1.0f / (float)h; + this->unpackSize = w * h * sizeof(u32); + this->unpackData = (u32 *)malloc_alignedCacheLine(this->unpackSize); + + //dump texture and 4x4 index data for cache keying + this->packSize = packedData.size; + this->packIndexSize = packedIndexData.size; + this->packData = (u8 *)malloc_alignedCacheLine(this->packSize); + this->packSizeFirstSlot = packedData.items[0].len; + + packedData.dump(this->packData); + + if (this->packFormat == TEXMODE_4X4) + { + u8 *oldPackIndexData = this->packIndexData; + this->packIndexData = (u8 *)malloc_alignedCacheLine(this->packIndexSize); + packedIndexData.dump(this->packIndexData, this->packIndexSize); + free_aligned(oldPackIndexData); + } + + free_aligned(oldPackData); + free_aligned(oldUnpackData); +} + +void TexCacheItem::SetTexturePalette(const u32 attr, const u16 *paletteBuffer) +{ + const u32 oldPaletteSize = this->paletteSize; + + this->paletteAttributes = attr; + this->paletteAddress = (this->packFormat == TEXMODE_I2) ? attr << 3 : attr << 4; + this->paletteSize = paletteSizeList[this->packFormat] * sizeof(u16); + + if (this->paletteSize > 0) + { + if (this->paletteSize != oldPaletteSize) { - case TEXMODE_I2: - paletteAddress = texpal<<3; - break; - - case TEXMODE_A3I5: - case TEXMODE_I4: - case TEXMODE_I8: - case TEXMODE_A5I3: - case TEXMODE_16BPP: - case TEXMODE_4X4: - default: - paletteAddress = texpal<<4; - break; + u16 *oldPaletteColorTable = this->paletteColorTable; + this->paletteColorTable = (u16 *)malloc_alignedCacheLine(this->paletteSize); + memcpy(this->paletteColorTable, paletteBuffer, this->paletteSize); + free_aligned(oldPaletteColorTable); } - - //analyze the texture memory mapping and the specifications of this texture - int palSize = palSizes[textureMode]; - int texSize = (imageSize*texSizes[textureMode])>>2; //shifted because the texSizes multiplier is fixed point - MemSpan ms = MemSpan_TexMem((format&0xFFFF)<<3,texSize); - MemSpan mspal = MemSpan_TexPalette(paletteAddress,palSize*2,false); - - //determine the location for 4x4 index data - u32 indexBase; - if((format & 0xc000) == 0x8000) indexBase = 0x30000; - else indexBase = 0x20000; - - u32 indexOffset = (format&0x3FFF)<<2; - - int indexSize = 0; - MemSpan msIndex; - if(textureMode == TEXMODE_4X4) + else { - indexSize = imageSize>>3; - msIndex = MemSpan_TexMem(indexOffset+indexBase,indexSize); + memcpy(this->paletteColorTable, paletteBuffer, this->paletteSize); } + } + else + { + free_aligned(this->paletteColorTable); + this->paletteColorTable = NULL; + } +} - - //dump the palette to a temp buffer, so that we don't have to worry about memory mapping. - //this isnt such a problem with texture memory, because we read sequentially from it. - //however, we read randomly from palette memory, so the mapping is more costly. - #ifdef WORDS_BIGENDIAN - mspal.dump16(pal); - #else - mspal.dump(pal); - #endif - - //TODO - as a special optimization, keep the last item returned and check it first - - for(std::pair<TTexCacheItemMultimap::iterator,TTexCacheItemMultimap::iterator> - iters = index.equal_range(format); - iters.first != iters.second; - ++iters.first) +template <TexCache_TexFormat TEXCACHEFORMAT> +void TexCacheItem::Unpack(const MemSpan &packedData) +{ + // Whenever a 1-bit alpha or no-alpha texture is unpacked (this means any texture + // format that is not A3I5 or A5I3), set all transparent pixels to 0 so that 3D + // renderers can assume that the transparent color is 0 during texture sampling. + + bool isPalZeroTransparent; + + switch (this->packFormat) + { + case TEXMODE_A3I5: + NDSTextureUnpackA3I5<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->unpackData); + break; + + case TEXMODE_I2: + isPalZeroTransparent = ( ((this->textureAttributes >> 29) & 1) != 0 ); + NDSTextureUnpackI2<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, isPalZeroTransparent, this->unpackData); + break; + + case TEXMODE_I4: + isPalZeroTransparent = ( ((this->textureAttributes >> 29) & 1) != 0 ); + NDSTextureUnpackI4<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, isPalZeroTransparent, this->unpackData); + break; + + case TEXMODE_I8: + isPalZeroTransparent = ( ((this->textureAttributes >> 29) & 1) != 0 ); + NDSTextureUnpackI8<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, isPalZeroTransparent, this->unpackData); + break; + + case TEXMODE_4X4: { - TexCacheItem* curr = iters.first->second; - - //conditions where we reject matches: - //when the teximage or texpal params dont match - //(this is our key for identifying textures in the cache) - //NEW: due to using format as a key we dont need to check this anymore - //if(curr->texformat != format) continue; - if(curr->texpal != texpal) continue; - - //we're being asked for a different format than what we had cached. - //TODO - this could be done at the entire cache level instead of checking repeatedly - if(curr->cacheFormat != TEXFORMAT) goto REJECT; - - //if the texture is assumed invalid, reject it - if(curr->assumedInvalid) goto REJECT; - - //the texture matches params, but isnt suspected invalid. accept it. - if(!curr->suspectedInvalid) return curr; - - //we suspect the texture may be invalid. we need to do a byte-for-byte comparison to re-establish that it is valid: - - //when the palettes dont match: - //note that we are considering 4x4 textures to have a palette size of 0. - //they really have a potentially HUGE palette, too big for us to handle like a normal palette, - //so they go through a different system - if(mspal.size != 0 && memcmp(curr->dump.palette,pal,mspal.size)) goto REJECT; - - //when the texture data doesn't match - if(ms.memcmp(&curr->dump.texture[0],curr->dump.textureSize)) goto REJECT; - - //if the texture is 4x4 then the index data must match - if(textureMode == TEXMODE_4X4) + if (this->packSize > this->packSizeFirstSlot) { - if(msIndex.memcmp(curr->dump.texture + curr->dump.textureSize,curr->dump.indexSize)) goto REJECT; + PROGINFO("Your 4x4 texture has overrun its texture slot.\n"); } - - //we found a match. just return it - //REMINDER to make it primary/newest when we have smarter code - //list_remove(curr); - //list_push_front(curr); - curr->suspectedInvalid = false; - return curr; - - REJECT: - //we found a cached item for the current address, but the data is stale. - //for a variety of complicated reasons, we need to throw it out right this instant. - list_remove(curr); - delete curr; + + NDSTextureUnpack4x4<TEXCACHEFORMAT>(this->packSizeFirstSlot, this->packData, this->packIndexData, this->paletteAddress, this->textureAttributes, this->sizeX, this->sizeY, this->unpackData); break; } + + case TEXMODE_A5I3: + NDSTextureUnpackA5I3<TEXCACHEFORMAT>(this->packSize, this->packData, this->paletteColorTable, this->unpackData); + break; + + case TEXMODE_16BPP: + NDSTextureUnpackDirect16Bit<TEXCACHEFORMAT>(this->packSize, this->packData, this->unpackData); + break; + + default: + break; + } + +#ifdef DO_DEBUG_DUMP_TEXTURE + this->DebugDump(); +#endif +} - //item was not found. recruit an existing one (the oldest), or create a new one - //evict(); //reduce the size of the cache if necessary - //TODO - as a peculiarity of the texcache, eviction must happen after the entire 3d frame runs - //to support separate cache and read passes - TexCacheItem* newitem = new TexCacheItem(); - newitem->suspectedInvalid = false; - newitem->texformat = format; - newitem->cacheFormat = TEXFORMAT; - newitem->texpal = texpal; - newitem->sizeX=sizeX; - newitem->sizeY=sizeY; - newitem->invSizeX=1.0f/((float)(sizeX)); - newitem->invSizeY=1.0f/((float)(sizeY)); - newitem->decode_len = sizeX*sizeY*4; - newitem->format = textureMode; - newitem->decoded = (u8 *)malloc_alignedCacheLine(newitem->decode_len); - list_push_front(newitem); - //printf("allocating: up to %d with %d items\n",cache_size,index.size()); +#ifdef DO_DEBUG_DUMP_TEXTURE +void TexCacheItem::DebugDump() +{ + static int ctr=0; + char fname[100]; + sprintf(fname,"c:\\dump\\%d.bmp", ctr); + ctr++; + + NDS_WriteBMP_32bppBuffer(this->sizeX, this->sizeY, this->unpackData, fname); +} +#endif - u32 *dwdst = (u32*)newitem->decoded; - - //dump palette data for cache keying - if(palSize) +// TODO: Delete these MemSpan based functions after testing confirms that using the dumped texture data works properly. +template <TexCache_TexFormat TEXCACHEFORMAT> +void NDSTextureUnpackI2(const MemSpan &ms, const u16 *pal, const bool isPalZeroTransparent, u32 *dstBuffer) +{ + u8 *adr; + +#ifdef ENABLE_SSSE3 + const __m128i pal_vec128 = _mm_loadl_epi64((__m128i *)pal); +#endif + if (isPalZeroTransparent) + { + for (size_t j = 0; j < ms.numItems; j++) { - memcpy(newitem->dump.palette, pal, palSize*2); - } - - //dump texture and 4x4 index data for cache keying - const int texsize = newitem->dump.textureSize = ms.size; - const int indexsize = newitem->dump.indexSize = msIndex.size; - newitem->dump.texture = new u8[texsize+indexsize]; - ms.dump(&newitem->dump.texture[0],newitem->dump.maxTextureSize); //dump texture - if(textureMode == TEXMODE_4X4) - msIndex.dump(newitem->dump.texture+newitem->dump.textureSize,newitem->dump.indexSize); //dump 4x4 - - - //============================================================================ - //Texture conversion - //============================================================================ - - // Whenever a 1-bit alpha or no-alpha texture is unpacked (this means any texture - // format that is not A3I5 or A5I3), set all transparent pixels to 0 so that 3D - // renderers can assume that the transparent color is 0 during texture sampling. - - const bool isPalZeroTransparent = ( ((format >> 29) & 1) != 0 ); - - switch (newitem->format) - { - case TEXMODE_A3I5: + adr = ms.items[j].ptr; +#ifdef ENABLE_SSSE3 + for (size_t x = 0; x < ms.items[j].len; x+=4, adr+=4, dstBuffer+=16) { - for (size_t j = 0; j < ms.numItems; j++) + __m128i idx = _mm_set_epi32(0, 0, 0, *(u32 *)adr); + idx = _mm_unpacklo_epi8(idx, idx); + idx = _mm_unpacklo_epi8(idx, idx); + idx = _mm_or_si128( _mm_or_si128( _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi32(0x00000003)), _mm_and_si128(_mm_srli_epi32(idx, 2), _mm_set1_epi32(0x00000300)) ), _mm_and_si128(_mm_srli_epi32(idx, 4), _mm_set1_epi32(0x00030000)) ), _mm_and_si128(_mm_srli_epi32(idx, 6), _mm_set1_epi32(0x03000000)) ); + idx = _mm_slli_epi16(idx, 1); + + __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + + const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0); + const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1); + + __m128i convertedColor[4]; + + if (TEXCACHEFORMAT == TexFormat_15bpp) { - adr = ms.items[j].ptr; - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - const u16 c = pal[*adr & 31] & 0x7FFF; - const u8 alpha = *adr >> 5; - *dwdst++ = (TEXFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, material_3bit_to_5bit[alpha]) : COLOR555TO8888(c, material_3bit_to_8bit[alpha]); - } + ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); } - break; + else + { + ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); + } + + // Set converted colors to 0 if the palette index is 0. + idx0 = _mm_cmpeq_epi16(idx0, _mm_set1_epi16(0x0100)); + idx1 = _mm_cmpeq_epi16(idx1, _mm_set1_epi16(0x0100)); + convertedColor[0] = _mm_andnot_si128(_mm_unpacklo_epi16(idx0, idx0), convertedColor[0]); + convertedColor[1] = _mm_andnot_si128(_mm_unpackhi_epi16(idx0, idx0), convertedColor[1]); + convertedColor[2] = _mm_andnot_si128(_mm_unpacklo_epi16(idx1, idx1), convertedColor[2]); + convertedColor[3] = _mm_andnot_si128(_mm_unpackhi_epi16(idx1, idx1), convertedColor[3]); + + _mm_store_si128((__m128i *)(dstBuffer + 0), convertedColor[0]); + _mm_store_si128((__m128i *)(dstBuffer + 4), convertedColor[1]); + _mm_store_si128((__m128i *)(dstBuffer + 8), convertedColor[2]); + _mm_store_si128((__m128i *)(dstBuffer + 12), convertedColor[3]); } - - case TEXMODE_I2: +#else + for (size_t x = 0; x < ms.items[j].len; x++, adr++) { + u8 idx; + + idx = *adr & 0x03; + *dstBuffer++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); + + idx = (*adr >> 2) & 0x03; + *dstBuffer++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); + + idx = (*adr >> 4) & 0x03; + *dstBuffer++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); + + idx = (*adr >> 6) & 0x03; + *dstBuffer++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); + } +#endif + } + } + else + { + for (size_t j = 0; j < ms.numItems; j++) + { + adr = ms.items[j].ptr; #ifdef ENABLE_SSSE3 - const __m128i pal_vec128 = _mm_loadl_epi64((__m128i *)pal); -#endif - if (isPalZeroTransparent) + for (size_t x = 0; x < ms.items[j].len; x+=4, adr+=4, dstBuffer+=16) + { + __m128i idx = _mm_set_epi32(0, 0, 0, *(u32 *)adr); + idx = _mm_unpacklo_epi8(idx, idx); + idx = _mm_unpacklo_epi8(idx, idx); + idx = _mm_or_si128( _mm_or_si128( _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi32(0x00000003)), _mm_and_si128(_mm_srli_epi32(idx, 2), _mm_set1_epi32(0x00000300)) ), _mm_and_si128(_mm_srli_epi32(idx, 4), _mm_set1_epi32(0x00030000)) ), _mm_and_si128(_mm_srli_epi32(idx, 6), _mm_set1_epi32(0x03000000)) ); + idx = _mm_slli_epi16(idx, 1); + + const __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + const __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + + const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0); + const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1); + + __m128i convertedColor[4]; + + if (TEXCACHEFORMAT == TexFormat_15bpp) { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; -#ifdef ENABLE_SSSE3 - for (size_t x = 0; x < ms.items[j].len; x+=4, adr+=4, dwdst+=16) - { - __m128i idx = _mm_set_epi32(0, 0, 0, *(u32 *)adr); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_or_si128( _mm_or_si128( _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi32(0x00000003)), _mm_and_si128(_mm_srli_epi32(idx, 2), _mm_set1_epi32(0x00000300)) ), _mm_and_si128(_mm_srli_epi32(idx, 4), _mm_set1_epi32(0x00030000)) ), _mm_and_si128(_mm_srli_epi32(idx, 6), _mm_set1_epi32(0x03000000)) ); - idx = _mm_slli_epi16(idx, 1); - - __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - - const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0); - const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1); - - __m128i convertedColor[4]; - - if (TEXFORMAT == TexFormat_15bpp) - { - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - else - { - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - - // Set converted colors to 0 if the palette index is 0. - idx0 = _mm_cmpeq_epi16(idx0, _mm_set1_epi16(0x0100)); - idx1 = _mm_cmpeq_epi16(idx1, _mm_set1_epi16(0x0100)); - convertedColor[0] = _mm_andnot_si128(_mm_unpacklo_epi16(idx0, idx0), convertedColor[0]); - convertedColor[1] = _mm_andnot_si128(_mm_unpackhi_epi16(idx0, idx0), convertedColor[1]); - convertedColor[2] = _mm_andnot_si128(_mm_unpacklo_epi16(idx1, idx1), convertedColor[2]); - convertedColor[3] = _mm_andnot_si128(_mm_unpackhi_epi16(idx1, idx1), convertedColor[3]); - - _mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]); - _mm_store_si128((__m128i *)(dwdst + 4), convertedColor[1]); - _mm_store_si128((__m128i *)(dwdst + 8), convertedColor[2]); - _mm_store_si128((__m128i *)(dwdst + 12), convertedColor[3]); - } -#else - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - u8 idx; - - idx = *adr & 0x03; - *dwdst++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - - idx = (*adr >> 2) & 0x03; - *dwdst++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - - idx = (*adr >> 4) & 0x03; - *dwdst++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - - idx = (*adr >> 6) & 0x03; - *dwdst++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - } -#endif - } + ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); } else { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; -#ifdef ENABLE_SSSE3 - for (size_t x = 0; x < ms.items[j].len; x+=4, adr+=4, dwdst+=16) - { - __m128i idx = _mm_set_epi32(0, 0, 0, *(u32 *)adr); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_or_si128( _mm_or_si128( _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi32(0x00000003)), _mm_and_si128(_mm_srli_epi32(idx, 2), _mm_set1_epi32(0x00000300)) ), _mm_and_si128(_mm_srli_epi32(idx, 4), _mm_set1_epi32(0x00030000)) ), _mm_and_si128(_mm_srli_epi32(idx, 6), _mm_set1_epi32(0x03000000)) ); - idx = _mm_slli_epi16(idx, 1); - - const __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - const __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - - const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0); - const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1); - - __m128i convertedColor[4]; - - if (TEXFORMAT == TexFormat_15bpp) - { - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - else - { - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - - _mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]); - _mm_store_si128((__m128i *)(dwdst + 4), convertedColor[1]); - _mm_store_si128((__m128i *)(dwdst + 8), convertedColor[2]); - _mm_store_si128((__m128i *)(dwdst + 12), convertedColor[3]); - } -#else - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - *dwdst++ = CONVERT(pal[ *adr & 0x03] & 0x7FFF); - *dwdst++ = CONVERT(pal[(*adr >> 2) & 0x03] & 0x7FFF); - *dwdst++ = CONVERT(pal[(*adr >> 4) & 0x03] & 0x7FFF); - *dwdst++ = CONVERT(pal[(*adr >> 6) & 0x03] & 0x7FFF); - } -#endif - } + ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); } - break; + + _mm_store_si128((__m128i *)(dstBuffer + 0), convertedColor[0]); + _mm_store_si128((__m128i *)(dstBuffer + 4), convertedColor[1]); + _mm_store_si128((__m128i *)(dstBuffer + 8), convertedColor[2]); + _mm_store_si128((__m128i *)(dstBuffer + 12), convertedColor[3]); } - - case TEXMODE_I4: +#else + for (size_t x = 0; x < ms.items[j].len; x++, adr++) { + *dstBuffer++ = CONVERT(pal[ *adr & 0x03] & 0x7FFF); + *dstBuffer++ = CONVERT(pal[(*adr >> 2) & 0x03] & 0x7FFF); + *dstBuffer++ = CONVERT(pal[(*adr >> 4) & 0x03] & 0x7FFF); + *dstBuffer++ = CONVERT(pal[(*adr >> 6) & 0x03] & 0x7FFF); + } +#endif + } + } +} + +template <TexCache_TexFormat TEXCACHEFORMAT> +void NDSTextureUnpackI4(const MemSpan &ms, const u16 *pal, const bool isPalZeroTransparent, u32 *dstBuffer) +{ + u8 *adr; + #ifdef ENABLE_SSSE3 - const __m128i palLo = _mm_load_si128((__m128i *)pal + 0); - const __m128i palHi = _mm_load_si128((__m128i *)pal + 1); + const __m128i palLo = _mm_load_si128((__m128i *)pal + 0); + const __m128i palHi = _mm_load_si128((__m128i *)pal + 1); #endif - if (isPalZeroTransparent) + if (isPalZeroTransparent) + { + for (size_t j = 0; j < ms.numItems; j++) + { + adr = ms.items[j].ptr; +#ifdef ENABLE_SSSE3 + for (size_t x = 0; x < ms.items[j].len; x+=8, adr+=8, dstBuffer+=16) + { + __m128i idx = _mm_loadl_epi64((__m128i *)adr); + idx = _mm_unpacklo_epi8(idx, idx); + idx = _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi16(0x000F)), _mm_and_si128(_mm_srli_epi16(idx, 4), _mm_set1_epi16(0x0F00)) ); + idx = _mm_slli_epi16(idx, 1); + + __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + + const __m128i palMask = _mm_cmpeq_epi8( _mm_and_si128(idx, _mm_set1_epi8(0x10)), _mm_setzero_si128() ); + const __m128i palColor0A = _mm_shuffle_epi8(palLo, idx0); + const __m128i palColor0B = _mm_shuffle_epi8(palHi, idx0); + const __m128i palColor1A = _mm_shuffle_epi8(palLo, idx1); + const __m128i palColor1B = _mm_shuffle_epi8(palHi, idx1); + + const __m128i palColor0 = _mm_blendv_epi8( palColor0B, palColor0A, _mm_unpacklo_epi8(palMask, palMask) ); + const __m128i palColor1 = _mm_blendv_epi8( palColor1B, palColor1A, _mm_unpackhi_epi8(palMask, palMask) ); + + __m128i convertedColor[4]; + + if (TEXCACHEFORMAT == TexFormat_15bpp) { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; -#ifdef ENABLE_SSSE3 - for (size_t x = 0; x < ms.items[j].len; x+=8, adr+=8, dwdst+=16) - { - __m128i idx = _mm_loadl_epi64((__m128i *)adr); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi16(0x000F)), _mm_and_si128(_mm_srli_epi16(idx, 4), _mm_set1_epi16(0x0F00)) ); - idx = _mm_slli_epi16(idx, 1); - - __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - - const __m128i palMask = _mm_cmpeq_epi8( _mm_and_si128(idx, _mm_set1_epi8(0x10)), _mm_setzero_si128() ); - const __m128i palColor0A = _mm_shuffle_epi8(palLo, idx0); - const __m128i palColor0B = _mm_shuffle_epi8(palHi, idx0); - const __m128i palColor1A = _mm_shuffle_epi8(palLo, idx1); - const __m128i palColor1B = _mm_shuffle_epi8(palHi, idx1); - - const __m128i palColor0 = _mm_blendv_epi8( palColor0B, palColor0A, _mm_unpacklo_epi8(palMask, palMask) ); - const __m128i palColor1 = _mm_blendv_epi8( palColor1B, palColor1A, _mm_unpackhi_epi8(palMask, palMask) ); - - __m128i convertedColor[4]; - - if (TEXFORMAT == TexFormat_15bpp) - { - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - else - { - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - - // Set converted colors to 0 if the palette index is 0. - idx0 = _mm_cmpeq_epi16(idx0, _mm_set1_epi16(0x0100)); - idx1 = _mm_cmpeq_epi16(idx1, _mm_set1_epi16(0x0100)); - convertedColor[0] = _mm_andnot_si128(_mm_unpacklo_epi16(idx0, idx0), convertedColor[0]); - convertedColor[1] = _mm_andnot_si128(_mm_unpackhi_epi16(idx0, idx0), convertedColor[1]); - convertedColor[2] = _mm_andnot_si128(_mm_unpacklo_epi16(idx1, idx1), convertedColor[2]); - convertedColor[3] = _mm_andnot_si128(_mm_unpackhi_epi16(idx1, idx1), convertedColor[3]); - - _mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]); - _mm_store_si128((__m128i *)(dwdst + 4), convertedColor[1]); - _mm_store_si128((__m128i *)(dwdst + 8), convertedColor[2]); - _mm_store_si128((__m128i *)(dwdst + 12), convertedColor[3]); - } -#else - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - u8 idx; - - idx = *adr & 0xF; - *dwdst++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - - idx = *adr >> 4; - *dwdst++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); - } -#endif - } - + ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); } else { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; -#ifdef ENABLE_SSSE3 - for (size_t x = 0; x < ms.items[j].len; x+=8, adr+=8, dwdst+=16) - { - __m128i idx = _mm_loadl_epi64((__m128i *)adr); - idx = _mm_unpacklo_epi8(idx, idx); - idx = _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi16(0x000F)), _mm_and_si128(_mm_srli_epi16(idx, 4), _mm_set1_epi16(0x0F00)) ); - idx = _mm_slli_epi16(idx, 1); - - const __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - const __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); - - const __m128i palMask = _mm_cmpeq_epi8( _mm_and_si128(idx, _mm_set1_epi8(0x10)), _mm_setzero_si128() ); - const __m128i palColor0A = _mm_shuffle_epi8(palLo, idx0); - const __m128i palColor0B = _mm_shuffle_epi8(palHi, idx0); - const __m128i palColor1A = _mm_shuffle_epi8(palLo, idx1); - const __m128i palColor1B = _mm_shuffle_epi8(palHi, idx1); - - const __m128i palColor0 = _mm_blendv_epi8( palColor0B, palColor0A, _mm_unpacklo_epi8(palMask, palMask) ); - const __m128i palColor1 = _mm_blendv_epi8( palColor1B, palColor1A, _mm_unpackhi_epi8(palMask, palMask) ); - - __m128i convertedColor[4]; - - if (TEXFORMAT == TexFormat_15bpp) - { - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - else - { - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); - } - - _mm_store_si128((__m128i *)(dwdst + 0), convertedColor[0]); - _mm_store_si128((__m128i *)(dwdst + 4), convertedColor[1]); - _mm_store_si128((__m128i *)(dwdst + 8), convertedColor[2]); - _mm_store_si128((__m128i *)(dwdst + 12), convertedColor[3]); - } -#else - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - *dwdst++ = CONVERT(pal[*adr & 0x0F] & 0x7FFF); - *dwdst++ = CONVERT(pal[*adr >> 4] & 0x7FFF); - } -#endif - } + ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); } - break; + + // Set converted colors to 0 if the palette index is 0. + idx0 = _mm_cmpeq_epi16(idx0, _mm_set1_epi16(0x0100)); + idx1 = _mm_cmpeq_epi16(idx1, _mm_set1_epi16(0x0100)); + convertedColor[0] = _mm_andnot_si128(_mm_unpacklo_epi16(idx0, idx0), convertedColor[0]); + convertedColor[1] = _mm_andnot_si128(_mm_unpackhi_epi16(idx0, idx0), convertedColor[1]); + convertedColor[2] = _mm_andnot_si128(_mm_unpacklo_epi16(idx1, idx1), convertedColor[2]); + convertedColor[3] = _mm_andnot_si128(_mm_unpackhi_epi16(idx1, idx1), convertedColor[3]); + + _mm_store_si128((__m128i *)(dstBuffer + 0), convertedColor[0]); + _mm_store_si128((__m128i *)(dstBuffer + 4), convertedColor[1]); + _mm_store_si128((__m128i *)(dstBuffer + 8), convertedColor[2]); + _mm_store_si128((__m128i *)(dstBuffer + 12), convertedColor[3]); } +#else + for (size_t x = 0; x < ms.items[j].len; x++, adr++) + { + u8 idx; - case TEXMODE_I8: + idx = *adr & 0xF; + *dstBuffer++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); + + idx = *adr >> 4; + *dstBuffer++ = (idx == 0) ? 0 : CONVERT(pal[idx] & 0x7FFF); + } +#endif + } + } + else + { + for (size_t j = 0; j < ms.numItems; j++) + { + adr = ms.items[j].ptr; +#ifdef ENABLE_SSSE3 + for (size_t x = 0; x < ms.items[j].len; x+=8, adr+=8, dstBuffer+=16) { - if (isPalZeroTransparent) + __m128i idx = _mm_loadl_epi64((__m128i *)adr); + idx = _mm_unpacklo_epi8(idx, idx); + idx = _mm_or_si128( _mm_and_si128(idx, _mm_set1_epi16(0x000F)), _mm_and_si128(_mm_srli_epi16(idx, 4), _mm_set1_epi16(0x0F00)) ); + idx = _mm_slli_epi16(idx, 1); + + const __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + const __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + + const __m128i palMask = _mm_cmpeq_epi8( _mm_and_si128(idx, _mm_set1_epi8(0x10)), _mm_setzero_si128() ); + const __m128i palColor0A = _mm_shuffle_epi8(palLo, idx0); + const __m128i palColor0B = _mm_shuffle_epi8(palHi, idx0); + const __m128i palColor1A = _mm_shuffle_epi8(palLo, idx1); + const __m128i palColor1B = _mm_shuffle_epi8(palHi, idx1); + + const __m128i palColor0 = _mm_blendv_epi8( palColor0B, palColor0A, _mm_unpacklo_epi8(palMask, palMask) ); + const __m128i palColor1 = _mm_blendv_epi8( palColor1B, palColor1A, _mm_unpackhi_epi8(palMask, palMask) ); + + __m128i convertedColor[4]; + + if (TEXCACHEFORMAT == TexFormat_15bpp) { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - *dwdst++ = (*adr == 0) ? 0 : CONVERT(pal[*adr] & 0x7FFF); - } - } + ColorspaceConvert555To6665Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To6665Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); } else { - for (size_t j = 0; j < ms.numItems; j++) - { - adr = ms.items[j].ptr; - for (size_t x = 0; x < ms.items[j].len; x++, adr++) - { - *dwdst++ = CONVERT(pal[*adr] & 0x7FFF); - } - } + ColorspaceConvert555To8888Opaque_SSE2<false>(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555To8888Opaque_SSE2<false>(palColor1, convertedColor[2], convertedColor[3]); } - break; + + _mm_store_si128((__m128i *)(dstBuffer + 0), convertedColor[0]); + _mm_store_si128((__m128i *)(dstBuffer + 4), convertedColor[1]); + _mm_store_si128((__m128i *)(dstBuffer + 8), convertedColor[2]); + _mm_store_si128((__m128i *)(dstBuffer + 12), convertedColor[3]); } - - case TEXMODE_4X4: +#else + for (size_t x = 0; x < ms.items[j].len; x++, adr++) { - if (ms.numItems != 1) - { - PROGINFO("Your 4x4 texture has overrun its texture slot.\n"); - } - //this check isnt necessary since the addressing is tied to the texture data which will also run out: - //if(msIndex.numItems != 1) PROGINFO("Your 4x4 texture index has overrun its slot.\n"); + *dstBuffer++ = CONVERT(pal[*adr & 0x0F] & 0x7FFF); + *dstBuffer++ = CONVERT(pal[*adr >> 4] & 0x7FFF); + } +#endif + } + } +} - #define PAL4X4(offset) ( LE_TO_LOCAL_16( *(u16*)( MMU.texInfo.texPalSlot[((paletteAddress + (offset)*2)>>14)&0x7] + ((paletteAddress + (offset)*2)&0x3FFF) ) ) & 0x7FFF ) +template <TexCache_TexFormat TEXCACHEFORMAT> +void NDSTextureUnpackI8(const MemSpan &ms, const u16 *srcPal, const bool isPalZeroTransparent, u32 *dstBuffer) +{ + u8 *adr; + + if (isPalZeroTransparent) + { + for (size_t j = 0; j < ms.numItems; j++) + { + adr = ms.items[j].ptr; + for (size_t x = 0; x < ms.items[j].len; x++, adr++) + { + *dstBuffer++ = (*adr == 0) ? 0 : CONVERT(srcPal[*adr] & 0x7FFF); + } + } + } + else + { + for (size_t j = 0; j < ms.numItems; j++) + { + adr = ms.items[j].ptr; + for (size_t x = 0; x < ms.items[j].len; x++, adr++) + { + *dstBuffer++ = CONVERT(srcPal[*adr] & 0x7FFF); + } + } + } +} - u16* slot1; - u32* map = (u32*)ms.items[0].ptr; - u32 limit = ms.items[0].len<<2; - u32 d = 0; - if ( (format & 0xc000) == 0x8000) - // texel are in slot 2 - slot1=(u16*)&MMU.texInfo.textureSlotAddr[1][((format & 0x3FFF)<<2)+0x010000]; - else - slot1=(u16*)&MMU.texInfo.textureSlotAddr[1][(format & 0x3FFF)<<2]; +template <TexCache_TexFormat TEXCACHEFORMAT> +void NDSTextureUnpackA3I5(const MemSpan &ms, const u16 *pal, u32 *dstBuffer) +{ + u8 *adr; + + for (size_t j = 0; j < ms.numItems; j++) + { + adr = ms.items[j].ptr; + for (size_t x = 0; x < ms.items[j].len; x++, adr++) + { + const u16 c = pal[*adr & 0x1F] & 0x7FFF; + const u8 alpha = *adr >> 5; + *dstBuffer++ = (TEXCACHEFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, material_3bit_to_5bit[alpha]) : COLOR555TO8888(c, material_3bit_to_8bit[alpha]); + } + } +} - u16 yTmpSize = (sizeY>>2); - u16 xTmpSize = (sizeX>>2); +template <TexCache_TexFormat TEXCACHEFORMAT> +void NDSTextureUnpackA5I3(const MemSpan &ms, const u16 *pal, u32 *dstBuffer) +{ + u8 *adr; + +#ifdef ENABLE_SSSE3 + const __m128i pal_vec128 = _mm_load_si128((__m128i *)pal); +#endif + for (size_t j = 0; j < ms.numItems; j++) + { + adr = ms.items[j].ptr; +#ifdef ENABLE_SSSE3 + for (size_t x = 0; x < ms.items[j].len; x+=16, adr+=16, dstBuffer+=16) + { + const __m128i bits = _mm_loadu_si128((__m128i *)adr); + + const __m128i idx = _mm_slli_epi16( _mm_and_si128(bits, _mm_set1_epi8(0x07)), 1 ); + const __m128i idx0 = _mm_add_epi8( _mm_unpacklo_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + const __m128i idx1 = _mm_add_epi8( _mm_unpackhi_epi8(idx, idx), _mm_set1_epi16(0x0100) ); + + const __m128i palColor0 = _mm_shuffle_epi8(pal_vec128, idx0); + const __m128i palColor1 = _mm_shuffle_epi8(pal_vec128, idx1); + + __m128i tmpAlpha[2]; + __m128i convertedColor[4]; + + if (TEXCACHEFORMAT == TexFormat_15bpp) + { + const __m128i alpha = _mm_srli_epi16( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), 3 ); + const __m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); + const __m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); + + tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo); + tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo); + ColorspaceConvert555To6665_SSE2<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]); + + tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi); + tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi); + ColorspaceConvert555To6665_SSE2<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]); + } + else + { + const __m128i alpha = _mm_or_si128( _mm_and_si128(bits, _mm_set1_epi8(0xF8)), _mm_srli_epi16(_mm_and_si128(bits, _mm_set1_epi8(0xE0)), 5) ); + const __m128i alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); + const __m128i alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); + + tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaLo); + tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaLo); + ColorspaceConvert555To8888_SSE2<false>(palColor0, tmpAlpha[0], tmpAlpha[1], convertedColor[0], convertedColor[1]); + + tmpAlpha[0] = _mm_unpacklo_epi16(_mm_setzero_si128(), alphaHi); + tmpAlpha[1] = _mm_unpackhi_epi16(_mm_setzero_si128(), alphaHi); + ColorspaceConvert555To8888_SSE2<false>(palColor1, tmpAlpha[0], tmpAlpha[1], convertedColor[2], convertedColor[3]); + } + + _mm_store_si128((__m128i *)(dstBuffer + 0), convertedColor[0]); + _mm_store_si128((__m128i *)(dstBuffer + 4), convertedColor[1]); + _mm_store_si128((__m128i *)(dstBuffer + 8), convertedColor[2]); + _mm_store_si128((__m128i *)(dstBuffer + 12), convertedColor[3]); + } +#else + for (size_t x = 0; x < ms.items[j].len; x++, adr++) + { + const u16 c = pal[*adr&0x07] & 0x7FFF; + const u8 alpha = (*adr>>3); + *dstBuffer++ = (TEXCACHEFORMAT == TexFormat_15bpp) ? COLOR555TO6665(c, alpha) : COLOR555TO8888(c, material_5bit_to_8bit[alpha]); + } +#endif + } +} - //this is flagged whenever a 4x4 overruns its slot. - //i am guessing we just generate black in that case - bool dead = false; +#define PAL4X4(offset) ( LE_TO_LOCAL_16( *(u16*)( MMU.texInfo.texPalSlot[((palAddress + (offset)*2)>>14)&0x7] + ((palAddress + (offset)*2)&0x3FFF) ) ) & 0x7FFF ) - for (size_t y = 0; y < y... [truncated message content] |
From: <ze...@us...> - 2016-10-25 06:02:35
|
Revision: 5566 http://sourceforge.net/p/desmume/code/5566 Author: zeromus Date: 2016-10-25 06:02:33 +0000 (Tue, 25 Oct 2016) Log Message: ----------- add savetype hardcode for puzzler world Modified Paths: -------------- trunk/desmume/src/mc.cpp Modified: trunk/desmume/src/mc.cpp =================================================================== --- trunk/desmume/src/mc.cpp 2016-10-05 08:21:38 UTC (rev 5565) +++ trunk/desmume/src/mc.cpp 2016-10-25 06:02:33 UTC (rev 5566) @@ -632,6 +632,7 @@ else if(!memcmp(gameInfo.header.gameCode,"AH5", 3)) addr_size = 1; //over the hedge else if(!memcmp(gameInfo.header.gameCode,"AVH", 3)) addr_size = 1; //over the hedge - Hammy Goes Nuts! else if(!memcmp(gameInfo.header.gameCode,"AQ3", 3)) addr_size = 1; //spider-man 3 + else if(!memcmp(gameInfo.header.gameCode,"BPV", 3)) addr_size = 2; //puzzler world (should be eeprom 64KBits) //if we found a whitelist match, we dont need to run detection if(addr_size) state = RUNNING; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-10-05 08:21:41
|
Revision: 5565 http://sourceforge.net/p/desmume/code/5565 Author: zeromus Date: 2016-10-05 08:21:38 +0000 (Wed, 05 Oct 2016) Log Message: ----------- fix support importing newer style ardsi duc files Modified Paths: -------------- trunk/desmume/src/mc.cpp Modified: trunk/desmume/src/mc.cpp =================================================================== --- trunk/desmume/src/mc.cpp 2016-10-03 01:48:05 UTC (rev 5564) +++ trunk/desmume/src/mc.cpp 2016-10-05 08:21:38 UTC (rev 5565) @@ -1504,7 +1504,7 @@ bool BackupDevice::import_duc(const char* filename, u32 force_size) { u32 size; - u8 id16[16] = {0}, id4[4] = {0}, id2[2] = {0}; + u8 id16[16] = {0}, id4[4] = {0}, id3[3] = {0}; FILE* file = fopen(filename, "rb"); if(!file) return false; @@ -1516,12 +1516,13 @@ if(!memcmp(id16, "ARDS000000000001", 16)) version = 1; //ID version 2 - fseek(file,0xA2,SEEK_SET); - fread(id2,1,2,file); - if(!memcmp(id16,"\0\0\0\0",4) && !memcmp(id2,"\x04\xC0",2)) version = 2; + fseek(file,0xA1,SEEK_SET); + fread(id3,1,3,file); + if(!memcmp(id16,"\0\0\0\0",4) && id3[2] == 0xC0) version = 2; if(version == 0) { + INVALID_DUC: printf("Not recognized as a valid DUC file\n"); fclose(file); return false; @@ -1540,6 +1541,11 @@ { size -= 0xA4; fseek(file, 0xA4, SEEK_SET); + + //validate size + int specifiedSize = (id3[0]<<8)+(id3[1]<<16); + if(specifiedSize != size) + goto INVALID_DUC; } u32 left = 0; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-10-03 01:48:06
|
Revision: 5564 http://sourceforge.net/p/desmume/code/5564 Author: zeromus Date: 2016-10-03 01:48:05 +0000 (Mon, 03 Oct 2016) Log Message: ----------- update path format tooltip Modified Paths: -------------- trunk/desmume/src/windows/pathsettings.cpp Modified: trunk/desmume/src/windows/pathsettings.cpp =================================================================== --- trunk/desmume/src/windows/pathsettings.cpp 2016-09-30 05:41:00 UTC (rev 5563) +++ trunk/desmume/src/windows/pathsettings.cpp 2016-10-03 01:48:05 UTC (rev 5564) @@ -185,14 +185,17 @@ ti.uFlags = TTF_SUBCLASS | TTF_IDISHWND; ti.uId = (UINT_PTR)hwnd; ti.lpszText = - "The format a screenshot should be saved in.\r\n" + "The string format a screenshot should be saved with (google strftime).\r\n" "%f\t\tFilename\r\n" "%r\t\tRandom: 0 ~ RAND_MAX\r\n" "%t\t\tTick: Reset on startup\r\n" "%Y\t\tYear:Four Digit\r\n" + "%y\t\tYear:Two Digit\r\n" "%m\t\tMonth:Two Digit\r\n" - "%D\t\tDay:Two Digit\r\n" - "%H\t\tHour:Two Digit\r\n" + "%d\t\tDay:Two Digit\r\n" + "%H\t\tHour (24):Two Digit\r\n" + "%I\t\tHour (12):Two Digit\r\n" + "%p\t\tAM/PM\r\n" "%M\t\tMinute: Two Digit\r\n" "%S\t\tSecond: Two Digit\r\n"; GetClientRect(hwnd, &ti.rect); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-09-30 05:41:02
|
Revision: 5563 http://sourceforge.net/p/desmume/code/5563 Author: rogerman Date: 2016-09-30 05:41:00 +0000 (Fri, 30 Sep 2016) Log Message: ----------- Filters: - Simplify the functionality of the Deposterize filter by making the threshold a constant value. - Increase the Deposterize threshold from 21 to 23. Modified Paths: -------------- trunk/desmume/src/filter/deposterize.cpp trunk/desmume/src/render3D.cpp trunk/desmume/src/render3D.h Modified: trunk/desmume/src/filter/deposterize.cpp =================================================================== --- trunk/desmume/src/filter/deposterize.cpp 2016-09-30 05:08:23 UTC (rev 5562) +++ trunk/desmume/src/filter/deposterize.cpp 2016-09-30 05:41:00 UTC (rev 5563) @@ -18,7 +18,10 @@ #include "../types.h" #include "filter.h" -static u32 Deposterize_InterpLTE(const u32 pixA, const u32 pixB, const u32 threshold) +#define DEPOSTERIZE_THRESHOLD 23 // Possible values are [0-255], where lower a value prevents blending and a higher value allows for more blending + + +static u32 Deposterize_InterpLTE(const u32 pixA, const u32 pixB) { const u32 aB = (pixB & 0xFF000000) >> 24; if (aB == 0) @@ -35,10 +38,10 @@ const u32 gB = (pixB & 0x0000FF00) >> 8; const u32 bB = (pixB & 0x00FF0000) >> 16; - const u32 rC = ( (rB - rA <= threshold) || (rA - rB <= threshold) ) ? ( ((rA+rB)>>1) ) : rA; - const u32 gC = ( (gB - gA <= threshold) || (gA - gB <= threshold) ) ? ( ((gA+gB)>>1) ) : gA; - const u32 bC = ( (bB - bA <= threshold) || (bA - bB <= threshold) ) ? ( ((bA+bB)>>1) ) : bA; - const u32 aC = ( (bB - aA <= threshold) || (aA - aB <= threshold) ) ? ( ((aA+aB)>>1) ) : aA; + const u32 rC = ( (rB - rA <= DEPOSTERIZE_THRESHOLD) || (rA - rB <= DEPOSTERIZE_THRESHOLD) ) ? ( ((rA+rB)>>1) ) : rA; + const u32 gC = ( (gB - gA <= DEPOSTERIZE_THRESHOLD) || (gA - gB <= DEPOSTERIZE_THRESHOLD) ) ? ( ((gA+gB)>>1) ) : gA; + const u32 bC = ( (bB - bA <= DEPOSTERIZE_THRESHOLD) || (bA - bB <= DEPOSTERIZE_THRESHOLD) ) ? ( ((bA+bB)>>1) ) : bA; + const u32 aC = ( (bB - aA <= DEPOSTERIZE_THRESHOLD) || (aA - aB <= DEPOSTERIZE_THRESHOLD) ) ? ( ((aA+aB)>>1) ) : aA; return (rC | (gC << 8) | (bC << 16) | (aC << 24)); } @@ -84,7 +87,6 @@ u32 *src = (u32 *)Src.Surface; u32 *workingDst = (u32 *)Dst.workingSurface[0]; u32 *finalDst = (u32 *)Dst.Surface; - u32 threshold = *(u32 *)Dst.userData; int i = 0; for (int y = 0; y < h; y++) @@ -108,14 +110,14 @@ color[8] = ((x < w-1) && (y > 0)) ? src[i-w+1] : src[i]; blend[0] = color[0]; - blend[1] = Deposterize_InterpLTE(color[0], color[1], threshold); - blend[2] = Deposterize_InterpLTE(color[0], color[2], threshold); - blend[3] = Deposterize_InterpLTE(color[0], color[3], threshold); - blend[4] = Deposterize_InterpLTE(color[0], color[4], threshold); - blend[5] = Deposterize_InterpLTE(color[0], color[5], threshold); - blend[6] = Deposterize_InterpLTE(color[0], color[6], threshold); - blend[7] = Deposterize_InterpLTE(color[0], color[7], threshold); - blend[8] = Deposterize_InterpLTE(color[0], color[8], threshold); + blend[1] = Deposterize_InterpLTE(color[0], color[1]); + blend[2] = Deposterize_InterpLTE(color[0], color[2]); + blend[3] = Deposterize_InterpLTE(color[0], color[3]); + blend[4] = Deposterize_InterpLTE(color[0], color[4]); + blend[5] = Deposterize_InterpLTE(color[0], color[5]); + blend[6] = Deposterize_InterpLTE(color[0], color[6]); + blend[7] = Deposterize_InterpLTE(color[0], color[7]); + blend[8] = Deposterize_InterpLTE(color[0], color[8]); workingDst[i] = Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(blend[0], blend[5], 1, 7), Deposterize_Blend(blend[0], blend[1], 1, 7), @@ -157,14 +159,14 @@ color[8] = ((x < w-1) && (y > 0)) ? workingDst[i-w+1] : workingDst[i]; blend[0] = color[0]; - blend[1] = Deposterize_InterpLTE(color[0], color[1], threshold); - blend[2] = Deposterize_InterpLTE(color[0], color[2], threshold); - blend[3] = Deposterize_InterpLTE(color[0], color[3], threshold); - blend[4] = Deposterize_InterpLTE(color[0], color[4], threshold); - blend[5] = Deposterize_InterpLTE(color[0], color[5], threshold); - blend[6] = Deposterize_InterpLTE(color[0], color[6], threshold); - blend[7] = Deposterize_InterpLTE(color[0], color[7], threshold); - blend[8] = Deposterize_InterpLTE(color[0], color[8], threshold); + blend[1] = Deposterize_InterpLTE(color[0], color[1]); + blend[2] = Deposterize_InterpLTE(color[0], color[2]); + blend[3] = Deposterize_InterpLTE(color[0], color[3]); + blend[4] = Deposterize_InterpLTE(color[0], color[4]); + blend[5] = Deposterize_InterpLTE(color[0], color[5]); + blend[6] = Deposterize_InterpLTE(color[0], color[6]); + blend[7] = Deposterize_InterpLTE(color[0], color[7]); + blend[8] = Deposterize_InterpLTE(color[0], color[8]); finalDst[i] = Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(blend[0], blend[5], 1, 7), Deposterize_Blend(blend[0], blend[1], 1, 7), Modified: trunk/desmume/src/render3D.cpp =================================================================== --- trunk/desmume/src/render3D.cpp 2016-09-30 05:08:23 UTC (rev 5562) +++ trunk/desmume/src/render3D.cpp 2016-09-30 05:41:00 UTC (rev 5563) @@ -32,7 +32,6 @@ #include "./filter/filter.h" #include "./filter/xbrz.h" -#define TEXTURE_DEPOSTERIZE_THRESHOLD 21 // Possible values are [0-255], where lower a value prevents blending and a higher value allows for more blending int cur3DCore = GPU3D_NULL; @@ -238,7 +237,6 @@ _textureScalingFactor = 1; _textureSmooth = false; _textureUpscaleBuffer = NULL; - _textureDeposterizeThreshold = TEXTURE_DEPOSTERIZE_THRESHOLD; memset(&_textureDeposterizeSrcSurface, 0, sizeof(_textureDeposterizeSrcSurface)); memset(&_textureDeposterizeDstSurface, 0, sizeof(_textureDeposterizeDstSurface)); @@ -246,7 +244,6 @@ _textureDeposterizeSrcSurface.Width = _textureDeposterizeDstSurface.Width = 1; _textureDeposterizeSrcSurface.Height = _textureDeposterizeDstSurface.Height = 1; _textureDeposterizeSrcSurface.Pitch = _textureDeposterizeDstSurface.Pitch = 1; - _textureDeposterizeDstSurface.userData = &_textureDeposterizeThreshold; Reset(); } Modified: trunk/desmume/src/render3D.h =================================================================== --- trunk/desmume/src/render3D.h 2016-09-30 05:08:23 UTC (rev 5562) +++ trunk/desmume/src/render3D.h 2016-09-30 05:41:00 UTC (rev 5563) @@ -134,9 +134,7 @@ SSurface _textureDeposterizeSrcSurface; SSurface _textureDeposterizeDstSurface; - u32 _textureDeposterizeThreshold; - //u32 *_textureDeposterizeBuffer; u32 *_textureUpscaleBuffer; CACHE_ALIGN u16 clearImageColor16Buffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-09-30 05:08:25
|
Revision: 5562 http://sourceforge.net/p/desmume/code/5562 Author: zeromus Date: 2016-09-30 05:08:23 +0000 (Fri, 30 Sep 2016) Log Message: ----------- support importing newer style ardsi duc files Modified Paths: -------------- trunk/desmume/src/mc.cpp Modified: trunk/desmume/src/mc.cpp =================================================================== --- trunk/desmume/src/mc.cpp 2016-09-29 00:58:04 UTC (rev 5561) +++ trunk/desmume/src/mc.cpp 2016-09-30 05:08:23 UTC (rev 5562) @@ -1504,27 +1504,44 @@ bool BackupDevice::import_duc(const char* filename, u32 force_size) { u32 size; - char id[16]; + u8 id16[16] = {0}, id4[4] = {0}, id2[2] = {0}; FILE* file = fopen(filename, "rb"); if(!file) return false; - fseek(file, 0, SEEK_END); - size = (u32)ftell(file) - 500; - fseek(file, 0, SEEK_SET); + int version = 0; - // Make sure we really have the right file - fread((void *)id, sizeof(char), 16, file); + //ID version 1 + fread(id16, 1, 16, file); + if(!memcmp(id16, "ARDS000000000001", 16)) version = 1; - if (memcmp(id, "ARDS000000000001", 16) != 0) + //ID version 2 + fseek(file,0xA2,SEEK_SET); + fread(id2,1,2,file); + if(!memcmp(id16,"\0\0\0\0",4) && !memcmp(id2,"\x04\xC0",2)) version = 2; + + if(version == 0) { printf("Not recognized as a valid DUC file\n"); fclose(file); return false; } - // Skip the rest of the header since we don't need it - fseek(file, 500, SEEK_SET); + fseek(file, 0, SEEK_END); + size = (u32)ftell(file); + + //skip to raw data + if(version == 1) + { + size -= 500; + fseek(file, 500, SEEK_SET); + } + if(version == 2) + { + size -= 0xA4; + fseek(file, 0xA4, SEEK_SET); + } + u32 left = 0; if (force_size > 0) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-09-29 00:58:06
|
Revision: 5561 http://sourceforge.net/p/desmume/code/5561 Author: rogerman Date: 2016-09-29 00:58:04 +0000 (Thu, 29 Sep 2016) Log Message: ----------- Filters: - Remove the Deposterize texture filter from render3D.cpp and make it a general-purpose standalone filter. Modified Paths: -------------- trunk/desmume/src/Makefile.am trunk/desmume/src/OGLRender.cpp trunk/desmume/src/OGLRender_3_2.cpp trunk/desmume/src/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj trunk/desmume/src/cocoa/DeSmuME (XCode 3).xcodeproj/project.pbxproj trunk/desmume/src/filter/filter.h trunk/desmume/src/render3D.cpp trunk/desmume/src/render3D.h trunk/desmume/src/windows/DeSmuME.vcxproj trunk/desmume/src/windows/DeSmuME.vcxproj.filters Added Paths: ----------- trunk/desmume/src/filter/deposterize.cpp Modified: trunk/desmume/src/Makefile.am =================================================================== --- trunk/desmume/src/Makefile.am 2016-09-25 22:09:13 UTC (rev 5560) +++ trunk/desmume/src/Makefile.am 2016-09-29 00:58:04 UTC (rev 5561) @@ -88,15 +88,45 @@ utils/tinyxml/tinyxmlerror.cpp \ utils/tinyxml/tinyxmlparser.cpp \ utils/glcorearb.h \ - addons/slot2_auto.cpp addons/slot2_mpcf.cpp addons/slot2_paddle.cpp addons/slot2_gbagame.cpp addons/slot2_none.cpp addons/slot2_rumblepak.cpp addons/slot2_guitarGrip.cpp addons/slot2_expMemory.cpp addons/slot2_piano.cpp addons/slot2_passme.cpp addons/slot1_none.cpp addons/slot1_r4.cpp addons/slot1_retail_nand.cpp addons/slot1_retail_auto.cpp addons/slot1_retail_mcrom.cpp addons/slot1_retail_mcrom_debug.cpp addons/slot1comp_mc.cpp addons/slot1comp_mc.h addons/slot1comp_rom.h addons/slot1comp_rom.cpp addons/slot1comp_protocol.h addons/slot1comp_protocol.cpp \ + addons/slot2_auto.cpp \ + addons/slot2_mpcf.cpp \ + addons/slot2_paddle.cpp \ + addons/slot2_gbagame.cpp \ + addons/slot2_none.cpp \ + addons/slot2_rumblepak.cpp \ + addons/slot2_guitarGrip.cpp \ + addons/slot2_expMemory.cpp \ + addons/slot2_piano.cpp \ + addons/slot2_passme.cpp \ + addons/slot1_none.cpp \ + addons/slot1_r4.cpp \ + addons/slot1_retail_nand.cpp \ + addons/slot1_retail_auto.cpp \ + addons/slot1_retail_mcrom.cpp \ + addons/slot1_retail_mcrom_debug.cpp \ + addons/slot1comp_mc.cpp \ + addons/slot1comp_mc.h \ + addons/slot1comp_rom.h \ + addons/slot1comp_rom.cpp \ + addons/slot1comp_protocol.h \ + addons/slot1comp_protocol.cpp \ cheatSystem.cpp cheatSystem.h \ texcache.cpp texcache.h rasterize.cpp rasterize.h \ metaspu/metaspu.cpp metaspu/metaspu.h \ - filter/2xsai.cpp filter/bilinear.cpp filter/epx.cpp filter/filter.h \ - filter/hq2x.cpp filter/hq2x.h \ - filter/hq3x.cpp filter/hq3x.dat \ - filter/hq4x.cpp filter/hq4x.dat \ - filter/interp.h filter/lq2x.cpp filter/lq2x.h filter/scanline.cpp \ + filter/2xsai.cpp \ + filter/bilinear.cpp \ + filter/deposterize.cpp \ + filter/epx.cpp \ + filter/filter.h \ + filter/hq2x.cpp \ + filter/hq2x.h \ + filter/hq3x.cpp \ + filter/hq3x.dat \ + filter/hq4x.cpp \ + filter/hq4x.dat \ + filter/interp.h \ + filter/lq2x.cpp filter/lq2x.h \ + filter/scanline.cpp \ filter/videofilter.cpp filter/videofilter.h \ filter/xbrz.cpp filter/xbrz.h \ version.cpp version.h \ Modified: trunk/desmume/src/OGLRender.cpp =================================================================== --- trunk/desmume/src/OGLRender.cpp 2016-09-25 22:09:13 UTC (rev 5560) +++ trunk/desmume/src/OGLRender.cpp 2016-09-29 00:58:04 UTC (rev 5561) @@ -2993,10 +2993,10 @@ size_t texWidth = this->currTexture->sizeX; size_t texHeight = this->currTexture->sizeY; - if (this->_textureDeposterizeBuffer != NULL) + if (this->_textureDeposterizeDstSurface.Surface != NULL) { this->TextureDeposterize(textureSrc, texWidth, texHeight); - textureSrc = this->_textureDeposterizeBuffer; + textureSrc = (u32 *)this->_textureDeposterizeDstSurface.Surface; } switch (this->_textureScalingFactor) @@ -4644,10 +4644,10 @@ size_t texWidth = this->currTexture->sizeX; size_t texHeight = this->currTexture->sizeY; - if (this->_textureDeposterizeBuffer != NULL) + if (this->_textureDeposterizeDstSurface.Surface != NULL) { this->TextureDeposterize(textureSrc, texWidth, texHeight); - textureSrc = this->_textureDeposterizeBuffer; + textureSrc = (u32 *)this->_textureDeposterizeDstSurface.Surface; } switch (this->_textureScalingFactor) Modified: trunk/desmume/src/OGLRender_3_2.cpp =================================================================== --- trunk/desmume/src/OGLRender_3_2.cpp 2016-09-25 22:09:13 UTC (rev 5560) +++ trunk/desmume/src/OGLRender_3_2.cpp 2016-09-29 00:58:04 UTC (rev 5561) @@ -1721,10 +1721,10 @@ size_t texWidth = this->currTexture->sizeX; size_t texHeight = this->currTexture->sizeY; - if (this->_textureDeposterizeBuffer != NULL) + if (this->_textureDeposterizeDstSurface.Surface != NULL) { this->TextureDeposterize(textureSrc, texWidth, texHeight); - textureSrc = this->_textureDeposterizeBuffer; + textureSrc = (u32 *)this->_textureDeposterizeDstSurface.Surface; } switch (this->_textureScalingFactor) Modified: trunk/desmume/src/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj =================================================================== --- trunk/desmume/src/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj 2016-09-25 22:09:13 UTC (rev 5560) +++ trunk/desmume/src/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj 2016-09-29 00:58:04 UTC (rev 5561) @@ -112,6 +112,10 @@ AB2EE13117D57F5000F68622 /* fsnitro.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB2EE13017D57F5000F68622 /* fsnitro.cpp */; }; AB2EE13217D57F5000F68622 /* fsnitro.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB2EE13017D57F5000F68622 /* fsnitro.cpp */; }; AB2EE13317D57F5000F68622 /* fsnitro.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB2EE13017D57F5000F68622 /* fsnitro.cpp */; }; + AB301BDF1D9C8BAC00246A93 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB301BDE1D9C8BAC00246A93 /* deposterize.cpp */; }; + AB301BE01D9C8BCD00246A93 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB301BDE1D9C8BAC00246A93 /* deposterize.cpp */; }; + AB301BE11D9C8BCE00246A93 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB301BDE1D9C8BAC00246A93 /* deposterize.cpp */; }; + AB301BE21D9C8BCF00246A93 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB301BDE1D9C8BAC00246A93 /* deposterize.cpp */; }; AB350BA51478AC96007165AC /* IOKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB350BA41478AC96007165AC /* IOKit.framework */; }; AB350D3B147A1D93007165AC /* HID_usage_strings.plist in Resources */ = {isa = PBXBuildFile; fileRef = AB350D3A147A1D93007165AC /* HID_usage_strings.plist */; }; AB3701E5173A3FBF006E573E /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AB74EC891738499C0026C41E /* Carbon.framework */; }; @@ -1323,6 +1327,7 @@ AB2EE12B17D57ED500F68622 /* slot1_retail_mcrom_debug.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = slot1_retail_mcrom_debug.cpp; sourceTree = "<group>"; }; AB2EE12F17D57F5000F68622 /* fsnitro.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fsnitro.h; sourceTree = "<group>"; }; AB2EE13017D57F5000F68622 /* fsnitro.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fsnitro.cpp; sourceTree = "<group>"; }; + AB301BDE1D9C8BAC00246A93 /* deposterize.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deposterize.cpp; sourceTree = "<group>"; }; AB350BA41478AC96007165AC /* IOKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = IOKit.framework; path = System/Library/Frameworks/IOKit.framework; sourceTree = SDKROOT; }; AB350D38147A1D8D007165AC /* English */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = English; path = translations/English.lproj/HID_usage_strings.plist; sourceTree = "<group>"; }; AB3A655C16CC5416001F5D4A /* EmuControllerDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = EmuControllerDelegate.h; sourceTree = "<group>"; }; @@ -2926,6 +2931,7 @@ children = ( ABFE14FA14C92FF5005D6699 /* 2xsai.cpp */, ABFE14FB14C92FF5005D6699 /* bilinear.cpp */, + AB301BDE1D9C8BAC00246A93 /* deposterize.cpp */, ABFE14FC14C92FF5005D6699 /* epx.cpp */, ABFE14FE14C92FF5005D6699 /* hq2x.cpp */, AB4C81E31B21676C00ACECD5 /* hq3x.cpp */, @@ -3847,6 +3853,7 @@ ABD1041C1346652500AF11D1 /* cocoa_input.mm in Sources */, AB3E34C9134AF4500056477A /* cocoa_output.mm in Sources */, ABFEA8CB1BB4EC1100B08C25 /* smooth.c in Sources */, + AB301BE11D9C8BCE00246A93 /* deposterize.cpp in Sources */, ABD1041E1346652500AF11D1 /* cocoa_rom.mm in Sources */, AB80E04D142BC4A800A52038 /* cocoa_util.mm in Sources */, ABE5DFE5143FB1DA00835AD8 /* cocoa_videofilter.mm in Sources */, @@ -3951,6 +3958,7 @@ AB796CF815CDCBA200C59155 /* cp15.cpp in Sources */, AB796CF915CDCBA200C59155 /* cpu_detect_x86_gcc.cpp in Sources */, AB796CFA15CDCBA200C59155 /* crc.cpp in Sources */, + AB301BDF1D9C8BAC00246A93 /* deposterize.cpp in Sources */, AB796CFB15CDCBA200C59155 /* datetime.cpp in Sources */, AB796CFC15CDCBA200C59155 /* debug.cpp in Sources */, ABFEA82E1BB4EC1100B08C25 /* ftlcdfil.c in Sources */, @@ -4157,6 +4165,7 @@ ABFEA8361BB4EC1100B08C25 /* ftmm.c in Sources */, ABFEA81E1BB4EC1000B08C25 /* ftfstype.c in Sources */, ABA731601BB51E7000B26147 /* pshinter.c in Sources */, + AB301BE01D9C8BCD00246A93 /* deposterize.cpp in Sources */, ABFEA8211BB4EC1000B08C25 /* ftgasp.c in Sources */, ABFEA83C1BB4EC1100B08C25 /* ftotval.c in Sources */, ABFEA8181BB4EC1000B08C25 /* ftdebug.c in Sources */, @@ -4448,6 +4457,7 @@ ABB3C6B81501C04F00E0C22E /* common.cpp in Sources */, ABB3C6B91501C04F00E0C22E /* cp15.cpp in Sources */, AB407F371A6206FB00313213 /* xbrz.cpp in Sources */, + AB301BE21D9C8BCF00246A93 /* deposterize.cpp in Sources */, ABB3C6BA1501C04F00E0C22E /* debug.cpp in Sources */, ABB3C6BB1501C04F00E0C22E /* Disassembler.cpp in Sources */, ABB3C6BC1501C04F00E0C22E /* driver.cpp in Sources */, Modified: trunk/desmume/src/cocoa/DeSmuME (XCode 3).xcodeproj/project.pbxproj =================================================================== --- trunk/desmume/src/cocoa/DeSmuME (XCode 3).xcodeproj/project.pbxproj 2016-09-25 22:09:13 UTC (rev 5560) +++ trunk/desmume/src/cocoa/DeSmuME (XCode 3).xcodeproj/project.pbxproj 2016-09-29 00:58:04 UTC (rev 5561) @@ -1418,6 +1418,11 @@ ABB9212317CEB4110049D4C5 /* slot1comp_protocol.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABB9212017CEB4110049D4C5 /* slot1comp_protocol.cpp */; }; ABB9212417CEB4110049D4C5 /* slot1comp_protocol.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABB9212017CEB4110049D4C5 /* slot1comp_protocol.cpp */; }; ABB9212517CEB4110049D4C5 /* slot1comp_protocol.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABB9212017CEB4110049D4C5 /* slot1comp_protocol.cpp */; }; + ABBB4ACD1D9C927C00794E08 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBB4ACC1D9C927C00794E08 /* deposterize.cpp */; }; + ABBB4ACE1D9C927C00794E08 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBB4ACC1D9C927C00794E08 /* deposterize.cpp */; }; + ABBB4ACF1D9C927C00794E08 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBB4ACC1D9C927C00794E08 /* deposterize.cpp */; }; + ABBB4AD01D9C927C00794E08 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBB4ACC1D9C927C00794E08 /* deposterize.cpp */; }; + ABBB4AD11D9C927C00794E08 /* deposterize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBB4ACC1D9C927C00794E08 /* deposterize.cpp */; }; ABBCE29715ACB1FF00A2C965 /* arm_jit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBCE29515ACB1FF00A2C965 /* arm_jit.cpp */; }; ABBCE29815ACB1FF00A2C965 /* arm_jit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = ABBCE29515ACB1FF00A2C965 /* arm_jit.cpp */; }; ABBF04A614B515F300E505A0 /* AppIcon_ROMCheats.icns in Resources */ = {isa = PBXBuildFile; fileRef = ABBF04A414B515F300E505A0 /* AppIcon_ROMCheats.icns */; }; @@ -1953,6 +1958,7 @@ ABB97873144E89CC00793FA3 /* Icon_ActionReplay_32x32.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_ActionReplay_32x32.png; path = Images/Icon_ActionReplay_32x32.png; sourceTree = "<group>"; }; ABB97874144E89CC00793FA3 /* Icon_CodeBreaker_32x32.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_CodeBreaker_32x32.png; path = Images/Icon_CodeBreaker_32x32.png; sourceTree = "<group>"; }; ABB97875144E89CC00793FA3 /* Icon_DeSmuME_32x32.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_DeSmuME_32x32.png; path = Images/Icon_DeSmuME_32x32.png; sourceTree = "<group>"; }; + ABBB4ACC1D9C927C00794E08 /* deposterize.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deposterize.cpp; sourceTree = "<group>"; }; ABBC0F8C1394B1AA0028B6BD /* DefaultUserPrefs.plist */ = {isa = PBXFileReference; lastKnownFileType = file.bplist; path = DefaultUserPrefs.plist; sourceTree = "<group>"; }; ABBCE29415ACB1E600A2C965 /* arm_jit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = arm_jit.h; path = ../arm_jit.h; sourceTree = SOURCE_ROOT; }; ABBCE29515ACB1FF00A2C965 /* arm_jit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = arm_jit.cpp; path = ../arm_jit.cpp; sourceTree = SOURCE_ROOT; }; @@ -3453,6 +3459,7 @@ children = ( ABFE14FA14C92FF5005D6699 /* 2xsai.cpp */, ABFE14FB14C92FF5005D6699 /* bilinear.cpp */, + ABBB4ACC1D9C927C00794E08 /* deposterize.cpp */, ABFE14FC14C92FF5005D6699 /* epx.cpp */, ABFE14FE14C92FF5005D6699 /* hq2x.cpp */, ABAAEFFE1B22361800E1269D /* hq3x.cpp */, @@ -4540,6 +4547,7 @@ AB50200C1D09E712002FA150 /* retro_stat.c in Sources */, AB7BB17F1D62C8CC00A7A6E2 /* colorspacehandler.cpp in Sources */, AB7BB1801D62C8CF00A7A6E2 /* colorspacehandler_AltiVec.cpp in Sources */, + ABBB4AD11D9C927C00794E08 /* deposterize.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -4721,6 +4729,7 @@ AB5020181D09E712002FA150 /* retro_stat.c in Sources */, AB37E3801D6188BC004A2C0D /* colorspacehandler.cpp in Sources */, AB37E38A1D61895F004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */, + ABBB4AD01D9C927C00794E08 /* deposterize.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -4932,6 +4941,7 @@ AB50200F1D09E712002FA150 /* retro_stat.c in Sources */, AB37E3741D6188BC004A2C0D /* colorspacehandler.cpp in Sources */, AB37E3771D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */, + ABBB4ACD1D9C927C00794E08 /* deposterize.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -5143,6 +5153,7 @@ AB5020121D09E712002FA150 /* retro_stat.c in Sources */, AB37E3781D6188BC004A2C0D /* colorspacehandler.cpp in Sources */, AB37E37B1D6188BC004A2C0D /* colorspacehandler_SSE2.cpp in Sources */, + ABBB4ACE1D9C927C00794E08 /* deposterize.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -5324,6 +5335,7 @@ AB5020151D09E712002FA150 /* retro_stat.c in Sources */, AB37E37C1D6188BC004A2C0D /* colorspacehandler.cpp in Sources */, AB37E37D1D6188BC004A2C0D /* colorspacehandler_AltiVec.cpp in Sources */, + ABBB4ACF1D9C927C00794E08 /* deposterize.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; Added: trunk/desmume/src/filter/deposterize.cpp =================================================================== --- trunk/desmume/src/filter/deposterize.cpp (rev 0) +++ trunk/desmume/src/filter/deposterize.cpp 2016-09-29 00:58:04 UTC (rev 5561) @@ -0,0 +1,186 @@ +/* + Copyright (C) 2016 DeSmuME team + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the this software. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "../types.h" +#include "filter.h" + +static u32 Deposterize_InterpLTE(const u32 pixA, const u32 pixB, const u32 threshold) +{ + const u32 aB = (pixB & 0xFF000000) >> 24; + if (aB == 0) + { + return pixA; + } + + const u32 rA = (pixA & 0x000000FF); + const u32 gA = (pixA & 0x0000FF00) >> 8; + const u32 bA = (pixA & 0x00FF0000) >> 16; + const u32 aA = (pixA & 0xFF000000) >> 24; + + const u32 rB = (pixB & 0x000000FF); + const u32 gB = (pixB & 0x0000FF00) >> 8; + const u32 bB = (pixB & 0x00FF0000) >> 16; + + const u32 rC = ( (rB - rA <= threshold) || (rA - rB <= threshold) ) ? ( ((rA+rB)>>1) ) : rA; + const u32 gC = ( (gB - gA <= threshold) || (gA - gB <= threshold) ) ? ( ((gA+gB)>>1) ) : gA; + const u32 bC = ( (bB - bA <= threshold) || (bA - bB <= threshold) ) ? ( ((bA+bB)>>1) ) : bA; + const u32 aC = ( (bB - aA <= threshold) || (aA - aB <= threshold) ) ? ( ((aA+aB)>>1) ) : aA; + + return (rC | (gC << 8) | (bC << 16) | (aC << 24)); +} + +static u32 Deposterize_Blend(const u32 pixA, const u32 pixB, const u32 weightA, const u32 weightB) +{ + const u32 aB = (pixB & 0xFF000000) >> 24; + if (aB == 0) + { + return pixA; + } + + const u32 weightSum = weightA + weightB; + + const u32 rbA = pixA & 0x00FF00FF; + const u32 gA = pixA & 0x0000FF00; + const u32 aA = (pixA & 0xFF000000) >> 24; + + const u32 rbB = pixB & 0x00FF00FF; + const u32 gB = pixB & 0x0000FF00; + + const u32 rbC = ( ((rbA * weightA) + (rbB * weightB)) / weightSum ) & 0x00FF00FF; + const u32 gC = ( (( gA * weightA) + ( gB * weightB)) / weightSum ) & 0x0000FF00; + const u32 aC = ( (( aA * weightA) + ( aB * weightB)) / weightSum ) << 24; + + return (rbC | gC | aC); +} + +void RenderDeposterize(SSurface Src, SSurface Dst) +{ + //---------------------------------------\n\ + // Input Pixel Mapping: 06|07|08 + // 05|00|01 + // 04|03|02 + // + // Output Pixel Mapping: 00 + + const int w = Src.Width; + const int h = Src.Height; + + u32 color[9]; + u32 blend[9]; + u32 *src = (u32 *)Src.Surface; + u32 *workingDst = (u32 *)Dst.workingSurface[0]; + u32 *finalDst = (u32 *)Dst.Surface; + u32 threshold = *(u32 *)Dst.userData; + + int i = 0; + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++, i++) + { + if ((src[i] & 0xFF000000) == 0) + { + workingDst[i] = src[i]; + continue; + } + + color[0] = src[i]; + color[1] = (x < w-1) ? src[i+1] : src[i]; + color[2] = ((x < w-1) && (y < h-1)) ? src[i+w+1] : src[i]; + color[3] = (y < h-1) ? src[i+w] : src[i]; + color[4] = ((x > 0) && (y < h-1)) ? src[i+w-1] : src[i]; + color[5] = (x > 0) ? src[i-1] : src[i]; + color[6] = ((x > 0) && (y > 0)) ? src[i-w-1] : src[i]; + color[7] = (y > 0) ? src[i-w] : src[i]; + color[8] = ((x < w-1) && (y > 0)) ? src[i-w+1] : src[i]; + + blend[0] = color[0]; + blend[1] = Deposterize_InterpLTE(color[0], color[1], threshold); + blend[2] = Deposterize_InterpLTE(color[0], color[2], threshold); + blend[3] = Deposterize_InterpLTE(color[0], color[3], threshold); + blend[4] = Deposterize_InterpLTE(color[0], color[4], threshold); + blend[5] = Deposterize_InterpLTE(color[0], color[5], threshold); + blend[6] = Deposterize_InterpLTE(color[0], color[6], threshold); + blend[7] = Deposterize_InterpLTE(color[0], color[7], threshold); + blend[8] = Deposterize_InterpLTE(color[0], color[8], threshold); + + workingDst[i] = Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(blend[0], blend[5], 1, 7), + Deposterize_Blend(blend[0], blend[1], 1, 7), + 1, 1), + Deposterize_Blend(Deposterize_Blend(blend[0], blend[7], 1, 7), + Deposterize_Blend(blend[0], blend[3], 1, 7), + 1, 1), + 1, 1), + Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(blend[0], blend[6], 7, 9), + Deposterize_Blend(blend[0], blend[2], 7, 9), + 1, 1), + Deposterize_Blend(Deposterize_Blend(blend[0], blend[8], 7, 9), + Deposterize_Blend(blend[0], blend[4], 7, 9), + 1, 1), + 1, 1), + 3, 1); + } + } + + i = 0; + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++, i++) + { + if ((src[i] & 0xFF000000) == 0) + { + finalDst[i] = src[i]; + continue; + } + + color[0] = workingDst[i]; + color[1] = (x < w-1) ? workingDst[i+1] : workingDst[i]; + color[2] = ((x < w-1) && (y < h-1)) ? workingDst[i+w+1] : workingDst[i]; + color[3] = (y < h-1) ? workingDst[i+w] : workingDst[i]; + color[4] = ((x > 0) && (y < h-1)) ? workingDst[i+w-1] : workingDst[i]; + color[5] = (x > 0) ? workingDst[i-1] : workingDst[i]; + color[6] = ((x > 0) && (y > 0)) ? workingDst[i-w-1] : workingDst[i]; + color[7] = (y > 0) ? workingDst[i-w] : workingDst[i]; + color[8] = ((x < w-1) && (y > 0)) ? workingDst[i-w+1] : workingDst[i]; + + blend[0] = color[0]; + blend[1] = Deposterize_InterpLTE(color[0], color[1], threshold); + blend[2] = Deposterize_InterpLTE(color[0], color[2], threshold); + blend[3] = Deposterize_InterpLTE(color[0], color[3], threshold); + blend[4] = Deposterize_InterpLTE(color[0], color[4], threshold); + blend[5] = Deposterize_InterpLTE(color[0], color[5], threshold); + blend[6] = Deposterize_InterpLTE(color[0], color[6], threshold); + blend[7] = Deposterize_InterpLTE(color[0], color[7], threshold); + blend[8] = Deposterize_InterpLTE(color[0], color[8], threshold); + + finalDst[i] = Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(blend[0], blend[5], 1, 7), + Deposterize_Blend(blend[0], blend[1], 1, 7), + 1, 1), + Deposterize_Blend(Deposterize_Blend(blend[0], blend[7], 1, 7), + Deposterize_Blend(blend[0], blend[3], 1, 7), + 1, 1), + 1, 1), + Deposterize_Blend(Deposterize_Blend(Deposterize_Blend(blend[0], blend[6], 7, 9), + Deposterize_Blend(blend[0], blend[2], 7, 9), + 1, 1), + Deposterize_Blend(Deposterize_Blend(blend[0], blend[8], 7, 9), + Deposterize_Blend(blend[0], blend[4], 7, 9), + 1, 1), + 1, 1), + 3, 1); + } + } +} Modified: trunk/desmume/src/filter/filter.h =================================================================== --- trunk/desmume/src/filter/filter.h 2016-09-25 22:09:13 UTC (rev 5560) +++ trunk/desmume/src/filter/filter.h 2016-09-29 00:58:04 UTC (rev 5561) @@ -1,20 +1,23 @@ /* -Copyright (C) 2009-2014 DeSmuME team + Copyright (C) 2009-2016 DeSmuME team + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the this software. If not, see <http://www.gnu.org/licenses/>. + */ -This file is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 2 of the License, or -(at your option) any later version. +#ifndef _IMAGE_FILTER_ +#define _IMAGE_FILTER_ -This file is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with the this software. If not, see <http://www.gnu.org/licenses/>. -*/ - #define FILTER_MAX_WORKING_SURFACE_COUNT 8 typedef struct { @@ -27,6 +30,8 @@ void *userData; } SSurface; +void RenderDeposterize(SSurface Src, SSurface Dst); + void RenderNearest2X (SSurface Src, SSurface Dst); void RenderLQ2X (SSurface Src, SSurface Dst); void RenderLQ2XS (SSurface Src, SSurface Dst); @@ -52,3 +57,5 @@ void Render4xBRZ(SSurface Src, SSurface Dst); void Render5xBRZ(SSurface Src, SSurface Dst); void Render6xBRZ(SSurface Src, SSurface Dst); + +#endif // _IMAGE_FILTER_ Modified: trunk/desmume/src/render3D.cpp =================================================================== --- trunk/desmume/src/render3D.cpp 2016-09-25 22:09:13 UTC (rev 5560) +++ trunk/desmume/src/render3D.cpp 2016-09-29 00:58:04 UTC (rev 5561) @@ -29,6 +29,7 @@ #include "gfx3d.h" #include "MMU.h" #include "texcache.h" +#include "./filter/filter.h" #include "./filter/xbrz.h" #define TEXTURE_DEPOSTERIZE_THRESHOLD 21 // Possible values are [0-255], where lower a value prevents blending and a higher value allows for more blending @@ -127,55 +128,6 @@ } } -static u32 TextureDeposterize_InterpLTE(const u32 pixA, const u32 pixB, const u32 threshold) -{ - const u32 aB = (pixB & 0xFF000000) >> 24; - if (aB == 0) - { - return pixA; - } - - const u32 rA = (pixA & 0x000000FF); - const u32 gA = (pixA & 0x0000FF00) >> 8; - const u32 bA = (pixA & 0x00FF0000) >> 16; - const u32 aA = (pixA & 0xFF000000) >> 24; - - const u32 rB = (pixB & 0x000000FF); - const u32 gB = (pixB & 0x0000FF00) >> 8; - const u32 bB = (pixB & 0x00FF0000) >> 16; - - const u32 rC = ( (rB - rA <= threshold) || (rA - rB <= threshold) ) ? ( ((rA+rB)>>1) ) : rA; - const u32 gC = ( (gB - gA <= threshold) || (gA - gB <= threshold) ) ? ( ((gA+gB)>>1) ) : gA; - const u32 bC = ( (bB - bA <= threshold) || (bA - bB <= threshold) ) ? ( ((bA+bB)>>1) ) : bA; - const u32 aC = ( (bB - aA <= threshold) || (aA - aB <= threshold) ) ? ( ((aA+aB)>>1) ) : aA; - - return (rC | (gC << 8) | (bC << 16) | (aC << 24)); -} - -static u32 TextureDeposterize_Blend(const u32 pixA, const u32 pixB, const u32 weightA, const u32 weightB) -{ - const u32 aB = (pixB & 0xFF000000) >> 24; - if (aB == 0) - { - return pixA; - } - - const u32 weightSum = weightA + weightB; - - const u32 rbA = pixA & 0x00FF00FF; - const u32 gA = pixA & 0x0000FF00; - const u32 aA = (pixA & 0xFF000000) >> 24; - - const u32 rbB = pixB & 0x00FF00FF; - const u32 gB = pixB & 0x0000FF00; - - const u32 rbC = ( ((rbA * weightA) + (rbB * weightB)) / weightSum ) & 0x00FF00FF; - const u32 gC = ( (( gA * weightA) + ( gB * weightB)) / weightSum ) & 0x0000FF00; - const u32 aC = ( (( aA * weightA) + ( aB * weightB)) / weightSum ) << 24; - - return (rbC | gC | aC); -} - FragmentAttributesBuffer::FragmentAttributesBuffer(size_t newCount) { count = newCount; @@ -285,15 +237,28 @@ _textureScalingFactor = 1; _textureSmooth = false; - _textureDeposterizeBuffer = NULL; _textureUpscaleBuffer = NULL; + _textureDeposterizeThreshold = TEXTURE_DEPOSTERIZE_THRESHOLD; + memset(&_textureDeposterizeSrcSurface, 0, sizeof(_textureDeposterizeSrcSurface)); + memset(&_textureDeposterizeDstSurface, 0, sizeof(_textureDeposterizeDstSurface)); + + _textureDeposterizeSrcSurface.Width = _textureDeposterizeDstSurface.Width = 1; + _textureDeposterizeSrcSurface.Height = _textureDeposterizeDstSurface.Height = 1; + _textureDeposterizeSrcSurface.Pitch = _textureDeposterizeDstSurface.Pitch = 1; + _textureDeposterizeDstSurface.userData = &_textureDeposterizeThreshold; + Reset(); } Render3D::~Render3D() { - // Do nothing. + if (this->_textureDeposterizeDstSurface.Surface != NULL) + { + free_aligned(this->_textureDeposterizeDstSurface.Surface); + this->_textureDeposterizeDstSurface.Surface = NULL; + this->_textureDeposterizeDstSurface.workingSurface[0] = NULL; + } } const Render3DDeviceInfo& Render3D::GetDeviceInfo() @@ -385,20 +350,24 @@ const size_t newScalingFactor = (isScaleValid) ? scalingFactor : 1; bool needTexCacheReset = false; - if ( willDeposterize && (this->_textureDeposterizeBuffer == NULL) ) + if ( willDeposterize && (this->_textureDeposterizeDstSurface.Surface == NULL) ) { // 1024x1024 texels is the largest possible texture size. // We need two buffers, one for each deposterize stage. const size_t bufferSize = 1024 * 1024 * 2 * sizeof(u32); - this->_textureDeposterizeBuffer = (u32 *)malloc_alignedCacheLine(bufferSize); - memset(this->_textureDeposterizeBuffer, 0, bufferSize); + this->_textureDeposterizeDstSurface.Surface = (unsigned char *)malloc_alignedCacheLine(bufferSize); + this->_textureDeposterizeDstSurface.workingSurface[0] = (unsigned char *)((u32 *)this->_textureDeposterizeDstSurface.Surface + (1024 * 1024)); + + memset(this->_textureDeposterizeDstSurface.Surface, 0, bufferSize); + needTexCacheReset = true; } - else if ( !willDeposterize && (this->_textureDeposterizeBuffer != NULL) ) + else if ( !willDeposterize && (this->_textureDeposterizeDstSurface.Surface != NULL) ) { - free_aligned(this->_textureDeposterizeBuffer); - this->_textureDeposterizeBuffer = NULL; + free_aligned(this->_textureDeposterizeDstSurface.Surface); + this->_textureDeposterizeDstSurface.Surface = NULL; + this->_textureDeposterizeDstSurface.workingSurface[0] = NULL; needTexCacheReset = true; } @@ -429,119 +398,12 @@ Render3DError Render3D::TextureDeposterize(const u32 *src, const size_t srcTexWidth, const size_t srcTexHeight) { - //---------------------------------------\n\ - // Input Pixel Mapping: 06|07|08 - // 05|00|01 - // 04|03|02 - // - // Output Pixel Mapping: 00 + this->_textureDeposterizeSrcSurface.Width = this->_textureDeposterizeDstSurface.Width = srcTexWidth; + this->_textureDeposterizeSrcSurface.Height = this->_textureDeposterizeDstSurface.Height = srcTexHeight; + this->_textureDeposterizeSrcSurface.Surface = (unsigned char *)src; - const int w = srcTexWidth; - const int h = srcTexHeight; + RenderDeposterize(this->_textureDeposterizeSrcSurface, this->_textureDeposterizeDstSurface); - u32 color[9]; - u32 blend[9]; - u32 *dst = this->_textureDeposterizeBuffer + (1024 * 1024); - u32 *finalDst = this->_textureDeposterizeBuffer; - - size_t i = 0; - for (int y = 0; y < h; y++) - { - for (int x = 0; x < w; x++, i++) - { - if ((src[i] & 0xFF000000) == 0) - { - dst[i] = src[i]; - continue; - } - - color[0] = src[i]; - color[1] = (x < w-1) ? src[i+1] : src[i]; - color[2] = ((x < w-1) && (y < h-1)) ? src[i+w+1] : src[i]; - color[3] = (y < h-1) ? src[i+w] : src[i]; - color[4] = ((x > 0) && (y < h-1)) ? src[i+w-1] : src[i]; - color[5] = (x > 0) ? src[i-1] : src[i]; - color[6] = ((x > 0) && (y > 0)) ? src[i-w-1] : src[i]; - color[7] = (y > 0) ? src[i-w] : src[i]; - color[8] = ((x < w-1) && (y > 0)) ? src[i-w+1] : src[i]; - - blend[0] = color[0]; - blend[1] = TextureDeposterize_InterpLTE(color[0], color[1], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[2] = TextureDeposterize_InterpLTE(color[0], color[2], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[3] = TextureDeposterize_InterpLTE(color[0], color[3], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[4] = TextureDeposterize_InterpLTE(color[0], color[4], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[5] = TextureDeposterize_InterpLTE(color[0], color[5], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[6] = TextureDeposterize_InterpLTE(color[0], color[6], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[7] = TextureDeposterize_InterpLTE(color[0], color[7], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[8] = TextureDeposterize_InterpLTE(color[0], color[8], TEXTURE_DEPOSTERIZE_THRESHOLD); - - dst[i] = TextureDeposterize_Blend(TextureDeposterize_Blend(TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[5], 1, 7), - TextureDeposterize_Blend(blend[0], blend[1], 1, 7), - 1, 1), - TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[7], 1, 7), - TextureDeposterize_Blend(blend[0], blend[3], 1, 7), - 1, 1), - 1, 1), - TextureDeposterize_Blend(TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[6], 7, 9), - TextureDeposterize_Blend(blend[0], blend[2], 7, 9), - 1, 1), - TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[8], 7, 9), - TextureDeposterize_Blend(blend[0], blend[4], 7, 9), - 1, 1), - 1, 1), - 3, 1); - } - } - - i = 0; - for (int y = 0; y < h; y++) - { - for (int x = 0; x < w; x++, i++) - { - if ((src[i] & 0xFF000000) == 0) - { - finalDst[i] = src[i]; - continue; - } - - color[0] = dst[i]; - color[1] = (x < w-1) ? dst[i+1] : dst[i]; - color[2] = ((x < w-1) && (y < h-1)) ? dst[i+w+1] : dst[i]; - color[3] = (y < h-1) ? dst[i+w] : dst[i]; - color[4] = ((x > 0) && (y < h-1)) ? dst[i+w-1] : dst[i]; - color[5] = (x > 0) ? dst[i-1] : dst[i]; - color[6] = ((x > 0) && (y > 0)) ? dst[i-w-1] : dst[i]; - color[7] = (y > 0) ? dst[i-w] : dst[i]; - color[8] = ((x < w-1) && (y > 0)) ? dst[i-w+1] : dst[i]; - - blend[0] = color[0]; - blend[1] = TextureDeposterize_InterpLTE(color[0], color[1], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[2] = TextureDeposterize_InterpLTE(color[0], color[2], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[3] = TextureDeposterize_InterpLTE(color[0], color[3], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[4] = TextureDeposterize_InterpLTE(color[0], color[4], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[5] = TextureDeposterize_InterpLTE(color[0], color[5], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[6] = TextureDeposterize_InterpLTE(color[0], color[6], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[7] = TextureDeposterize_InterpLTE(color[0], color[7], TEXTURE_DEPOSTERIZE_THRESHOLD); - blend[8] = TextureDeposterize_InterpLTE(color[0], color[8], TEXTURE_DEPOSTERIZE_THRESHOLD); - - finalDst[i] = TextureDeposterize_Blend(TextureDeposterize_Blend(TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[5], 1, 7), - TextureDeposterize_Blend(blend[0], blend[1], 1, 7), - 1, 1), - TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[7], 1, 7), - TextureDeposterize_Blend(blend[0], blend[3], 1, 7), - 1, 1), - 1, 1), - TextureDeposterize_Blend(TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[6], 7, 9), - TextureDeposterize_Blend(blend[0], blend[2], 7, 9), - 1, 1), - TextureDeposterize_Blend(TextureDeposterize_Blend(blend[0], blend[8], 7, 9), - TextureDeposterize_Blend(blend[0], blend[4], 7, 9), - 1, 1), - 1, 1), - 3, 1); - } - } - return RENDER3DERROR_NOERR; } Modified: trunk/desmume/src/render3D.h =================================================================== --- trunk/desmume/src/render3D.h 2016-09-25 22:09:13 UTC (rev 5560) +++ trunk/desmume/src/render3D.h 2016-09-29 00:58:04 UTC (rev 5561) @@ -21,6 +21,7 @@ #include "gfx3d.h" #include "types.h" +#include "./filter/filter.h" #define kUnsetTranslucentPolyID 255 @@ -130,7 +131,12 @@ size_t _textureScalingFactor; bool _textureSmooth; - u32 *_textureDeposterizeBuffer; + + SSurface _textureDeposterizeSrcSurface; + SSurface _textureDeposterizeDstSurface; + u32 _textureDeposterizeThreshold; + + //u32 *_textureDeposterizeBuffer; u32 *_textureUpscaleBuffer; CACHE_ALIGN u16 clearImageColor16Buffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; Modified: trunk/desmume/src/windows/DeSmuME.vcxproj =================================================================== --- trunk/desmume/src/windows/DeSmuME.vcxproj 2016-09-25 22:09:13 UTC (rev 5560) +++ trunk/desmume/src/windows/DeSmuME.vcxproj 2016-09-29 00:58:04 UTC (rev 5561) @@ -87,6 +87,7 @@ <ClCompile Include="..\FIFO.cpp" /> <ClCompile Include="..\filter\2xsai.cpp" /> <ClCompile Include="..\filter\bilinear.cpp" /> + <ClCompile Include="..\filter\deposterize.cpp" /> <ClCompile Include="..\filter\epx.cpp" /> <ClCompile Include="..\filter\hq2x.cpp" /> <ClCompile Include="..\filter\hq4x.cpp" /> Modified: trunk/desmume/src/windows/DeSmuME.vcxproj.filters =================================================================== --- trunk/desmume/src/windows/DeSmuME.vcxproj.filters 2016-09-25 22:09:13 UTC (rev 5560) +++ trunk/desmume/src/windows/DeSmuME.vcxproj.filters 2016-09-29 00:58:04 UTC (rev 5561) @@ -975,6 +975,9 @@ <ClCompile Include="..\utils\colorspacehandler\colorspacehandler_SSE2.cpp"> <Filter>Core\utils\colorspacehandler</Filter> </ClCompile> + <ClCompile Include="..\filter\deposterize.cpp"> + <Filter>Core\filter</Filter> + </ClCompile> </ItemGroup> <ItemGroup> <ClInclude Include="..\armcpu.h"> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-09-25 22:09:15
|
Revision: 5560 http://sourceforge.net/p/desmume/code/5560 Author: zeromus Date: 2016-09-25 22:09:13 +0000 (Sun, 25 Sep 2016) Log Message: ----------- fix bugs in MC import. I dont want to talk about it. Modified Paths: -------------- trunk/desmume/src/emufile.h trunk/desmume/src/mc.cpp Modified: trunk/desmume/src/emufile.h =================================================================== --- trunk/desmume/src/emufile.h 2016-09-14 21:49:47 UTC (rev 5559) +++ trunk/desmume/src/emufile.h 2016-09-25 22:09:13 UTC (rev 5560) @@ -280,6 +280,7 @@ { mPositionCacheEnabled = false; mCondition = eCondition_Clean; + mFilePosition = 0; fp = fopen(fname,mode); if(!fp) failbit = true; Modified: trunk/desmume/src/mc.cpp =================================================================== --- trunk/desmume/src/mc.cpp 2016-09-14 21:49:47 UTC (rev 5559) +++ trunk/desmume/src/mc.cpp 2016-09-25 22:09:13 UTC (rev 5560) @@ -294,7 +294,9 @@ else { printf("BackupDevice: Converting old raw .sav file.\n"); - sz = trim(buf, sz); + //dont TRIM this! it will wreck the searchFileSaveType below. + //was this intended for egregiously over-sized save files? too bad. + //sz = trim(buf, sz); } if (fpOut->fwrite(buf, sz) == sz) @@ -305,6 +307,7 @@ info.type = (res + 1); addr_size = info.addr_size = save_types[info.type].addr_size; info.size = fsize = sz; + fpMC = fpOut; //so ensure() works ensure(sz, fpOut); fsize = 0; } @@ -1069,8 +1072,10 @@ bool res = false; if (strlen(filename) < 4) return res; - if ((memcmp(filename + strlen(filename) - 4, ".duc", 4) == 0) || - (memcmp(filename + strlen(filename) - 4, ".dss", 4) == 0)) + std::string ext = strright(filename,4); + bool isDuc = strncasecmp(ext.c_str(), ".duc", 4) == 0; + bool isDss = strncasecmp(ext.c_str(), ".dss", 4) == 0; + if(isDuc || isDss) res = import_duc(filename, force_size); else if (import_no_gba(filename, force_size)) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <rog...@us...> - 2016-09-14 21:49:50
|
Revision: 5559 http://sourceforge.net/p/desmume/code/5559 Author: rogerman Date: 2016-09-14 21:49:47 +0000 (Wed, 14 Sep 2016) Log Message: ----------- Cocoa Port: - OS X App Debug builds now use Xcode 8?\226?\128?\153s new Incremental LTO feature. Modified Paths: -------------- trunk/desmume/src/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj Modified: trunk/desmume/src/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj =================================================================== --- trunk/desmume/src/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj 2016-09-08 22:39:31 UTC (rev 5558) +++ trunk/desmume/src/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj 2016-09-14 21:49:47 UTC (rev 5559) @@ -4577,13 +4577,13 @@ buildSettings = { GCC_OPTIMIZATION_LEVEL = fast; GCC_UNROLL_LOOPS = YES; + LLVM_LTO = YES_THIN; }; name = Debug; }; AB796D6F15CDCBA200C59155 /* Release */ = { isa = XCBuildConfiguration; buildSettings = { - GCC_OPTIMIZATION_LEVEL = fast; GCC_UNROLL_LOOPS = YES; LLVM_LTO = YES; }; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-09-08 22:39:33
|
Revision: 5558 http://sourceforge.net/p/desmume/code/5558 Author: zeromus Date: 2016-09-08 22:39:31 +0000 (Thu, 08 Sep 2016) Log Message: ----------- do a better job on the language setting, probably Modified Paths: -------------- trunk/desmume/src/commandline.cpp trunk/desmume/src/windows/main.cpp trunk/desmume/src/windows/main.h Modified: trunk/desmume/src/commandline.cpp =================================================================== --- trunk/desmume/src/commandline.cpp 2016-09-08 22:31:31 UTC (rev 5557) +++ trunk/desmume/src/commandline.cpp 2016-09-08 22:39:31 UTC (rev 5558) @@ -67,7 +67,7 @@ , start_paused(FALSE) , autodetect_method(-1) , render3d(COMMANDLINE_RENDER3D_DEFAULT) -, language(-1) +, language(1) //english by default { #ifndef HOST_WINDOWS disable_sound = 0; @@ -118,7 +118,9 @@ " --bios-arm9 BIN_FILE Uses the ARM9 BIOS provided at the specified path" ENDL " --bios-arm7 BIN_FILE Uses the ARM7 BIOS provided at the specified path" ENDL " --bios-swi Uses SWI from the provided bios files (else HLE)" ENDL -" --lang N Pick firmware language (can affect game translations)" ENDL +" --lang N Firmware language (can affect game translations)" ENDL +" 0 = Japanese, 1 = English (default), 2 = French" ENDL +" 3 = German, 4 = Italian, 5 = Spanish" ENDL ENDL "Arguments affecting contents of SLOT-1:" ENDL " --slot1 [RETAIL|RETAILAUTO|R4|RETAILNAND|RETAILMCDROM|RETAILDEBUG]" ENDL Modified: trunk/desmume/src/windows/main.cpp =================================================================== --- trunk/desmume/src/windows/main.cpp 2016-09-08 22:31:31 UTC (rev 5557) +++ trunk/desmume/src/windows/main.cpp 2016-09-08 22:39:31 UTC (rev 5558) @@ -3383,8 +3383,8 @@ } } - if(cmdline.language != -1) - CommonSettings.fw_config.language = cmdline.language; + //not supported; use the GUI + //if(cmdline.language != -1) CommonSettings.fw_config.language = cmdline.language; cmdline.process_movieCommands(); Modified: trunk/desmume/src/windows/main.h =================================================================== --- trunk/desmume/src/windows/main.h 2016-09-08 22:31:31 UTC (rev 5557) +++ trunk/desmume/src/windows/main.h 2016-09-08 22:39:31 UTC (rev 5558) @@ -57,15 +57,6 @@ #define GPU3D_SWRAST 2 #define GPU3D_OPENGL_OLD 3 -static const int LANGUAGE_ENGLISH = 0; -static const int LANGUAGE_FRENCH = 1; -static const int LANGUAGE_CHINESE = 3; -static const int LANGUAGE_ITALIAN = 4; -static const int LANGUAGE_JAPANESE = 5; -static const int LANGUAGE_SPANISH = 6; -static const int LANGUAGE_KOREAN = 7; -static const int LANGUAGE_BRAZILIAN = 8; - extern void Change3DCoreWithFallbackAndSave(int newCore); extern int backupmemorytype; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-09-08 22:31:33
|
Revision: 5557 http://sourceforge.net/p/desmume/code/5557 Author: zeromus Date: 2016-09-08 22:31:31 +0000 (Thu, 08 Sep 2016) Log Message: ----------- try to apply patches from #1593 Modified Paths: -------------- trunk/desmume/src/commandline.cpp trunk/desmume/src/commandline.h trunk/desmume/src/gtk/main.cpp trunk/desmume/src/windows/main.cpp Modified: trunk/desmume/src/commandline.cpp =================================================================== --- trunk/desmume/src/commandline.cpp 2016-09-06 21:17:32 UTC (rev 5556) +++ trunk/desmume/src/commandline.cpp 2016-09-08 22:31:31 UTC (rev 5557) @@ -67,6 +67,7 @@ , start_paused(FALSE) , autodetect_method(-1) , render3d(COMMANDLINE_RENDER3D_DEFAULT) +, language(-1) { #ifndef HOST_WINDOWS disable_sound = 0; @@ -117,6 +118,7 @@ " --bios-arm9 BIN_FILE Uses the ARM9 BIOS provided at the specified path" ENDL " --bios-arm7 BIN_FILE Uses the ARM7 BIOS provided at the specified path" ENDL " --bios-swi Uses SWI from the provided bios files (else HLE)" ENDL +" --lang N Pick firmware language (can affect game translations)" ENDL ENDL "Arguments affecting contents of SLOT-1:" ENDL " --slot1 [RETAIL|RETAILAUTO|R4|RETAILNAND|RETAILMCDROM|RETAILDEBUG]" ENDL @@ -163,6 +165,7 @@ #define OPT_CONSOLE_TYPE 200 #define OPT_ARM9 201 #define OPT_ARM7 202 +#define OPT_LANGUAGE 203 #define OPT_SLOT1 300 #define OPT_SLOT1_FAT_DIR 301 @@ -224,7 +227,8 @@ { "console-type", required_argument, NULL, OPT_CONSOLE_TYPE }, { "bios-arm9", required_argument, NULL, OPT_ARM9}, { "bios-arm7", required_argument, NULL, OPT_ARM7}, - { "bios-swi", required_argument, &_bios_swi, 1}, + { "bios-swi", no_argument, &_bios_swi, 1}, + { "lang", required_argument, NULL, OPT_LANGUAGE}, //slot-1 contents { "slot1", required_argument, NULL, OPT_SLOT1}, @@ -308,6 +312,7 @@ //utilities case OPT_ADVANSCENE: CommonSettings.run_advanscene_import = optarg; break; + case OPT_LANGUAGE: language = atoi(optarg); break; } } //arg parsing loop @@ -354,10 +359,10 @@ //process 3d renderer _render3d = strtoupper(_render3d); if(_render3d == "NONE") render3d = COMMANDLINE_RENDER3D_NONE; - if(_render3d == "SW") render3d = COMMANDLINE_RENDER3D_SW; - if(_render3d == "OLDGL") render3d = COMMANDLINE_RENDER3D_OLDGL; - if(_render3d == "AUTOGL") render3d = COMMANDLINE_RENDER3D_AUTOGL; - if(_render3d == "GL") render3d = COMMANDLINE_RENDER3D_GL; + else if(_render3d == "SW") render3d = COMMANDLINE_RENDER3D_SW; + else if(_render3d == "OLDGL") render3d = COMMANDLINE_RENDER3D_OLDGL; + else if(_render3d == "AUTOGL") render3d = COMMANDLINE_RENDER3D_AUTOGL; + else if(_render3d == "GL") render3d = COMMANDLINE_RENDER3D_GL; if (autodetect_method != -1) CommonSettings.autodetectBackupMethod = autodetect_method; Modified: trunk/desmume/src/commandline.h =================================================================== --- trunk/desmume/src/commandline.h 2016-09-06 21:17:32 UTC (rev 5556) +++ trunk/desmume/src/commandline.h 2016-09-08 22:31:31 UTC (rev 5557) @@ -47,6 +47,7 @@ int depth_threshold; int autodetect_method; int render3d; + int language; std::string nds_file; std::string play_movie_file; std::string record_movie_file; Modified: trunk/desmume/src/gtk/main.cpp =================================================================== --- trunk/desmume/src/gtk/main.cpp 2016-09-06 21:17:32 UTC (rev 5556) +++ trunk/desmume/src/gtk/main.cpp 2016-09-08 22:31:31 UTC (rev 5557) @@ -1,6 +1,6 @@ /* main.cpp - this file is part of DeSmuME * - * Copyright (C) 2006-2015 DeSmuME Team + * Copyright (C) 2006-2016 DeSmuME Team * Copyright (C) 2007 Pascal Giard (evilynux) * * This file is free software; you can redistribute it and/or modify @@ -649,24 +649,30 @@ }; static void -init_configured_features( class configured_features *config) +init_configured_features( class configured_features *config ) { - config->engine_3d = 1; + if(config->render3d == COMMANDLINE_RENDER3D_GL || config->render3d == COMMANDLINE_RENDER3D_OLDGL || config->render3d == COMMANDLINE_RENDER3D_AUTOGL) + config->engine_3d = 2; + else + config->engine_3d = 1; config->savetype = 0; config->timeout = 0; /* use the default language */ - config->firmware_language = -1; + config->firmware_language = -1; + + /* If specified by --lang option the lang will change to choosed one */ + config->firmware_language = config->language; } static int fill_configured_features( class configured_features *config, - int argc, char ** argv) + char ** argv) { GOptionEntry options[] = { - { "3d-engine", 0, 0, G_OPTION_ARG_INT, &config->engine_3d, "Select 3d rendering engine. Available engines:\n" + { "3d-render", 0, 0, G_OPTION_ARG_INT, &config->engine_3d, "Select 3D rendering engine. Available engines:\n" "\t\t\t\t 0 = 3d disabled\n" "\t\t\t\t 1 = internal rasterizer (default)\n" #if defined(HAVE_LIBOSMESA) || defined(HAVE_GL_GLX) @@ -696,7 +702,6 @@ //g_option_context_add_main_entries (config->ctx, options, "options"); //g_option_context_add_group (config->ctx, gtk_get_option_group (TRUE)); - config->parse(argc,argv); if(!config->validate()) goto error; @@ -3252,6 +3257,7 @@ // The global menu screws up the window size... unsetenv("UBUNTU_MENUPROXY"); + my_config.parse(argc, argv); init_configured_features( &my_config); if (!g_thread_supported()) @@ -3259,7 +3265,7 @@ gtk_init(&argc, &argv); - if ( !fill_configured_features( &my_config, argc, argv)) { + if ( !fill_configured_features( &my_config, argv)) { exit(0); } Modified: trunk/desmume/src/windows/main.cpp =================================================================== --- trunk/desmume/src/windows/main.cpp 2016-09-06 21:17:32 UTC (rev 5556) +++ trunk/desmume/src/windows/main.cpp 2016-09-08 22:31:31 UTC (rev 5557) @@ -3383,6 +3383,9 @@ } } + if(cmdline.language != -1) + CommonSettings.fw_config.language = cmdline.language; + cmdline.process_movieCommands(); if(cmdline.load_slot != -1) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-09-06 21:17:34
|
Revision: 5556 http://sourceforge.net/p/desmume/code/5556 Author: zeromus Date: 2016-09-06 21:17:32 +0000 (Tue, 06 Sep 2016) Log Message: ----------- winport: fix #1590 1 pixel black line on the right side of the emulator Modified Paths: -------------- trunk/desmume/src/windows/CWindow.cpp Modified: trunk/desmume/src/windows/CWindow.cpp =================================================================== --- trunk/desmume/src/windows/CWindow.cpp 2016-09-06 18:09:00 UTC (rev 5555) +++ trunk/desmume/src/windows/CWindow.cpp 2016-09-06 21:17:32 UTC (rev 5556) @@ -621,8 +621,10 @@ ZeroMemory(&mbi, sizeof(mbi)); mbi.cbSize = sizeof(mbi); GetMenuBarInfo(hwnd, OBJID_MENU, 0, &mbi); - //int menuHeight = (mbi.rcBar.bottom - mbi.rcBar.top + 1); //zero 07-aug-2016 - why did I do this? it isn't normal in windows and in the case of no menu bar it was making a 1 instead of a 0 (r3184 in 2009) + + //if the menubar exists, its height is off by 1 (frame between bar and client area?) int menuHeight = (mbi.rcBar.bottom - mbi.rcBar.top); + if(menuHeight != 0) menuHeight++; rect->bottom -= cymenu; rect->bottom += menuHeight; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-09-06 18:09:03
|
Revision: 5555 http://sourceforge.net/p/desmume/code/5555 Author: zeromus Date: 2016-09-06 18:09:00 +0000 (Tue, 06 Sep 2016) Log Message: ----------- placement of fastbuild hacks in gpu.cpp breaks some build types. tired of fixing it locally. not a safe hack anyway. Modified Paths: -------------- trunk/desmume/src/GPU.cpp Modified: trunk/desmume/src/GPU.cpp =================================================================== --- trunk/desmume/src/GPU.cpp 2016-09-02 01:15:26 UTC (rev 5554) +++ trunk/desmume/src/GPU.cpp 2016-09-06 18:09:00 UTC (rev 5555) @@ -18,14 +18,6 @@ along with the this software. If not, see <http://www.gnu.org/licenses/>. */ -#ifdef FASTBUILD - #undef FORCEINLINE - #define FORCEINLINE - //compilation speed hack (cuts time exactly in half by cutting out permutations) - #define DISABLE_MOSAIC - #define DISABLE_COLOREFFECTDISABLEHINT -#endif - #include "GPU.h" #include <assert.h> @@ -48,6 +40,13 @@ #include "matrix.h" #include "emufile.h" +#ifdef FASTBUILD + #undef FORCEINLINE + #define FORCEINLINE + //compilation speed hack (cuts time exactly in half by cutting out permutations) + #define DISABLE_MOSAIC + #define DISABLE_COLOREFFECTDISABLEHINT +#endif //instantiate static instance u16 GPUEngineBase::_brightnessUpTable555[17][0x8000]; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ze...@us...> - 2016-09-02 01:15:28
|
Revision: 5554 http://sourceforge.net/p/desmume/code/5554 Author: zeromus Date: 2016-09-02 01:15:26 +0000 (Fri, 02 Sep 2016) Log Message: ----------- w32 scond: reorder variable declarations and statements for ancient compilers Modified Paths: -------------- trunk/desmume/src/libretro-common/rthreads/rthreads.c Modified: trunk/desmume/src/libretro-common/rthreads/rthreads.c =================================================================== --- trunk/desmume/src/libretro-common/rthreads/rthreads.c 2016-08-29 20:08:07 UTC (rev 5553) +++ trunk/desmume/src/libretro-common/rthreads/rthreads.c 2016-09-02 01:15:26 UTC (rev 5554) @@ -396,11 +396,11 @@ /* add ourselves to a queue of waiting threads */ struct QueueEntry myentry; - myentry.next = NULL; - struct QueueEntry** ptr = &cond->head; + struct QueueEntry** ptr = &cond->head; while(*ptr) /* walk to the end of the linked list */ ptr = &((*ptr)->next); *ptr = &myentry; + myentry.next = NULL; cond->waiters++; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |