From: Jed B. <je...@59...> - 2011-10-14 03:45:09
|
I'm tripping an assert that Derek and John are not seeing. From the debugging session below, I have no idea why, unless they really aren't building with DEBUG on. Reproduce with: ./ex3-dbg -d 2 #ifdef DEBUG libmesh_assert (tot_size == di.size()); #endif ... because tot_size is 6, di.size() is 9 tot_size is set by tot_size += FEInterface::n_dofs(dim, fe_type, type); The arguments were dim = 2 fe_type = {.order = libMeshEnums::SECOND, .family = libMeshEnums::LAGRANGE} type = libMeshEnums::QUAD9 and the value 6 is returned from fe_xyz.C:931 by case TRI3: case TRI6: case QUAD4: case QUAD8: case QUAD9: return 6; I don't understand this interpretation of n_dofs(), but elem->n_nodes() returns 9 which makes perfect sense to me. Internals of elem below. (gdb) p *elem $22 = { <libMesh::ReferenceCountedObject<libMesh::Elem>> = { <libMesh::ReferenceCounter> = { _vptr.ReferenceCounter = 0x7ffff7d8b3f0, static _counts = { <std::__cxx1998::map<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<unsigned int, unsigned int>, std::less<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::pair<unsigned int, unsigned int> > > >> = { _M_t = { _M_impl = { <std::allocator<std::_Rb_tree_node<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::pair<unsigned int, unsigned int> > > >> = { <__gnu_cxx::new_allocator<std::_Rb_tree_node<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::pair<unsigned int, unsigned int> > > >> = {<No data fields>}, <No data fields>}, members of std::_Rb_tree<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::pair<unsigned int, unsigned int> >, std::_Select1st<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::pair<unsigned int, unsigned int> > >, std::less<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::pair<unsigned int, unsigned int> > > >::_Rb_tree_impl<std::less<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, false>: _M_key_compare = { <std::binary_function<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::basic_string<char, std::char_traits<char>, std::allocator<char> >, bool>> = {<No data fields>}, <No data fields>}, _M_header = { _M_color = std::_S_red, _M_parent = 0x946c50, _M_left = 0x946d10, _M_right = 0x67feb0 }, _M_node_count = 11 } } }, <__gnu_debug::_Safe_sequence<std::__debug::map<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<unsigned int, unsigned int>, std::less<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::pair<unsigned int, unsigned int> > > > >> = { <__gnu_debug::_Safe_sequence_base> = { _M_iterators = 0x0, _M_const_iterators = 0x0, _M_version = 1 }, <No data fields>}, <No data fields>}, static _n_objects = { <tbb::atomic<unsigned int>> = { <tbb::internal::atomic_impl_with_arithmetic<unsigned int, unsigned int, char>> = { <tbb::internal::atomic_impl<unsigned int>> = { rep = { value = 2384 } }, <No data fields>}, <No data fields>}, <No data fields>}, static _mutex = { flag = 0 '\000', static is_rw_mutex = false, static is_recursive_mutex = false, static is_fair_mutex = false }, static _enable_print_counter = true }, <No data fields>}, <libMesh::DofObject> = { <libMesh::ReferenceCountedObject<libMesh::DofObject>> = { <libMesh::ReferenceCounter> = { _vptr.ReferenceCounter = 0x7ffff7d8b578, static _counts = { <std::__cxx1998::map<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<unsigned int, unsigned int>, std::less<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::pair<unsigned int, unsigned int> > > >> = { _M_t = { _M_impl = { <std::allocator<std::_Rb_tree_node<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::pair<unsigned int, unsigned int> > > >> = { <__gnu_cxx::new_allocator<std::_Rb_tree_node<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::pair<unsigned int, unsigned int> > > >> = {<No data fields>}, <No data fields>}, members of std::_Rb_tree<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::pair<unsigned int, unsigned int> >, std::_Select1st<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::pair<unsigned int, unsigned int> > >, std::less<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::pair<unsigned int, unsigned int> > > >::_Rb_tree_impl<std::less<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, false>: _M_key_compare = { <std::binary_function<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::basic_string<char, std::char_traits<char>, std::allocator<char> >, bool>> = {<No data fields>}, <No data fields>}, _M_header = { _M_color = std::_S_red, _M_parent = 0x946c50, _M_left = 0x946d10, _M_right = 0x67feb0 }, _M_node_count = 11 } } }, <__gnu_debug::_Safe_sequence<std::__debug::map<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<unsigned int, unsigned int>, std::less<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::pair<unsigned int, unsigned int> > > > >> = { <__gnu_debug::_Safe_sequence_base> = { _M_iterators = 0x0, _M_const_iterators = 0x0, _M_version = 1 }, <No data fields>}, <No data fields>}, static _n_objects = { <tbb::atomic<unsigned int>> = { <tbb::internal::atomic_impl_with_arithmetic<unsigned int, unsigned int, char>> = { <tbb::internal::atomic_impl<unsigned int>> = { rep = { value = 2384 } }, <No data fields>}, <No data fields>}, <No data fields>}, static _mutex = { flag = 0 '\000', static is_rw_mutex = false, static is_recursive_mutex = false, static is_fair_mutex = false }, static _enable_print_counter = true }, <No data fields>}, members of libMesh::DofObject: old_dof_object = 0x0, static invalid_id = 4294967295, static invalid_processor_id = 65535, _id = 0, _processor_id = 0, _idx_buf = { <std::__cxx1998::vector<unsigned int, std::allocator<unsigned int> >> = { <std::__cxx1998::_Vector_base<unsigned int, std::allocator<unsigned int> >> = { _M_impl = { <std::allocator<unsigned int>> = { <__gnu_cxx::new_allocator<unsigned int>> = {<No data fields>}, <No data fields>}, members of std::__cxx1998::_Vector_base<unsigned int, std::allocator<unsigned int> >::_Vector_impl: _M_start = 0x94f430, _M_finish = 0x94f43c, _M_end_of_storage = 0x94f43c } }, <No data fields>}, <__gnu_debug::_Safe_sequence<std::__debug::vector<unsigned int, std::allocator<unsigned int> > >> = { <__gnu_debug::_Safe_sequence_base> = { _M_iterators = 0x0, _M_const_iterators = 0x0, _M_version = 1 }, <No data fields>}, members of std::__debug::vector<unsigned int, std::allocator<unsigned int> >: _M_guaranteed_capacity = 3 } }, members of libMesh::Elem: static type_to_n_nodes_map = {2, 3, 4, 3, 6, 4, 8, 9, 4, 10, 8, 20, 27, 6, 15, 18, 5, 2, 4, 6, 8, 16, 18, 6, 16, 1, 0}, _nodes = 0x937b78, _elemlinks = 0x937b48, _children = 0x0, _rflag = 1 '\001', _pflag = 1 '\001', _p_level = 0 '\000', _sbd_id = 0, static _bp1 = 65449, static _bp2 = 48661 } |
From: Roy S. <roy...@ic...> - 2011-10-14 04:26:46
|
On Thu, 13 Oct 2011, Jed Brown wrote: > fe_type = {.order = libMeshEnums::SECOND, .family = libMeshEnums::LAGRANGE} > > and the value 6 is returned from fe_xyz.C:931 by Here's your problem: the linker isn't calling the right function. It's a linker problem I've seen before, too, where for some reason the XYZ instantiation gets triggered to handle a LAGRANGE call. I don't remember exactly what caused it, though. Something about the fe_* object files getting built or linked out of their usual order, maybe? Try hunting the mailing list archives. Note that I don't say it's a linker error: I'm pretty sure that Ben's fancy template partial partial specialization tricks aren't actually legal C++. We just haven't fixed that because the tricks work using our Makefiles with all the compiler/linkers we try, and because fixing them would require a major refactoring. --- Roy |
From: Derek G. <fri...@gm...> - 2011-10-14 04:33:05
|
Indeed - I suspected as much. Are you coming to the meeting tomorrow morning Jed? We can definitely work on this some then… Derek On Oct 13, 2011, at 10:26 PM, Roy Stogner wrote: > > On Thu, 13 Oct 2011, Jed Brown wrote: > >> fe_type = {.order = libMeshEnums::SECOND, .family = libMeshEnums::LAGRANGE} >> and the value 6 is returned from fe_xyz.C:931 by > > Here's your problem: the linker isn't calling the right function. > > It's a linker problem I've seen before, too, where for some reason the > XYZ instantiation gets triggered to handle a LAGRANGE call. I don't > remember exactly what caused it, though. Something about the fe_* > object files getting built or linked out of their usual order, maybe? > Try hunting the mailing list archives. > > Note that I don't say it's a linker error: I'm pretty sure that Ben's > fancy template partial partial specialization tricks aren't actually > legal C++. We just haven't fixed that because the tricks work using > our Makefiles with all the compiler/linkers we try, and because fixing > them would require a major refactoring. > --- > Roy------------------------------------------------------------------------------ > All the data continuously generated in your IT infrastructure contains a > definitive record of customers, application performance, security > threats, fraudulent activity and more. Splunk takes this data and makes > sense of it. Business sense. IT sense. Common sense. > http://p.sf.net/sfu/splunk-d2d-oct_______________________________________________ > Libmesh-devel mailing list > Lib...@li... > https://lists.sourceforge.net/lists/listinfo/libmesh-devel |
From: Jed B. <je...@59...> - 2011-10-14 05:16:07
|
On Thu, Oct 13, 2011 at 23:32, Derek Gaston <fri...@gm...> wrote: > Indeed - I suspected as much. > > Are you coming to the meeting tomorrow morning Jed? We can definitely work > on this some then… > Yeah, I'll be there. I was using gcc-4.6.1 on Arch Linux. I tried to rebuild with clang-2.9, but ran into -fPIC issues that I haven't tried debugging yet. |
From: John P. <jwp...@gm...> - 2011-10-14 12:48:37
|
On Thu, Oct 13, 2011 at 10:26 PM, Roy Stogner <roy...@ic...> wrote: > > On Thu, 13 Oct 2011, Jed Brown wrote: > >> fe_type = {.order = libMeshEnums::SECOND, .family = >> libMeshEnums::LAGRANGE} >> >> and the value 6 is returned from fe_xyz.C:931 by > > Here's your problem: the linker isn't calling the right function. > > It's a linker problem I've seen before, too, where for some reason the > XYZ instantiation gets triggered to handle a LAGRANGE call. I don't > remember exactly what caused it, though. Something about the fe_* > object files getting built or linked out of their usual order, maybe? > Try hunting the mailing list archives. http://sourceforge.net/mailarchive/message.php?msg_id=27134833 > Note that I don't say it's a linker error: I'm pretty sure that Ben's > fancy template partial partial specialization tricks aren't actually > legal C++. Interesting, can you be more specific? If we are invoking some kind of undefined behavior that would be good to fix... -- John |
From: Cody P. <Cod...@gm...> - 2011-10-18 23:38:02
|
On Oct 14, 2011, at 6:48 AM, John Peterson wrote: > On Thu, Oct 13, 2011 at 10:26 PM, Roy Stogner <roy...@ic...> wrote: >> >> On Thu, 13 Oct 2011, Jed Brown wrote: >> >>> fe_type = {.order = libMeshEnums::SECOND, .family = >>> libMeshEnums::LAGRANGE} >>> >>> and the value 6 is returned from fe_xyz.C:931 by >> >> Here's your problem: the linker isn't calling the right function. >> >> It's a linker problem I've seen before, too, where for some reason the >> XYZ instantiation gets triggered to handle a LAGRANGE call. I don't >> remember exactly what caused it, though. Something about the fe_* >> object files getting built or linked out of their usual order, maybe? >> Try hunting the mailing list archives. > > http://sourceforge.net/mailarchive/message.php?msg_id=27134833 > >> Note that I don't say it's a linker error: I'm pretty sure that Ben's >> fancy template partial partial specialization tricks aren't actually >> legal C++. > > Interesting, can you be more specific? If we are invoking some kind > of undefined behavior that would be good to fix... > We just checked in coupled of extra template instantiations as we were running into a different but similar error that you showed us last week. We are still having a debate as to what causes the error but "missing" instantiations appear to have caused a different ordering of the symbols in the final library file. We were able to force a different order simply by touching "fe_map.C" and rebuilding which indicates a problem. The additional explicit lines we added appears to have resolved our problems for now. Jed, You may want to try an update from HEAD and see if your problem disappears too. Cody > -- > John > > ------------------------------------------------------------------------------ > All the data continuously generated in your IT infrastructure contains a > definitive record of customers, application performance, security > threats, fraudulent activity and more. Splunk takes this data and makes > sense of it. Business sense. IT sense. Common sense. > http://p.sf.net/sfu/splunk-d2d-oct > _______________________________________________ > Libmesh-devel mailing list > Lib...@li... > https://lists.sourceforge.net/lists/listinfo/libmesh-devel |
From: John P. <jwp...@gm...> - 2011-10-24 15:24:38
|
On Tue, Oct 18, 2011 at 10:18 PM, Jed Brown <jed...@mc...> wrote: > > I rebuilt everything and sadly, the same problem is still present with > r4873. One other difference between Mac and Linux we noted was the ordering of linking imposed by make's $(wildcard ) functionality. (This ordering may also be file-system dependent?) Anyway, the Makefile patch below seems to fix things for us on openSUSE 11.4/GCC 4.5.1/GNU ld 2.21. If you'd like try applying the patch below and seeing if it fixes anything, it would at least give us another data point. If this does fix the problem on other linux's, it could imply some issue with the libmesh source (probably explicit template instantiations) but I'm not really sure where to start looking? -- John Index: Makefile.const =================================================================== --- Makefile.const (revision 4876) +++ Makefile.const (working copy) @@ -49,7 +49,7 @@ # # object files -objects := $(patsubst %.C, %.$(obj-suffix), $(srcfiles)) $(patsubst %.c, %.$(obj-suffix), $(csrcfiles)) +objects := $(sort $(patsubst %.C, %.$(obj-suffix), $(srcfiles))) $(sort $(patsubst %.c, %.$(obj-suffix), $(csrcfiles))) # # logged files -- all files you might want to log information for |
From: Jed B. <je...@59...> - 2011-10-24 19:01:57
|
On Mon, Oct 24, 2011 at 10:24, John Peterson <jwp...@gm...> wrote: > On Tue, Oct 18, 2011 at 10:18 PM, Jed Brown <jed...@mc...> wrote: > > > > I rebuilt everything and sadly, the same problem is still present with > > r4873. > > One other difference between Mac and Linux we noted was the ordering > of linking imposed by make's $(wildcard ) functionality. (This > ordering may also be file-system dependent?) > > Anyway, the Makefile patch below seems to fix things for us on > openSUSE 11.4/GCC 4.5.1/GNU ld 2.21. > > If you'd like try applying the patch below and seeing if it fixes > anything, it would at least give us another data point. > > If this does fix the problem on other linux's, it could imply some > issue with the libmesh source (probably explicit template > instantiations) but I'm not really sure where to start looking? > Yes, this patch fixes the problem for me too. Thanks for tracking this down. |
From: John P. <jwp...@gm...> - 2011-10-27 17:58:06
|
On Mon, Oct 24, 2011 at 1:01 PM, Jed Brown <je...@59...> wrote: > On Mon, Oct 24, 2011 at 10:24, John Peterson <jwp...@gm...> wrote: >> >> On Tue, Oct 18, 2011 at 10:18 PM, Jed Brown <jed...@mc...> wrote: >> > >> > I rebuilt everything and sadly, the same problem is still present with >> > r4873. >> >> One other difference between Mac and Linux we noted was the ordering >> of linking imposed by make's $(wildcard ) functionality. (This >> ordering may also be file-system dependent?) >> >> Anyway, the Makefile patch below seems to fix things for us on >> openSUSE 11.4/GCC 4.5.1/GNU ld 2.21. >> >> If you'd like try applying the patch below and seeing if it fixes >> anything, it would at least give us another data point. >> >> If this does fix the problem on other linux's, it could imply some >> issue with the libmesh source (probably explicit template >> instantiations) but I'm not really sure where to start looking? > > Yes, this patch fixes the problem for me too. Thanks for tracking this down. I've investigated this issue a bit further, and as Jed deduced, the FE<Dim,XYZ> version of n_dofs() is indeed being called where the FE<Dim,LAGRANGE> version should be. This issue is not limited to ex4, e.g. it happens in ex3 as well. But in that case, it causes the Newton iteration in inverse_map() to fail rather than tripping an assert. Interestingly, the problem is not present in debug mode, and the problem goes away if you link the object files in alphabetical order (as the previous patch I sent demonstrated.) This makes the bug really annoying and somewhat delicate to reproduce on different systems. I don't know anything about the internals of the linker that would explain why this happens (buggy linker function name resolution?) but I do have what I think is a reasonable fix that should be portable. First, I contend we are doing something slightly strange with the FE template functions to begin with: we're using explicit *instantiation* to generate template code rather than explicit *specialization*. Conceptually I'd say these two techniques are similar, but they are syntactically different and they are certainly distinguished between by the standard[0]. What our code looks like in practice is essentially: fe_lagrange.C ------------- // function definition unsigned int FE<Dim,T>::n_dofs(const ElemType t, const Order o) { // function body specialized for Lagrange } ... // explicit instantiation (for 2D, but all Dims are instantiated with a macro) unsigned int FE<2,LAGRANGE>::n_dofs(const ElemType t, const Order o); fe_xyz.C -------- // function definition unsigned int FE<Dim,T>::n_dofs(const ElemType t, const Order o) { // function body specialized for XYZ } ... // explicit instantiation (for 2D, but all Dims are instantiated with a macro) unsigned int FE<2,XYZ>::n_dofs(const ElemType t, const Order o); I think this is legal C++... at least I haven't found anything stating it's illegal, but it's not exactly in the spirit of what we're doing. We're specializing the behavior of n_dofs() for different template arguments; it follows that we should probably use the template specialization syntax to do so. And indeed, changing the n_dofs() function in fe_lagrange.C to use explicit specialization[1] rather than instantiation *does* fix the problem on the system I have access to. (It works regardless of what order the object files are linked in, etc.) There's no reason the problem will always be limited to n_dofs(), so I'm planning to change all the FE function instantiations to specializations. There's also no reason for this problem to be limited to Lagrange FEM, so I'm changing the other FE types as well. Obviously this will be a fairly large patch, so I would appreciate it if folks using/testing some of the less common FE types will let me know if I've broken their stuff once the patch comes through! -- John [0] For example, 14.7 p5 states: "both an explicit instantiation and a declaration of an explicit specialization shall not appear in a program unless the explicit instantiation follows a declaration of the explicit specialization." [1] For those interested, my fix is to declare a function "lagrange_n_dofs()" in an anonymous namespace in fe_lagrange.C, and then do the explicit specialization: template <> unsigned int FE<2,LAGRANGE>::n_dofs(const ElemType t, const Order o) { return lagrange_n_dofs(t, o); } for all relevant dimensions. The logic of lagrange_n_dofs() is the same for all dimensions. |
From: John P. <jwp...@gm...> - 2011-11-02 19:48:01
|
On Thu, Oct 27, 2011 at 11:57 AM, John Peterson <jwp...@gm...> wrote: > > There's no reason the problem will always be limited to n_dofs(), so > I'm planning to change all the FE function instantiations to > specializations. There's also no reason for this problem to be > limited to Lagrange FEM, so I'm changing the other FE types as well. > Obviously this will be a fairly large patch, so I would appreciate it > if folks using/testing some of the less common FE types will let me > know if I've broken their stuff once the patch comes through! The first round of template FE function refactoring has been checked in. There is a separate SVN revision for each file (and the examples should run cleanly for all versions) so it should be easy to bisect back to the initial appearance of any bugs, if necessary. I believe the only types of FE's we don't regularly test are BERNSTEIN and SZABAB (?) so if anyone has code which depends on those it would be nice if you could test it out with the new patches. Finally, these patches should fix all of the weird "linker calls the wrong template function" errors we've seen lately on various linux machines, so please also let me know if that's not the case. -- John |
From: Roy S. <roy...@ic...> - 2011-10-27 18:05:41
|
On Thu, 27 Oct 2011, John Peterson wrote: > We're specializing the behavior of n_dofs() for different template > arguments; it follows that we should probably use the template > specialization syntax to do so. Here's the catch: template specialization doesn't exist for templated functions in C++, only for templated classes. I assume that's why we've been "cheating" by trying to trick the linker in the first place. The "right" thing to do is probably to indeed provide specialized declarations/definitions for the whole subclass in each case, but that's going to be an annoyingly huge amount of code. --- Roy |
From: John P. <jwp...@gm...> - 2011-10-27 18:19:24
|
On Thu, Oct 27, 2011 at 12:05 PM, Roy Stogner <roy...@ic...> wrote: > > On Thu, 27 Oct 2011, John Peterson wrote: > >> We're specializing the behavior of n_dofs() for different template >> arguments; it follows that we should probably use the template >> specialization syntax to do so. > > Here's the catch: template specialization doesn't exist for templated > functions in C++, only for templated classes. 14.7.2 p1 of my standard draft states: "A class, a function or member template specialization can be explicitly instantiated from its template." Perhaps you are thinking of *partial* specialization for template functions? That is not allowed AFAIK. -- John |
From: Roy S. <roy...@ic...> - 2011-10-27 18:21:14
|
On Thu, 27 Oct 2011, John Peterson wrote: > On Thu, Oct 27, 2011 at 12:05 PM, Roy Stogner <roy...@ic...> wrote: >> >> On Thu, 27 Oct 2011, John Peterson wrote: >> >>> We're specializing the behavior of n_dofs() for different template >>> arguments; it follows that we should probably use the template >>> specialization syntax to do so. >> >> Here's the catch: template specialization doesn't exist for templated >> functions in C++, only for templated classes. > > 14.7.2 p1 of my standard draft states: > > "A class, a function or member template specialization can be > explicitly instantiated from its template." > > Perhaps you are thinking of *partial* specialization for template > functions? That is not allowed AFAIK. Ah, right; that's how I got confused. Thanks. --- Roy |