I experimented a bit with expressive patterns and rope data structures to try to understand what benefits could be achieved. While it works very well. However, if you try to combine more than 5 arguments, the compiler will not be able to optimize correctly and creates unnecessary time series. Can someone enlighten me why this is happening? Is this a compiler or do my optimization options require tuning?
I am using g ++ 4.4.1 (mingw32) with the following parameters: -O3 -Winline -Wextra -Wall -fno-exceptions -fno-rtti -fomit-frame-pointer -fexpensive-optimizations -fverbose-asm -S
The code below is just an experiment, so it does not meet any standards:
#include <stdio.h>
template<typename Derived>
struct rope_base {
const Derived & ref() const;
};
struct string {
size_t m_length;
char * m_value;
template<typename Derived>
string(const rope_base<Derived> & rope);
~string();
const char * data() const;
size_t length() const;
char * write_to(char * dst) const;
};
struct static_string {
const char * m_value;
const size_t m_length;
static_string(const char * value);
size_t length() const;
char * write_to(char * dst) const;
};
template<typename T>
struct rope_traits {
typedef const T type;
};
template<>
struct rope_traits<string> {
typedef const string & type;
};
template<>
struct rope_traits<static_string> {
typedef const static_string & type;
};
template<typename Left, typename Right>
struct rope : public rope_base<rope<Left, Right> > {
typename rope_traits<Left>::type m_left;
typename rope_traits<Right>::type m_right;
rope(const Left & left, const Right & right);
size_t length() const;
char * write_to(char * dst) const;
};
inline static_string::static_string(const char * value)
: m_value(value)
, m_length(__builtin_strlen(m_value)) {}
inline size_t static_string::length() const {
return m_length;
}
inline char * static_string::write_to(char * dst) const {
__builtin_memcpy(dst, m_value, m_length);
return dst + m_length;
}
template<typename Derived>
inline string::string(const rope_base<Derived> & rope)
: m_length(rope.ref().length())
, m_value(new char[m_length + 1]) {
*rope.ref().write_to(m_value) = 0;
}
inline string::~string() {
delete[] m_value;
}
inline const char * string::data() const {
return m_value;
}
inline size_t string::length() const {
return m_length;
}
template<typename Derived>
inline const Derived & rope_base<Derived>::ref() const {
return static_cast<const Derived &>(*this);
}
template<typename Left, typename Right>
inline rope<Left, Right>::rope(const Left & left, const Right & right)
: m_left(left)
, m_right(right) {}
template<typename Left, typename Right>
inline size_t rope<Left, Right>::length() const {
return m_left.length() + m_right.length();
}
template<typename Left, typename Right>
inline char * rope<Left, Right>::write_to(char * dst) const {
return m_right.write_to(m_left.write_to(dst));
}
inline rope<static_string, static_string> operator+(const static_string & left, const static_string & right) {
return rope<static_string, static_string>(left, right);
}
template<typename Left>
inline rope<Left, static_string> operator+(const rope_base<Left> & left, const static_string & right) {
return rope<Left, static_string>(left.ref(), right);
}
template<typename Right>
inline rope<static_string, Right> operator+(const static_string & left, const rope_base<Right> & right) {
return rope<static_string, Right>(left, right.ref());
}
template<typename Left, typename Right>
inline rope<Left, Right> operator+(const rope_base<Left> & left, const rope_base<Right> & right) {
return rope<Left, Right>(left.ref(), right.ref());
}
typedef static_string ss;
int main(int, char **)
{
string s(ss("111111111111") + "222222222222" + "333333333333" + "444444444444" + "555555555555");
printf("%d %s\n", s.length(), s.data());
return 0;
}
, , :
.def ___main; .scl 2; .type 32; .endef
.section .rdata,"dr"
LC0:
.ascii "444444444444\0"
LC1:
.ascii "333333333333\0"
LC2:
.ascii "222222222222\0"
LC3:
.ascii "111111111111\0"
LC4:
.ascii "555555555555\0"
LC5:
.ascii "%d %s\12\0"
.text
.p2align 2,,3
.globl _main
.def _main; .scl 2; .type 32; .endef
_main:
pushl %ebp
movl %esp, %ebp
andl $-16, %esp
pushl %edi
pushl %esi
pushl %ebx
subl $20, %esp
call ___main
movl $LC3, %esi
movl $61, (%esp)
call __Znaj
movl %eax, %ebx
movl $3, %ecx
movl %eax, %edi
rep movsl
leal 12(%eax), %eax
movb $3, %cl
movl %eax, %edi
movl $LC2, %esi
rep movsl
leal 24(%ebx), %eax
movb $3, %cl
movl %eax, %edi
movl $LC1, %esi
rep movsl
leal 36(%ebx), %eax
movb $3, %cl
movl %eax, %edi
movl $LC0, %esi
rep movsl
leal 48(%ebx), %eax
movl $LC4, %esi
movb $3, %cl
movl %eax, %edi
rep movsl
movb $0, 12(%eax)
movl %ebx, 8(%esp)
movl $60, 4(%esp)
movl $LC5, (%esp)
call _printf
testl %ebx, %ebx
je L2
movl %ebx, (%esp)
call __ZdaPv
L2:
xorl %eax, %eax
addl $20, %esp
popl %ebx
popl %esi
popl %edi
leave
ret
.def __Znaj; .scl 2; .type 32; .endef
.def _printf; .scl 2; .type 32; .endef
.def __ZdaPv; .scl 2; .type 32; .endef
, :
.def ___main; .scl 2; .type 32; .endef
.section .rdata,"dr"
LC0:
.ascii "777777777777\0"
LC1:
.ascii "666666666666\0"
LC2:
.ascii "555555555555\0"
LC3:
.ascii "444444444444\0"
LC4:
.ascii "333333333333\0"
LC5:
.ascii "222222222222\0"
LC6:
.ascii "111111111111\0"
LC7:
.ascii "888888888888\0"
LC8:
.ascii "%d %s\12\0"
.text
.p2align 2,,3
.globl _main
.def _main; .scl 2; .type 32; .endef
_main:
pushl %ebp
movl %esp, %ebp
andl $-16, %esp
pushl %edi
pushl %esi
pushl %ebx
subl $228, %esp
call ___main
movl $LC0, 168(%esp)
movl $12, 172(%esp)
movl $LC1, 176(%esp)
movl $12, 180(%esp)
movl $LC2, 184(%esp)
movl $12, 188(%esp)
movl $LC3, 192(%esp)
movl $12, 196(%esp)
movl $LC4, 200(%esp)
movl $12, 204(%esp)
movl $LC5, 208(%esp)
movl $12, 212(%esp)
movl $LC6, 216(%esp)
movl $12, 220(%esp)
leal 216(%esp), %eax
movl %eax, 152(%esp)
leal 208(%esp), %eax
movl %eax, 156(%esp)
leal 200(%esp), %eax
movl %eax, 160(%esp)
leal 192(%esp), %eax
movl %eax, 164(%esp)
leal 132(%esp), %edi
leal 152(%esp), %esi
movl $4, %ecx
rep movsl
leal 184(%esp), %eax
movl %eax, 148(%esp)
leal 108(%esp), %edi
leal 132(%esp), %esi
movb $5, %cl
rep movsl
leal 176(%esp), %eax
movl %eax, 128(%esp)
leal 80(%esp), %edi
leal 108(%esp), %esi
movb $6, %cl
rep movsl
leal 168(%esp), %eax
movl %eax, 104(%esp)
leal 48(%esp), %edi
leal 80(%esp), %esi
movb $7, %cl
rep movsl
movl 48(%esp), %ebx
movl 52(%esp), %edx
movl 56(%esp), %eax
movl %eax, 36(%esp)
movl 60(%esp), %eax
movl %eax, 32(%esp)
movl 64(%esp), %eax
movl %eax, 28(%esp)
movl 68(%esp), %eax
movl %eax, 24(%esp)
movl 72(%esp), %eax
movl %eax, 20(%esp)
movl 4(%ebx), %eax
addl 4(%edx), %eax
addl $12, %eax
movl %eax, 44(%esp)
movl 36(%esp), %eax
movl 4(%eax), %eax
addl %eax, 44(%esp)
movl 32(%esp), %eax
movl 4(%eax), %eax
addl %eax, 44(%esp)
movl 28(%esp), %eax
movl 4(%eax), %eax
addl %eax, 44(%esp)
movl 24(%esp), %eax
movl 4(%eax), %eax
addl %eax, 44(%esp)
movl 20(%esp), %eax
movl 4(%eax), %eax
addl %eax, 44(%esp)
movl 44(%esp), %eax
incl %eax
movl %eax, (%esp)
movl %edx, 16(%esp)
call __Znaj
movl %eax, 40(%esp)
movl (%ebx), %esi
movl 4(%ebx), %ecx
movl %eax, %edi
rep movsb
movl 40(%esp), %eax
addl 4(%ebx), %eax
movl 16(%esp), %edx
movl (%edx), %esi
movl 4(%edx), %ecx
movl %eax, %edi
rep movsb
addl 4(%edx), %eax
movl 36(%esp), %edx
movl (%edx), %esi
movl 4(%edx), %ecx
movl %eax, %edi
rep movsb
addl 4(%edx), %eax
movl 32(%esp), %edx
movl (%edx), %esi
movl 4(%edx), %ecx
movl %eax, %edi
rep movsb
addl 4(%edx), %eax
movl 28(%esp), %edx
movl (%edx), %esi
movl 4(%edx), %ecx
movl %eax, %edi
rep movsb
addl 4(%edx), %eax
movl 24(%esp), %edx
movl (%edx), %esi
movl 4(%edx), %ecx
movl %eax, %edi
rep movsb
addl 4(%edx), %eax
movl 20(%esp), %edx
movl (%edx), %esi
movl 4(%edx), %ecx
movl %eax, %edi
rep movsb
addl 4(%edx), %eax
movl $LC7, %esi
movb $3, %cl
movl %eax, %edi
rep movsl
movb $0, 12(%eax)
movl 40(%esp), %eax
movl %eax, 8(%esp)
movl 44(%esp), %edx
movl %edx, 4(%esp)
movl $LC8, (%esp)
call _printf
movl 40(%esp), %eax
testl %eax, %eax
je L2
movl 40(%esp), %eax
movl %eax, (%esp)
call __ZdaPv
L2:
xorl %eax, %eax
addl $228, %esp
popl %ebx
popl %esi
popl %edi
leave
ret
.def __Znaj; .scl 2; .type 32; .endef
.def _printf; .scl 2; .type 32; .endef
.def __ZdaPv; .scl 2; .type 32; .endef