Try that with VC++/GCC. It will likely not matter one bit.
I've put a link to my binary. It's the same source I posted compiled with Visual Studio 2010 (as C++ code, though; though I don't think that matters).
Printable View
By the way, what's the difference between
andCode:for (x = 0; x < 14; x++)
{
if (buffer[x] == '+' || buffer[x] == '-' || buffer[x] == sep || buffer[x] >= '0' && buffer[x] <= '9')
printf("%c",buffer[x]);
if (buffer[x] == '\0')
break;
}
?Code:printf("%s", buffer);
And I believe in Elysia's version,
This is the bottleneck.Code:int len_str = (int)log10((double)num);
The casting is very expensive (from integral to IEEE double), and log10(double) is also very expensive. A binary search loop would be much faster. Or linear search if number of digits is small.
The version I'm using is:
and the output is:Code:// ----------------------------------------------------------------------------------------------
// Prog_name: comma_sep.c / ver 0.5
// A routine for formatting integer numbers with thousand separators.
// Created with Pelles C for Windows 6.00.4
//-----------------------------------------------------------------------------------------------
// This version takes into account the sign and the possibility to have different
// separator like space, comma, point, and so on.
// Moveover this version creates a function that can be tested for performance.
// Added the choice for right and left alignment.
// In this version Global variables have been removed.
//-----------------------------------------------------------------------------------------------
// Date: 04 july 2010
// Author: frktons @ cprogramming forum.
//-----------------------------------------------------------------------------------------------
#include <stdio.h>
#include <stdbool.h>
#include <time.h>
#undef getchar
#pragma comment(lib, "\\masm32\\lib\\msvcrt.lib")
//-----------------------------------------------------------------------------------------------
// Function prototype for int_format(). Takes the number to format,
// the address of the string to fill, some switches and thousand separator.
// Returns nothing.
//-----------------------------------------------------------------------------------------------
void int_format(int num, char *, bool sign, bool neg, char sep, char alignment);
//-----------------------------------------------------------------------------------------------
int main(int argc, char *argv[])
{
//-----------------------------------------------------------------------------------------------
// Local variables.
//-----------------------------------------------------------------------------------------------
bool sign = true; // if the number has to be displayed with sign
bool neg = false; // if the number is negative this flag will be set
// from the function to reflect the state
char sep = ','; // here I choose the separator
char buffer[15] = {' '}; // string array for the formatted number
char alignment = ' '; // choice to [R]ight-align or [L]eft-align the number
int num = -1234567890; // test number
int cycles = 0;
float ver = 0.5; // Program version
int x; // generic integer counter
time_t init_time = 0; // initial time
time_t end_time = 0; // end time
alignment = 'L'; // the formatted number will be Left-aligned
int times = 500000000; // for testing performance set this number for repetitions
printf("\n\t\t\t Testing version : %.2f \n",ver);
printf("\n\t\t\t ----------------------\n");
printf("\n The value of num is: %d\n",num);
time(&init_time);
printf("\n init_time = %d \n",init_time);
for (cycles = 0; cycles < times; cycles++) {
for (x=0; x < 14; x++)
buffer[x] = ' ';
int_format(num, buffer, sign, neg, sep, alignment);
} // end for
printf("\n The formatted value of num is: ");
for (x = 0; x < 14; x++)
{
if (buffer[x] == '+' || buffer[x] == '-' || buffer[x] == sep || buffer[x] >= '0' && buffer[x] <= '9')
printf("%c",buffer[x]);
else if (buffer[x] == '\0')
break;
}
printf("\n\n");
time(&end_time);
printf(" end_time = %d \n",end_time);
printf("\n\n The routine test has taken about %d seconds\n", end_time - init_time);
for (x=0; x < 14; x++)
buffer[x] = ' ';
// buffer[14] = '\0';
neg = false;
sign = false;
int_format(times, buffer, sign, neg, sep, alignment);
printf("\n to perform ");
for (x = 0; x < 14; x++)
{
if (buffer[x] == '+' || buffer[x] == '-' || buffer[x] == sep || buffer[x] >= '0' && buffer[x] <= '9')
printf("%c",buffer[x]);
else if (buffer[x] == '\0')
break;
}
printf(" cycles of the formatting function");
getchar();
return 0;
}
//--------------------------------------------------------------------------------------------------------------------
// Function int_format()
//--------------------------------------------------------------------------------------------------------------------
void int_format(int number, char *buffer, bool sign, bool neg, char sep, char alignment){
register int num = number;
int x = 0;
int y = 0;
const int ten = 10;
register int remain = 0; // integer variable to store the remainder
int count = 0; // integer counter for positioning the separator
bool zero = false; // if the number is zero the function sets this
// flag for its decisions
char digit[10] = {'0','1','2','3','4','5','6','7','8','9'}; // the digits to display in char shape
int len_str = 14; // string lenght less the terminator NULL
if (num != 0)
{
if (num < 0)
{
neg = true;
num = num * -1 ; // transform number to positive if negative
}
for (x = len_str; x >= 0; x--)
{
if (num == 0)
break;
if (count == 3)
{
count = 0;
buffer[x] = sep;
x--;
}
remain = num % ten;
num = num / ten;
buffer[x] = digit[remain];
// buffer[x] = '0' + remain;
count++;
}
}
else
{
buffer[len_str] = '0';
zero = true;
}
if (sign == true && neg == true)
buffer[x] = '-';
else if (sign == true && zero == false)
buffer[x] = '+';
if (alignment == 'L') {
if (x == 0)
return;
for (y = 0; y < 15; y++, x++) {
buffer[y] = buffer[x];
if (buffer[y] == '\0')
return;
} // end for
}
return;
}
Code:
Testing version : 0.50
----------------------
The value of num is: -1234567890
init_time = 1278270428
The formatted value of num is: -1,234,567,890
end_time = 1278270501
The routine test has taken about 73 seconds
to perform 500,000,000 cycles of the formatting function
Funny, when I used -123456 on my system, it took 9 seconds to run with:
gcc -Wall -Wextra -o speed speed.c
When I did -O3, it took 1 second to run it.
When I changed the number to -1234567890, it took 14 seconds to run with:
gcc -Wall -Wextra -o speed speed.c
And zero seconds with the -O3 switch.
My system:
gcc version 4.4.1
Linux 2.6.31
AMD Athlon II X2 250 (3.0GHz)
I wish I had more time so I could run some tests on OpenSolaris with Sun's C compiler....
Time to switch compiler I think :).Code:Testing version : 0.50
----------------------
The value of num is: -1234567890
init_time = 1278270636
The formatted value of num is: -1,234,567,890
end_time = 1278270641
The routine test has taken about 5 seconds
to perform 500,000,000 cycles of the formatting function
Dude, I have no idea. Not my code.
Also, I think I have found the reason for the speedup.
For some reason, VC++ is able to inline int_format on x64. Indeed, this provides the speedup. Disabling it ramps the time up to 29 seconds.
This is not done in x86, though. Why, I don't know, but there you have it.
True. It does take a lot of time, I've noticed.
The question is how to optimize that. I threw off a couple of suggestions before, but I haven't tried anything.
Core 2 Duo at 3GHz, 64-bit Linux, GCC 4.4.1.Quote:
Time to tell me the HW/SW configuration you are using as well. :-)
Un-optimized version runs in 150 seconds, so your compiler is most certainly optimizing, just not nearly as much.
Yeah I was talking to frktons.Quote:
Dude, I have no idea. Not my code.
I've managed to cut 10 seconds by getting rid of log10. Horribly slow function, that. Regular division is faster. Unfortunately, now the compiler refuses to inline the function :(
Passing the length of the number to the function as an argument allowed the compiler to inline it again. The new optimization to remove log10 shaved off a second off that time (9 seconds now).
I only got warnings for using %d for time_t, so I ran it as is.
I'm not sure about MSVC, but gcc has a compiler flag that sets inline aggressiveness.
Yes, there is __forceinline to try to force it to inline. I've been successful so far to make it inline. Now trying a division table.
Funny how my test all hate lookup tables. They always turn out slower.
Both in filling and accessing. It seems it's faster to do the division twice.
64 bit doesn't make any difference. I got similar results in 32 bit. It's the compiler.
Compilers are picky things. I know I got it down to around 9 seconds, but I cannot seem to reproduce it now.
AMD Athlon II X2 250 (3 GHz).
I will try yours shortly.
EDIT: Your code: 39 seconds.
EDIT2: My code with your integer: 29 seconds.
o_OCode:Address Line Source Code Bytes Timer samples
0x13f3f11e1 181 remain = num % 10; 48.99
One optimization I would do is to precompute a mapping of integers 0-999 to their 3 characters strings.
That would take 1000*3 = ~3KB of memory. No null terminators needed because you know they are exactly 3 characters long.
Then you can do % 1000 instead of % 10. I'm guessing that will make it at least twice as fast.
It will also make the logic simpler (since you want a comma every 3 digits), and eliminate a bunch of branching, which are also slow with modern processors.
And switching to a modern compiler will make this all not matter ;).
Branching? Slow? I don't buy that argument so much.
Today's processors are pretty clever, and since the loop is pretty deterministic, it should easily be able to avoid branch misprediction.
I'm just speculating, though.
But that idea would probably see some speed gains.
I'm going to try it.
EDIT: 14 seconds vs 29 on x64.
EDIT2: With a slightly larger table and x64, it is possible to reduce the runtime to ~8 seconds (if you are willing to sacrifice ~8 MB memory).
The only problem is that you have to correct the numbers:
Code:The value of num is: -1234567890
init_time = 1278283073
The formatted value of num is: -001,234,567,890
end_time = 1278283081
The routine test has taken about 8 seconds
to perform +000,500,000,000 cycles of the formatting function
Hmm when you go to 8MB of memory, I don't see how it can be faster.
8MB is larger than the L2 of most/all CPUs, and the values are pretty much random. Cache misses should be much more expensive than doing the calculations.
The loop is fine. The compiler will probably at least partially unroll it anyways.
I was talking about this one
It's true once every 3 iterations in each function call (not globally). I'm not sure if the branch predictor is that smart.Code:if (count == 3)
But the biggest difference would be the elimination of division (%).
Actually, I took a look with the profiler. I got 2 L2 cache misses.
The rest was L2 cache hits and retired instructions (whatever that means).
Nevertheless, it was faster.
For the sake of it, my current code:
Code:// ----------------------------------------------------------------------------------------------
// Prog_name: comma_sep.c / ver 0.5
// A routine for formatting integer numbers with thousand separators.
// Created with Pelles C for Windows 6.00.4
//-----------------------------------------------------------------------------------------------
// This version takes into account the sign and the possibility to have different
// separator like space, comma, point, and so on.
// Moveover this version creates a function that can be tested for performance.
// Added the choice for right and left alignment.
// In this version Global variables have been removed.
//-----------------------------------------------------------------------------------------------
// Date: 04 july 2010
// Author: frktons @ cprogramming forum.
//-----------------------------------------------------------------------------------------------
#include <stdio.h>
#include <time.h>
#include <math.h>
#include <string.h>
//-----------------------------------------------------------------------------------------------
// Function prototype for int_format(). Takes the number to format,
// the address of the string to fill, some switches and thousand separator.
// Returns nothing.
//-----------------------------------------------------------------------------------------------
inline int GetNumLength(int num)
{
int len_str = 0;
int temp_num = num;
while (temp_num /= 10) len_str++;
return len_str + 1;
}
void int_format(int num, char * buffer, bool sign, char sep);
//-----------------------------------------------------------------------------------------------
char digits_table[999999][8];
int main()
{
for (int i = 0; i < 999999; i++)
{
for (int j = 0; j < 8; j++)
{
digits_table[i][0] = ',';
digits_table[i][1] = '0' + char(i / 100000);
digits_table[i][2] = '0' + char(i / 10000 % 10);
digits_table[i][3] = '0' + char(i / 1000 % 10);
digits_table[i][4] = ',';
digits_table[i][5] = '0' + char(i / 100 % 10);
digits_table[i][6] = '0' + char(i / 10 % 10);
digits_table[i][7] = '0' + char(i % 10);
}
}
//-----------------------------------------------------------------------------------------------
// Local variables.
//-----------------------------------------------------------------------------------------------
bool sign = true; // if the number has to be displayed with sign
// from the function to reflect the state
char sep = ','; // here I choose the separator
char buffer[30] = {0}; // string array for the formatted number
//char alignment = ' '; // choice to [R]ight-align or [L]eft-align the number
int num = -1234567890; // test number
int cycles = 0;
//int len_str = GetNumLength(num);
int x; // generic integer counter
time_t init_time = 0; // initial time
time_t end_time = 0; // end time
//alignment = 'L'; // the formatted number will be Left-aligned
int times = 500000000; // for testing performance set this number for repetitions
printf("\n The value of num is: %d\n",num);
time(&init_time);
printf("\n init_time = %d \n",init_time);
for (cycles = 0; cycles < times; cycles++) {
memset(buffer, ' ', sizeof(buffer));
int_format(num, /*len_str, */buffer, sign, sep/*, alignment*/);
} // end for
printf("\n The formatted value of num is: %s", buffer);
printf("\n\n");
time(&end_time);
printf(" end_time = %d \n",end_time);
printf("\n\n The routine test has taken about %d seconds\n", end_time - init_time);
memset(buffer, ' ', sizeof(buffer));
int_format(times, buffer, sign, sep);
printf("\n to perform %s cycles of the formatting function\n", buffer);
return 0;
}
//--------------------------------------------------------------------------------------------------------------------
// Function int_format()
//--------------------------------------------------------------------------------------------------------------------
__forceinline void int_format(int num, /*int length_of_num, */char *buffer, bool sign, /*bool neg, */char sep/* char alignment*/){
int remain = 0; // integer variable to store the remainder
bool neg = false;
const int sizeof_buffer = 30;
if (num == 0)
{
strcpy(buffer, "0");
return;
}
if (num < 0)
{
neg = true;
buffer[0] = '-';
num = -num; // transform number to positive if negative
}
else
{
if (sign) buffer[0] = '+';
}
int x;
for (x = sizeof_buffer - 1; num; x -= 8)
{
remain = num % 1000000;
num /= 1000000;
memcpy(&buffer[x - 8], digits_table[remain], 8);
}
int copy_to = 0;
if (neg || sign) copy_to++;
memmove(&buffer[copy_to], &buffer[x + 1], sizeof_buffer - x - 1);
buffer[sizeof_buffer - x - 1] = '\0';
return;
}
Ah! of course.
We are running it on the same number over and over :). It's not random at all.
If you do it on a bunch of random numbers, it should be much slower.
The 2 cache misses are probably in the first function call. The digits table would be mostly flushed out of cache by then, because of the order you are building it.
-1234567890 takes 2 lookups.
Let me catch up here, I've been messing with hardware and RL for a bit. ;)
Is this still the objective?
And no, I wasn't joking about deleting some of this code, if this is the objective. I did not say to delete the entire program!Quote:
I recently started studying C, and so far I didn't find a function to format
integer numbers with thousand comma separators:
The number you want to test on is: -1234567890, and you want to format it 500 Million times, for timing/testing purposes, right?
I'll put together some code for it, and show you what I mean.
Hmmm. Well, not feeling up to try right now.
The OP is trying to optimize his/her 1000 separator formatting function.
Basically we've been able to hit 8 seconds. Still trying to figure if it's possible to optimize it more. Preferably getting rid of the modulus.
EDIT: Ran cache test again.
I got 5 misses this time (not inlining code this time).
The lines that were causing it were:
So it probably has something to do with the assembly, and that makes it tricky to find the actual cause.Code:Address Line Source Code Bytes Ret inst L2 fill/writeback L2 requests L2 misses
0x13f1f1290 107 16966 0 2 1
108 __forceinline void int_format(...){ 0 0 0 0
0x13f1f12ab 131 int x; 22588 1 0 2
132 for (x = sizeof_buffer - 1; num; x -= 8) 0 0 0 0
1 file, 1 function, 4 lines, 0 instructions, Summary: 39560 samples, 26.00% of module samples, 13.84% of total samples
You'd think sprintf or something would make this a whole lot easier to implement.
Didn't someone post a printf solution earlier? It seems to be gone now.
@Elysia
My compiler says the following are illegal:
Are these standard C99 instructions? :-(Code:digits_table[i][1] = '0' + char(i / 100000);
digits_table[i][2] = '0' + char(i / 10000 % 10);
digits_table[i][3] = '0' + char(i / 1000 % 10);
digits_table[i][4] = ',';
digits_table[i][5] = '0' + char(i / 100 % 10);
digits_table[i][6] = '0' + char(i / 10 % 10);
digits_table[i][7] = '0' + char(i % 10);
@Adak
This is a good idea.
@cyberfishQuote:
Let me catch up here, I've been messing with hardware and RL for a bit.
Is this still the objective?
Quote:
I recently started studying C, and so far I didn't find a function to format
integer numbers with thousand comma separators:
And no, I wasn't joking about deleting some of this code, if this is the objective. I did not say to delete the entire program!
The number you want to test on is: -1234567890, and you want to format it 500 Million times, for timing/testing purposes, right?
I'll put together some code for it, and show you what I mean.
Yes, that is one of the options I was thinking about, we need to handle the last remainder
if less than 100, with leading blanks/zeroes. :-)
Quote:
One optimization I would do is to precompute a mapping of integers 0-999 to their 3 characters strings.
That would take 1000*3 = ~3KB of memory. No null terminators needed because you know they are exactly 3 characters long.
Then you can do % 1000 instead of % 10. I'm guessing that will make it at least twice as fast.
It will also make the logic simpler (since you want a comma every 3 digits), and eliminate a bunch of branching, which are also slow with modern processors.
And switching to a modern compiler will make this all not matter .
@others
If you have viable alternatives, please feel free to partecipate with
your code and suggestions, not to speak about testing it and posting
the results you get. :-)
Well, sometimes a little math and some recursion can be a beautiful thing.
The sprintf version is not much different, but I have to leave you something to figure out. Can't be doing your homework.Code:#include <stdio.h>
#include <math.h>
void bar (unsigned long n , char sep)
{
if (n > 0UL) {
bar (n / 1000UL , ',');
printf ("%lu%c" , n % 1000UL , sep);
/* If you can print it you can copy it */
}
}
void foo (long n)
{
unsigned long m = n;
if (n < 0) {
m = n * -1L;
printf ("%c" , '-');
}
bar (m , '\n');
}
int main (void)
{
foo (-1500300L);
foo (1500300L);
return 0;
}
Eylsia, you've made the classic mistake of not handling -2147483648 (0x80000000) correctly.
When you find the number is negative and negate it, put the result into an unsigned number and deal with only unsigned variables from then on.
You also don't need temp_num as well as the parameter num, in GetNumLength. Since it's passed by value there is no difference between them. Just modify num (which will of course be unsigned right ;))
You do realise that a 7MB lookup table is absurd right?
Wow! Who'd a thunk - a race to print 500 million huge numbers, with comma's!! :p :p
This one takes a smarter route, and only prints the big number once, unless it has changed. That brings the time down to 1-2 seconds.
@iMalc, what's the solution to this 2147483648 * -1 > 9999999999 problem?Code:/*
I recently started studying C, and so far I didn't find a function to format
integer numbers with thousand comma separators:
The number you want to test on is: -1234567890, and you want to format it
500 Million times, for timing/testing purposes, right?
-2147483648 for num, fails :(
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int main() {
clock_t start, stop;
char neg;
char s[15] = { '\0' };
long num = -123, orig_num=0;
unsigned long i, num2;
/* for testing
num = -1234;
num = -12345;
num = -123456;
num = 1234567;
num = -12345678;
num = 123456789;
*/
num = -1234567890; //1,234,567,890
// num = -2147483648; //this fails 2147483648 * -1 > 9999999999 ??
printf("\n\n");
start=clock();
for(i=0;i<500000000;i++) {
if(num==orig_num)
continue;
if(num<0) {
neg='-';
num2=(num* -1);
}
else {
neg=' ';
num2=num;
}
ultoa(num2, s, 10);
printf("\n%c%s", neg,s);
putchar('\n');
putchar(neg);
if(num2>9999999999) { //10 Billion to 99 Billion
cprintf("%c%c,%c%c%c,%c%c%c,%c%c%c",
s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7],s[8],s[9],s[10]);
}
else if(num2>999999999) { //1 Billion to 9 Billion
cprintf("%c,%c%c%c,%c%c%c,%c%c%c",
s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7],s[8],s[9]);
}
else if(num2>99999999) { //100 Million to 999 Million (US)
cprintf("%c%c%c,%c%c%c,%c%c%c",
s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7],s[8]);
}
else if(num2>9999999) { //10 Million to 99 Million (US)
cprintf("%c%c,%c%c%c,%c%c%c", s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7]);
}
else if(num2>999999) { //1 Million to 9 Million (US)
cprintf("%c,%c%c%c,%c%c%c", s[0],s[1],s[2],s[3],s[4],s[5],s[6]);
}
else if(num2>99999) { //100 Thousand to 999 Thousand (US)
cprintf("%c%c%c,%c%c%c", s[0],s[1],s[2],s[3],s[4],s[5]);
}
else if(num2>9999) { //10 Thousand to 99 Thousand (US)
cprintf("%c%c,%c%c%c", s[0],s[1],s[2],s[3],s[4]);
}
else if(num2>999) { //1 Thousand to 9 Thousand (US)
cprintf("%c,%c%c%c", s[0],s[1],s[2],s[3]);
}
else //less than 1 Thousand
cprintf("%s", s);
orig_num=num;
//if num is to be changed, that
//code would go right here
}
stop = clock();
printf("\nElapsed Time: %f", (stop-start)/CLK_TCK);
printf("\n\n\t\t\t press enter when ready");
i = getchar(); ++i;
return 0;
}
No.
Does
Work?Code:digits_table[i][1] = '0' + (char)(i / 100000)
Also, please use quotes for quoting text and not code tags.
:(:(:(
Aye? Does it matter? If so, how?
I know. I just broke out some code and copied it into the function.Quote:
You also don't need temp_num as well as the parameter num, in GetNumLength. Since it's passed by value there is no difference between them. Just modify num (which will of course be unsigned right ;))
But the compiler will probably optimize it anyway. Plus it's test code and time isn't spent there.
True, well, maybe. It was a test and it turned out to be faster for this particular number.Quote:
You do realise that a 7MB lookup table is absurd right?
But I think I've made a classic mistake. I've only tried for one particular input.
O_oQuote:
Does it matter? If so, how?
Would the answer to these questions be more obvious if your code didn't handle a different integer value correctly?
Soma
Could you please add some comments about your code
in order to help C n00bs like me to get a better idea of what
and why you use a kind of code instead of a different one?
Thanks
What part are you confused about?
Whiteflags and Adak, for example, posted alternative ways of
doing the same task, and I don't know some of the code they used,
the syntax is only partially clear for me.
The L or UL terminated numbers are quite new for me, and they
just printed the result, not explaining what performance and why they
obtained that. The recursive function is another new stuff I'm not accustomed
with. It is partially clear though.
Those are really interesting ways of coding that require some studying from
me, and I'll be glad to do it as soon as I get the time to do it. At the moment I hope
during the week-end I'll get some free time :-)
At the same time if the code is followed with some explanation, it
could be a lot easier and give me some good new point of view as well.
Experienced programmers tend to be quick to code and slow to explain.
I cannot blame them, but I dare to ask. ;-)
Adak code, gives me these messages:
when I try to compile it, so I'm just spaced out.Code:C:\comma_sep_adak\comma_sep.c(48): warning #2027: Missing prototype for 'ultoa'.
C:\comma_sep.c(53): warning #2027: Missing prototype for 'cprintf'.
C:\comma_sep.c(57): warning #2027: Missing prototype for 'cprintf'.
C:\\comma_sep.c(61): warning #2027: Missing prototype for 'cprintf'.
C:\comma_sep.c(65): warning #2027: Missing prototype for 'cprintf'.
C:\comma_sep.c(68): warning #2027: Missing prototype for 'cprintf'.
C:\comma_sep.c(71): warning #2027: Missing prototype for 'cprintf'.
C:\comma_sep.c(74): warning #2027: Missing prototype for 'cprintf'.
C:\comma_sep.c(77): warning #2027: Missing prototype for 'cprintf'.
C:\comma_sep.c(80): warning #2027: Missing prototype for 'cprintf'.
C:\comma_sep.c(86): error #2048: Undeclared identifier 'CLK_TCK'.
C:\comma_sep.c(86): warning #2234: Argument 2 to 'printf' does not match the format
string; expected 'double' but found 'unsigned int'.
If the code posted is not standard ANSI C, how could I understand what are you
doing? How could it help me? :-(
Whiteflags's code compiles and runs just fine. But what about performance?
Why is it better or smarter? what tricks is it using to get a better performance?
What system are they using? At least this last example is ANSI C standard. :-)
U and UL are integer prefixes (postfixes?). U stands for unsigned and UL stands for unsigned long and L stands for long. Normally, any integer you write is considered an int. By putting those *fixes after, you can tell the compiler that they should be a different type.
Much like 1.0f is a float instead of a double.
I don't think Whiteflag's code is any faster. It's probably much worse since it uses recursion. I believe it was a demonstration that it could be done in an alternate way.
Of course, to be sure, you really should time it.
Recursion can be mind boggling. Try writing down the flow on a piece of paper. That is my best advice. You should be able to see how it works.
Adak, did you compile your code in Turbo C again?
Oh wait, it's called suffix, isn't it?
@Adak:
your code , at a first sight, looks like it's doing a conversion and an empty
500,000,000 cycles. Could you check it please?
ANSI standards have changed a lot over the years - and vendors don't support all those standards.
Re: my code
I'm working on a version that you can run, and of course, it shows the elapsed time (1-2 second range, currently).
Yes, unless the variable being printed has changed since the last loop, it just continues to iterate and does no processing.
Why would you want it to? Once you have formatted a number with comma's, there is no point in formatting it again and again. If you have a function call that changes the number, then the new number will be formatted (once), and printed, once. That's why it's looping - every loop checks to see if the value of num has changed, or not.
A smarter program can be faster. :D
I use Turbo C for all these little programs.
.
I make no apologies if it actually is slower, frktons, than the other solutions here. I had no idea this was a speed contest. I never really wrote anything so time sensitive that it required profiling, and I really have zero grasp on profiling in a meaningful way.
Instead of letting Elysia have the last word on the topic: recursion is a handy technique that everyone should know, so if my code is the first time you've seen it and so forth, I'm glad I could be so helpful.
As I sayd before, I really like your sense of humour :-)
We are repeating the same formatting task over the same number
just to test the speed of the function.
Of course if you do nothing inside the cycle it'll be faster :D
If you post something that comply with the target we are working on
you are very welcome. Anyway your code shows me something useful
about timings and printing, something I did'nt faced with yet.
So thanks for your contribution.
I agree. Your code shows some interesting way of coding, and
it is elegant, well formatted and stilish :-)
If you have some idea about improving the performance as well
I'll be glad to have a look at your code. :D
Grrr. Adak can be frustrating at times. Adak's inability to discard Turbo C is a liability for anyone looking at Adak's code.
So don't believe Adak's code is standard compliant. In fact, be very skeptic about it, or if you can, ignore it.
Compile it and sort out its problems first, if you can. If you can't, then demand that Adak fix it. Otherwise it's of no use.
@Elysia:
No need to time it. A function that calls itself generate some stackQuote:
I don't think Whiteflag's code is any faster. It's probably much worse since it uses recursion. I believe it was a demonstration that it could be done in an alternate way.
Of course, to be sure, you really should time it.
overhead. It is not the fastest code around, but the concept is interesting
in itself from an algorithmic point of view. :-)
Stack overhead is just a few clocks, if you don't need to preserve registers.
On x86:
call f
push ebp
mov ebp, esp
...
pop ebp
ret
With a loop, you need to do initialize, increment, check, and conditional jump.
They are about equal. Especially when you take into account things like the % operator, which takes 20-40 cycles.
Careful there. Unless you will be doing that in real usage, it may not be an accurate test.Quote:
We are repeating the same formatting task over the same number
just to test the speed of the function.
Read up on CPU caches. CPU cache - Wikipedia, the free encyclopedia
The effect is, essentially, the first call will be a few hundred times slower than subsequent calls on the same number, if you are not careful (access memory in some pattern).
But in the real world, with more or less random numbers, all your calls will be slow.
Yes, in some occasion this could be the best choice, it
depends on what you are doing.
Yes cyberfish, in real coding nothing like this usually happens.
Tests are always approximative. I'm not using random numbers
because the time to generate the number itself and calling a couple of
functions more would make the test pointless.
When you use the same variable, you use the same memory address,
so if it is cached, even if the number changes, it is already in the cache.
At least this is what I'm assuming here. Data properly aligned and cached.
We have many other factors and variables to consider as well,
because in modern OSes there are some dozen tasks running at the same
time, so approximative tests have to be done more than once
to have an average performance idea. :-)
I think you already know that, so let's carry on the experiment.
Some good ideas have already shown up. What's next?
No. It would actually be a very good idea. Generating numbers is typically many times faster than your formatting routine and as you should know, not all inputs are equal. Therefore, it is best to use random numbers to properly test your optimizations.
Also, compilers typically have more problem optimizing recursion than iteration. But we cannot rule out anything for certain, of course. That is why we should time it before making a conclusion.
Well, this is a good point. Maybe we should test the function
this way and use milliseconds instead of seconds in order to
have more accurate performance reports. Agreed. From 0.6 on
let's implement some random code :-)
Well it's actually 3.30 am here and in a few hours I have to
go to work. See you later. :-)
You can generate the numbers beforehand, and not count it in the time.
If that takes too much memory, you can also do it, say, 100000 at a time, and subtract all the random number generation times.
Performance testing is tricky business. There will always be errors no matter what you do, but errors of a few hundred times when you are tracking a few % difference makes your tests pointless.
The effect of CPU cache is much greater than you seem to think.
A cache-hit is almost free, a L2 miss takes about 50-100 cycles.
For comparison, add, subtract, shift, etc take 1 cycle. Multiply take ~10. Divide takes 20-40.
If you have a few cache misses per function call, the rest doesn't matter.
This is the algorithm I've used to print out "numbers", with commas. It's WAY slower than the earlier program I posted, but it's accurate, portable, relatively easy to understand.
The fastest way to do this, is by using bit fields and bit operators - MUCH faster, but it's not portable, and more difficult to code. The bottleneck is the string handling, modulus operations, and division, of course.
I get 2.5 seconds per million numbers formatted.Code:/*
Commas.c ver. 0.2
Puts commas into numerical print outs.
Adak, July 5, 2010
Status: OK
Timing: 2.5 seconds for each million numbers C2D @2.66GHz Turbo C compiler
Note: There is no overflow error checking. Without ultoa, it is very slow.
*/
#include <stdio.h>
#include <time.h>
#include <string.h>
#define NUMBER 1000000 //number of repetitions
#define SIZE 15 //size of char array s[]
int num2a(long num2, char *s);
int main() {
clock_t start, stop;
char neg;
char s[SIZE] = { "zzzzzzzzzzzzzz" };
double etime; //the elapsed time, as a double
long num = -123, orig_num=0;
unsigned long i, num2;
int len, j, k;
// for testing
// num = -1234;
// num = -12345;
// num = -123456;
// num = 1234567;
// num = -12345678;
// num = 123456789;
num = -1234567890; //1,234,567,890
// num = 2147483648; //exceeds maximum long int value on 16 bit compiler
start=clock();
for(i=0;i<NUMBER;i++) {
//if(num==orig_num)
//continue;
if(num<0) {
neg='-';
num2=num * -1;
}
else {
neg=' ';
num2=num;
}
//put the digits from num2, into char s[] and return the length of the string in s[]
len = num2a(num2, s);
if(i==0) {
printf("\n%c%s\n", neg,s); //check sign and s[]
putchar(neg);
for(j=0;j<len;j++) {
if((len-j) % 3 == 0 && j) //need a comma here?
putchar(',');
putchar(s[j]);
}
}
}
//orig_num = num;
stop = clock();
etime=stop-start;
printf("\nElapsed Time: %f", etime/18.2);
printf("\n\n\t\t\t press enter when ready");
i = getchar(); ++i;
return 0;
}
int num2a(long num2, char s[SIZE]) {
int i, j;
i=SIZE;
while(num2) {
s[--i] = (num2 % 10) + '0';
num2 /= 10;
}
//shift the digits to the low end of s
for(j=0;j+i<SIZE;j++) {
s[j]=s[i+j];
}
s[j]='\0';
return j;
}
Edit: slightly cleaner version.
Try executing num = -num; with -2147483648 and see what the value is.
It's possible that it's faster for this example, afterall there isn't anything else that the cache is needed for in that program, so it's free to hog as much as it wants.Quote:
True, well, maybe. It was a test and it turned out to be faster for this particular number.
But I think I've made a classic mistake. I've only tried for one particular input.
Throw it into a real-world program though and the effects should become apparent.
Here's another option. Someone else can time it relative to other examples.
If that's too slow then the sprintf can be replaced with something that uses the multiplicative inverse to calculate the mod 10 for each char.Code:int num2a(long num2, char s[])
{
unsigned long n = (num2 < 0) : -num2 : num2;
if (num2 <= -1000000000) {
return sprintf("-%u,%03u,%03u,%03u", n/1000000000, (n/1000000)%1000, (n/1000)%1000, n%1000);
} else if (num2 <= -1000000) {
return sprintf("-%u,%03u,%03u", n/1000000, (n/1000)%1000, n%1000);
} else if (num2 <= -1000) {
return sprintf("-%u,%03u", n/1000, n%1000);
} else if (num2 < 0) {
return sprintf("-%u", n);
} else if (num2 < 1000) {
return sprintf("%u", n);
} else if (num2 < 1000000) {
return sprintf("%u,%03u", n/1000, n%1000);
} else if (num2 < 1000000000) {
return sprintf("%u,%03u,%03u", n/1000000, (n/1000)%1000, n%1000);
} else {
return sprintf("%u,%03u,%03u,%03u", n/1000000000, (n/1000000)%1000, (n/1000)%1000, n%1000);
}
}
I can't be bothered reading the whole thread to work out whether people found a valid reason to not simply do it by modifying the locale as originally suggested.
No this is basically a dick waving contest. Although AFAIK the only thing printf is guaranteed to do in response to locale changes is change the decimal point. Still important.
Anyway thank you for the contribution iMalc! This made for a fun profile. When I discovered my IDE had a profiler.
Anyway, in the interest of full disclosure, I unwound the recursion in my method and used another stack instead. It could probably be smaller. I did also profile my recursive version, but did not include it in this test.
I had to make significant changes to Adak's code to get it to compile. It probably isn't fair, but I had to do things like delete C++ comments and make sure his function actually printed output like the rest can.
In an upset, Adak's method is the slowest, with me trailing iMalc by a hair. If frktons wants to read the profile properly, I suggest this page. Profile output is not execution time -- it's something better. The actual execution time was something not pretty, like 800-900 seconds.
Because it may be pertinent, this is a debug build with no optimization from mingw (gcc 3.4.5), on a P4 2.6GHz architecture. I hope this is fair to everyone. I did my utmost.
In order to carry on the experiment of building a new
itoa() or ultoa() or whatever you like to call it, being it
not included in standard libs, and given the features
it has to format the final string with sign, thousand separator
and left or right alignment, I'm going to prepare a testpad
for comparing the various posted routines, and the future to
come, with some definite conditions:
1) 1 million random integer numbers to generate and format
2) timing in milliseconds the proposed functions
3) displaying the results in a format like the following:
........... --------------------------------------
........... | New function for integers formatting |
........... | ----------------------------------- |
........... | testing 1 million random integers |
........... --------------------------------------
proposed by ..............version...........elapsed clock ticks
---------------------------------------------------------------------------------
xxxxxxxxxxxxx ........... xx.xx ........... x.xxx.xxx.xxx
---------------------------------------------------------------------------------
xxxxxxxxxxxxx ........... xx.xx ........... x.xxx.xxx.xxx
---------------------------------------------------------------------------------
and so on.
This will give us an idea of what we get and we can choose
among the proposed algorithms the ones we like the most
for our personal use.
The source should be standard enough to permit cross
compiling and testing on different OSes.
I hope some of you will enjoy the task and partecipate with
your own idea and code. :-)
If you have ultoa, try this out:
I get 111 seconds for 500 million repetitions on:Code:/*
Commas, ver. 0.3
Puts commas into numerical print outs.
Adak, July 5, 2010
Status: OK
Timing: 2.5 seconds for each million numbers C2D @2.66GHz
with ver. .02. With version .03, 0.21 seconds per million
numbers, 111 seconds for 500 million with 0.3
Compiler is Turbo C ver. 1.01
Note: There is no overflow error checking.
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#define NUMBER 500000000 //1000000
#define SIZE 15
int main() {
clock_t start, stop;
char neg;
char s[SIZE] = { "zzzzzzzzzzzzzz" };
double etime; //the elapsed time, as a double
long num = -123;
unsigned long i, num2;
int len, j, k;
// for testing
// num = -1234;
// num = -12345;
// num = -123456;
// num = 1234567;
// num = -12345678;
// num = 123456789;
num = -1234567890; //1,234,567,890
// num = 2147483648; //exceeds maximum long int value on 16 bit compiler
start=clock();
for(i=0;i<NUMBER;i++) {
if(num<0) {
neg='-';
num2=num * -1;
}
else {
neg=' ';
num2=num;
}
//put the digits from num2, into char s[]
ultoa(num2, s, 10);
len=strlen(s);
if(i==0) {
printf("\n%c%s\n", neg,s); //check sign and s[]
putchar(neg);
for(j=0;j<len;j++) {
if((len-j) % 3 == 0 && j) //need a comma here?
putchar(',');
putchar(s[j]);
}
}
}
//orig_num = num;
stop = clock();
etime=stop-start;
printf("\nElapsed Time: %f", etime/18.2);
printf("\n\n\t\t\t press enter when ready");
i = getchar(); ++i;
return 0;
}
CPU: E6700 C2D @2.66GHz
OS: WindowsXP 32 bit
Compiler: Turbo C ver. 1.01.
@frktons: You shouldn't have a left or right alignment, because the printf() function already HAS a built in format specifier just for that.
My version of ultoa() will not format anything, and will not handle negative numbers. Are you confusing it with printf()?
Thanks Adak.Quote:
I get 111 seconds for 500 million repetitions
In order to properly evaluate your code you should add
some info:
- CPU
- OS
- compiler
:-)
Do you even have to ask? Adak uses Turbo C, which you should avoid like the plague.
Well, here's a Microsoft Visual C/C++ 6.0 example, OF itoa(), so it IS standard, and let's see how much better it is than Turbo C.
Frktons, are you sure you don't have ultoa() on your Pelles compiler?Code:
Example
/* ITOA.C: This program converts integers of various
* sizes to strings in various radixes.
*/
#include <stdlib.h>
#include <stdio.h>
void main( void ) //<<--- Note the void main(), Turbo C has int main()!
{
char buffer[20];
int i = 3445;
long l = -344115L; //<<--- And what's this? A suffix!! Just like Turbo C can use!
unsigned long ul = 1234567890UL; //<< Shock! Another suffix!! Oh Noes!!
//Quick! Grab the smelling salts for Elysia!
_itoa( i, buffer, 10 );
printf( "String of integer %d (radix 10): %s\n", i, buffer );
_itoa( i, buffer, 16 );
printf( "String of integer %d (radix 16): 0x%s\n", i, buffer );
_itoa( i, buffer, 2 );
printf( "String of integer %d (radix 2): %s\n", i, buffer );
_ltoa( l, buffer, 16 );
printf( "String of long int %ld (radix 16): 0x%s\n", l,
buffer );
_ultoa( ul, buffer, 16 );
printf( "String of unsigned long %lu (radix 16): 0x%s\n", ul,
buffer );
}
ultoa? non-standard function?
sprintf/snprintf with %lu does not work?
c-faq
Sorry, but itoa() is not part of the C standard library. You are using another compiler's library extension to claim that a library extension of your compiler is standard. Either that, or you mean standard as in "de facto standard".Quote:
Originally Posted by Adak
...You do realize that VC++ 6 isn't much better, right? It's another stoneage compiler which isn't the least bit standards compliant.
Microsoft didn't really care about standards until version 2005.
Plus void main is not standard, even though compilers have extensions to support it. VC++ does, and yes, even GCC.
And as you can see, _itoa and _ultoa are not standard because of the leading _.
Here's a version done on MSVC 6.0:
Oddly, it's a few seconds slower than the same program version, on Turbo C, on the same computer.Code:/*
Commas, ver. 0.3
Puts commas into numerical print outs.
Adak, July 5, 2010
Status: OK
Timing: C2D E6700 cpu @2.66GHz running 32 bit WinXP: 122 seconds for 500 million
Note: There is no overflow error checking.
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#define NUMBER 500000000 //1000000
#define SIZE 15
int main() {
clock_t start, stop;
char neg;
char s[SIZE] = { "zzzzzzzzzzzzzz" };
double etime; //the elapsed time, as a double
long num = -123;
unsigned long i, num2;
int len, j;
// for testing
// num = -1234;
// num = -12345;
// num = -123456;
// num = 1234567;
// num = -12345678;
// num = 123456789;
num = -1234567890; //1,234,567,890
// num = 2147483648; //exceeds maximum long int value on 16 bit compiler
start=clock();
for(i=0;i<NUMBER;i++) {
if(num<0) {
neg='-';
num2=num * -1;
}
else {
neg=' ';
num2=num;
}
//put the digits from num2, into char s[]
ultoa(num2, s, 10);
len=strlen(s);
if(i==0) {
printf("\n%c%s\n", neg,s); //check sign and s[]
putchar(neg);
for(j=0;j<len;j++) {
if((len-j) % 3 == 0 && j) //need a comma here?
putchar(',');
putchar(s[j]);
}
}
}
//orig_num = num;
stop = clock();
etime=(double)(stop-start);
printf("\nElapsed Time: %lf", etime/CLOCKS_PER_SEC);
printf("\n\n\t\t\t press enter when ready");
i=getchar();
return 0;
}
I haven't tried sprintf() yet.
I fail to see why you insist on using outdated compilers.
I also found this:
Warning 1 warning C4996: 'ultoa': The POSIX name for this item is deprecated. Instead, use the ISO C++ conformant name: _ultoa. See online help for details.
(Yes, it says C++ for some reason even when it's compiling C code with a C compiler.)
MSVC 6.0 works with either ultoa or _ultoa. I'm running Windows, not a POSIX OS, as you know - or did you fail to see that, too? :p :p
I'm testing sprintf(), right now, in my program, on MSVC 6.0. It works accurately, but it increased the run time from 122 seconds, to 205 seconds. <Yikes!>
I'll stick with ulta/_ulta. :)
All standard functions are big chunk of code for the simple
reason they handle generic situations, quite a lot indeed, like
printf(). This is one of the reasons they are usually slower than
simpler versions you can manage to code, if you really want to.
If I'm trying to have a faster function, a faster itoa() or ultoa()
I have to bother with coding it myself. I need to do some experimentation.
Pelles'C has indeed a _ultoa() and a _itoa(), named this way
to indicate that they are not ANSI standard C, but extensions.
I can find an hundred version of them. But where is the fun?
What I learn? How do I understand how to code in C if I
don't bother with some reinventing the wheel here and there?
If I had to build a massive 10 million lines application I'd
probably use whatever fits the task.
Here I'm just learning some C syntax, so it is more fun to
reinvent the wheel sometime.
So far no standard function was faster than the crumpled
example I posted. It is the product of a beginner C learner
and it is already faster than usual standard printf(), itoa()
or the like.
So please don't come again with this story of using what is
available, I'm not in a hurry, I can bother spending some weeks
enjoyng coding, trying, learning from your examples and
aiming at a faster function. :-)
It is a game, if you enjoy it, just play with it for a while. ;-)
Yeah, but presumably they both print the result faster that you can read. ;)
Outputs "-1,234,567,153"Code:void insert_commas(char *inp, char *outp) {
int a = (21 - (int)strlen(inp)) % 3, d;
for (; outp[ d = a / 3] = *inp++; a += 4)
outp[d + 1] = ',';
outp[d - 1] = '\0'; }
...
char buffer[100], buffer2[100];
int number = -1234567153;
sprintf(buffer, "%d", number);
insert_commas(buffer, buffer2);
printf("\"%s\"", buffer2);
Good and standard solution.
It is a bit slow due to the use of standard functions
like sprintf().
I elaborated 10 million cycles in 5,262 clock ticks:
after inserting the missing code:Code:-1,234,567,153
Elapsed time = 5,262
Press any key to continue...
Code://-----------------------------------
// comma_sep.c
//-----------------------------------
// version proposed by nonoob
//-----------------------------------
#include <stdio.h>
#include <string.h>
#include <time.h>
void insert_commas(char *inp, char *outp) {
int a = (21 - (int)strlen(inp)) % 3, d;
for (; outp[ d = a / 3] = *inp++; a += 4)
outp[d + 1] = ',';
outp[d - 1] = '\0';
}
int main(void){
int times = 10000000; // iterations to test
int x = 0;
char buffer[100], buffer2[100];
int number = -1234567153; // number to format
clock_t start = 0; // initial time
clock_t stop = 0; // end time
start = clock();
for (x = 0; x < times; x++){
sprintf(buffer, "%d", number);
insert_commas(buffer, buffer2);
}
stop = clock();
int elapsed = (int)(stop-start);
printf("%s\n", buffer2);
sprintf(buffer,"%d",elapsed);
insert_commas(buffer, buffer2);
printf("\n Elapsed time = %s\n",buffer2);
return 0;
}