99 ways to program a hex, Part 24: more lookup tables

So we went from a character encoding specific version [1] to a character encoding agnostic version [2] to today's version—another character encoding specific version (ASCII (American Standard Code for Information Interchange) [3] to be exact). But today's version also eliminates a branch point in the code, using a 256-element string to pick which character to display as part of the hexidecimal dump.

/*************************************************************************
*************************************************************************/
/* Style: C89, const correctness, assertive, system calls, full buffering */
/* lookup tables */
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#define LINESIZE 16
/********************************************************************/
extern const char *sys_errlist[];
extern int sys_nerr;
static void do_dump (const int,const int);
static size_t dump_line (char **const,unsigned char *,size_t,const unsigned long);
static void hexout (char *const,unsigned long,size_t,const int);
static void myperror (const char *const);
static size_t myread (const int,char *,size_t);
static void mywrite (const int,const char *const,const size_t);
/********************************************************************/
int main(const int argc,const char *const argv[])
{
if (argc == 1)
do_dump(STDIN_FILENO,STDOUT_FILENO);
else
{
int i;
for (i = 1 ; i < argc ; i++)
{
int fhin;
fhin = open(argv[i],O_RDONLY);
if (fhin == -1)
{
myperror(argv[i]);
continue;
}
mywrite(STDOUT_FILENO,"-----",5);
mywrite(STDOUT_FILENO,argv[i],strlen(argv[i]));
mywrite(STDOUT_FILENO,"-----\n",6);
do_dump(fhin,STDOUT_FILENO);
if (close(fhin) < 0)
myperror(argv[i]);
}
}
return EXIT_SUCCESS;
}
/************************************************************************/
static void do_dump(const int fhin,const int fhout)
{
unsigned char buffer[4096];
char outbuffer[75 * 109];
char *pout;
unsigned long off;
size_t bytes;
size_t count;
assert(fhin >= 0);
assert(fhout >= 0);
memset(outbuffer,' ',sizeof(outbuffer));
off = 0;
count = 0;
pout = outbuffer;
while((bytes = myread(fhin,(char *)buffer,sizeof(buffer))) > 0)
{
unsigned char *p = buffer;
for (p = buffer ; bytes > 0 ; )
{
size_t amount;
amount = dump_line(&pout,p,bytes,off);
p += amount;
bytes -= amount;
off += amount;
count++;
if (count == 109)
{
mywrite(fhout,outbuffer,(size_t)(pout - outbuffer));
memset(outbuffer,' ',sizeof(outbuffer));
count = 0;
pout = outbuffer;
}
}
}
if ((size_t)(pout - outbuffer) > 0)
mywrite(fhout,outbuffer,(size_t)(pout - outbuffer));
}
/********************************************************************/
static size_t dump_line(
char **const pline,
unsigned char *p,
size_t bytes,
const unsigned long off
)
{
char *line;
char *dh;
char *da;
size_t count;
assert(pline != NULL);
assert(*pline != NULL);
assert(p != NULL);
assert(bytes > 0);
line = *pline;
hexout(line,off,8,':');
if (bytes > LINESIZE)
bytes = LINESIZE;
p += bytes;
dh = &line[10 + bytes * 3];
da = &line[58 + bytes];
for (count = 0 ; count < bytes ; count++)
{
p --;
da --;
dh -= 3;
da = "................................ !"#$%&'()+,-./0123456789:;<=>?"
"@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~."
"................................................................"
"........................................................"
"........"[*p];
hexout(dh,(unsigned long)*p,2,' ');
}
line[58 + count] = '\n';
*pline = &line[59 + count];
return count;
}
/**********************************************************************/
static void hexout(char *const dest,unsigned long value,size_t size,const int padding)
{
assert(dest != NULL);
assert(size > 0);
assert((padding >= ' ') && (padding <= '~'));
dest[size] = padding;
while(size--)
{
dest[size] = "0123456789ABCDEF"[value & 0x0f];
value >>= 4;
}
}
/************************************************************************/
static void myperror(const char *const s)
{
int err = errno;
assert(s != NULL);
mywrite(STDERR_FILENO,s,strlen(s));
mywrite(STDERR_FILENO,": ",2);
if (err > sys_nerr)
mywrite(STDERR_FILENO,"(unknown)",9);
else
mywrite(STDERR_FILENO,sys_errlist[err],strlen(sys_errlist[err]));
mywrite(STDERR_FILENO,"\n",1);
}
/************************************************************************/
static size_t myread(const int fh,char *buf,size_t size)
{
size_t amount = 0;
assert(fh >= 0);
assert(buf != NULL);
assert(size > 0);
while(size > 0)
{
ssize_t bytes;
bytes = read(fh,buf,size);
if (bytes < 0)
{
myperror("read()");
exit(EXIT_FAILURE);
}
if (bytes == 0)
break;
amount += bytes;
size -= bytes;
buf += bytes;
}
return amount;
}
/*********************************************************************/
static void mywrite(const int fh,const char *const msg,const size_t size)
{
assert(fh >= 0);
assert(msg != NULL);
assert(size > 0);
if (write(fh,msg,size) < (ssize_t)size)
{
if (fh != STDERR_FILENO)
myperror("output");
exit(EXIT_FAILURE);
}
}
/***********************************************************************/

And it is faster:

[spc]lucy:~/projects/99/src>time ./23 ~/bin/firefox/libxul.so >/dev/null
real 0m0.258s
user 0m0.247s
sys 0m0.011s
[spc]lucy:~/projects/99/src>time ./24 ~/bin/firefox/libxul.so >/dev/null
real 0m0.186s
user 0m0.178s
sys 0m0.008s

About 1.3 times faster, but it is faster.

The conversion string is fixed, but that doesn't preclude a port to, say, an EBCIDIC (Extended Binary Coded Decimal Interchange Code) [4] system from using a different one, or the string being constructed at run time. The runtime generation would be more portable, but to me, that's wasted time spent generating a string that will always be the same (and frankly, if we're using this hack for speed, that's just wasted time).

Perhaps better might be several such strings, ASCII (American Standard Code for Information Interchange), EBCIDIC (Extended Binary Coded Decimal Interchange Code), Baudot [5], PETSCII (Personal Electronic Transactor Standard Code of Information Interchange) [6] and select via a command line option which one to use (defaulting to whatever character set is native for the platform the program is running on). It could be a useful thing.

But such a modification I'm leaving as an exercise for the reader.

Now, is this the fastest version possible? I'm not going to say yes this time. There might be something else that could be done to wring that last bit of performance out of this code, but at this point, I am definitely done with wringing out the speed.

I think.

=> [1] /boston/2012/01/30.1 | [2] /boston/2012/01/31.2 | [3] http://en.wikipedia.org/wiki/ASCII | [4] http://en.wikipedia.org/wiki/EBCDIC | [5] http://en.wikipedia.org/wiki/Baudot_code | [6] http://en.wikipedia.org/wiki/PETSCII | [7] /boston/2012/01/31.2 | [8] /boston/2012/02/02.1

=> Gemini Mention this post | Contact the author

Proxy Information
Original URL
gemini://gemini.conman.org/boston/2012/02/01.3
Status Code
Success (20)
Meta
text/gemini
Capsule Response Time
567.484402 milliseconds
Gemini-to-HTML Time
6.881521 milliseconds

This content has been proxied by September (3851b).