#include <stdlib.h>
#include <sys/time.h>
#include <stdio.h>

int buf[64*4];

#define LOOPS 10*1000*1000

#define READ_INT(i)   a += buf[i]
#define READ_INT4(i)  READ_INT(i); READ_INT(i+1); READ_INT(i+2); READ_INT(i+3)
#define READ_INT16(i) READ_INT4(i); READ_INT4(i+4); READ_INT4(i+8); READ_INT4(i+12)
#define READ_INT64(i) READ_INT16(i); READ_INT16(i+16); READ_INT16(i+32); READ_INT16(i+48)

#define READ_CHAR(i)   a += *(char*)&buf[i]
#define READ_CHAR4(i)  READ_CHAR(i); READ_CHAR(i+1); READ_CHAR(i+2); READ_CHAR(i+3)
#define READ_CHAR16(i) READ_CHAR4(i); READ_CHAR4(i+4); READ_CHAR4(i+8); READ_CHAR4(i+12)
#define READ_CHAR64(i) READ_CHAR16(i); READ_CHAR16(i+16); READ_CHAR16(i+32); READ_CHAR16(i+48)

int main()
{
	struct timeval tv;
	char *cp;
	int *ip;
	int i;
	unsigned usec, start;
	char a = 0;

	gettimeofday(&tv, NULL); usec = tv.tv_sec * 1000000 + tv.tv_usec;
	start = usec;
	for (i = 0; i < LOOPS; i++) {
		asm volatile ("" ::: "memory");
		READ_CHAR64(0);
		READ_CHAR64(0+64);
		READ_CHAR64(0+64*2);
		READ_CHAR64(0+64*3);
	}
	gettimeofday(&tv, NULL); usec = tv.tv_sec * 1000000 + tv.tv_usec;
	printf("char reads: %u us\n", (usec - start));

	gettimeofday(&tv, NULL); usec = tv.tv_sec * 1000000 + tv.tv_usec;
	start = usec;
	for (i = 0; i < LOOPS; i++) {
		asm volatile ("" ::: "memory");
		READ_INT64(0);
		READ_INT64(0+64);
		READ_INT64(0+64*2);
		READ_INT64(0+64*3);
	}
	gettimeofday(&tv, NULL); usec = tv.tv_sec * 1000000 + tv.tv_usec;
	printf("int reads: %u us\n", (usec - start));

	return (a & 0xffffff00);
}
