The problem interested me, so here is what I came up with:
No error checking, no guarantees...
// One or two for each processor...
const UINT NUM_WORKER_THREADS = 8;
// One and a half times the number of worker threads
// so that blocks are always queued...
const UINT NUM_BLOCKS = 12;
PIXEL g_PixelArray[BLOCK_SIZE * NUM_BLOCKS];
volatile BOOL g_BlockStates[NUM_BLOCKS];
// Create completion port...
g_hCompletionPort = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, NUM_WORKER_THREADS);
// Create an event to be signalled when a work item is complete...
g_hWorkItemFinished = CreateEvent(NULL, FALSE, FALSE, NULL);
// Create worker threads...
for (int i = 0;i < NUM_WORKER_THREADS;i++)
HANDLE hThread = CreateThread(NULL, 0, WorkerThread, NULL, 0, NULL);
// Create IO thread...
HANDLE hThread = CreateThread(NULL, 0, ReadThread, NULL, 0, NULL);
static DWORD CALLBACK ReadThread(LPVOID lpParam)
BOOL bReadFromFile = FALSE;
for (int i = 0;i < NUM_BLOCKS;i++)
// Look for an available block in the input buffer...
if (g_BlockStates[i] == FALSE)
// We found an unused block, mark it as being used...
g_BlockStates[i] = TRUE;
// Read from file into our selected block
ReadFile(g_hInputFile, &g_PixelArray[BLOCK_SIZE * i], BLOCK_SIZE * sizeof(PIXEL), &dwRead, NULL);
// Record that we have read from file in this iteration...
bReadFromFile = TRUE;
// Issue work item to thread pool with PostQueuedCompletionStatus.
// Tell it which block to process and the size...
PostQueuedCompletionStatus(g_hCompletionPort, dwRead, i, NULL);
// Check if we are out of input data...
if (dwRead < BLOCK_SIZE * sizeof(PIXEL)) goto end;
// Only take the time to Wait if we haven't been in a timely ReadFile() call.
// If we have, there is probably already a block free.
// Possibly the event is not needed at all and we just replace it with a Sleep(1)...
// Wait for a work item to complete before we check for free input blocks again.
static DWORD CALLBACK WorkerThread(LPVOID lpParam)
// Get a work item posted from the IO thread...
GetQueuedCompletionStatus(g_hCompletionPort, &dwSize, &dwBlock, &lpOverlapped, INFINITE);
// Process the block of pixels we have been allocated...
// Mark the block as being free again (no cs needed here(I think))...
g_BlockStates[dwBlock] = FALSE;
// Signal the main thread that a work item is finished so it
// can reuse the block...