The problem interested me, so here is what I came up with:
Code:
// One or two for each processor...
const UINT NUM_WORKER_THREADS = 8;
// One and a half times the number of worker threads
// so that blocks are always queued...
const UINT NUM_BLOCKS = 12;
PIXEL g_PixelArray[BLOCK_SIZE * NUM_BLOCKS];
volatile BOOL g_BlockStates[NUM_BLOCKS];
HANDLE g_hWorkItemFinished;
void SetupOperation(void)
{
// Create completion port...
g_hCompletionPort = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, NUM_WORKER_THREADS);
// Create an event to be signalled when a work item is complete...
g_hWorkItemFinished = CreateEvent(NULL, FALSE, FALSE, NULL);
// Create worker threads...
for (int i = 0;i < NUM_WORKER_THREADS;i++)
{
HANDLE hThread = CreateThread(NULL, 0, WorkerThread, NULL, 0, NULL);
CloseHandle(hThread);
}
// Create IO thread...
HANDLE hThread = CreateThread(NULL, 0, ReadThread, NULL, 0, NULL);
CloseHandle(hThread);
}
static DWORD CALLBACK ReadThread(LPVOID lpParam)
{
DWORD dwRead;
while (TRUE)
{
BOOL bReadFromFile = FALSE;
for (int i = 0;i < NUM_BLOCKS;i++)
{
// Look for an available block in the input buffer...
if (g_BlockStates[i] == FALSE)
{
// We found an unused block, mark it as being used...
g_BlockStates[i] = TRUE;
// Read from file into our selected block
ReadFile(g_hInputFile, &g_PixelArray[BLOCK_SIZE * i], BLOCK_SIZE * sizeof(PIXEL), &dwRead, NULL);
// Record that we have read from file in this iteration...
bReadFromFile = TRUE;
// Issue work item to thread pool with PostQueuedCompletionStatus.
// Tell it which block to process and the size...
PostQueuedCompletionStatus(g_hCompletionPort, dwRead, i, NULL);
// Check if we are out of input data...
if (dwRead < BLOCK_SIZE * sizeof(PIXEL)) goto end;
}
}
if (!bReadFromFile)
{
// Only take the time to Wait if we haven't been in a timely ReadFile() call.
// If we have, there is probably already a block free.
// Possibly the event is not needed at all and we just replace it with a Sleep(1)...
// Wait for a work item to complete before we check for free input blocks again.
WaitForSingleObject(g_hWorkItemFinished, INFINITE);
}
}
end:
return 0;
}
static DWORD CALLBACK WorkerThread(LPVOID lpParam)
{
DWORD dwSize;
ULONG_PTR dwBlock;
LPOVERLAPPED lpOverlapped;
while (TRUE)
{
// Get a work item posted from the IO thread...
GetQueuedCompletionStatus(g_hCompletionPort, &dwSize, &dwBlock, &lpOverlapped, INFINITE);
// Process the block of pixels we have been allocated...
YourFunc_ProcessPixels(&g_PixelArray[dwBlock], dwSize);
// Mark the block as being free again (no cs needed here(I think))...
g_BlockStates[dwBlock] = FALSE;
// Signal the main thread that a work item is finished so it
// can reuse the block...
SetEvent(g_hWorkItemFinished);
}
return 0;
}
No error checking, no guarantees...