Overlapping Disk bound and CPU bound work in the same thread can achieve speedup without adding additional threads. If the CPU bound work is enough that it could benefit from multithreading, then the overlapping CPU work on the reader thread could be responsible for breaking the file into lines and passing the work off to the other threads.
Does C++14 cover async/overlapped I/O?
Here's an example using the Win32 API. This is my code for applying MD5 to a file.
Code:
bool MD5File::HashFile(const char *pathname, BYTE digest[16])
{
OVERLAPPED osReader = {0};
osReader.hEvent = m_ev;
void *read_buff = m_buff1,
*work_buff = m_buff2;
OVERLAPPED *read_os = &osReader;
LONGLONG *read_offset =
&(reinterpret_cast<LARGE_INTEGER*>(&read_os->Pointer)->QuadPart);
DWORD nLastRead;
const DWORD flags = FILE_FLAG_NO_BUFFERING |
FILE_FLAG_SEQUENTIAL_SCAN |
FILE_FLAG_OVERLAPPED;
HANDLE hFile = CreateFileA(pathname, GENERIC_READ, FILE_SHARE_READ, 0,
OPEN_EXISTING, flags, 0);
scoped_HANDLE shf1(hFile);
if (hFile == INVALID_HANDLE_VALUE)
{
LogMessage(LOG_LVL_ERROR,
"Hash: CreateFile failed for [%hs], le = %u",
pathname, ::GetLastError());
return false;
}//if
if (!ReadFile(hFile, read_buff, m_buffsz, 0, read_os))
{
const DWORD le = ::GetLastError();
if (le != ERROR_IO_PENDING)
{
LogMessage(LOG_LVL_ERROR,
"Hash: ReadFile failed for [%hs], le = %u",
pathname, ::GetLastError());
return false;
}//if
}//if
m_md5.Init();
for (;;)
{
std::swap(read_buff, work_buff);
if (!GetOverlappedResult(hFile, read_os, &nLastRead, TRUE))
{
const DWORD le = ::GetLastError();
if (le != ERROR_HANDLE_EOF)
{
LogMessage(LOG_LVL_ERROR,
"Hash: GetOverlappedResult failed for [%hs], le = %u",
pathname, ::GetLastError());
return false;
}//if
else
{
// last ReadFile() issued with read_offset right at EOF
break;
}//else
}//if
*read_offset += nLastRead;
if (!ReadFile(hFile, read_buff, m_buffsz, 0, read_os))
{
const DWORD le = ::GetLastError();
if (le != ERROR_IO_PENDING)
{
m_md5.Update(work_buff, nLastRead);
break;
}//if
}//if
m_md5.Update(work_buff, nLastRead);
}//for
m_md5.Final(digest);
return true;
}//HashFile
m_buff1 and 2 are 128K each. The m_md5.Update() is the CPU work being done while allowing Disk I/O to complete in the "background".
gg