This is based on the previous implementation that can be found here:
Win32 Thread Object Model Revisted

Enhancements:
- Added support for CE platforms
- Added support for compilation under the old Platform SDK (that ships with VC++ 6.0)
- Added support for threads under MFC
- Spelling corrections

Code:
//-----------------------------------------------------------------------------
#ifndef CP_THREAD_H
#define CP_THREAD_H

#include <windows.h>

#if defined(_WIN32_WCE) // doesn't have assert.h
#   ifndef assert
#       define assert(exp) ((void)0)
#   endif
#else
#   include <assert.h>
#endif

// If building with the SDK that ships with VC++ 6.0 then OLD_PLATFORM_SDK 
// should be defined (because it is....very old)
#if defined(OLD_PLATFORM_SDK) || (defined(_WIN32_WCE) && (_WIN32_WCE < 400))
#   define InterlockedExchangePointer(Target, Value) \
                (PVOID)InterlockedExchange((PLONG)(Target), (LONG)(Value))
#endif

namespace CP_Thread
{
    
// Forward decl.
class ThreadBase; 

//-----------------------------------------------------------------------------
// Compile time assertion utility
template <bool assertion> struct compile_assert;
template <> struct compile_assert<true> {}; // specialized on true only

//-----------------------------------------------------------------------------
// ThreadFunc_t - Thread function type used by the ThreadBase class. All 
//                thread functions return DWORD and take a ThreadBase* 
//                parameter.
typedef DWORD (*ThreadFunc_t)(ThreadBase*);

//-----------------------------------------------------------------------------
// Base class for functor object which invokes the user [member] function
struct ThreadFunctor
{
    virtual DWORD Invoke(ThreadBase *t) = 0;
};//ThreadFunctor

//-----------------------------------------------------------------------------
// Functor object for non-member functions
struct NonMemFunctor : public ThreadFunctor
{
    ThreadFunc_t m_tfp;
    NonMemFunctor(ThreadFunc_t tfp) : m_tfp(tfp) {}
    virtual DWORD Invoke(ThreadBase *t) {return m_tfp(t);}
};//NonMemFunctor

//-----------------------------------------------------------------------------
// Functor object for member functions
template <typename Obj_t>
struct MemFunctor : public ThreadFunctor
{
    typedef DWORD (Obj_t::*MemberFunc_t)(ThreadBase*);
    MemberFunc_t m_tfp;
    Obj_t *m_obj;
    MemFunctor(Obj_t *obj, MemberFunc_t tfp ) : m_tfp(tfp), m_obj(obj) {}
    virtual DWORD Invoke(ThreadBase *t) {return (m_obj->*m_tfp)(t);}
};//MemFunctor

//-----------------------------------------------------------------------------
// ThreadBase - Provides interface for thread objects
class ThreadBase
{
protected:
    //-------------------------------------------------------------------------
    // Virtual method for actually creating the thread. This is an abstract 
    // interface in order to accommodate both the CRT and Win32 API's for
    // creating threads.
    virtual HANDLE create_thread(DWORD flags = 0, LPSECURITY_ATTRIBUTES psa = 0, 
                                 DWORD stack_sz = 0) = 0;

    //-------------------------------------------------------------------------
    // Thread entry point for Win32 threads
    static DWORD WINAPI Win32ThreadProc(LPVOID pv)
    {
        ThreadBase *t = reinterpret_cast<ThreadBase*>(pv);
        return t->m_functor->Invoke(t);
    }//Win32ThreadProc

    //-------------------------------------------------------------------------
    // Thread entry point for CRT threads
    static unsigned __stdcall CrtThreadProc(void *pv)
    {
        ThreadBase *t = reinterpret_cast<ThreadBase*>(pv);
        return t->m_functor->Invoke(t);
    }//CrtThreadProc

    //-------------------------------------------------------------------------
    // Member data
    ThreadFunctor *m_functor; 
    HANDLE m_hthread;
    volatile void *m_thread_data;
    volatile LONG m_flags;
 
public:
    //-------------------------------------------------------------------------
    // enum used by SetPriority() for type safety
    enum Priority_t
    {
        priority_above_normal  = THREAD_PRIORITY_ABOVE_NORMAL,
        priority_below_normal  = THREAD_PRIORITY_BELOW_NORMAL,
        priority_highest       = THREAD_PRIORITY_HIGHEST,
        priority_idle          = THREAD_PRIORITY_IDLE,
        priority_lowest        = THREAD_PRIORITY_LOWEST,
        priority_normal        = THREAD_PRIORITY_NORMAL,
        priority_time_critical = THREAD_PRIORITY_TIME_CRITICAL,
    };//Priority_t

    //-------------------------------------------------------------------------
    // Constructor
    ThreadBase() : m_functor(0), m_hthread(0), 
                   m_thread_data(0), m_flags(0) {}
 
    //-------------------------------------------------------------------------
    // Destructor - If the thread is running, it is killed (not good)
    virtual ~ThreadBase() 
    {
        if (m_hthread != 0)
        {
            if (IsRunning())
            {
                assert(false); // thread shouldn't be running!
                Kill(); // not good
            }//if

            ::CloseHandle(m_hthread);
        }//if

        delete m_functor;
    }//destructor

    //-------------------------------------------------------------------------
    // Sets the thread entry point function. This is the function which is 
    // called when the thread is started.
    //   fun - pointer to function to execute in this thread's context
    void SetThreadEntry(ThreadFunc_t fun)
    {
        delete m_functor;
        m_functor = new NonMemFunctor(fun);
    }//SetThreadEntry

    //-------------------------------------------------------------------------
    // Use this version to run object member functions in the context of this 
    // thread.
    //   obj - pointer to struct or class instance to run the member function 
    //         thread on
    //   fun - pointer to object member function to run in this thread's 
    //         context
    template <typename Obj_t>
    void SetThreadEntry(Obj_t *obj, DWORD (Obj_t::*fun)(ThreadBase*))
    {
        delete m_functor;
        m_functor = new MemFunctor<Obj_t>(obj, fun);
    }//SetThreadEntry
 
    //-------------------------------------------------------------------------
    // HANDLE operator for access to the thread handle
    operator HANDLE() {return m_hthread;}
 
    //-------------------------------------------------------------------------
    // Set a custom data value in the thread. Member templates  are used for 
    // convenience but only pointer-sized data types or smaller may be used. 
    template <typename T>
    void SetData(T data) 
    {
        // only pointer-sized data or smaller allowed
        compile_assert<sizeof(T) <= sizeof(void*)>();
        InterlockedExchangePointer(&m_thread_data,
            reinterpret_cast<void*>(data));
    }//SetData
    
    // For const pointer types
    template <typename T>
    void SetData(const T *data) 
    {
        InterlockedExchangePointer(&m_thread_data, 
            reinterpret_cast<void*>(const_cast<T*>(data)));
    }//SetData

    //-------------------------------------------------------------------------
    // Get a custom data value in the thread. Member templates  are used for 
    // convenience but only pointer-sized data types or smaller may be used. 
    // The same type used in the SetData() call should also be used in
    // the GetData() call.
    template <typename T>
    void GetData(T &data) const
    {
        // only pointer-sized data or smaller allowed
        compile_assert<sizeof(T) <= sizeof(void*)>();
        InterlockedExchangePointer(&data, m_thread_data);
    }//GetData

    //-------------------------------------------------------------------------
    // Set user-defined flags for this thread. Subsequent calls to GetFlags() 
    // will return the last value set by this method. The default value is 
    // zero. Returns the previous flags value.
    LONG SetFlags(LONG flags)
    {
        // casts needed by dev-c++
        return InterlockedExchange((LONG*)&m_flags, flags);
    }//SetFlag
    
    //-------------------------------------------------------------------------
    // Set user-defined flags for this thread. The default value returned is
    // zero, otherwise it's the last value set by SetFlags().
    LONG GetFlags() const 
    {
        // LONG cast needed by dev-c++
        return InterlockedExchange((LONG*)&m_flags, (LONG)m_flags);
    }//GetFlag

    //-------------------------------------------------------------------------
    // Returns false if the thread hasn't been created. Otherwise, the current
    // execution state of thread is returned. Note that this will return true 
    // if the thread is suspended.
    bool IsRunning() const
    {
        if (m_hthread == 0)
            return false;
        return ::WaitForSingleObject(m_hthread, 0) == WAIT_TIMEOUT;
    }//IsRunning

    //-------------------------------------------------------------------------
    // Start the threads execution. Returns true if the thread is already 
    // running or if the thread was started successfully. Returns false if the
    // thread could not be created.
    //    flags, psa, stack_sz - see CreateThread() documentation
    bool Start(DWORD flags = 0, LPSECURITY_ATTRIBUTES psa = 0, 
               DWORD stack_sz = 0)
    {
        assert(m_functor != 0);

        if (IsRunning())
            return true;

        if (m_hthread != 0)
            ::CloseHandle(m_hthread);

        m_hthread = create_thread(flags, psa, stack_sz);
        
        return m_hthread != 0;
    }//Start

    //-------------------------------------------------------------------------
    // Wait for thread execution to complete. Returns true if the thread
    // execution has completed. Returns false if the thread was never started
    // or if the timeout occurred before thread execution was complete.
    //    timeout - number of milliseconds to wait before giving up. If not
    //              specified, an infinite timeout is used.
    bool Join(DWORD timeout = INFINITE)
    {
        if (m_hthread == 0)
            return false;
        return ::WaitForSingleObject(m_hthread, timeout) == WAIT_OBJECT_0;
    }//Join

    //-------------------------------------------------------------------------
    // Terminate the currently running thread. Returns true if the thread is 
    // not running or if the thread was terminated successfully.
    // NOTE: This is a dangerous function that should only be used in the most
    //       extreme cases. 
    bool Kill(DWORD exitcode = 0xDEADBEAF)
    {
        if (!IsRunning())
            return true;
        return ::TerminateThread(m_hthread, exitcode) == TRUE;
    }//Kill

    //-------------------------------------------------------------------------
    // Get the DWORD return value of the thread function. Returns false if the
    // thread was never started or the exit code could not be retrieved.
    //    exitcode - reference parameter to receive the exit code
    bool GetExitCode(DWORD &exitcode) 
    {
        if (m_hthread == 0)
            return false;
        return ::GetExitCodeThread(m_hthread, &exitcode) == TRUE;
    }//GetExitCode

    //-------------------------------------------------------------------------
    // Set the threads priority. Returns false if the thread isn't running. 
    // Returns true if the priority of the thread is set successfully.
    //    p - A value from the Priority_t enum.
    bool SetPriority(Priority_t p)
    {
        if (!IsRunning())
            return false;
        return ::SetThreadPriority(m_hthread, p) == TRUE;
    }//SetPriority

protected:
    // don't allow copies or assignment
    ThreadBase(const ThreadBase&); // no implementation
    ThreadBase& operator=(const ThreadBase&); // no implementation 
};//ThreadBase

//-----------------------------------------------------------------------------
// Enumeration of supported thread API's - each enumeration represents a
// template specialization of Thread<>
enum ThreadAPI
{
    win_thread_api,
    crt_thread_api,
    mfc_thread_api,
};//ThreadAPI

//-----------------------------------------------------------------------------
// Thread<> template - this class is specialized for each ThreadAPI enum 
template <ThreadAPI thread_api> class Thread;

//-----------------------------------------------------------------------------
// Thread<> specialization which uses the Win32 API
template <>
class Thread<win_thread_api> : public ThreadBase
{
protected:
    //-------------------------------------------------------------------------
    // create_thread using Win32 thread API
    virtual HANDLE create_thread(DWORD flags = 0, LPSECURITY_ATTRIBUTES psa = 0, 
                                 DWORD stack_sz = 0)
    {
        DWORD thread_id;
        return ::CreateThread(psa, stack_sz, &Win32ThreadProc, 
                              this, flags, &thread_id);
    }//create_thread
};//Thread<win_thread_api>

//-----------------------------------------------------------------------------
// Thread<> specialization which uses the CRT API
template <>
class Thread<crt_thread_api> : public ThreadBase
{
protected:
    //-------------------------------------------------------------------------
    // create_thread using CRT thread API
    virtual HANDLE create_thread(DWORD flags = 0, LPSECURITY_ATTRIBUTES psa = 0, 
                                 DWORD stack_sz = 0)
    {
        // generic decl. for _beginthreadex() so we don't have to include 
        // process.h
        typedef unsigned (__stdcall *CrtCallback_t)(void*);
        extern unsigned long __cdecl _beginthreadex(void*, unsigned, 
                                                    CrtCallback_t, void*, 
                                                    unsigned, unsigned*);
        unsigned thread_id;
        return (HANDLE)_beginthreadex(psa, stack_sz, &CrtThreadProc, 
                                      this, flags, &thread_id);
    }//create_thread
};//Thread<crt_thread_api>

//-----------------------------------------------------------------------------
#ifdef _MFC_VER
// Thread<> specialization which uses the MFC API
template <>
class Thread<mfc_thread_api> : public ThreadBase
{
protected:
    //-------------------------------------------------------------------------
    // create_thread using MFC thread API
    virtual HANDLE create_thread(DWORD flags = 0, LPSECURITY_ATTRIBUTES psa = 0, 
                                 DWORD stack_sz = 0)
    {
        // create the thread suspended so that we can safely duplicate the 
        // thread handle - CWinThread will clean up its copy and we'll clean up
        // ours.
        CWinThread *t;
        t = AfxBeginThread((AFX_THREADPROC)&Win32ThreadProc, this, 
                           THREAD_PRIORITY_NORMAL, stack_sz, 
                           flags | CREATE_SUSPENDED, psa);
        
        HANDLE proc = GetCurrentProcess(),
               thrd = 0;
        BOOL ret = DuplicateHandle(proc, t->m_hThread, proc, &thrd, 0, FALSE, 
                                   DUPLICATE_SAME_ACCESS);
        assert(ret);
        
        if (!(flags & CREATE_SUSPENDED))
            t->ResumeThread();
        
        return thrd;
    }//create_thread
};//Thread<mfc_thread_api>
#endif //_MFC_VER

//-----------------------------------------------------------------------------
// Nice typedefs
typedef Thread<win_thread_api> WinThread;
typedef Thread<crt_thread_api> CrtThread;
typedef Thread<mfc_thread_api> MfcThread;

}// namespace CP_Thread

#endif //CP_THREAD_H
gg