Here is some code which can be used to get the html from a webpage. You can then write the html to disk, parse it, or whatever you want.
Code:
/**
* This function sends the http headers to the specified server. The result is then
* copied into a passed buffer.
* lpszServer - The webserver address
* lpszHttp - The http request
* data - buffer to hold the html returned
* datasize - the size of the data buffer
*/
BOOL SendHttp(LPCTSTR lpszServer, LPCTSTR lpszHttp, LPTSTR data, UINT datasize)
{
SOCKET s;
WSADATA wsaData;
struct sockaddr_in hostaddr;
struct hostent *serverent;
char *serverip;
char buff[1024];
int i,bytes;
/* Initialize sockets */
if(WSAStartup(MAKEWORD(2,2),&wsaData))
{
DisplayError("Error Initializing Sockets",GetLastError());
return FALSE;
}
/* create a socket descriptor */
s = socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);
if(s == INVALID_SOCKET)
{
DisplayError("Error creating socket.",GetLastError());
return FALSE;
}
/* Get a hostent structure from the domain name */
if(!(serverent = gethostbyname(lpszServer)))
{
DisplayError("Could not resolve host name.",GetLastError());
return FALSE;
}
/* Get the ip address from the hostent structure */
if(!(serverip = inet_ntoa(*(struct in_addr *)*serverent->h_addr_list)))
{
DisplayError("Call to inet_ntoa failed",0);
return FALSE;
}
memset(&hostaddr,0,sizeof(struct sockaddr_in));
hostaddr.sin_family = AF_INET;
hostaddr.sin_addr.s_addr = inet_addr(serverip);
hostaddr.sin_port = htons(80);
/* Connect to the server */
if(connect(s,(struct sockaddr*)&hostaddr,sizeof(struct sockaddr)))
{
DisplayError("Unable to connect to server.",GetLastError());
return FALSE;
}
/* Send the http headers */
if(send(s,lpszHttp,strlen(lpszHttp),0) == SOCKET_ERROR)
{
DisplayError("Error Sending HTTP data.",GetLastError());
return FALSE;
}
/* Receive a response */
i = 0;
while(1)
{
bytes = recv(s,buff,sizeof(buff),0);
if(bytes <= 0) break;
if( (bytes + i + 1) > datasize) break; /* dont overflow the buffer */
memcpy(data + i, buff,bytes);
i += bytes;
}
data[i] = 0;
closesocket(s);
return TRUE;
}
void DisplayError(LPCTSTR lpszError, int errornum)
{
char szError[256];
if(errno)
sprintf(szError,"%s\n\nError Number: %d",lpszError,errornum);
else
sprintf(szError,"%s",lpszError);
MessageBox(NULL,szError,"ERROR",MB_OK | MB_ICONERROR);
}
The http request might look something like:
HTTP/1.1
Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*
Accept-Language: en-us
Content-Type: application/x-www-form-urlencoded
User-Agent: Mozilla/4.0
Host: website_server.com
Content-Length: 0
Cache-Control: no-cache
\r\n\r\n
This should be enough to get you started.