10 November 2009

wget

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <netdb.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <sys/socket.h>

#include <fcntl.h>
#include <libgen.h>

char *gethostname(char *url)
{
if(strstr(url, "http://"))
{
url += strlen("http://");
}
if(strstr(url, "https://"))
{
url += strlen("https://");
}
if(strchr(url, '/'))
{
strchr(url, '/')[0] = '\0';
}

return url;
}

char *getfilename(char *file)
{
if(file[0] == '\0')
strcpy(file, "test.txt");
if(file[0] == '/' && file[1] == '\0')
strcpy(file, "test.txt");

return file;
}


#define PORT 80

int connect_to_ip(char *host)
{
int sock;
int sockfd;
struct hostent *he;
struct sockaddr_in their_addr;


if((he = gethostbyname(host)) == NULL)
{
perror("gethostbyname(): fail: ");
exit(1);
}
else
printf("Client-The remote host is: %s\n", host);

if((sockfd = socket(AF_INET, SOCK_STREAM, 0)) == -1)
{
perror("socket()");
exit(1);
}
else
printf("Client-The socket() sockfd is OK...\n");

their_addr.sin_family = AF_INET;
printf("Server-Using %s and port %d...\n", host, PORT);
their_addr.sin_port = htons(PORT);
their_addr.sin_addr = *((struct in_addr *)he->h_addr);
memset(&(their_addr.sin_zero), '\0', 8);

if(connect(sockfd, (struct sockaddr *)&their_addr, sizeof(struct sockaddr)) == -1)
{
perror("connect()");
exit(1);
}
else
printf("Client-The connect() is OK...\n");

return sockfd;
}

char *gethdr(char *buff, int size, FILE *sfp)
{
char *s;
memset((char*)buff, 0, size);

fgets(buff, size, sfp);

for(s=buff; *s == '\r'; ++s) continue;
if(*s == '\n') return NULL;

/* locate the end of header */
while (*s && *s != '\r' && *s != '\n')
++s;

*s = '\0';

return buff;
}


#define isspace(s) (*s == ' ')

char *skip_whitespace(char *str)
{
while(isspace(str)) ++str;

return str;
}

char *skip_non_whitespace(char *str)
{
while(*str && !isspace(str)) ++str;

return str;
}


void getfile(int sockfd, char *host, char *file)
{
char cmd[1024] = {};
char buff[512] = {};
char *fname;
int fd, len=0;
int hack = 0;
FILE *sfp;
char *str;

/* open outpt file */
fname = basename(file);
fd = open(fname, O_CREAT|O_RDWR, 0755);

sprintf(cmd, "GET %s HTTP/1.1\r\n", file);
sprintf(cmd, "%sHost: %s\r\n", cmd, host);
sprintf(cmd, "%s\r\n", cmd);

write(sockfd, cmd, strlen(cmd));
printf("cmd: %s\n", cmd);

/* skip the HEADER */
/* START */
sfp = fdopen(sockfd, "r+");
gethdr(buff, 20, sfp);

str = buff;
str = skip_non_whitespace(str);
str = skip_whitespace(str);
printf("%s, status: %d\n", buff, atoi(str));

while(gethdr(buff, sizeof(buff), sfp) != NULL)
printf("%s\n", buff);

fflush(sfp);
/* END */

while(1)
{
memset((char*)buff, 0, sizeof(buff));
len=read(sockfd, buff, sizeof(buff));

write(fd, buff, len);

if(len <= 0) break;
}

close(fd);
close(sockfd);

return;
}

int main(int argc, char *argv[])
{
char *host, *file;
char url[256] = {};
int sockfd;

if(argc < 2)
{
printf("usage: %s \n", argv[0]);
exit(0);
}

strcpy(url, argv[1]);
host = gethostname(url);
file = argv[1];
file = getfilename(&file[host+strlen(host)-url]);
printf("url: %s, file: %s\n", url, file);

sockfd = connect_to_ip(host);
getfile(sockfd, host, file);

printf("\nsuccessfully written to \"%s\"\n\n", basename(file));

return 0;
}

gcc -o wget wget.c
./wget http://www.dhpc.adelaide.edu.au/education/dhpc/lab/MPI/hello.c
./wget www.physics.arizona.edu/~physreu/dox/cprogs/day1/hello.c