How to make an HTTP get request in C without libcurl?

Using BSD sockets or, if you're somewhat limited, say you have some RTOS, some simpler TCP stack, like lwIP, you can form the GET/POST request.

There are a number of open-source implementations. See the "happyhttp" as a sample ( http://scumways.com/happyhttp/happyhttp.html ). I know, it is C++, not C, but the only thing that is "C++-dependant" there is a string/array management, so it is easily ported to pure C.

Beware, there are no "packets", since HTTP is usually transfered over the TCP connection, so technically there is only a stream of symbols in RFC format. Since http requests are usually done in a connect-send-disconnect manner, one might actually call this a "packet".

Basically, once you have an open socket (sockfd) "all" you have to do is something like

char sendline[MAXLINE + 1], recvline[MAXLINE + 1];
char* ptr;

size_t n;

/// Form request
snprintf(sendline, MAXSUB, 
     "GET %s HTTP/1.0\r\n"  // POST or GET, both tested and works. Both HTTP 1.0 HTTP 1.1 works, but sometimes 
     "Host: %s\r\n"     // but sometimes HTTP 1.0 works better in localhost type
     "Content-type: application/x-www-form-urlencoded\r\n"
     "Content-length: %d\r\n\r\n"
     "%s\r\n", page, host, (unsigned int)strlen(poststr), poststr);

/// Write the request
if (write(sockfd, sendline, strlen(sendline))>= 0) 
{
    /// Read the response
    while ((n = read(sockfd, recvline, MAXLINE)) > 0) 
    {
        recvline[n] = '\0';

        if(fputs(recvline, stdout) == EOF)
        {
            printf("fputs() error\n");
        }

        /// Remove the trailing chars
        ptr = strstr(recvline, "\r\n\r\n");

        // check len for OutResponse here ?
        snprintf(OutResponse, MAXRESPONSE,"%s", ptr);
    }          
}

POSIX 7 minimal runnable example

Let's fetch http://example.com.

wget.c

#define _XOPEN_SOURCE 700
#include <arpa/inet.h>
#include <assert.h>
#include <netdb.h> /* getprotobyname */
#include <netinet/in.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <unistd.h>

int main(int argc, char** argv) {
    char buffer[BUFSIZ];
    enum CONSTEXPR { MAX_REQUEST_LEN = 1024};
    char request[MAX_REQUEST_LEN];
    char request_template[] = "GET / HTTP/1.1\r\nHost: %s\r\n\r\n";
    struct protoent *protoent;
    char *hostname = "example.com";
    in_addr_t in_addr;
    int request_len;
    int socket_file_descriptor;
    ssize_t nbytes_total, nbytes_last;
    struct hostent *hostent;
    struct sockaddr_in sockaddr_in;
    unsigned short server_port = 80;

    if (argc > 1)
        hostname = argv[1];
    if (argc > 2)
        server_port = strtoul(argv[2], NULL, 10);

    request_len = snprintf(request, MAX_REQUEST_LEN, request_template, hostname);
    if (request_len >= MAX_REQUEST_LEN) {
        fprintf(stderr, "request length large: %d\n", request_len);
        exit(EXIT_FAILURE);
    }

    /* Build the socket. */
    protoent = getprotobyname("tcp");
    if (protoent == NULL) {
        perror("getprotobyname");
        exit(EXIT_FAILURE);
    }
    socket_file_descriptor = socket(AF_INET, SOCK_STREAM, protoent->p_proto);
    if (socket_file_descriptor == -1) {
        perror("socket");
        exit(EXIT_FAILURE);
    }

    /* Build the address. */
    hostent = gethostbyname(hostname);
    if (hostent == NULL) {
        fprintf(stderr, "error: gethostbyname(\"%s\")\n", hostname);
        exit(EXIT_FAILURE);
    }
    in_addr = inet_addr(inet_ntoa(*(struct in_addr*)*(hostent->h_addr_list)));
    if (in_addr == (in_addr_t)-1) {
        fprintf(stderr, "error: inet_addr(\"%s\")\n", *(hostent->h_addr_list));
        exit(EXIT_FAILURE);
    }
    sockaddr_in.sin_addr.s_addr = in_addr;
    sockaddr_in.sin_family = AF_INET;
    sockaddr_in.sin_port = htons(server_port);

    /* Actually connect. */
    if (connect(socket_file_descriptor, (struct sockaddr*)&sockaddr_in, sizeof(sockaddr_in)) == -1) {
        perror("connect");
        exit(EXIT_FAILURE);
    }

    /* Send HTTP request. */
    nbytes_total = 0;
    while (nbytes_total < request_len) {
        nbytes_last = write(socket_file_descriptor, request + nbytes_total, request_len - nbytes_total);
        if (nbytes_last == -1) {
            perror("write");
            exit(EXIT_FAILURE);
        }
        nbytes_total += nbytes_last;
    }

    /* Read the response. */
    fprintf(stderr, "debug: before first read\n");
    while ((nbytes_total = read(socket_file_descriptor, buffer, BUFSIZ)) > 0) {
        fprintf(stderr, "debug: after a read\n");
        write(STDOUT_FILENO, buffer, nbytes_total);
    }
    fprintf(stderr, "debug: after last read\n");
    if (nbytes_total == -1) {
        perror("read");
        exit(EXIT_FAILURE);
    }

    close(socket_file_descriptor);
    exit(EXIT_SUCCESS);
}

GitHub upstream.

Compile:

gcc -ggdb3 -std=c99 -Wall -Wextra -o wget wget.c

Get http://example.com and output to stdout:

./wget example.com

We see something like:

debug: before first read
debug: after a read
HTTP/1.1 200 OK
Age: 540354
Cache-Control: max-age=604800
Content-Type: text/html; charset=UTF-8
Date: Tue, 02 Feb 2021 15:21:14 GMT
Etag: "3147526947+ident"
Expires: Tue, 09 Feb 2021 15:21:14 GMT
Last-Modified: Thu, 17 Oct 2019 07:18:26 GMT
Server: ECS (nyb/1D11)
Vary: Accept-Encoding
X-Cache: HIT
Content-Length: 1256

<!doctype html>
<html>
...
</html>

After printing the reply, this command hangs for most servers until timeout, and that is expected:

  • either server or client must close the connection
  • we (client) are not doing it
  • most HTTP servers leave the connection open until a timeout expecting further requests, e.g. JavaScript, CSS and images following an HTML page
  • we could parse the response, and close when Content-Length bytes are read, but we didn't for simplicity. What HTTP response headers are required says that if Content-Length is not sent, the server can just close to determine length.

We could however make the host close by passing adding the HTTP 1.1 standard header Connection: close to the server:

char request_template[] = "GET / HTTP/1.1\r\nHost: %s\r\nConnection: close\r\n\r\n";

The connection part also works with the IP:

host example.com

gives:

example.com has address 93.184.216.34
example.com has IPv6 address 2606:2800:220:1:248:1893:25c8:1946

and so we do:

./wget 93.184.216.34

however, the reply is an error, because we are not setting the Host: properly in our program, and that is required in HTTP 1.1.

Tested on Ubuntu 18.04.

Server examples

  • minimal POSIX C example: Send and Receive a file in socket programming in Linux with C/C++ (GCC/G++)
  • minimal Android Java example: how to create Socket connection in Android?

“Without any external libraries” strictly speaking would exclude libc as well, so you'd have to write all syscalls yourself. I doubt you mean it that strict, though. If you don't want to link to another library, and don't want to copy source code from another library into your application, then directly dealing with the TCP stream using the socket API is your best approach.

Creating the HTTP request and sending it over a TCP socket connection is easy, as is reading the answer. It's parsing the answer which is going to be real tricky, particularly if you aim to support a reasonably large portion of the standard. Things like error pages, redirects, content negotiation and so on can make our life quite hard if you're talking to arbitrary web servers. If on the other hand the server is known to be well-behaved, and a simple error message is all right for any unexpected server response, then that is reasonably simple as well.