Lecture 12

Note: Reading these lecture notes is not a substitute for watching the lecture. I frequently go off script, and you are responsible for understanding everything I talk about in lecture unless I specify otherwise.

Writing an HTTP server

Last lecture, we wrote a simple HTTP client that issues an HTTP request and saves the returned payload. Let’s write a simple Scrabble HTTP server that takes a set of letters and returns a list of words that can be formed with those letters.

An example request might look like this:

GET /balloon HTTP/1.1
Host: myth58:12345
User-Agent: curl/7.47.0
Accept: */*

The server responds with a list of words that can be spelled with the letters balloon:

HTTP/1.1 200 OK
Content-Type: text/javascript; charset=UTF-8
Content-Length: 503

{
  status: "success",
  possibilities: [
    'abo',
    'aboon',
    'al',
    'alb',
    'all',
    'an',
    'ba',
    'bal',
    'ball',
    'ballon',
    'balloon',
    'ban',
    'bo',
    <more output omitted>
  ]
}

Assuming we have an executable scrabble-solver that takes letters via stdin and prints out a list of words (one on each line), we can write the following server:

int main() {
    int serverSocket = createServerSocket(12345);
    ThreadPool pool(16);
    while (true) {
        int clientSocket = accept(serverSocket, NULL, NULL);
        pool.schedule([clientSocket](){
            publishScrabbleWords(clientSocket);
        });
    }
    return 0;
}

static void publishScrabbleWords(int clientSocket) {
    sockbuf sb(clientSocket);
    iosockstream ss(&sb);

    string letters = readRequest(ss);
    skipHeaders(ss);

    const char *command[] = {"./scrabble-solver", letters.c_str(), NULL};
    subprocess_t sp = subprocess((char**)command, false, true);
    waitpid(sp.pid, NULL, 0);

    vector<string> words = readLines(sp.ingestfd);
    publishResponse(ss, words);
}

static string readRequest(iosockstream& ss) {
    string method, path, protocol;
    ss >> method >> path >> protocol;
    if (ss.fail()) return ""; // in case request isn't HTTP
    string rest;
    getline(ss, rest);
    size_t pos = path.rfind("/");
    return pos == string::npos ? path : path.substr(pos + 1);
}

static void skipHeaders(iosockstream& ss) {
    string line;
    do {
        getline(ss, line);
    } while (!line.empty() && line != "\r");
}

static vector<string> readLines(int ingestfd) {
    vector<string> linesList;
    stdio_filebuf<char> inbuf(ingestfd, ios::in);
    istream is(&inbuf);
    while (true) {
        string line;
        getline(is, line);
        if (is.fail()) break;
        linesList.push_back(line);
    }
    return linesList;
}

static string serializeVector(vector<string>& vec, int indent) {
    if (vec.empty()) {
        return "[]";
    } else if (vec.size() == 1) {
        return "[" + vec.front() + "]";
    } else {
        ostringstream oss;
        oss << "[";
        bool first = true;
        for (const string& possibility: vec) {
            if (!first) {
                oss << ", ";
            } else {
                first = false;
            }

            oss << "\n";
            for (int i = 0; i < indent; i++) oss << "  ";
            oss << ("'" + possibility + "'");
        }

        oss << "\n";
        for (int i = 0; i < indent - 1; i++) oss << "  ";
        oss << "]";
        return oss.str();
    }
}

static string generatePayload(vector<string>& words) {
    ostringstream oss;
    oss << "{" << endl;
    oss << "  status: \"success\"," << endl;
    oss << "  possibilities: " << serializeVector(words, 2) << endl;
    oss << "}" << endl;
    return oss.str();
}

static void publishResponse(iosockstream& ss, vector<string>& words) {
    string payload = generatePayload(words);

    ss << "HTTP/1.1 200 OK\r\n";
    ss << "Content-Type: text/javascript; charset=UTF-8\r\n";
    ss << "Content-Length: " << payload.size() << "\r\n";
    ss << "\r\n";
    ss << payload << flush;
}

Implementing createServerSocket and createClientSocket

Note: This section involves a lot of clunky/cumbersome structs and function calls. This is due to an attempt of operating system designers to make network-related functions as flexible as possible, both to support current network protocols and future network protocols. We won’t ask you to write code with these functions on an exam, but we might ask you questions about them.

createServerSocket

Let’s implement the internals of the createServerSocket function that we’ve been using.

First, we call the socket system call:

int socket(int domain, int type, int protocol);

This allocates resources within the operating system for a new stream of network communication. The file descriptor that is returned can be used to communicate over this new socket.

The socket syscall can be used for many purposes, but in this class, we’re only going to use it for network communication (specifically TCP over IPv4). As such, we’ll always pass AF_INET for domain (indicating IPv4), SOCK_STREAM for type (indicating TCP), and 0 for protocol (indicating “give me the default protocol meeting the requirements for AF_INET and SOCK_STREAM”).

int fd = socket(AF_INET, SOCK_STREAM, 0);

Notes:

Once we’ve called socket, the socket data structures exist, but they don’t do anything. We need to configure the socket to bind to a port and receive network connections.

To specify what IP address and port number we’d like to listen on, we fill out a struct sockaddr. This is actually a polymorphic C struct, intended to be capable of representing IP/port pairs for multiple protocols. Definitions are as follows:

// Generic, abstract base "class" for all sockets
struct sockaddr {
    short sa_family;      // AF_INET or AF_INET6
    char so_unused[14];   // padding to make this struct 16 bytes
}

// IPv4-specific version:
struct sockaddr_in {
    short sin_family;     // Should only ever be populated with AF_INET
    short sin_port;
    struct in_addr sin_addr;
    char sin_zero[8];     // padding to make this struct 16 bytes
}

// Stores an IPv4 address (4 bytes)
struct in_addr {
    unsigned long s_addr;
};

// IPv6-specific version:
struct sockaddr_in6 {
    short sin6_family;
    // Other fields here. Turns out to be a variable-length struct
}

Filling in one of these looks like this:

int createServerSocket(short port) {
    int s = socket(AF_INET, SOCK_STREAM, 0);
    if (s < 0) return -1;

    struct sockaddr_in server;
    bzero(&server, sizeof(struct sockaddr_in)); // Zero out all 16 bytes
    server.sin_family = AF_INET;
    server.sin_port = htons(port);
    server.sin_addr.s_addr = htonl(INADDR_ANY); // Bind to *any* possible
                                                // incoming IP address

    // More code here...
}

Once we initialize a sockaddr containing the IP address and port number we want to listen to, we can bind to that port. (This is like walking into one of the apartments in the apartment complex and setting up shop there. If there’s someone already in the apartment – if the port is already taken – this is where we’ll see an error occur.)

bind(s, (struct sockaddr*) &server, sizeof(struct sockaddr_in));

Finally, we need to call listen to start listening for people trying to connect to our server. Under the hood, this is what allocates the “waiting list” – the list of people waiting at the door – which we can read by calling accept on the server socket. When calling listen, we need to tell it how long we want the waiting list to be. If anyone tries to connect to our server and the waiting list is full, the operating system will turn them away and tell them to try coming back later.

listen(s, 128);

The complete function:

int createServerSocket(short port) {
    int s = socket(AF_INET, SOCK_STREAM, 0);
    if (s < 0) return -1;

    // We need to bind this socket to one of our IP addresses. A machine might have
    // many IP addresses; it might be connected over WiFi and Ethernet, and it's
    // possible it could be doing networking over Bluetooth, and it might have virtual
    // network interfaces; it'll have an IP address for each of those
    struct sockaddr_in server;
    bzero(&server, sizeof(struct sockaddr_in));	// Zero out all 16 bytes
    server.sin_family = AF_INET;
    server.sin_port = htons(port);
    server.sin_addr.s_addr = htonl(INADDR_ANY);	// Bind the socket to *all* of our
      											// potential IP addresses

    // Start moving into our apartment
    bind(s, (struct sockaddr*) &server, sizeof(struct sockaddr_in));
    // Listen for incoming connections
    listen(s, 128);
    return s;
}

createClientSocket

The implementation of createClientSocket is similar, differing in two ways: * We don’t need to call bind or listen, because we aren’t setting up shop in any apartment. We’re establishing an outgoing connection, not listening for incoming ones. * We don’t know what IP address we’re trying to connect to. We need to look it up.

At the beginning of our discussion of networking, I mentioned that the DNS protocol takes human-friendly hostnames and resolves them to IP addresses. The gethostbyname standard library function performs a DNS lookup, returning information about the host in question.

struct hostent *gethostbyname(const char *hostname);

// Stores DNS info about a host
struct hostent {
    char  *h_name;          // Domain name of the host
    char  **h_aliases;      // A NULL-terminated array of alternate names
    short h_addrtype;       // Type of addresses being returned (e.g. AF_INET)
    short h_length;         // Length (in bytes) of each address (4 for IPv6)
    char  **h_addr_list;    // NULL-terminated array of pointers to in_addr structs
};

// Stores an IPv4 address (4 bytes)
struct in_addr {
    unsigned long s_addr;
};

Usage looks like this:

int createClientSocket(const string& host, unsigned short port) {
    struct hostent *he = gethostbyname(host.c_str());
    if (he == NULL) {
      // Couldn't find the host
      return -1;
    }

    // Set up a struct sockaddr to declare where we're trying to connect to
    struct sockaddr_in server;
    server.sin_family = AF_INET;
    server.sin_port = htons(port);
    server.sin_addr.s_addr = ((struct in_addr*)(he->h_addr_list[0]))->s_addr;
    bzero(server.sin_zero, 8);

    // Allocate data structures for this network connection
    int s = socket(AF_INET, SOCK_STREAM, 0);
    if (s < 0) {
      return -1;
    }

    // Establish the actual connection to the remote host
    int ret = connect(s, (struct sockaddr*) &server, sizeof(server));
    if (ret == 0) return s;
    // If the connection failed, we need to release s so that we don't leak
    // descriptors
    close(s);
    return -1;
}