Just to be sure, I whipped up a pair of C programs (using POSIX.1-2008, so should compile and run on Linux, *BSDs and Mac OS X at least).
The idea of these is to roughly and minimally simulate a Blinkenlights system, where clients connect to a server using UDP/IP, telling their grid coordinates, and the server first waits for all clients to connect. Then, the server sends a message to each client in turn, with 16-byte payload (simulating 4x4 grayscale samples). A full update is called a frame, because it would update the entire display. The wall clock time taken by each frame is measured, as well as the total wallclock time taken over the desired number of frames.
Here is the service end, server.c:
Here is the client end, client.c:Code:#define _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <time.h>
#include <netdb.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#define PAYLOAD 16
struct client {
/* Client address. */
union {
struct sockaddr_in ipv4;
struct sockaddr_in6 ipv6;
} address;
socklen_t addrlen;
/* Any other state/information? */
};
static volatile sig_atomic_t interrupted = 0;
static void catch_interrupt(const int signum)
{
interrupted = signum;
}
static int interrupt_on(const int signum)
{
struct sigaction act;
sigemptyset(&act.sa_mask);
act.sa_handler = catch_interrupt;
act.sa_flags = 0;
if (sigaction(signum, &act, NULL) == -1)
return errno;
else
return 0;
}
int main(int argc, char *argv[])
{
struct addrinfo hints, *socket_list, *curr;
struct client *map = NULL;
struct timespec all_started, all_ended, started, ended;
double *duration, all_duration;
const char *lasterr;
ssize_t n;
int xclients, yclients, clients, frames, socketfd, unconnected;
int c, i, result;
char dummy;
if (interrupt_on(SIGINT) ||
interrupt_on(SIGHUP) ||
interrupt_on(SIGQUIT) ||
interrupt_on(SIGTERM)) {
fprintf(stderr, "Cannot set interrupt handlers: %s.\n", strerror(errno));
return 1;
}
if (argc != 6 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s ADDRESS PORT XCLIENTS YCLIENTS FRAMES\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "This program creates an 8-bit grayscale framebuffer,\n");
fprintf(stderr, "XCLIENTS*4 samples wide, YCLIENTS*4 samples tall.\n");
fprintf(stderr, "It waits for XCLIENTS*YCLIENTS clients to connect to\n");
fprintf(stderr, "ADDRESS and PORT using UDP/IP, with the payload being\n");
fprintf(stderr, "two bytes: x and y coordinates for the client.\n");
fprintf(stderr, "\n");
fprintf(stderr, "When all clients have connected, this program will\n");
fprintf(stderr, "send a total of FRAMES full-frame updates to the clients,\n");
fprintf(stderr, "and measure and report the wall-clock frame rate.\n");
fprintf(stderr, "Each client will get 16 samples (4x4 grid) per frame.\n");
fprintf(stderr, "\n");
fprintf(stderr, "You need to start this program before the clients.\n");
fprintf(stderr, "\n");
return 1;
}
if (sscanf(argv[3], " %d %c", &xclients, &dummy) != 1 ||
xclients < 0 || xclients > 256) {
fprintf(stderr, "%s: Invalid number of clients in the X direction.\n", argv[3]);
return 1;
}
if (sscanf(argv[4], " %d %c", &yclients, &dummy) != 1 ||
yclients < 0 || yclients > 256) {
fprintf(stderr, "%s: Invalid number of clients in the Y direction.\n", argv[4]);
return 1;
}
if (sscanf(argv[5], " %d %c", &frames, &dummy) != 1 ||
frames < 1) {
fprintf(stderr, "%s: Invalid number of frames.\n", argv[5]);
return 1;
}
clients = xclients * yclients;
/* Allocate duration map (for each frame). */
duration = malloc(frames * sizeof *duration);
if (!duration) {
fprintf(stderr, "Not enough memory for measuring duration of %d frames.\n", frames);
return 1;
}
/* Allocate client map, and mark them unused. */
map = malloc(clients * sizeof *map);
if (!map) {
fprintf(stderr, "Not enough memory for %d x %d clients.\n", xclients, yclients);
return 1;
}
memset(map, 0, clients * sizeof *map);
for (i = 0; i < clients; i++)
map[i].addrlen = 0;
/* Bind to datagram socket, specified address and port. */
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_DGRAM;
hints.ai_protocol = 0;
hints.ai_flags = AI_PASSIVE;
result = getaddrinfo(argv[1], argv[2], &hints, &socket_list);
if (result) {
fprintf(stderr, "%s %s: %s.\n", argv[1], argv[2], gai_strerror(result));
return 1;
}
lasterr = NULL;
for (curr = socket_list; curr != NULL; curr = curr->ai_next) {
socketfd = socket(curr->ai_family, curr->ai_socktype, curr->ai_protocol);
if (socketfd == -1)
continue;
if (bind(socketfd, curr->ai_addr, curr->ai_addrlen) == 0)
break;
lasterr = strerror(errno);
do {
result = close(socketfd);
} while (result == -1 && errno == EINTR);
}
if (!curr) {
if (lasterr)
fprintf(stderr, "%s %s: %s.\n", argv[1], argv[2], lasterr);
else
fprintf(stderr, "%s %s: Invalid address and/or port.\n", argv[1], argv[2]);
freeaddrinfo(socket_list);
return 1;
}
freeaddrinfo(socket_list);
printf("Waiting for clients to connect to %s, port %s.\n", argv[1], argv[2]);
fflush(stdout);
unconnected = xclients * yclients;
while (!interrupted && unconnected > 0) {
struct sockaddr_in6 address;
socklen_t addrlen;
char name_buffer[256];
char port_buffer[128];
unsigned char coords[2];
size_t i;
addrlen = sizeof address;
n = recvfrom(socketfd, &coords, 2, 0, (struct sockaddr *)&address, &addrlen);
if (n == (ssize_t)-1 && errno != EINTR) {
fprintf(stderr, "%s.\n", strerror(errno));
shutdown(socketfd, SHUT_RDWR);
do {
result = close(socketfd);
} while (result == -1 && errno == EINTR);
return 1;
}
if (n != (ssize_t)2)
continue;
if (getnameinfo((struct sockaddr *)&address, addrlen,
name_buffer, sizeof name_buffer,
port_buffer, sizeof port_buffer,
NI_NUMERICSERV) == -1) {
name_buffer[0] = '?';
name_buffer[1] = '\0';
port_buffer[0] = '?';
port_buffer[1] = '\0';
}
if (coords[0] >= xclients ||
coords[1] >= yclients) {
fprintf(stderr, "Ignoring client %s:%s due to invalid coordinates.\n",
name_buffer, port_buffer);
fflush(stderr);
continue;
}
i = (int)(coords[0]) + xclients * (int)(coords[1]);
if (map[i].addrlen > 0) {
fprintf(stderr, "Ignoring client %s:%s (%d) due to address (%d,%d) being already taken.\n",
name_buffer, port_buffer, (int)i, (int)coords[0], (int)coords[1]);
fflush(stderr);
continue;
}
memcpy(&(map[i].address.ipv6), &address, addrlen);
map[i].addrlen = addrlen;
unconnected--;
}
if (!interrupted) {
printf("All clients connected. Starting test.\n");
fflush(stdout);
}
clock_gettime(CLOCK_REALTIME, &all_started);
for (i = 0; i < frames && !interrupted; i++) {
clock_gettime(CLOCK_REALTIME, &started);
for (c = 0; c < clients && !interrupted; c++) {
char payload[PAYLOAD];
memset(payload, i, PAYLOAD);
do {
n = sendto(socketfd, payload, sizeof payload, MSG_EOR | MSG_NOSIGNAL,
(struct sockaddr *)&(map[c].address), map[c].addrlen);
} while (n == (ssize_t)-1 && errno == EINTR);
if (n == (ssize_t)-1) {
fprintf(stderr, "(%s)\n", strerror(errno));
fflush(stderr);
} else
if (n != (ssize_t)sizeof payload) {
fprintf(stderr, "(incomplete write)\n");
fflush(stderr);
}
}
clock_gettime(CLOCK_REALTIME, &ended);
duration[i] = difftime(ended.tv_sec, started.tv_sec)
+ ((double)ended.tv_nsec - (double)started.tv_nsec) / 1000000000.0;
}
clock_gettime(CLOCK_REALTIME, &all_ended);
all_duration = difftime(all_ended.tv_sec, all_started.tv_sec)
+ ((double)all_ended.tv_nsec - (double)all_started.tv_nsec) / 1000000000.0;
if (!interrupted) {
fprintf(stderr, "Test completed.\n");
fflush(stderr);
for (i = 0; i < frames; i++)
printf("Frame %d of %d: %.6f seconds wall clock time (%.1f FPS)\n",
i + 1, frames, duration[i], 1.0 / duration[i]);
printf("Total duration: %.6f seconds (average %.1f frames per second)\n",
all_duration, (double)frames / all_duration);
fflush(stdout);
} else
fprintf(stderr, "%s.\n", strsignal(interrupted));
shutdown(socketfd, SHUT_RDWR);
do {
result = close(socketfd);
} while (result == -1 && errno == EINTR);
return 0;
}
Both are interruptible via signals (e.g. INT (CTRL+C) and HUP).Code:#define _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#define PAYLOAD 16
static volatile sig_atomic_t interrupted = 0;
static void catch_interrupt(const int signum)
{
interrupted = signum;
}
static int interrupt_on(const int signum)
{
struct sigaction act;
sigemptyset(&act.sa_mask);
act.sa_handler = catch_interrupt;
act.sa_flags = 0;
if (sigaction(signum, &act, NULL) == -1)
return errno;
else
return 0;
}
int main(int argc, char *argv[])
{
struct addrinfo hints, *socket_list, *curr;
unsigned char buffer[PAYLOAD + 2];
const char *lasterr;
ssize_t n;
int clientx, clienty, frames, socketfd;
int i, result;
char dummy;
if (interrupt_on(SIGINT) ||
interrupt_on(SIGHUP) ||
interrupt_on(SIGQUIT) ||
interrupt_on(SIGTERM)) {
fprintf(stderr, "Cannot set interrupt handlers: %s.\n", strerror(errno));
return 1;
}
if (argc != 6 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s ADDRESS PORT CLIENTX CLIENTY FRAMES\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "This program will connect to ADDRESS port PORT\n");
fprintf(stderr, "using UDP, asking to become (CLIENTX, CLIENTY),\n");
fprintf(stderr, "and will receive FRAMES %d-byte messages.\n", PAYLOAD);
fprintf(stderr, "\n");
return 1;
}
if (sscanf(argv[3], " %d %c", &clientx, &dummy) != 1 ||
clientx < 0 || clientx > 256) {
fprintf(stderr, "%s: Invalid client X.\n", argv[3]);
return 1;
}
if (sscanf(argv[4], " %d %c", &clienty, &dummy) != 1 ||
clienty < 0 || clienty > 256) {
fprintf(stderr, "%s: Invalid client Y.\n", argv[4]);
return 1;
}
if (sscanf(argv[5], " %d %c", &frames, &dummy) != 1 ||
frames < 1) {
fprintf(stderr, "%s: Invalid number of frames.\n", argv[5]);
return 1;
}
/* Bind to datagram socket, specified address and port. */
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_DGRAM;
hints.ai_protocol = 0;
hints.ai_flags = 0;
result = getaddrinfo(argv[1], argv[2], &hints, &socket_list);
if (result) {
fprintf(stderr, "%s %s: %s.\n", argv[1], argv[2], gai_strerror(result));
return 1;
}
lasterr = NULL;
for (curr = socket_list; curr != NULL; curr = curr->ai_next) {
socketfd = socket(curr->ai_family, curr->ai_socktype, curr->ai_protocol);
if (socketfd == -1)
continue;
if (connect(socketfd, curr->ai_addr, curr->ai_addrlen) == 0)
break;
lasterr = strerror(errno);
do {
result = close(socketfd);
} while (result == -1 && errno == EINTR);
}
if (!curr) {
if (lasterr)
fprintf(stderr, "%s %s: %s.\n", argv[1], argv[2], lasterr);
else
fprintf(stderr, "%s %s: Invalid address and/or port.\n", argv[1], argv[2]);
freeaddrinfo(socket_list);
return 1;
}
freeaddrinfo(socket_list);
/* Send a login message. */
buffer[0] = clientx;
buffer[1] = clienty;
do {
n = send(socketfd, buffer, 2, 0);
} while (n == (ssize_t)-1 && errno == EINTR && !interrupted);
if (n != 2) {
if (n == (ssize_t)-1)
fprintf(stderr, "%s %s: %s.\n", argv[1], argv[2], strerror(errno));
else
fprintf(stderr, "%s %s: Failed to send a login message.\n", argv[1], argv[2]);
shutdown(socketfd, SHUT_RDWR);
do {
result = close(socketfd);
} while (result == -1 && errno == EINTR);
return 1;
}
/* Receive the 'frames' 16-byte messages. */
for (i = 0; i < frames && !interrupted; i++) {
do {
n = recv(socketfd, buffer, PAYLOAD, 0);
} while (n == (ssize_t)-1 && errno == EINTR && !interrupted);
}
if (interrupted)
fprintf(stderr, "(%s)\n", strsignal(interrupted));
shutdown(socketfd, SHUT_RDWR);
do {
result = close(socketfd);
} while (result == -1 && errno == EINTR);
return 0;
}
To compile both, I use
To have any real-world meaning, the clients should run on separate machine; best if run on a number of different machines.Code:gcc -W -Wall -O3 server.c -lrt -o server
gcc -W -Wall -O3 client.c -o client
Both take five command-line parameters: address port x y frames. For the server, x and y are the number of clients to expect in the x and y dimensions, and for the client, they are the clients' coordinates.
The server is completely sequential (does not even use non-blocking I/O), and uses only one core. The clients just consume the data, they don't actually even access it.
In one terminal (or machine; but then use real IP address instead of loopback address!), first start the server. This uses 100 clients horizontally and vertically (100*100=10,000), and a thousand frames; that should be long enough to find the steady-state operation. You might need a longer test run if you use separate machines.
When it starts, it tells you it's waiting for clients to connect.Code:./server 127.0.0.1 8000 100 100 1000
In another terminal (machine), start the 10,000 clients. In Linux, this is not a problem. Using a Bash shell:
Now, I have a pretty powerful (but not top-of-the-line) desktop workstation, an AMD Athlon X4 640 with four cores at 3 GHz, with about 6 GB of RAM (about 5.5 free for applications; I'm running Linux, of course). Running the above takes about 51 seconds wall-clock time, and I get about 20 frames per second update rate.Code:( X=100 Y=100 N=100 ;
for ((y=0; y<Y; y++)); do
for ((x=0; x<X; x++)); do
./client 127.0.0.1 8000 $x $y $N &
done ;
done ;
wait )
This means I actually sent 20×100×100 = 200,000 packets per second, each UDP packet having a 16-byte payload. While none of the packets hit my networking hardware, the packets are routed and filtered as normal; the Linux kernel doesn't take too many shortcuts with the loopback interface.
Changing the PAYLOAD macro to 64 in both programs simulates a 8x8 grayscale chunk per client, thus a 800x800 pixel map. For this, I get a hair less, somewhat over 19 frames per second. For 256 byte payloads, I still get about 16 frames per second; that's distributing a 1600x1600-pixel grayscale map to 10,000 different clients, each owning a 16x16 chunk.
Does this mean you can expect the same when using real networking hardware?
To me, this means the server side is no issue. You don't even need any fancy tricks, all you need is a relatively powerful machine. If you only need updates on the order of once per second per client, something as simple as a Cubieboard (running Linux) might suffice as the server end.
Arduino Ethernet shields should have no issue with the per-client data rate; it's just a few packets per second. Whether you can do the processing you need on an Arduino -- remember, a chunk of your RAM will be consumed by the copy of the packet, and Arduinos have very little RAM to go by --, depends entirely on what you do with it.
Based on my personal experience, I'd say you should have no difficulty implementing this on a local area network, if at least the switches closest to the server are gigabit, to keep the latencies on the server side to minimum. (Bandwidth is not a problem, but the latencies might be. Also, the switches' bandwidth is not that important, but switching rate, packets per second, is. Note that you definitely do not want to use routers, just simple unmanaged switches, for the client devices. Assuming you really intend to build a LAN for 10,000 devices.)
(Wireless connection to the clients might work, if you get the base stations to not interfere with each other, and you use ones that support enough concurrent clients, say a thousand or so. For 10,000 clients you'd need a 11-port gigabit switch (unmanaged, cheap), and ten WiFi base stations capable of supporting a thousand concurrent clients. Most do not, they're limited to a very small number of devices, due to very small internal tables etc. The bandwidth or collisions at the last leg is not an issue; the bandwidth is small, see below, and most data flows from base station to client device, so collisions should be rare.)
On a public internet, you'll most likely lose at least an occasional packet: the 10,000 packets per second at 16 byte payload translates to 320,000 bytes per second (since an UDP/IPv4 packet has 16 bytes of overhead), or 2.56 Mbit/s. And that's just for 1 "frame" per second, i.e. one packet per client from the server.