We used Deepseek and Chatgpt to generate the code we use to measure.
1. Time cost of syscall:
/**
* System Call Overhead Measurement
* Measures the time cost of a zero-byte read system call
* Uses clock_gettime with MONOTONIC clock for precise timing
* Binds process to single CPU for consistent measurements
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <sched.h>
#include <sys/syscall.h>
#include <fcntl.h>
#define ITERATIONS 1000000
// Function to set CPU affinity to a specific core
void set_cpu_affinity(int cpu_id) {
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(cpu_id, &cpuset);
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) == -1) {
perror("sched_setaffinity failed");
exit(EXIT_FAILURE);
}
printf("Process bound to CPU %d\n", cpu_id);
}
// High resolution timestamp function
static inline long long get_timestamp_ns() {
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1) {
perror("clock_gettime failed");
exit(EXIT_FAILURE);
}
return (long long)ts.tv_sec * 1000000000LL + ts.tv_nsec;
}
int main() {
int pipefd[2];
char buffer[1];
long long start_time, end_time;
long long total_time = 0;
double avg_time_ns;
printf("=== System Call Overhead Measurement ===\n");
printf("Measuring zero-byte read system call cost\n");
printf("Iterations: %d\n\n", ITERATIONS);
// Bind to CPU 0 for consistent measurements
set_cpu_affinity(0);
// Create a pipe for read system calls
if (pipe(pipefd) == -1) {
perror("pipe creation failed");
exit(EXIT_FAILURE);
}
// Close write end to ensure reads return immediately
close(pipefd[1]);
printf("Starting measurement...\n");
// Warm up (optional, helps with CPU frequency scaling)
for (int i = 0; i < 1000; i++) {
read(pipefd[0], buffer, 0);
}
// Measure system call overhead
for (int i = 0; i < ITERATIONS; i++) {
start_time = get_timestamp_ns();
read(pipefd[0], buffer, 0); // Zero-byte read system call
end_time = get_timestamp_ns();
total_time += (end_time - start_time);
}
close(pipefd[0]);
// Calculate results
avg_time_ns = (double)total_time / ITERATIONS;
printf("\n=== Results ===\n");
printf("Total elapsed time: %.2f microseconds\n", total_time / 1000.0);
printf("Average system call time: %.2f nanoseconds\n", avg_time_ns);
printf("Average system call time: %.2f microseconds\n", avg_time_ns / 1000.0);
return 0;
}
Output on my server:
=== System Call Overhead Measurement ===
Measuring zero-byte read system call cost
Iterations: 1000000
Process bound to CPU 0
Starting measurement...
=== Results ===
Total elapsed time: 192011.73 microseconds
Average system call time: 192.01 nanoseconds
Average system call time: 0.19 microseconds
2. Time cost of context switch
#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <time.h>
#define ITERATIONS 100000
// Bind process to a single CPU core (CPU 0)
void bind_to_cpu() {
cpu_set_t set;
CPU_ZERO(&set);
CPU_SET(0, &set);
if (sched_setaffinity(0, sizeof(set), &set) != 0) {
perror("sched_setaffinity");
}
}
int main() {
bind_to_cpu();
int pipe1[2]; // parent -> child
int pipe2[2]; // child -> parent
if (pipe(pipe1) < 0 || pipe(pipe2) < 0) {
perror("pipe");
return 1;
}
pid_t pid = fork();
if (pid < 0) {
perror("fork");
return 1;
} else if (pid == 0) {
// Child process
char msg;
for (int i = 0; i < ITERATIONS; i++) {
// read from parent
if (read(pipe1[0], &msg, 1) != 1) {
perror("child read");
exit(1);
}
// write back to parent
if (write(pipe2[1], &msg, 1) != 1) {
perror("child write");
exit(1);
}
}
exit(0);
} else {
// Parent process
char msg = 'x';
struct timespec start, end;
long long total_ns = 0;
for (int i = 0; i < ITERATIONS; i++) {
clock_gettime(CLOCK_MONOTONIC, &start);
// write to child
if (write(pipe1[1], &msg, 1) != 1) {
perror("parent write");
return 1;
}
// read back from child
if (read(pipe2[0], &msg, 1) != 1) {
perror("parent read");
return 1;
}
clock_gettime(CLOCK_MONOTONIC, &end);
long long ns = (end.tv_sec - start.tv_sec) * 1000000000LL +
(end.tv_nsec - start.tv_nsec);
total_ns += ns;
}
printf("Average round-trip time per context switch via pipe: %.2f ns\n",
(double)total_ns / ITERATIONS / 2); // divide by 2 for single context switch
}
return 0;
}
Output:
Average round-trip time per context switch via pipe: 2903.81 ns