Slices

Slices are a ubiquitous feature in newer programming languages like Go, Rust, and Zig. But until rather recently I found them mysterious. They seemed to be a solution to a problem that I’d never before considered. Yet they’re important enough to be “baked” into the languages mentioned above, with their own special syntax and support. What exactly are they, and what are they good for?

Here’s how the canonical introductions to Go and Rust describe them:

An array has a fixed size. A slice, on the other hand, is a dynamically-sized, flexible view into the elements of an array. In practice, slices are much more common than arrays.
- A Tour of Go

Slices let you reference a contiguous sequence of elements in a collection rather than the whole collection. A slice is a kind of reference, so it does not have ownership.
- The Rust Programming Language

Let’s have a go at modeling this in C:

/*
 * A "view" into a string. ptr points to the first byte of the
 * string, and len is the total number of bytes.
 */
struct string_slice {
        char *ptr;
        size_t len;
};

Suppose the string “Hello, world” lives somewhere in memory. We can construct a slice of the substring "world" like so:

char *str = "Hello, world";
struct string_slice world_slice = {
        .ptr = str + 7,
        .len = 5
};

Here’s what’s going on:

  0   1   2   3   4   5   6   7
+---+---+---+---+---+---+---+---+---+---+---+---+
| H | e | l | l | o | , |   | w | o | r | l | d |
+---+---+---+---+---+---+---+---+---+---+---+---+
  ^                           ^
  |                           |
 str                   world_slice.ptr = str + 7

                            [=== === === === ===]
							  1   2   3   4   5
                             world_slice.len = 5

What can we do with slices? We can print them:

void string_slice_debug(struct string_slice slice) {
        putchar('"');
        for (int i = 0; i < slice.len; i += 1) {
                putchar(*(slice.ptr + i));
        }
        putchar('"');
}
// string_slice_debug(world_slice)
// =>
// "world"

compare them:

bool string_slice_eq(struct string_slice s1, struct string_slice s2) {
        if (s1.len != s2.len) {
                return false;
        }
        for (int i = 0; i < s1.len; i += 1) {
                if (*(s1.ptr + i) != *(s2.ptr + i)) {
                        return false;
                }
        }
        return true;
}

order them, re-slice them, and so much more. And the beautiful thing is that none of this requires allocating any additional memory for the substrings we’re interested in.

Actually, that’s not quite true. After all, we need to store the struct string_slices somewhere. The difference is that a struct string_slice takes 16 bytes to store, regardless of the length of the substring being sliced.

Because of this, you’ll often hear it said that slices have “value semantics”: they have a fixed size, are typically immutable, are cheap to copy, and therefore behave more like integers and booleans than complex data structures such as Rust’s Vec.

Example: Splitting a String

To illustrate just how useful these string slices are, let’s write a program that splits its input string on some separator character, allowing us to iterate over the resulting substrings. Using our shiny struct string_slice type, we can do so without allocating on the heap at all.

Here’s the idea: the function split_string doesn’t return an array of strings or anything of the sort; instead it returns a structure that maintains some “iterator state”. In particular, it stores:

struct split_iter {
        char *ptr;
        char split_char;
        bool done;
};

/*
 * Split string on all occurrences of split_char. Returns an
 * "iterator" that yields string slices.
 */
struct split_iter split_string(char *string, char split_char) {
        struct split_iter iter = {
                .ptr = string,
                .split_char = split_char,
                .done = false,
        };
        return iter;
}

To actually get at the resulting substrings, we’ll write a split_iter_next function that returns either the next string slice, or a value indicating that we’re done. Specifically, it returns a struct string_slice_option (which mimics Rust’s Option<&str>):

enum option_kind {
        OPTION_NONE,
        OPTION_SOME,
};

struct string_slice_option {
        enum option_kind kind;
        struct string_slice slice;
};

The real work is accomplished by split_iter_next. This function scans the string, starting at the struct split_iter’s ptr, and stopping when it encounters either the split_char or the end of the string. It then constructs a slice to return to the caller, and updates the iterator’s internal state appropriately.

struct string_slice_option split_iter_next(struct split_iter *iter) {
        struct string_slice_option option;
        if (iter->done) {
                option.kind = OPTION_NONE;
                return option;
        }

		// Advance the end pointer until we reach the end of
		// the string or the split character.
        char *end_ptr = iter->ptr;
        while (*end_ptr != 0 && *end_ptr != iter->split_char) {
                end_ptr += 1;
        }

        struct string_slice slice = {
                .ptr = iter->ptr,
                .len = end_ptr - iter->ptr,
        };

		// Update the iterator's pointer and done state.
        if (*end_ptr == 0) {
                iter->ptr = end_ptr;
                iter->done = true;
        } else {
                iter->ptr = end_ptr + 1;
        }

        option.kind = OPTION_SOME;
        option.slice = slice;

        return option;
}

Here it is in action:

char *str = "foo bar baz quux";
struct split_iter iter = split_string(str, ' ');
while (1) {
        struct string_slice_option next = split_iter_next(&iter);
        if (next.kind == OPTION_SOME) {
                string_slice_debug(next.slice);
                printf("\n");
        } else {
                break;
        }
}
// =>
// "foo"
// "bar"
// "baz"
// "quux"

Beautiful! We could continue the exercise, writing similar “adapters” to map over the slices, filter the results, drop or take some, accumulate a value, or collect them into some data structure.

But event this simple exercise helped me understand and better appreciate the following roughly equivalent Rust code:

let str = "foo bar baz quux";
let mut iter = str.split(' ');
while let Some(part) = iter.next() {
    println!("{:?}", part);
}

Speaking of Rust…

Bonus: Lifetimes and Ownership

As the Rust Book points out above, a slice is a kind of reference. It only makes sense in the context of the region of memory that it refers to. There are two potential pitfalls here:

Here’s a C program illustrating the first pitfall:

char str[12] = "first second";
// Split the string.
struct split_iter iter = split_string(str, ' ');
// Mutate the underlying memory.
str[0] = 'F';
string_slice_debug(split_iter_next(&iter).slice);
// =>
// "First"
//  ^ Not what we expected!

We expected to see "first" but instead got "First" because the original string was mutated under our noses.

Rust’s ownership system prevents this. For example, this won’t compile:

let mut text = String::from("first second");
let mut iter = text.split(' ');
unsafe {
    text.as_bytes_mut()[0] = b'F';
}
println!("{:?}", iter.next());
// =>
// error: cannot borrow `text` as mutable because it is also borrowed as immutable

Where does the immutable borrow of text occur? In the return value of split, actually. As in our example, this function returns a structure that hangs onto a pointer to (somewhere in) the underlying text.

Here’s the second pitfall in action:

struct split_iter return_from_fn(void) {
        char buffer[10];
        printf("Enter up to 10 characters:\n");
        fgets(buffer, sizeof(buffer), stdin);
        return split_string(buffer, ' ');
}

int main(void) {
        struct split_iter iter = return_from_fn();
        string_slice_debug(split_iter_next(&iter).slice);
}
// =>
// Enter up to 10 characters:
// foo bar
// "" <- nothing

In this case, the character buffer buffer is allocated in the stack frame of return_from_fn. Because that stack frame is cleaned up when the function returns, the pointer held by iter is no longer meaningful.

Once again, Rust prevents this kind of mistake:

use std::io::{self, BufRead};

fn return_slice<'a>() -> &'a str {
    let stdin = io::stdin();
    let mut line = String::new();
	//             ^^^^^^^^^^^ Slightly different from our C example
	// since line is heap-allocated. But it's nonetheless deallocated
	// when the function returns.
    stdin
        .lock()
        .read_line(&mut line)
        .expect("failed to read line");
    return &line;
	//     ^^^^^ Rust catches this: the data this refers to gets
	// deallocated when this function returns.
}

fn main() {
    let _slice = return_slice();
}
// error: cannot return reference to local variable `line`

As anyone who’s test-driven the language knows, rustc is a stern master. Hopefully these examples show that it has ample reason for being so.

Other Applications

Slices aren’t limited to strings. Nor is this idea of constructing a “view” of an object limited to slices and arrays. For example, the Python package numpy represents its n-dimensional arrays as special “views” of underlying buffers.

Additional Example

As a fun exercise I decided to try my hand at transcribing this Rust program to C:

fn main() {
    let xs: [i32; 12] = [0, 4, 3, 4, 1, 2, 3, 6, 1, 2, 2, 6];

    for sum in xs.windows(3).map(|part| part.iter().sum::<i32>()) {
        println!("{}", sum);
    }
}

Here’s what I ended up with:

#include <stdio.h>

struct int_slice {
        int *ptr;
        size_t len;
};

void int_slice_debug(struct int_slice slice) {
        printf("{ ");
        for (int i = 0; i < slice.len; i += 1) {
                printf("%d, ", *(slice.ptr + i));
        }
        printf("}");
}

struct window_iter {
        struct int_slice slice;
        size_t pos;
        size_t win_size;
};

struct window_iter window(struct int_slice slice, size_t win_size) {
        struct window_iter iter = {
                .slice = slice,
                .pos = 0,
                .win_size = win_size,
        };
        return iter;
}

enum option_kind {
        OPTION_NONE,
        OPTION_SOME,
};

struct window_iter_option {
        enum option_kind kind;
        struct int_slice slice;
};

struct window_iter_option window_iter_next(struct window_iter *iter) {
        struct window_iter_option option;
        if (iter->pos > iter->slice.len - iter->win_size) {
                option.kind = OPTION_NONE;
                return option;
        }

        struct int_slice slice = {
                .ptr = iter->slice.ptr + iter->pos,
                .len = iter->win_size,
        };
        iter->pos += 1;

        option.kind = OPTION_SOME;
        option.slice = slice;
        return option;
}

struct window_iter_sum_iter {
        struct window_iter iter;
};

int sum_slice(struct int_slice slice) {
        int sum = 0;
        for (int i = 0; i < slice.len; i += 1) {
                sum += *(slice.ptr + i);
        }
        return sum;
}

struct window_iter_sum_iter sum_windows(struct window_iter win_iter) {
        struct window_iter_sum_iter iter = {
                .iter = win_iter,
        };
        return iter;
}

struct window_iter_sum_iter_option {
        enum option_kind kind;
        int sum;
};

struct window_iter_sum_iter_option window_iter_sum_iter_next(struct window_iter_sum_iter *iter) {
        struct window_iter_sum_iter_option option;
        struct window_iter_option next = window_iter_next(&(iter->iter));
        if (next.kind == OPTION_NONE) {
                option.kind = OPTION_NONE;
                return option;
        }

        int sum = sum_slice(next.slice);
        option.kind = OPTION_SOME;
        option.sum = sum;
        return option;
}

int main(void) {
        // An array of ints.
        int xs[] = { 0, 4, 3, 4, 1, 2, 3, 6, 1, 2, 2, 6 };
        // A SLICE viewing the entire xs array.
        struct int_slice xs_slice = {
                .ptr = xs,
                .len = 12,
        };

        // Two "chained" iterators:
        // - The first iterator yields slices produced by sliding a windows of
        // length 3 over the input slice.
        // - The second iterator yields the sum of each slice.
        struct window_iter_sum_iter iter = sum_windows(window(xs_slice, 3));
        while (1) {
                struct window_iter_sum_iter_option next = window_iter_sum_iter_next(&iter);
                if (next.kind == OPTION_SOME) {
                        printf("%d\n", next.sum);
                } else {
                        break;
                }
        }

        return 0;
}

My C transcription is significantly longer because methods like windows , map, and sum are already defined in the Rust language. But length aside, Rust is so much more ergonomic for writing programs in this style.