aboutsummaryrefslogtreecommitdiffstats
path: root/core.lv2/serd-0.1.0.c
diff options
context:
space:
mode:
Diffstat (limited to 'core.lv2/serd-0.1.0.c')
-rw-r--r--core.lv2/serd-0.1.0.c2700
1 files changed, 2700 insertions, 0 deletions
diff --git a/core.lv2/serd-0.1.0.c b/core.lv2/serd-0.1.0.c
new file mode 100644
index 0000000..413fedb
--- /dev/null
+++ b/core.lv2/serd-0.1.0.c
@@ -0,0 +1,2700 @@
+/*
+ Copyright 2011 David Robillard <http://drobilla.net>
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef SERD_INTERNAL_H
+#define SERD_INTERNAL_H
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "serd/serd.h"
+
+/** A dynamic stack in memory. */
+typedef struct {
+ uint8_t* buf; ///< Stack memory
+ size_t buf_size; ///< Allocated size of buf (>= size)
+ size_t size; ///< Conceptual size of stack in buf
+} SerdStack;
+
+/** An offset to start the stack at. Note 0 is reserved for NULL. */
+#define SERD_STACK_BOTTOM sizeof(void*)
+
+static inline SerdStack
+serd_stack_new(size_t size)
+{
+ SerdStack stack;
+ stack.buf = malloc(size);
+ stack.buf_size = size;
+ stack.size = SERD_STACK_BOTTOM;
+ return stack;
+}
+
+static inline bool
+serd_stack_is_empty(SerdStack* stack)
+{
+ return stack->size <= SERD_STACK_BOTTOM;
+}
+
+static inline void
+serd_stack_free(SerdStack* stack)
+{
+ free(stack->buf);
+ stack->buf = NULL;
+ stack->buf_size = 0;
+ stack->size = 0;
+}
+
+static inline uint8_t*
+serd_stack_push(SerdStack* stack, size_t n_bytes)
+{
+ const size_t new_size = stack->size + n_bytes;
+ if (stack->buf_size < new_size) {
+ stack->buf_size *= 2;
+ stack->buf = realloc(stack->buf, stack->buf_size);
+ }
+ uint8_t* const ret = (stack->buf + stack->size);
+ stack->size = new_size;
+ return ret;
+}
+
+static inline void
+serd_stack_pop(SerdStack* stack, size_t n_bytes)
+{
+ assert(stack->size >= n_bytes);
+ stack->size -= n_bytes;
+}
+
+/** Return true if @a c lies within [min...max] (inclusive) */
+static inline bool
+in_range(const uint8_t c, const uint8_t min, const uint8_t max)
+{
+ return (c >= min && c <= max);
+}
+
+/** RFC2234: ALPHA := %x41-5A / %x61-7A ; A-Z / a-z */
+static inline bool
+is_alpha(const uint8_t c)
+{
+ return in_range(c, 'A', 'Z') || in_range(c, 'a', 'z');
+}
+
+/** RFC2234: DIGIT ::= %x30-39 ; 0-9 */
+static inline bool
+is_digit(const uint8_t c)
+{
+ return in_range(c, '0', '9');
+}
+
+/** UTF-8 strlen.
+ * @return Lengh of @a utf8 in characters.
+ * @param utf8 A null-terminated UTF-8 string.
+ * @param out_n_bytes (Output) Set to the size of @a utf8 in bytes.
+ */
+static inline size_t
+serd_strlen(const uint8_t* utf8, size_t* out_n_bytes)
+{
+ size_t n_chars = 0;
+ size_t i = 0;
+ for (; utf8[i]; ++i) {
+ if ((utf8[i] & 0xC0) != 0x80) {
+ // Does not start with `10', start of a new character
+ ++n_chars;
+ }
+ }
+ if (out_n_bytes) {
+ *out_n_bytes = i + 1;
+ }
+ return n_chars;
+}
+
+#endif // SERD_INTERNAL_H
+
+/**
+ * @file env.c
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+typedef struct {
+ SerdNode name;
+ SerdNode uri;
+} SerdPrefix;
+
+struct SerdEnvImpl {
+ SerdPrefix* prefixes;
+ size_t n_prefixes;
+};
+
+SERD_API
+SerdEnv
+serd_env_new()
+{
+ SerdEnv env = malloc(sizeof(struct SerdEnvImpl));
+ env->prefixes = NULL;
+ env->n_prefixes = 0;
+ return env;
+}
+
+SERD_API
+void
+serd_env_free(SerdEnv env)
+{
+ for (size_t i = 0; i < env->n_prefixes; ++i) {
+ serd_node_free(&env->prefixes[i].name);
+ serd_node_free(&env->prefixes[i].uri);
+ }
+ free(env->prefixes);
+ free(env);
+}
+
+static inline SerdPrefix*
+serd_env_find(SerdEnv env,
+ const uint8_t* name,
+ size_t name_len)
+{
+ for (size_t i = 0; i < env->n_prefixes; ++i) {
+ const SerdNode* const prefix_name = &env->prefixes[i].name;
+ if (prefix_name->n_bytes == name_len + 1) {
+ if (!memcmp(prefix_name->buf, name, name_len)) {
+ return &env->prefixes[i];
+ }
+ }
+ }
+ return NULL;
+}
+
+SERD_API
+void
+serd_env_add(SerdEnv env,
+ const SerdNode* name,
+ const SerdNode* uri)
+{
+ assert(name && uri);
+ SerdPrefix* const prefix = serd_env_find(env, name->buf, name->n_chars);
+ if (prefix) {
+ serd_node_free(&prefix->uri);
+ prefix->uri = serd_node_copy(uri);
+ } else {
+ env->prefixes = realloc(env->prefixes,
+ (++env->n_prefixes) * sizeof(SerdPrefix));
+ env->prefixes[env->n_prefixes - 1].name = serd_node_copy(name);
+ env->prefixes[env->n_prefixes - 1].uri = serd_node_copy(uri);
+ }
+}
+
+SERD_API
+bool
+serd_env_qualify(const SerdEnv env,
+ const SerdNode* uri,
+ SerdNode* prefix_name,
+ SerdChunk* suffix)
+{
+ for (size_t i = 0; i < env->n_prefixes; ++i) {
+ const SerdNode* const prefix_uri = &env->prefixes[i].uri;
+ if (uri->n_bytes >= prefix_uri->n_bytes) {
+ if (!strncmp((const char*)uri->buf,
+ (const char*)prefix_uri->buf,
+ prefix_uri->n_bytes - 1)) {
+ *prefix_name = env->prefixes[i].name;
+ suffix->buf = uri->buf + prefix_uri->n_bytes - 1;
+ suffix->len = uri->n_bytes - prefix_uri->n_bytes;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+SERD_API
+bool
+serd_env_expand(const SerdEnv env,
+ const SerdNode* qname,
+ SerdChunk* uri_prefix,
+ SerdChunk* uri_suffix)
+{
+ const uint8_t* const colon = memchr(qname->buf, ':', qname->n_bytes);
+ if (!colon) {
+ return false; // Illegal qname
+ }
+
+ const size_t name_len = colon - qname->buf;
+ const SerdPrefix* const prefix = serd_env_find(env, qname->buf, name_len);
+ if (prefix) {
+ uri_prefix->buf = prefix->uri.buf;
+ uri_prefix->len = prefix->uri.n_bytes - 1;
+ uri_suffix->buf = colon + 1;
+ uri_suffix->len = qname->n_bytes - (colon - qname->buf) - 2;
+ return true;
+ }
+ return false;
+}
+
+SERD_API
+void
+serd_env_foreach(const SerdEnv env,
+ SerdPrefixSink func,
+ void* handle)
+{
+ for (size_t i = 0; i < env->n_prefixes; ++i) {
+ func(handle,
+ &env->prefixes[i].name,
+ &env->prefixes[i].uri);
+ }
+}
+
+/**
+ * @file node.c
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+
+SERD_API
+SerdNode
+serd_node_from_string(SerdType type, const uint8_t* buf)
+{
+ size_t buf_n_bytes;
+ const size_t buf_n_chars = serd_strlen(buf, &buf_n_bytes);
+ SerdNode ret = { type, buf_n_bytes, buf_n_chars, buf };
+ return ret;
+}
+
+SERD_API
+SerdNode
+serd_node_copy(const SerdNode* node)
+{
+ SerdNode copy = *node;
+ uint8_t* buf = malloc(copy.n_bytes);
+ memcpy(buf, node->buf, copy.n_bytes);
+ copy.buf = buf;
+ return copy;
+}
+
+static size_t
+serd_uri_string_length(const SerdURI* uri)
+{
+ size_t len = uri->path_base.len;
+
+#define ADD_LEN(field, n_delims) \
+ if ((field).len) { len += (field).len + (n_delims); }
+
+ ADD_LEN(uri->path, 1); // + possible leading `/'
+ ADD_LEN(uri->scheme, 1); // + trailing `:'
+ ADD_LEN(uri->authority, 2); // + leading `//'
+ ADD_LEN(uri->query, 1); // + leading `?'
+ ADD_LEN(uri->fragment, 1); // + leading `#'
+
+ // Add 2 for authority // prefix (added even though authority.len = 0)
+ return len + 2; // + 2 for authority //
+}
+
+static size_t
+string_sink(const void* buf, size_t len, void* stream)
+{
+ uint8_t** ptr = (uint8_t**)stream;
+ memcpy(*ptr, buf, len);
+ *ptr += len;
+ return len;
+}
+
+SERD_API
+SerdNode
+serd_node_new_uri_from_node(const SerdNode* uri_node,
+ const SerdURI* base,
+ SerdURI* out)
+{
+ return serd_node_new_uri_from_string(uri_node->buf, base, out);
+}
+
+SERD_API
+SerdNode
+serd_node_new_uri_from_string(const uint8_t* str,
+ const SerdURI* base,
+ SerdURI* out)
+{
+ if (str[0] == '\0') {
+ return serd_node_new_uri(base, NULL, out); // Empty URI => Base URI
+ } else {
+ SerdURI uri;
+ if (serd_uri_parse(str, &uri)) {
+ return serd_node_new_uri(&uri, base, out); // Resolve/Serialise
+ }
+ }
+ return SERD_NODE_NULL;
+}
+
+SERD_API
+SerdNode
+serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out)
+{
+ SerdURI abs_uri = *uri;
+ if (base) {
+ serd_uri_resolve(uri, base, &abs_uri);
+ }
+
+ const size_t len = serd_uri_string_length(&abs_uri);
+ uint8_t* buf = malloc(len + 1);
+
+ SerdNode node = { SERD_URI, len + 1, len, buf }; // FIXME: UTF-8
+
+ uint8_t* ptr = buf;
+ const size_t actual_len = serd_uri_serialise(&abs_uri, string_sink, &ptr);
+
+ buf[actual_len] = '\0';
+ node.n_bytes = actual_len + 1;
+ node.n_chars = actual_len;
+
+ // FIXME: double parse
+ if (!serd_uri_parse(buf, out)) {
+ fprintf(stderr, "error parsing URI\n");
+ return SERD_NODE_NULL;
+ }
+
+ return node;
+}
+
+SERD_API
+void
+serd_node_free(SerdNode* node)
+{
+ free((uint8_t*)node->buf);
+}
+
+/**
+ * @file reader.c
+ */
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#define NS_XSD "http://www.w3.org/2001/XMLSchema#"
+#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+
+#define TRY_THROW(exp) if (!(exp)) goto except;
+#define TRY_RET(exp) if (!(exp)) return 0;
+
+#define STACK_PAGE_SIZE 4096
+#define READ_BUF_LEN 4096
+
+typedef struct {
+ const uint8_t* filename;
+ unsigned line;
+ unsigned col;
+} Cursor;
+
+typedef uint32_t uchar;
+
+typedef size_t Ref;
+
+typedef struct {
+ SerdType type;
+ Ref value;
+ Ref datatype;
+ Ref lang;
+} Node;
+
+typedef struct {
+ const Node* graph;
+ const Node* subject;
+ const Node* predicate;
+} ReadContext;
+
+/** Measured UTF-8 string. */
+typedef struct {
+ size_t n_bytes; ///< Size in bytes including trailing null byte
+ size_t n_chars; ///< Length in characters
+ uint8_t buf[]; ///< Buffer
+} SerdString;
+
+static const Node INTERNAL_NODE_NULL = { 0, 0, 0, 0 };
+
+struct SerdReaderImpl {
+ void* handle;
+ SerdBaseSink base_sink;
+ SerdPrefixSink prefix_sink;
+ SerdStatementSink statement_sink;
+ SerdEndSink end_sink;
+ Node rdf_type;
+ Node rdf_first;
+ Node rdf_rest;
+ Node rdf_nil;
+ FILE* fd;
+ SerdStack stack;
+ Cursor cur;
+ uint8_t* buf;
+ const uint8_t* blank_prefix;
+ unsigned next_id;
+ int err;
+ uint8_t* read_buf;
+ int32_t read_head; ///< Offset into read_buf
+ bool from_file; ///< True iff reading from @ref fd
+ bool eof;
+#ifdef SUIL_STACK_CHECK
+ Ref* alloc_stack; ///< Stack of push offsets
+ size_t n_allocs; ///< Number of stack pushes
+#endif
+};
+
+struct SerdReadStateImpl {
+ SerdEnv env;
+ SerdNode base_uri_node;
+ SerdURI base_uri;
+};
+
+typedef enum {
+ SERD_SUCCESS = 0, ///< Completed successfully
+ SERD_FAILURE = 1, ///< Non-fatal failure
+ SERD_ERROR = 2, ///< Fatal error
+} SerdStatus;
+
+static inline int
+error(SerdReader reader, const char* fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ fprintf(stderr, "error: %s:%u:%u: ",
+ reader->cur.filename, reader->cur.line, reader->cur.col);
+ vfprintf(stderr, fmt, args);
+ return 0;
+}
+
+static Node
+make_node(SerdType type, Ref value, Ref datatype, Ref lang)
+{
+ const Node ret = { type, value, datatype, lang };
+ return ret;
+}
+
+static inline bool
+page(SerdReader reader)
+{
+ assert(reader->from_file);
+ reader->read_head = 0;
+ const size_t n_read = fread(reader->read_buf, 1, READ_BUF_LEN, reader->fd);
+ if (n_read == 0) {
+ reader->read_buf[0] = '\0';
+ reader->eof = true;
+ return false;
+ } else if (n_read < READ_BUF_LEN) {
+ reader->read_buf[n_read] = '\0';
+ }
+ return true;
+}
+
+static inline bool
+peek_string(SerdReader reader, uint8_t* pre, int n)
+{
+ uint8_t* ptr = reader->read_buf + reader->read_head;
+ for (int i = 0; i < n; ++i) {
+ if (reader->from_file && (reader->read_head + i >= READ_BUF_LEN)) {
+ if (!page(reader)) {
+ return false;
+ }
+ ptr = reader->read_buf;
+ reader->read_head = -i;
+ memcpy(reader->read_buf + reader->read_head, pre, i);
+ assert(reader->read_buf[reader->read_head] == pre[0]);
+ }
+ if ((pre[i] = *ptr++) == '\0') {
+ return false;
+ }
+ }
+ return true;
+}
+
+static inline uint8_t
+peek_byte(SerdReader reader)
+{
+ return reader->read_buf[reader->read_head];
+}
+
+static inline uint8_t
+eat_byte(SerdReader reader, const uint8_t byte)
+{
+ const uint8_t c = peek_byte(reader);
+ ++reader->read_head;
+ switch (c) {
+ case '\n': ++reader->cur.line; reader->cur.col = 0; break;
+ default: ++reader->cur.col;
+ }
+
+ if (c != byte) {
+ return error(reader, "expected `%c', not `%c'\n", byte, c);
+ }
+ if (reader->from_file && (reader->read_head == READ_BUF_LEN)) {
+ TRY_RET(page(reader));
+ assert(reader->read_head < READ_BUF_LEN);
+ }
+ if (reader->read_buf[reader->read_head] == '\0') {
+ reader->eof = true;
+ }
+ return c;
+}
+
+static inline void
+eat_string(SerdReader reader, const char* str, unsigned n)
+{
+ for (unsigned i = 0; i < n; ++i) {
+ eat_byte(reader, ((const uint8_t*)str)[i]);
+ }
+}
+
+#ifdef SUIL_STACK_CHECK
+static inline bool
+stack_is_top_string(SerdReader reader, Ref ref)
+{
+ return ref == reader->alloc_stack[reader->n_allocs - 1];
+}
+#endif
+
+static inline intptr_t
+pad_size(intptr_t size)
+{
+ return (size + 7) & (~7);
+}
+
+// Make a new string from a non-UTF-8 C string (internal use only)
+static Ref
+push_string(SerdReader reader, const char* c_str, size_t n_bytes)
+{
+ // Align strings to 64-bits (assuming malloc/realloc are aligned to 64-bits)
+ const size_t stack_size = pad_size((intptr_t)reader->stack.size);
+ const size_t pad = stack_size - reader->stack.size;
+ uint8_t* mem = serd_stack_push(
+ &reader->stack, pad + sizeof(SerdString) + n_bytes) + pad;
+ SerdString* const str = (SerdString*)mem;
+ str->n_bytes = n_bytes;
+ str->n_chars = n_bytes - 1;
+ memcpy(str->buf, c_str, n_bytes);
+#ifdef SUIL_STACK_CHECK
+ reader->alloc_stack = realloc(reader->alloc_stack,
+ sizeof(uint8_t*) * (++reader->n_allocs));
+ reader->alloc_stack[reader->n_allocs - 1] = (mem - reader->stack.buf);
+#endif
+ return (uint8_t*)str - reader->stack.buf;
+}
+
+static inline SerdString*
+deref(SerdReader reader, const Ref ref)
+{
+ if (ref) {
+ return (SerdString*)(reader->stack.buf + ref);
+ }
+ return NULL;
+}
+
+static inline void
+push_byte(SerdReader reader, Ref ref, const uint8_t c)
+{
+ #ifdef SUIL_STACK_CHECK
+ assert(stack_is_top_string(reader, ref));
+ #endif
+ serd_stack_push(&reader->stack, 1);
+ SerdString* const str = deref(reader, ref);
+ ++str->n_bytes;
+ if ((c & 0xC0) != 0x80) {
+ // Does not start with `10', start of a new character
+ ++str->n_chars;
+ }
+ assert(str->n_bytes > str->n_chars);
+ str->buf[str->n_bytes - 2] = c;
+ str->buf[str->n_bytes - 1] = '\0';
+}
+
+static void
+pop_string(SerdReader reader, Ref ref)
+{
+ if (ref) {
+ if (ref == reader->rdf_nil.value
+ || ref == reader->rdf_first.value
+ || ref == reader->rdf_rest.value) {
+ return;
+ }
+ #ifdef SUIL_STACK_CHECK
+ if (!stack_is_top_string(reader, ref)) {
+ fprintf(stderr, "attempt to pop non-top string %s\n",
+ deref(reader, ref)->buf);
+ fprintf(stderr, "top: %s\n",
+ deref(reader, reader->alloc_stack[reader->n_allocs - 1])->buf);
+ }
+ assert(stack_is_top_string(reader, ref));
+ --reader->n_allocs;
+ #endif
+ serd_stack_pop(&reader->stack, deref(reader, ref)->n_bytes);
+ }
+}
+
+static inline SerdNode
+public_node_from_ref(SerdReader reader, SerdType type, Ref ref)
+{
+ if (!ref) {
+ return SERD_NODE_NULL;
+ }
+ const SerdString* str = deref(reader, ref);
+ const SerdNode public = { type, str->n_bytes, str->n_chars, str->buf };
+ return public;
+}
+
+static inline SerdNode
+public_node(SerdReader reader, const Node* private)
+{
+ return public_node_from_ref(reader, private->type, private->value);
+}
+
+
+static inline bool
+emit_statement(SerdReader reader,
+ const Node* g, const Node* s, const Node* p, const Node* o)
+{
+ assert(s->value && p->value && o->value);
+ const SerdNode graph = g ? public_node(reader, g) : SERD_NODE_NULL;
+ const SerdNode subject = public_node(reader, s);
+ const SerdNode predicate = public_node(reader, p);
+ const SerdNode object = public_node(reader, o);
+ const SerdNode object_datatype = public_node_from_ref(reader, SERD_URI, o->datatype);
+ const SerdNode object_lang = public_node_from_ref(reader, SERD_LITERAL, o->lang);
+ return reader->statement_sink(reader->handle,
+ &graph,
+ &subject,
+ &predicate,
+ &object,
+ &object_datatype,
+ &object_lang);
+}
+
+static bool read_collection(SerdReader reader, ReadContext ctx, Node* dest);
+static bool read_predicateObjectList(SerdReader reader, ReadContext ctx);
+
+// [40] hex ::= [#x30-#x39] | [#x41-#x46]
+static inline uint8_t
+read_hex(SerdReader reader)
+{
+ const uint8_t c = peek_byte(reader);
+ if (in_range(c, 0x30, 0x39) || in_range(c, 0x41, 0x46)) {
+ return eat_byte(reader, c);
+ } else {
+ return error(reader, "illegal hexadecimal digit `%c'\n", c);
+ }
+}
+
+static inline bool
+read_hex_escape(SerdReader reader, unsigned length, Ref dest)
+{
+ uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ for (unsigned i = 0; i < length; ++i) {
+ buf[i] = read_hex(reader);
+ }
+
+ uint32_t c;
+ sscanf((const char*)buf, "%X", &c);
+
+ unsigned size = 0;
+ if (c < 0x00000080) {
+ size = 1;
+ } else if (c < 0x00000800) {
+ size = 2;
+ } else if (c < 0x00010000) {
+ size = 3;
+ } else if (c < 0x00200000) {
+ size = 4;
+ } else {
+ return false;
+ }
+
+ // Build output in buf
+ // (Note # of bytes = # of leading 1 bits in first byte)
+ switch (size) {
+ case 4:
+ buf[3] = 0x80 | (uint8_t)(c & 0x3F);
+ c >>= 6;
+ c |= (16 << 12); // set bit 4
+ case 3:
+ buf[2] = 0x80 | (uint8_t)(c & 0x3F);
+ c >>= 6;
+ c |= (32 << 6); // set bit 5
+ case 2:
+ buf[1] = 0x80 | (uint8_t)(c & 0x3F);
+ c >>= 6;
+ c |= 0xC0; // set bits 6 and 7
+ case 1:
+ buf[0] = (uint8_t)c;
+ }
+
+ for (unsigned i = 0; i < size; ++i) {
+ push_byte(reader, dest, buf[i]);
+ }
+ return true;
+}
+
+static inline bool
+read_character_escape(SerdReader reader, Ref dest)
+{
+ switch (peek_byte(reader)) {
+ case '\\':
+ push_byte(reader, dest, eat_byte(reader, '\\'));
+ return true;
+ case 'u':
+ eat_byte(reader, 'u');
+ return read_hex_escape(reader, 4, dest);
+ case 'U':
+ eat_byte(reader, 'U');
+ return read_hex_escape(reader, 8, dest);
+ default:
+ return false;
+ }
+}
+
+static inline bool
+read_echaracter_escape(SerdReader reader, Ref dest)
+{
+ switch (peek_byte(reader)) {
+ case 't':
+ eat_byte(reader, 't');
+ push_byte(reader, dest, '\t');
+ return true;
+ case 'n':
+ eat_byte(reader, 'n');
+ push_byte(reader, dest, '\n');
+ return true;
+ case 'r':
+ eat_byte(reader, 'r');
+ push_byte(reader, dest, '\r');
+ return true;
+ default:
+ return read_character_escape(reader, dest);
+ }
+}
+
+static inline bool
+read_scharacter_escape(SerdReader reader, Ref dest)
+{
+ switch (peek_byte(reader)) {
+ case '"':
+ push_byte(reader, dest, eat_byte(reader, '"'));
+ return true;
+ default:
+ return read_echaracter_escape(reader, dest);
+ }
+}
+
+static inline bool
+read_ucharacter_escape(SerdReader reader, Ref dest)
+{
+ switch (peek_byte(reader)) {
+ case '>':
+ push_byte(reader, dest, eat_byte(reader, '>'));
+ return true;
+ default:
+ return read_echaracter_escape(reader, dest);
+ }
+}
+
+// [38] character ::= '\u' hex hex hex hex
+// | '\U' hex hex hex hex hex hex hex hex
+// | '\\'
+// | [#x20-#x5B] | [#x5D-#x10FFFF]
+static inline SerdStatus
+read_character(SerdReader reader, Ref dest)
+{
+ const uint8_t c = peek_byte(reader);
+ assert(c != '\\'); // Only called from methods that handle escapes first
+ switch (c) {
+ case '\0':
+ error(reader, "unexpected end of file\n", peek_byte(reader));
+ return SERD_ERROR;
+ default:
+ if (c < 0x20) { // ASCII control character
+ error(reader, "unexpected control character\n");
+ return SERD_ERROR;
+ } else if (c <= 0x7E) { // Printable ASCII
+ push_byte(reader, dest, eat_byte(reader, c));
+ return SERD_SUCCESS;
+ } else { // Wide UTF-8 character
+ unsigned size = 1;
+ if ((c & 0xE0) == 0xC0) { // Starts with `110'
+ size = 2;
+ } else if ((c & 0xF0) == 0xE0) { // Starts with `1110'
+ size = 3;
+ } else if ((c & 0xF8) == 0xF0) { // Starts with `11110'
+ size = 4;
+ } else {
+ error(reader, "invalid character\n");
+ return SERD_ERROR;
+ }
+ for (unsigned i = 0; i < size; ++i) {
+ push_byte(reader, dest, eat_byte(reader, peek_byte(reader)));
+ }
+ return SERD_SUCCESS;
+ }
+ }
+}
+
+// [39] echaracter ::= character | '\t' | '\n' | '\r'
+static inline SerdStatus
+read_echaracter(SerdReader reader, Ref dest)
+{
+ uint8_t c = peek_byte(reader);
+ switch (c) {
+ case '\\':
+ eat_byte(reader, '\\');
+ if (read_echaracter_escape(reader, peek_byte(reader))) {
+ return SERD_SUCCESS;
+ } else {
+ error(reader, "illegal escape `\\%c'\n", peek_byte(reader));
+ return SERD_ERROR;
+ }
+ default:
+ return read_character(reader, dest);
+ }
+}
+
+// [43] lcharacter ::= echaracter | '\"' | #x9 | #xA | #xD
+static inline SerdStatus
+read_lcharacter(SerdReader reader, Ref dest)
+{
+ const uint8_t c = peek_byte(reader);
+ uint8_t pre[3];
+ switch (c) {
+ case '"':
+ peek_string(reader, pre, 3);
+ if (pre[1] == '\"' && pre[2] == '\"') {
+ eat_byte(reader, '\"');
+ eat_byte(reader, '\"');
+ eat_byte(reader, '\"');
+ return SERD_FAILURE;
+ } else {
+ push_byte(reader, dest, eat_byte(reader, '"'));
+ return SERD_SUCCESS;
+ }
+ case '\\':
+ eat_byte(reader, '\\');
+ if (read_scharacter_escape(reader, dest)) {
+ return SERD_SUCCESS;
+ } else {
+ error(reader, "illegal escape `\\%c'\n", peek_byte(reader));
+ return SERD_ERROR;
+ }
+ case 0x9: case 0xA: case 0xD:
+ push_byte(reader, dest, eat_byte(reader, c));
+ return SERD_SUCCESS;
+ default:
+ return read_echaracter(reader, dest);
+ }
+}
+
+// [42] scharacter ::= ( echaracter - #x22 ) | '\"'
+static inline SerdStatus
+read_scharacter(SerdReader reader, Ref dest)
+{
+ uint8_t c = peek_byte(reader);
+ switch (c) {
+ case '\\':
+ eat_byte(reader, '\\');
+ if (read_scharacter_escape(reader, dest)) {
+ return SERD_SUCCESS;
+ } else {
+ error(reader, "illegal escape `\\%c'\n", peek_byte(reader));
+ return SERD_ERROR;
+ }
+ case '\"':
+ return SERD_FAILURE;
+ default:
+ return read_character(reader, dest);
+ }
+}
+
+// Spec: [41] ucharacter ::= ( character - #x3E ) | '\>'
+// Impl: [41] ucharacter ::= ( echaracter - #x3E ) | '\>'
+static inline SerdStatus
+read_ucharacter(SerdReader reader, Ref dest)
+{
+ const uint8_t c = peek_byte(reader);
+ switch (c) {
+ case '\\':
+ eat_byte(reader, '\\');
+ if (read_ucharacter_escape(reader, dest)) {
+ return SERD_SUCCESS;
+ } else {
+ return error(reader, "illegal escape `\\%c'\n", peek_byte(reader));
+ }
+ case '>':
+ return SERD_FAILURE;
+ default:
+ return read_character(reader, dest);
+ }
+}
+
+// [10] comment ::= '#' ( [^#xA #xD] )*
+static void
+read_comment(SerdReader reader)
+{
+ eat_byte(reader, '#');
+ uint8_t c;
+ while (((c = peek_byte(reader)) != 0xA) && (c != 0xD)) {
+ eat_byte(reader, c);
+ }
+}
+
+// [24] ws ::= #x9 | #xA | #xD | #x20 | comment
+static inline bool
+read_ws(SerdReader reader)
+{
+ const uint8_t c = peek_byte(reader);
+ switch (c) {
+ case 0x9: case 0xA: case 0xD: case 0x20:
+ eat_byte(reader, c);
+ return true;
+ case '#':
+ read_comment(reader);
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline void
+read_ws_star(SerdReader reader)
+{
+ while (read_ws(reader)) {}
+}
+
+static inline bool
+read_ws_plus(SerdReader reader)
+{
+ TRY_RET(read_ws(reader));
+ read_ws_star(reader);
+ return true;
+}
+
+// [37] longSerdString ::= #x22 #x22 #x22 lcharacter* #x22 #x22 #x22
+static Ref
+read_longString(SerdReader reader)
+{
+ eat_string(reader, "\"\"\"", 3);
+ Ref str = push_string(reader, "", 1);
+ SerdStatus st;
+ while (!(st = read_lcharacter(reader, str))) {}
+ if (st != SERD_ERROR) {
+ return str;
+ }
+ pop_string(reader, str);
+ return 0;
+}
+
+// [36] string ::= #x22 scharacter* #x22
+static Ref
+read_string(SerdReader reader)
+{
+ eat_byte(reader, '\"');
+ Ref str = push_string(reader, "", 1);
+ SerdStatus st;
+ while (!(st = read_scharacter(reader, str))) {}
+ if (st != SERD_ERROR) {
+ eat_byte(reader, '\"');
+ return str;
+ }
+ pop_string(reader, str);
+ return 0;
+}
+
+// [35] quotedString ::= string | longSerdString
+static Ref
+read_quotedString(SerdReader reader)
+{
+ uint8_t pre[3];
+ peek_string(reader, pre, 3);
+ assert(pre[0] == '\"');
+ switch (pre[1]) {
+ case '\"':
+ if (pre[2] == '\"')
+ return read_longString(reader);
+ else
+ return read_string(reader);
+ default:
+ return read_string(reader);
+ }
+}
+
+// [34] relativeURI ::= ucharacter*
+static inline Ref
+read_relativeURI(SerdReader reader)
+{
+ Ref str = push_string(reader, "", 1);
+ SerdStatus st;
+ while (!(st = read_ucharacter(reader, str))) {}
+ if (st != SERD_ERROR) {
+ return str;
+ }
+ pop_string(reader, str);
+ return 0;
+}
+
+// [30] nameStartChar ::= [A-Z] | "_" | [a-z]
+// | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D]
+// | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF]
+// | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
+static inline uchar
+read_nameStartChar(SerdReader reader, bool required)
+{
+ const uint8_t c = peek_byte(reader);
+ if (c == '_' || is_alpha(c)) {
+ return eat_byte(reader, c);
+ } else {
+ if (required) {
+ error(reader, "illegal character `%c'\n", c);
+ }
+ return 0;
+ }
+}
+
+// [31] nameChar ::= nameStartChar | '-' | [0-9]
+// | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
+static inline uchar
+read_nameChar(SerdReader reader)
+{
+ uchar c = read_nameStartChar(reader, false);
+ if (c)
+ return c;
+
+ switch ((c = peek_byte(reader))) {
+ case '-': case 0xB7: case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return eat_byte(reader, c);
+ default:
+ // TODO: 0x300-0x036F | 0x203F-0x2040
+ return 0;
+ }
+ return 0;
+}
+
+// [33] prefixName ::= ( nameStartChar - '_' ) nameChar*
+static Ref
+read_prefixName(SerdReader reader)
+{
+ uint8_t c = peek_byte(reader);
+ if (c == '_') {
+ error(reader, "unexpected `_'\n");
+ return 0;
+ }
+ TRY_RET(c = read_nameStartChar(reader, false));
+ Ref str = push_string(reader, "", 1);
+ push_byte(reader, str, c);
+ while ((c = read_nameChar(reader)) != 0) {
+ push_byte(reader, str, c);
+ }
+ return str;
+}
+
+// [32] name ::= nameStartChar nameChar*
+static Ref
+read_name(SerdReader reader, Ref dest, bool required)
+{
+ uchar c = read_nameStartChar(reader, required);
+ if (!c) {
+ if (required) {
+ error(reader, "illegal character at start of name\n");
+ }
+ return 0;
+ }
+ do {
+ push_byte(reader, dest, c);
+ } while ((c = read_nameChar(reader)) != 0);
+ return dest;
+}
+
+// [29] language ::= [a-z]+ ('-' [a-z0-9]+ )*
+static Ref
+read_language(SerdReader reader)
+{
+ const uint8_t start = peek_byte(reader);
+ if (!in_range(start, 'a', 'z')) {
+ error(reader, "unexpected `%c'\n", start);
+ return 0;
+ }
+ Ref str = push_string(reader, "", 1);
+ push_byte(reader, str, eat_byte(reader, start));
+ uint8_t c;
+ while ((c = peek_byte(reader)) && in_range(c, 'a', 'z')) {
+ push_byte(reader, str, eat_byte(reader, c));
+ }
+ if (peek_byte(reader) == '-') {
+ push_byte(reader, str, eat_byte(reader, '-'));
+ while ((c = peek_byte(reader)) && (
+ in_range(c, 'a', 'z') || in_range(c, '0', '9'))) {
+ push_byte(reader, str, eat_byte(reader, c));
+ }
+ }
+ return str;
+}
+
+// [28] uriref ::= '<' relativeURI '>'
+static Ref
+read_uriref(SerdReader reader)
+{
+ TRY_RET(eat_byte(reader, '<'));
+ Ref const str = read_relativeURI(reader);
+ if (str && eat_byte(reader, '>')) {
+ return str;
+ }
+ pop_string(reader, str);
+ return 0;
+}
+
+// [27] qname ::= prefixName? ':' name?
+static Ref
+read_qname(SerdReader reader)
+{
+ Ref prefix = read_prefixName(reader);
+ if (!prefix) {
+ prefix = push_string(reader, "", 1);
+ }
+ TRY_THROW(eat_byte(reader, ':'));
+ push_byte(reader, prefix, ':');
+ Ref str = read_name(reader, prefix, false);
+ return str ? str : prefix;
+except:
+ pop_string(reader, prefix);
+ return 0;
+}
+
+static bool
+read_0_9(SerdReader reader, Ref str, bool at_least_one)
+{
+ uint8_t c;
+ if (at_least_one) {
+ if (!is_digit((c = peek_byte(reader)))) {
+ return error(reader, "expected digit\n");
+ }
+ push_byte(reader, str, eat_byte(reader, c));
+ }
+ while (is_digit((c = peek_byte(reader)))) {
+ push_byte(reader, str, eat_byte(reader, c));
+ }
+ return true;
+}
+
+// [19] exponent ::= [eE] ('-' | '+')? [0-9]+
+// [18] decimal ::= ( '-' | '+' )? ( [0-9]+ '.' [0-9]*
+// | '.' ([0-9])+
+// | ([0-9])+ )
+// [17] double ::= ( '-' | '+' )? ( [0-9]+ '.' [0-9]* exponent
+// | '.' ([0-9])+ exponent
+// | ([0-9])+ exponent )
+// [16] integer ::= ( '-' | '+' ) ? [0-9]+
+static bool
+read_number(SerdReader reader, Node* dest)
+{
+ #define XSD_DECIMAL NS_XSD "decimal"
+ #define XSD_DOUBLE NS_XSD "double"
+ #define XSD_INTEGER NS_XSD "integer"
+ Ref str = push_string(reader, "", 1);
+ uint8_t c = peek_byte(reader);
+ bool has_decimal = false;
+ Ref datatype = 0;
+ if (c == '-' || c == '+') {
+ push_byte(reader, str, eat_byte(reader, c));
+ }
+ if ((c = peek_byte(reader)) == '.') {
+ has_decimal = true;
+ // decimal case 2 (e.g. '.0' or `-.0' or `+.0')
+ push_byte(reader, str, eat_byte(reader, c));
+ TRY_THROW(read_0_9(reader, str, true));
+ } else {
+ // all other cases ::= ( '-' | '+' ) [0-9]+ ( . )? ( [0-9]+ )? ...
+ TRY_THROW(read_0_9(reader, str, true));
+ if ((c = peek_byte(reader)) == '.') {
+ has_decimal = true;
+ push_byte(reader, str, eat_byte(reader, c));
+ TRY_THROW(read_0_9(reader, str, false));
+ }
+ }
+ c = peek_byte(reader);
+ if (c == 'e' || c == 'E') {
+ // double
+ push_byte(reader, str, eat_byte(reader, c));
+ switch ((c = peek_byte(reader))) {
+ case '+': case '-':
+ push_byte(reader, str, eat_byte(reader, c));
+ default: break;
+ }
+ read_0_9(reader, str, true);
+ datatype = push_string(reader, XSD_DOUBLE, strlen(XSD_DOUBLE) + 1);
+ } else if (has_decimal) {
+ datatype = push_string(reader, XSD_DECIMAL, strlen(XSD_DECIMAL) + 1);
+ } else {
+ datatype = push_string(reader, XSD_INTEGER, strlen(XSD_INTEGER) + 1);
+ }
+ *dest = make_node(SERD_LITERAL, str, datatype, 0);
+ assert(dest->value);
+ return true;
+except:
+ pop_string(reader, datatype);
+ pop_string(reader, str);
+ return false;
+}
+
+// [25] resource ::= uriref | qname
+static bool
+read_resource(SerdReader reader, Node* dest)
+{
+ switch (peek_byte(reader)) {
+ case '<':
+ *dest = make_node(SERD_URI, read_uriref(reader), 0, 0);
+ break;
+ default:
+ *dest = make_node(SERD_CURIE, read_qname(reader), 0, 0);
+ }
+ return (dest->value != 0);
+}
+
+// [14] literal ::= quotedString ( '@' language )? | datatypeSerdString
+// | integer | double | decimal | boolean
+static bool
+read_literal(SerdReader reader, Node* dest)
+{
+ Ref str = 0;
+ Node datatype = INTERNAL_NODE_NULL;
+ const uint8_t c = peek_byte(reader);
+ if (c == '-' || c == '+' || c == '.' || is_digit(c)) {
+ return read_number(reader, dest);
+ } else if (c == '\"') {
+ str = read_quotedString(reader);
+ if (!str) {
+ return false;
+ }
+
+ Ref lang = 0;
+ switch (peek_byte(reader)) {
+ case '^':
+ eat_byte(reader, '^');
+ eat_byte(reader, '^');
+ TRY_THROW(read_resource(reader, &datatype));
+ break;
+ case '@':
+ eat_byte(reader, '@');
+ TRY_THROW(lang = read_language(reader));
+ }
+ *dest = make_node(SERD_LITERAL, str, datatype.value, lang);
+ } else {
+ return error(reader, "Unknown literal type\n");
+ }
+ return true;
+except:
+ pop_string(reader, str);
+ return false;
+}
+
+// [12] predicate ::= resource
+static bool
+read_predicate(SerdReader reader, Node* dest)
+{
+ return read_resource(reader, dest);
+}
+
+// [9] verb ::= predicate | 'a'
+static bool
+read_verb(SerdReader reader, Node* dest)
+{
+ uint8_t pre[2];
+ peek_string(reader, pre, 2);
+ switch (pre[0]) {
+ case 'a':
+ switch (pre[1]) {
+ case 0x9: case 0xA: case 0xD: case 0x20:
+ eat_byte(reader, 'a');
+ *dest = make_node(SERD_URI,
+ push_string(reader, NS_RDF "type", 48), 0, 0);
+ return true;
+ default: break; // fall through
+ }
+ default:
+ return read_predicate(reader, dest);
+ }
+}
+
+// [26] nodeID ::= '_:' name
+static Ref
+read_nodeID(SerdReader reader)
+{
+ eat_byte(reader, '_');
+ eat_byte(reader, ':');
+ Ref str = push_string(reader, "", 1);
+ return read_name(reader, str, true);
+}
+
+static Ref
+blank_id(SerdReader reader)
+{
+ const char* prefix = reader->blank_prefix
+ ? (const char*)reader->blank_prefix
+ : "genid";
+ char str[32]; // FIXME: ensure length of reader->blank_prefix is OK
+ const int len = snprintf(str, sizeof(str), "%s%u",
+ prefix, reader->next_id++);
+ return push_string(reader, str, len + 1);
+}
+
+// Spec: [21] blank ::= nodeID | '[]'
+// | '[' predicateObjectList ']' | collection
+// Impl: [21] blank ::= nodeID | '[ ws* ]'
+// | '[' ws* predicateObjectList ws* ']' | collection
+static bool
+read_blank(SerdReader reader, ReadContext ctx, Node* dest)
+{
+ switch (peek_byte(reader)) {
+ case '_':
+ *dest = make_node(SERD_BLANK_ID, read_nodeID(reader), 0, 0);
+ return true;
+ case '[':
+ eat_byte(reader, '[');
+ read_ws_star(reader);
+ if (peek_byte(reader) == ']') {
+ eat_byte(reader, ']');
+ *dest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0);
+ if (ctx.subject) {
+ TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest));
+ }
+ return true;
+ }
+ *dest = make_node(SERD_ANON_BEGIN, blank_id(reader), 0, 0);
+ if (ctx.subject) {
+ TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest));
+ dest->type = SERD_ANON;
+ }
+ ctx.subject = dest;
+ read_predicateObjectList(reader, ctx);
+ read_ws_star(reader);
+ eat_byte(reader, ']');
+ if (reader->end_sink) {
+ const SerdNode end = public_node(reader, dest);
+ reader->end_sink(reader->handle, &end);
+ }
+ return true;
+ case '(':
+ if (read_collection(reader, ctx, dest)) {
+ if (ctx.subject) {
+ TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest));
+ }
+ return true;
+ }
+ return false;
+ default:
+ return error(reader, "illegal blank node\n");
+ }
+}
+
+inline static bool
+is_object_end(const uint8_t c)
+{
+ switch (c) {
+ case 0x9: case 0xA: case 0xD: case 0x20: case '\0':
+ case '#': case '.': case ';':
+ return true;
+ default:
+ return false;
+ }
+}
+
+// [13] object ::= resource | blank | literal
+// Recurses, calling statement_sink for every statement encountered.
+// Leaves stack in original calling state (i.e. pops everything it pushes).
+static bool
+read_object(SerdReader reader, ReadContext ctx)
+{
+ static const char* const XSD_BOOLEAN = NS_XSD "boolean";
+ static const size_t XSD_BOOLEAN_LEN = 40;
+
+ uint8_t pre[6];
+ bool ret = false;
+ bool emit = (ctx.subject != 0);
+ Node o = INTERNAL_NODE_NULL;
+ const uint8_t c = peek_byte(reader);
+ switch (c) {
+ case '\0':
+ case ')':
+ return false;
+ case '[': case '(':
+ emit = false;
+ // fall through
+ case '_':
+ TRY_THROW(ret = read_blank(reader, ctx, &o));
+ break;
+ case '<': case ':':
+ TRY_THROW(ret = read_resource(reader, &o));
+ break;
+ case '\"': case '+': case '-':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ TRY_THROW(ret = read_literal(reader, &o));
+ break;
+ case '.':
+ TRY_THROW(ret = read_literal(reader, &o));
+ break;
+ default:
+ /* Either a boolean literal, or a qname.
+ Unfortunately there is no way to distinguish these without
+ readahead, since `true' or `false' could be the start of a qname.
+ */
+ peek_string(reader, pre, 6);
+ if (!memcmp(pre, "true", 4) && is_object_end(pre[4])) {
+ eat_string(reader, "true", 4);
+ const Ref value = push_string(reader, "true", 5);
+ const Ref datatype = push_string(reader, XSD_BOOLEAN, XSD_BOOLEAN_LEN + 1);
+ o = make_node(SERD_LITERAL, value, datatype, 0);
+ } else if (!memcmp(pre, "false", 5) && is_object_end(pre[5])) {
+ eat_string(reader, "false", 5);
+ const Ref value = push_string(reader, "false", 6);
+ const Ref datatype = push_string(reader, XSD_BOOLEAN, XSD_BOOLEAN_LEN + 1);
+ o = make_node(SERD_LITERAL, value, datatype, 0);
+ } else if (!is_object_end(c)) {
+ o = make_node(SERD_CURIE, read_qname(reader), 0, 0);
+ }
+ ret = o.value;
+ }
+
+ if (ret && emit) {
+ assert(o.value);
+ ret = emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, &o);
+ }
+
+except:
+ pop_string(reader, o.lang);
+ pop_string(reader, o.datatype);
+ pop_string(reader, o.value);
+ return ret;
+}
+
+// Spec: [8] objectList ::= object ( ',' object )*
+// Impl: [8] objectList ::= object ( ws* ',' ws* object )*
+static bool
+read_objectList(SerdReader reader, ReadContext ctx)
+{
+ TRY_RET(read_object(reader, ctx));
+ read_ws_star(reader);
+ while (peek_byte(reader) == ',') {
+ eat_byte(reader, ',');
+ read_ws_star(reader);
+ TRY_RET(read_object(reader, ctx));
+ read_ws_star(reader);
+ }
+ return true;
+}
+
+// Spec: [7] predicateObjectList ::= verb objectList
+// (';' verb objectList)* (';')?
+// Impl: [7] predicateObjectList ::= verb ws+ objectList
+// (ws* ';' ws* verb ws+ objectList)* (';')?
+static bool
+read_predicateObjectList(SerdReader reader, ReadContext ctx)
+{
+ if (reader->eof) {
+ return false;
+ }
+ Node predicate = INTERNAL_NODE_NULL;
+ TRY_RET(read_verb(reader, &predicate));
+ TRY_THROW(read_ws_plus(reader));
+ ctx.predicate = &predicate;
+ TRY_THROW(read_objectList(reader, ctx));
+ pop_string(reader, predicate.value);
+ predicate.value = 0;
+ read_ws_star(reader);
+ while (peek_byte(reader) == ';') {
+ eat_byte(reader, ';');
+ read_ws_star(reader);
+ switch (peek_byte(reader)) {
+ case '.': case ']':
+ return true;
+ default:
+ TRY_THROW(read_verb(reader, &predicate));
+ ctx.predicate = &predicate;
+ TRY_THROW(read_ws_plus(reader));
+ TRY_THROW(read_objectList(reader, ctx));
+ pop_string(reader, predicate.value);
+ predicate.value = 0;
+ read_ws_star(reader);
+ }
+ }
+ return true;
+except:
+ pop_string(reader, predicate.value);
+ return false;
+}
+
+/** Recursive helper for read_collection. */
+static bool
+read_collection_rec(SerdReader reader, ReadContext ctx)
+{
+ read_ws_star(reader);
+ if (peek_byte(reader) == ')') {
+ eat_byte(reader, ')');
+ TRY_RET(emit_statement(reader, NULL, ctx.subject,
+ &reader->rdf_rest, &reader->rdf_nil));
+ return false;
+ } else {
+ const Node rest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0);
+ TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, &reader->rdf_rest, &rest));
+ ctx.subject = &rest;
+ ctx.predicate = &reader->rdf_first;
+ if (read_object(reader, ctx)) {
+ read_collection_rec(reader, ctx);
+ pop_string(reader, rest.value);
+ return true;
+ } else {
+ pop_string(reader, rest.value);
+ return false;
+ }
+ }
+}
+
+// [22] itemList ::= object+
+// [23] collection ::= '(' itemList? ')'
+static bool
+read_collection(SerdReader reader, ReadContext ctx, Node* dest)
+{
+ TRY_RET(eat_byte(reader, '('));
+ read_ws_star(reader);
+ if (peek_byte(reader) == ')') { // Empty collection
+ eat_byte(reader, ')');
+ *dest = reader->rdf_nil;
+ return true;
+ }
+
+ *dest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0);
+ ctx.subject = dest;
+ ctx.predicate = &reader->rdf_first;
+ if (!read_object(reader, ctx)) {
+ return error(reader, "unexpected end of collection\n");
+ }
+
+ ctx.subject = dest;
+ return read_collection_rec(reader, ctx);
+}
+
+// [11] subject ::= resource | blank
+static Node
+read_subject(SerdReader reader, ReadContext ctx)
+{
+ Node subject = INTERNAL_NODE_NULL;
+ switch (peek_byte(reader)) {
+ case '[': case '(': case '_':
+ read_blank(reader, ctx, &subject);
+ break;
+ default:
+ read_resource(reader, &subject);
+ }
+ return subject;
+}
+
+// Spec: [6] triples ::= subject predicateObjectList
+// Impl: [6] triples ::= subject ws+ predicateObjectList
+static bool
+read_triples(SerdReader reader, ReadContext ctx)
+{
+ const Node subject = read_subject(reader, ctx);
+ bool ret = false;
+ if (subject.value != 0) {
+ ctx.subject = &subject;
+ TRY_RET(read_ws_plus(reader));
+ ret = read_predicateObjectList(reader, ctx);
+ pop_string(reader, subject.value);
+ }
+ ctx.subject = ctx.predicate = 0;
+ return ret;
+}
+
+// [5] base ::= '@base' ws+ uriref
+static bool
+read_base(SerdReader reader)
+{
+ // `@' is already eaten in read_directive
+ eat_string(reader, "base", 4);
+ TRY_RET(read_ws_plus(reader));
+ Ref uri;
+ TRY_RET(uri = read_uriref(reader));
+ const SerdNode uri_node = public_node_from_ref(reader, SERD_URI, uri);
+ reader->base_sink(reader->handle, &uri_node);
+ pop_string(reader, uri);
+ return true;
+}
+
+// Spec: [4] prefixID ::= '@prefix' ws+ prefixName? ':' uriref
+// Impl: [4] prefixID ::= '@prefix' ws+ prefixName? ':' ws* uriref
+static bool
+read_prefixID(SerdReader reader)
+{
+ // `@' is already eaten in read_directive
+ eat_string(reader, "prefix", 6);
+ TRY_RET(read_ws_plus(reader));
+ bool ret = false;
+ Ref name = read_prefixName(reader);
+ if (!name) {
+ name = push_string(reader, "", 1);
+ }
+ TRY_THROW(eat_byte(reader, ':') == ':');
+ read_ws_star(reader);
+ Ref uri = 0;
+ TRY_THROW(uri = read_uriref(reader));
+ const SerdNode name_node = public_node_from_ref(reader, SERD_LITERAL, name);
+ const SerdNode uri_node = public_node_from_ref(reader, SERD_URI, uri);
+ ret = reader->prefix_sink(reader->handle, &name_node, &uri_node);
+ pop_string(reader, uri);
+except:
+ pop_string(reader, name);
+ return ret;
+}
+
+// [3] directive ::= prefixID | base
+static bool
+read_directive(SerdReader reader)
+{
+ eat_byte(reader, '@');
+ switch (peek_byte(reader)) {
+ case 'b':
+ return read_base(reader);
+ case 'p':
+ return read_prefixID(reader);
+ default:
+ return error(reader, "illegal directive\n");
+ }
+}
+
+// Spec: [1] statement ::= directive '.' | triples '.' | ws+
+// Impl: [1] statement ::= directive ws* '.' | triples ws* '.' | ws+
+static bool
+read_statement(SerdReader reader)
+{
+ ReadContext ctx = { 0, 0, 0 };
+ read_ws_star(reader);
+ if (reader->eof) {
+ return true;
+ }
+ switch (peek_byte(reader)) {
+ case '@':
+ TRY_RET(read_directive(reader));
+ break;
+ default:
+ TRY_RET(read_triples(reader, ctx));
+ break;
+ }
+ read_ws_star(reader);
+ return eat_byte(reader, '.');
+}
+
+// [1] turtleDoc ::= statement
+static bool
+read_turtleDoc(SerdReader reader)
+{
+ while (!reader->eof) {
+ TRY_RET(read_statement(reader));
+ }
+ return true;
+}
+
+SERD_API
+SerdReader
+serd_reader_new(SerdSyntax syntax,
+ void* handle,
+ SerdBaseSink base_sink,
+ SerdPrefixSink prefix_sink,
+ SerdStatementSink statement_sink,
+ SerdEndSink end_sink)
+{
+ const Cursor cur = { NULL, 0, 0 };
+ SerdReader me = malloc(sizeof(struct SerdReaderImpl));
+ me->handle = handle;
+ me->base_sink = base_sink;
+ me->prefix_sink = prefix_sink;
+ me->statement_sink = statement_sink;
+ me->end_sink = end_sink;
+ me->fd = 0;
+ me->stack = serd_stack_new(STACK_PAGE_SIZE);
+ me->cur = cur;
+ me->blank_prefix = NULL;
+ me->next_id = 1;
+ me->read_buf = 0;
+ me->read_head = 0;
+ me->eof = false;
+#ifdef SERD_STACK_CHECK
+ me->alloc_stack = 0;
+ me->n_allocs = 0;
+#endif
+
+#define RDF_FIRST NS_RDF "first"
+#define RDF_REST NS_RDF "rest"
+#define RDF_NIL NS_RDF "nil"
+ me->rdf_first = make_node(SERD_URI, push_string(me, RDF_FIRST, 49), 0, 0);
+ me->rdf_rest = make_node(SERD_URI, push_string(me, RDF_REST, 48), 0, 0);
+ me->rdf_nil = make_node(SERD_URI, push_string(me, RDF_NIL, 47), 0, 0);
+
+ return me;
+}
+
+SERD_API
+void
+serd_reader_free(SerdReader reader)
+{
+ SerdReader const me = (SerdReader)reader;
+ pop_string(me, me->rdf_nil.value);
+ pop_string(me, me->rdf_rest.value);
+ pop_string(me, me->rdf_first.value);
+
+#ifdef SERD_STACK_CHECK
+ free(me->alloc_stack);
+#endif
+ free(me->stack.buf);
+ free(me);
+}
+
+SERD_API
+void
+serd_reader_set_blank_prefix(SerdReader reader,
+ const uint8_t* prefix)
+{
+ reader->blank_prefix = prefix;
+}
+
+SERD_API
+bool
+serd_reader_read_file(SerdReader me, FILE* file, const uint8_t* name)
+{
+ const Cursor cur = { name, 1, 1 };
+ me->fd = file;
+ me->read_buf = (uint8_t*)malloc(READ_BUF_LEN * 2);
+ me->read_head = 0;
+ me->cur = cur;
+ me->from_file = true;
+ me->eof = false;
+
+ /* Read into the second page of the buffer. Occasionally peek_string
+ will move the read_head to before this point when readahead causes
+ a page fault.
+ */
+ memset(me->read_buf, '\0', READ_BUF_LEN * 2);
+ me->read_buf += READ_BUF_LEN;
+
+ const bool ret = !page(me) || read_turtleDoc(me);
+
+ free(me->read_buf - READ_BUF_LEN);
+ me->fd = 0;
+ me->read_buf = NULL;
+ return ret;
+}
+
+SERD_API
+bool
+serd_reader_read_string(SerdReader me, const uint8_t* utf8)
+{
+ const Cursor cur = { (const uint8_t*)"(string)", 1, 1 };
+
+ me->read_buf = (uint8_t*)utf8;
+ me->read_head = 0;
+ me->cur = cur;
+ me->from_file = false;
+
+ const bool ret = read_turtleDoc(me);
+
+ me->read_buf = NULL;
+ return ret;
+}
+
+SERD_API
+SerdReadState
+serd_read_state_new(SerdEnv env,
+ const uint8_t* base_uri_str)
+{
+ SerdReadState state = malloc(sizeof(struct SerdReadStateImpl));
+ SerdURI base_base_uri = SERD_URI_NULL;
+ state->env = env;
+ state->base_uri_node = serd_node_new_uri_from_string(
+ base_uri_str, &base_base_uri, &state->base_uri);
+ return state;
+}
+
+SERD_API
+void
+serd_read_state_free(SerdReadState state)
+{
+ serd_node_free(&state->base_uri_node);
+ free(state);
+}
+
+SERD_API
+SerdNode
+serd_read_state_expand(SerdReadState state,
+ const SerdNode* node)
+{
+ if (node->type == SERD_CURIE) {
+ SerdChunk prefix;
+ SerdChunk suffix;
+ serd_env_expand(state->env, node, &prefix, &suffix);
+ SerdNode ret = { SERD_URI,
+ prefix.len + suffix.len + 1,
+ prefix.len + suffix.len, // FIXME: UTF-8
+ NULL };
+ ret.buf = malloc(ret.n_bytes);
+ snprintf((char*)ret.buf, ret.n_bytes, "%s%s", prefix.buf, suffix.buf);
+ return ret;
+ } else if (node->type == SERD_URI) {
+ SerdURI ignored;
+ return serd_node_new_uri_from_node(node, &state->base_uri, &ignored);
+ } else {
+ return SERD_NODE_NULL;
+ }
+}
+
+SERD_API
+SerdNode
+serd_read_state_get_base_uri(SerdReadState state,
+ SerdURI* out)
+{
+ *out = state->base_uri;
+ return state->base_uri_node;
+}
+
+SERD_API
+bool
+serd_read_state_set_base_uri(SerdReadState state,
+ const SerdNode* uri_node)
+{
+ // Resolve base URI and create a new node and URI for it
+ SerdURI base_uri;
+ SerdNode base_uri_node = serd_node_new_uri_from_node(
+ uri_node, &state->base_uri, &base_uri);
+
+ if (base_uri_node.buf) {
+ // Replace the current base URI
+ serd_node_free(&state->base_uri_node);
+ state->base_uri_node = base_uri_node;
+ state->base_uri = base_uri;
+ return true;
+ }
+ return false;
+}
+
+SERD_API
+bool
+serd_read_state_set_prefix(SerdReadState state,
+ const SerdNode* name,
+ const SerdNode* uri_node)
+{
+ if (serd_uri_string_has_scheme(uri_node->buf)) {
+ // Set prefix to absolute URI
+ serd_env_add(state->env, name, uri_node);
+ return true;
+ } else {
+ // Resolve relative URI and create a new node and URI for it
+ SerdURI abs_uri;
+ SerdNode abs_uri_node = serd_node_new_uri_from_node(
+ uri_node, &state->base_uri, &abs_uri);
+
+ if (!abs_uri_node.buf) {
+ return false;
+ }
+
+ // Set prefix to resolved (absolute) URI
+ serd_env_add(state->env, name, &abs_uri_node);
+ serd_node_free(&abs_uri_node);
+ return true;
+ }
+ return false;
+}
+
+
+/**
+ * @file uri.c
+ */
+
+/** @file uri.c */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+// #define URI_DEBUG 1
+
+SERD_API
+bool
+serd_uri_string_has_scheme(const uint8_t* utf8)
+{
+ // RFC3986: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ if (!is_alpha(utf8[0])) {
+ return false; // Invalid scheme initial character, URI is relative
+ }
+ for (uint8_t c = *++utf8; (c = *utf8) != '\0'; ++utf8) {
+ switch (c) {
+ case ':':
+ return true; // End of scheme
+ case '+': case '-': case '.':
+ break; // Valid scheme character, continue
+ default:
+ if (!is_alpha(c) && !is_digit(c)) {
+ return false; // Invalid scheme character
+ }
+ }
+ }
+
+ return false;
+}
+
+#ifdef URI_DEBUG
+static void
+serd_uri_dump(const SerdURI* uri, FILE* file)
+{
+#define PRINT_PART(range, name) \
+ if (range.buf) { \
+ fprintf(stderr, " " name " = "); \
+ fwrite((range).buf, 1, (range).len, stderr); \
+ fprintf(stderr, "\n"); \
+ }
+
+ PRINT_PART(uri->scheme, "scheme");
+ PRINT_PART(uri->authority, "authority");
+ PRINT_PART(uri->path_base, "path_base");
+ PRINT_PART(uri->path, "path");
+ PRINT_PART(uri->query, "query");
+ PRINT_PART(uri->fragment, "fragment");
+}
+#endif
+
+SERD_API
+bool
+serd_uri_parse(const uint8_t* utf8, SerdURI* uri)
+{
+ *uri = SERD_URI_NULL;
+ assert(uri->path_base.buf == NULL);
+ assert(uri->path_base.len == 0);
+ assert(uri->authority.len == 0);
+
+ const uint8_t* ptr = utf8;
+
+ /* See http://tools.ietf.org/html/rfc3986#section-3
+ URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ */
+
+ /* S3.1: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
+ if (is_alpha(*ptr)) {
+ for (uint8_t c = *++ptr; true; c = *++ptr) {
+ switch (c) {
+ case '\0': case '/': case '?': case '#':
+ ptr = utf8;
+ goto path; // Relative URI (starts with path by definition)
+ case ':':
+ uri->scheme.buf = utf8;
+ uri->scheme.len = (ptr++) - utf8;
+ goto maybe_authority; // URI with scheme
+ case '+': case '-': case '.':
+ continue;
+ default:
+ if (is_alpha(c) || is_digit(c)) {
+ continue;
+ }
+ }
+ }
+ }
+
+ /* S3.2: The authority component is preceded by a double slash ("//")
+ and is terminated by the next slash ("/"), question mark ("?"),
+ or number sign ("#") character, or by the end of the URI.
+ */
+maybe_authority:
+ if (*ptr == '/' && *(ptr + 1) == '/') {
+ ptr += 2;
+ uri->authority.buf = ptr;
+ assert(uri->authority.len == 0);
+ for (uint8_t c = *ptr; (c = *ptr) != '\0'; ++ptr) {
+ switch (c) {
+ case '/': goto path;
+ case '?': goto query;
+ case '#': goto fragment;
+ default:
+ ++uri->authority.len;
+ }
+ }
+ }
+
+ /* RFC3986 S3.3: The path is terminated by the first question mark ("?")
+ or number sign ("#") character, or by the end of the URI.
+ */
+path:
+ switch (*ptr) {
+ case '?': goto query;
+ case '#': goto fragment;
+ case '\0': goto end;
+ default: break;
+ }
+ uri->path.buf = ptr;
+ uri->path.len = 0;
+ for (uint8_t c = *ptr; (c = *ptr) != '\0'; ++ptr) {
+ switch (c) {
+ case '?': goto query;
+ case '#': goto fragment;
+ default:
+ ++uri->path.len;
+ }
+ }
+
+ /* RFC3986 S3.4: The query component is indicated by the first question
+ mark ("?") character and terminated by a number sign ("#") character
+ or by the end of the URI.
+ */
+query:
+ if (*ptr == '?') {
+ uri->query.buf = ++ptr;
+ for (uint8_t c = *ptr; (c = *ptr) != '\0'; ++ptr) {
+ switch (c) {
+ case '#':
+ goto fragment;
+ default:
+ ++uri->query.len;
+ }
+ }
+ }
+
+ /* RFC3986 S3.5: A fragment identifier component is indicated by the
+ presence of a number sign ("#") character and terminated by the end
+ of the URI.
+ */
+fragment:
+ if (*ptr == '#') {
+ uri->fragment.buf = ptr;
+ while (*ptr++ != '\0') {
+ ++uri->fragment.len;
+ }
+ }
+
+end:
+ #ifdef URI_DEBUG
+ fprintf(stderr, "PARSE URI <%s>\n", utf8);
+ serd_uri_dump(uri, stderr);
+ fprintf(stderr, "\n");
+ #endif
+
+ return true;
+}
+
+SERD_API
+void
+serd_uri_resolve(const SerdURI* r, const SerdURI* base, SerdURI* t)
+{
+ // See http://tools.ietf.org/html/rfc3986#section-5.2.2
+
+ t->path_base.buf = NULL;
+ t->path_base.len = 0;
+ if (r->scheme.len) {
+ *t = *r;
+ } else {
+ if (r->authority.len) {
+ t->authority = r->authority;
+ t->path = r->path;
+ t->query = r->query;
+ } else {
+ t->path = r->path;
+ if (!r->path.len) {
+ t->path_base = base->path;
+ if (r->query.len) {
+ t->query = r->query;
+ } else {
+ t->query = base->query;
+ }
+ } else {
+ if (r->path.buf[0] != '/') {
+ t->path_base = base->path;
+ }
+ t->query = r->query;
+ }
+ t->authority = base->authority;
+ }
+ t->scheme = base->scheme;
+ t->fragment = r->fragment;
+ }
+
+ #ifdef URI_DEBUG
+ fprintf(stderr, "RESOLVE URI\nBASE:\n");
+ serd_uri_dump(base, stderr);
+ fprintf(stderr, "URI:\n");
+ serd_uri_dump(r, stderr);
+ fprintf(stderr, "RESULT:\n");
+ serd_uri_dump(t, stderr);
+ fprintf(stderr, "\n");
+ #endif
+}
+
+SERD_API
+size_t
+serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream)
+{
+ // See http://tools.ietf.org/html/rfc3986#section-5.3
+
+ size_t write_size = 0;
+#define WRITE(buf, len) \
+ write_size += len; \
+ if (len) { \
+ sink((const uint8_t*)buf, len, stream); \
+ }
+#define WRITE_CHAR(c) WRITE(&(c), 1)
+#define WRITE_COMPONENT(prefix, field, suffix) \
+ if ((field).len) { \
+ for (const uint8_t* c = (const uint8_t*)prefix; *c != '\0'; ++c) { \
+ WRITE(c, 1); \
+ } \
+ WRITE((field).buf, (field).len); \
+ for (const uint8_t* c = (const uint8_t*)suffix; *c != '\0'; ++c) { \
+ WRITE(c, 1); \
+ } \
+ }
+
+ WRITE_COMPONENT("", uri->scheme, ":");
+ if (uri->authority.buf) {
+ WRITE("//", 2);
+ WRITE(uri->authority.buf, uri->authority.len);
+ }
+ if (uri->path_base.len) {
+ if (!uri->path.buf && (uri->fragment.buf || uri->query.buf)) {
+ WRITE_COMPONENT("", uri->path_base, "");
+ } else {
+ /* Merge paths, removing dot components.
+ See http://tools.ietf.org/html/rfc3986#section-5.2.3
+ */
+ const uint8_t* begin = uri->path.buf;
+ const uint8_t* end = begin;
+ size_t up = 1;
+ if (begin) {
+ // Count and skip leading dot components
+ end = uri->path.buf + uri->path.len;
+ for (bool done = false; !done && (begin < end);) {
+ switch (begin[0]) {
+ case '.':
+ switch (begin[1]) {
+ case '/':
+ begin += 2; // Chop leading "./"
+ break;
+ case '.':
+ ++up;
+ switch (begin[2]) {
+ case '/':
+ begin += 3; // Chop lading "../"
+ break;
+ default:
+ begin += 2; // Chop leading ".."
+ }
+ break;
+ default:
+ ++begin; // Chop leading "."
+ }
+ break;
+ case '/':
+ if (begin[1] == '/') {
+ ++begin; // Replace leading "//" with "/"
+ break;
+ } // else fall through
+ default:
+ done = true; // Finished chopping dot components
+ }
+ }
+
+ if (uri->path.buf && uri->path_base.buf) {
+ // Find the up'th last slash
+ const uint8_t* base_last = uri->path_base.buf + uri->path_base.len - 1;
+ do {
+ if (*base_last == '/') {
+ --up;
+ }
+ } while (up > 0 && (--base_last > uri->path_base.buf));
+
+ // Write base URI prefix
+ const size_t base_len = base_last - uri->path_base.buf + 1;
+ WRITE(uri->path_base.buf, base_len);
+
+ } else {
+ // Relative path is just query or fragment, append it to full base URI
+ WRITE_COMPONENT("", uri->path_base, "");
+ }
+
+ // Write URI suffix
+ WRITE(begin, end - begin);
+ }
+ }
+ } else {
+ WRITE_COMPONENT("", uri->path, "");
+ }
+ WRITE_COMPONENT("?", uri->query, "");
+ if (uri->fragment.buf) {
+ // Note uri->fragment.buf includes the leading `#'
+ WRITE_COMPONENT("", uri->fragment, "");
+ }
+ return write_size;
+}
+
+/**
+ * @file writer.c
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+#define NS_XSD "http://www.w3.org/2001/XMLSchema#"
+
+typedef struct {
+ SerdNode graph;
+ SerdNode subject;
+ SerdNode predicate;
+} WriteContext;
+
+static const WriteContext WRITE_CONTEXT_NULL = {
+ { 0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}
+};
+
+struct SerdWriterImpl {
+ SerdSyntax syntax;
+ SerdStyle style;
+ SerdEnv env;
+ SerdURI base_uri;
+ SerdStack anon_stack;
+ SerdSink sink;
+ void* stream;
+ WriteContext context;
+ unsigned indent;
+};
+
+typedef enum {
+ WRITE_NORMAL,
+ WRITE_URI,
+ WRITE_STRING
+} TextContext;
+
+static inline WriteContext*
+anon_stack_top(SerdWriter writer)
+{
+ assert(!serd_stack_is_empty(&writer->anon_stack));
+ return (WriteContext*)(writer->anon_stack.buf
+ + writer->anon_stack.size - sizeof(WriteContext));
+}
+
+static bool
+write_text(SerdWriter writer, TextContext ctx,
+ const uint8_t* utf8, size_t n_bytes, uint8_t terminator)
+{
+ char escape[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ for (size_t i = 0; i < n_bytes;) {
+ uint8_t in = utf8[i++];
+ switch (in) {
+ case '\\': writer->sink("\\\\", 2, writer->stream); continue;
+ case '\n': writer->sink("\\n", 2, writer->stream); continue;
+ case '\r': writer->sink("\\r", 2, writer->stream); continue;
+ case '\t': writer->sink("\\t", 2, writer->stream); continue;
+ case '"':
+ if (terminator == '"') {
+ writer->sink("\\\"", 2, writer->stream);
+ continue;
+ } // else fall-through
+ default: break;
+ }
+
+ if (in == terminator) {
+ snprintf(escape, 7, "\\u%04X", terminator);
+ writer->sink(escape, 6, writer->stream);
+ continue;
+ }
+
+ uint32_t c = 0;
+ size_t size = 0;
+ if ((in & 0x80) == 0) { // Starts with `0'
+ size = 1;
+ c = in & 0x7F;
+ if (in_range(in, 0x20, 0x7E)) { // Printable ASCII
+ writer->sink(&in, 1, writer->stream);
+ continue;
+ }
+ } else if ((in & 0xE0) == 0xC0) { // Starts with `110'
+ size = 2;
+ c = in & 0x1F;
+ } else if ((in & 0xF0) == 0xE0) { // Starts with `1110'
+ size = 3;
+ c = in & 0x0F;
+ } else if ((in & 0xF8) == 0xF0) { // Starts with `11110'
+ size = 4;
+ c = in & 0x07;
+ } else {
+ fprintf(stderr, "invalid UTF-8 at offset %zu: %X\n", i, in);
+ return false;
+ }
+
+ if (ctx == WRITE_STRING && !(writer->style & SERD_STYLE_ASCII)) {
+ // Write UTF-8 character directly to UTF-8 output
+ // TODO: Scan to next escape and write entire range at once
+ writer->sink(utf8 + i - 1, size, writer->stream);
+ i += size - 1;
+ continue;
+ }
+
+#define READ_BYTE() do { \
+ assert(i < n_bytes); \
+ in = utf8[i++] & 0x3f; \
+ c <<= 6; \
+ c |= in; \
+ } while (0)
+
+ switch (size) {
+ case 4: READ_BYTE();
+ case 3: READ_BYTE();
+ case 2: READ_BYTE();
+ }
+
+ if (c < 0xFFFF) {
+ snprintf(escape, 7, "\\u%04X", c);
+ writer->sink(escape, 6, writer->stream);
+ } else {
+ snprintf(escape, 11, "\\U%08X", c);
+ writer->sink(escape, 10, writer->stream);
+ }
+ }
+ return true;
+}
+
+static void
+serd_writer_write_delim(SerdWriter writer, const uint8_t delim)
+{
+ switch (delim) {
+ case '\n':
+ break;
+ default:
+ writer->sink(" ", 1, writer->stream);
+ case '[':
+ writer->sink(&delim, 1, writer->stream);
+ }
+ writer->sink("\n", 1, writer->stream);
+ for (unsigned i = 0; i < writer->indent; ++i) {
+ writer->sink("\t", 1, writer->stream);
+ }
+}
+
+static bool
+write_node(SerdWriter writer,
+ const SerdNode* node,
+ const SerdNode* datatype,
+ const SerdNode* lang)
+{
+ SerdChunk uri_prefix;
+ SerdChunk uri_suffix;
+ switch (node->type) {
+ case SERD_NOTHING:
+ return false;
+ case SERD_ANON_BEGIN:
+ if (writer->syntax != SERD_NTRIPLES) {
+ ++writer->indent;
+ serd_writer_write_delim(writer, '[');
+ WriteContext* ctx = (WriteContext*)serd_stack_push(
+ &writer->anon_stack, sizeof(WriteContext));
+ *ctx = writer->context;
+ writer->context.subject = *node;
+ writer->context.predicate = SERD_NODE_NULL;
+ break;
+ }
+ case SERD_ANON:
+ if (writer->syntax != SERD_NTRIPLES) {
+ break;
+ } // else fall through
+ case SERD_BLANK_ID:
+ writer->sink("_:", 2, writer->stream);
+ writer->sink(node->buf, node->n_bytes - 1, writer->stream);
+ break;
+ case SERD_CURIE:
+ switch (writer->syntax) {
+ case SERD_NTRIPLES:
+ if (!serd_env_expand(writer->env, node, &uri_prefix, &uri_suffix)) {
+ fprintf(stderr, "error: undefined namespace prefix `%s'\n", node->buf);
+ return false;
+ }
+ writer->sink("<", 1, writer->stream);
+ write_text(writer, WRITE_URI, uri_prefix.buf, uri_prefix.len, '>');
+ write_text(writer, WRITE_URI, uri_suffix.buf, uri_suffix.len, '>');
+ writer->sink(">", 1, writer->stream);
+ break;
+ case SERD_TURTLE:
+ writer->sink(node->buf, node->n_bytes - 1, writer->stream);
+ }
+ break;
+ case SERD_LITERAL:
+ if (writer->syntax == SERD_TURTLE && datatype && datatype->buf) {
+ // TODO: compare against NS_XSD prefix once
+ if (!strcmp((const char*)datatype->buf, NS_XSD "boolean")
+ || !strcmp((const char*)datatype->buf, NS_XSD "decimal")
+ || !strcmp((const char*)datatype->buf, NS_XSD "integer")) {
+ writer->sink(node->buf, node->n_bytes - 1, writer->stream);
+ break;
+ }
+ }
+ writer->sink("\"", 1, writer->stream);
+ write_text(writer, WRITE_STRING, node->buf, node->n_bytes - 1, '"');
+ writer->sink("\"", 1, writer->stream);
+ if (lang && lang->buf) {
+ writer->sink("@", 1, writer->stream);
+ writer->sink(lang->buf, lang->n_bytes - 1, writer->stream);
+ } else if (datatype && datatype->buf) {
+ writer->sink("^^", 2, writer->stream);
+ write_node(writer, datatype, NULL, NULL);
+ }
+ break;
+ case SERD_URI:
+ if ((writer->syntax == SERD_TURTLE)
+ && !strcmp((const char*)node->buf, NS_RDF "type")) {
+ writer->sink("a", 1, writer->stream);
+ return true;
+ } else if ((writer->style & SERD_STYLE_CURIED)
+ && serd_uri_string_has_scheme(node->buf)) {
+ SerdNode prefix;
+ SerdChunk suffix;
+ if (serd_env_qualify(writer->env, node, &prefix, &suffix)) {
+ write_text(writer, WRITE_URI, prefix.buf, prefix.n_bytes - 1, '>');
+ writer->sink(":", 1, writer->stream);
+ write_text(writer, WRITE_URI, suffix.buf, suffix.len, '>');
+ return true;
+ }
+ } else if ((writer->style & SERD_STYLE_RESOLVED)
+ && !serd_uri_string_has_scheme(node->buf)) {
+ SerdURI uri;
+ if (serd_uri_parse(node->buf, &uri)) {
+ SerdURI abs_uri;
+ serd_uri_resolve(&uri, &writer->base_uri, &abs_uri);
+ writer->sink("<", 1, writer->stream);
+ serd_uri_serialise(&abs_uri, writer->sink, writer->stream);
+ writer->sink(">", 1, writer->stream);
+ return true;
+ }
+ }
+ writer->sink("<", 1, writer->stream);
+ write_text(writer, WRITE_URI, node->buf, node->n_bytes - 1, '>');
+ writer->sink(">", 1, writer->stream);
+ return true;
+ }
+ return true;
+}
+
+SERD_API
+bool
+serd_writer_write_statement(SerdWriter writer,
+ const SerdNode* graph,
+ const SerdNode* subject,
+ const SerdNode* predicate,
+ const SerdNode* object,
+ const SerdNode* object_datatype,
+ const SerdNode* object_lang)
+{
+ assert(subject && predicate && object);
+ switch (writer->syntax) {
+ case SERD_NTRIPLES:
+ write_node(writer, subject, NULL, NULL);
+ writer->sink(" ", 1, writer->stream);
+ write_node(writer, predicate, NULL, NULL);
+ writer->sink(" ", 1, writer->stream);
+ if (!write_node(writer, object, object_datatype, object_lang)) {
+ return false;
+ }
+ writer->sink(" .\n", 3, writer->stream);
+ return true;
+ case SERD_TURTLE:
+ break;
+ }
+ if (subject->buf == writer->context.subject.buf) {
+ if (predicate->buf == writer->context.predicate.buf) { // Abbreviate S P
+ ++writer->indent;
+ serd_writer_write_delim(writer, ',');
+ write_node(writer, object, object_datatype, object_lang);
+ --writer->indent;
+ } else { // Abbreviate S
+ if (writer->context.predicate.buf) {
+ serd_writer_write_delim(writer, ';');
+ } else {
+ ++writer->indent;
+ serd_writer_write_delim(writer, '\n');
+ }
+ write_node(writer, predicate, NULL, NULL);
+ writer->context.predicate = *predicate;
+ writer->sink(" ", 1, writer->stream);
+ write_node(writer, object, object_datatype, object_lang);
+ }
+ } else {
+ if (writer->context.subject.buf) {
+ if (writer->indent > 0) {
+ --writer->indent;
+ }
+ if (serd_stack_is_empty(&writer->anon_stack)) {
+ serd_writer_write_delim(writer, '.');
+ serd_writer_write_delim(writer, '\n');
+ }
+ }
+
+ if (subject->type == SERD_ANON_BEGIN) {
+ writer->sink("[ ", 2, writer->stream);
+ ++writer->indent;
+ WriteContext* ctx = (WriteContext*)serd_stack_push(
+ &writer->anon_stack, sizeof(WriteContext));
+ *ctx = writer->context;
+ } else {
+ write_node(writer, subject, NULL, NULL);
+ ++writer->indent;
+ if (subject->type != SERD_ANON_BEGIN && subject->type != SERD_ANON) {
+ serd_writer_write_delim(writer, '\n');
+ }
+ }
+
+ writer->context.subject = *subject;
+ writer->context.predicate = SERD_NODE_NULL;
+
+ write_node(writer, predicate, NULL, NULL);
+ writer->context.predicate = *predicate;
+ writer->sink(" ", 1, writer->stream);
+
+ write_node(writer, object, object_datatype, object_lang);
+ }
+
+ const WriteContext new_context = { graph ? *graph : SERD_NODE_NULL,
+ *subject,
+ *predicate };
+ writer->context = new_context;
+ return true;
+}
+
+SERD_API
+bool
+serd_writer_end_anon(SerdWriter writer,
+ const SerdNode* node)
+{
+ if (writer->syntax == SERD_NTRIPLES) {
+ return true;
+ }
+ if (serd_stack_is_empty(&writer->anon_stack)) {
+ fprintf(stderr, "unexpected end of anonymous node\n");
+ return false;
+ }
+ assert(writer->indent > 0);
+ --writer->indent;
+ serd_writer_write_delim(writer, '\n');
+ writer->sink("]", 1, writer->stream);
+ writer->context = *anon_stack_top(writer);
+ serd_stack_pop(&writer->anon_stack, sizeof(WriteContext));
+ if (!writer->context.subject.buf) { // End of anonymous subject
+ writer->context.subject = *node;
+ }
+ return true;
+}
+
+SERD_API
+void
+serd_writer_finish(SerdWriter writer)
+{
+ if (writer->context.subject.buf) {
+ writer->sink(" .\n", 3, writer->stream);
+ writer->context.subject.buf = NULL;
+ }
+}
+
+SERD_API
+SerdWriter
+serd_writer_new(SerdSyntax syntax,
+ SerdStyle style,
+ SerdEnv env,
+ const SerdURI* base_uri,
+ SerdSink sink,
+ void* stream)
+{
+ const WriteContext context = WRITE_CONTEXT_NULL;
+ SerdWriter writer = malloc(sizeof(struct SerdWriterImpl));
+ writer->syntax = syntax;
+ writer->style = style;
+ writer->env = env;
+ writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL;
+ writer->anon_stack = serd_stack_new(sizeof(WriteContext));
+ writer->sink = sink;
+ writer->stream = stream;
+ writer->context = context;
+ writer->indent = 0;
+ return writer;
+}
+
+SERD_API
+void
+serd_writer_set_base_uri(SerdWriter writer,
+ const SerdURI* uri)
+{
+ writer->base_uri = *uri;
+ if (writer->syntax != SERD_NTRIPLES) {
+ if (writer->context.graph.buf || writer->context.subject.buf) {
+ writer->sink(" .\n\n", 4, writer->stream);
+ writer->context = WRITE_CONTEXT_NULL;
+ }
+ writer->sink("@base <", 7, writer->stream);
+ serd_uri_serialise(uri, writer->sink, writer->stream);
+ writer->sink("> .\n", 4, writer->stream);
+ }
+ writer->context = WRITE_CONTEXT_NULL;
+}
+
+SERD_API
+bool
+serd_writer_set_prefix(SerdWriter writer,
+ const SerdNode* name,
+ const SerdNode* uri)
+{
+ if (writer->syntax != SERD_NTRIPLES) {
+ if (writer->context.graph.buf || writer->context.subject.buf) {
+ writer->sink(" .\n\n", 4, writer->stream);
+ writer->context = WRITE_CONTEXT_NULL;
+ }
+ writer->sink("@prefix ", 8, writer->stream);
+ writer->sink(name->buf, name->n_bytes - 1, writer->stream);
+ writer->sink(": <", 3, writer->stream);
+ write_text(writer, WRITE_URI, uri->buf, uri->n_bytes - 1, '>');
+ writer->sink("> .\n", 4, writer->stream);
+ }
+ writer->context = WRITE_CONTEXT_NULL;
+ return true;
+}
+
+SERD_API
+void
+serd_writer_free(SerdWriter writer)
+{
+ SerdWriter const me = (SerdWriter)writer;
+ serd_writer_finish(me);
+ serd_stack_free(&writer->anon_stack);
+ free(me);
+}