From dc2943a4ace97bbadcc27e006b461f650e0a4cf2 Mon Sep 17 00:00:00 2001 From: David Robillard Date: Wed, 2 Mar 2011 07:20:25 +0000 Subject: Port lv2config to C. --- core.lv2/lv2config | 180 ---- core.lv2/lv2config.c | 379 +++++++ core.lv2/lv2config.py | 1 - core.lv2/serd-0.1.0.c | 2700 +++++++++++++++++++++++++++++++++++++++++++++++++ core.lv2/serd-0.1.0.h | 634 ++++++++++++ core.lv2/wscript | 51 +- 6 files changed, 3758 insertions(+), 187 deletions(-) delete mode 100755 core.lv2/lv2config create mode 100644 core.lv2/lv2config.c delete mode 120000 core.lv2/lv2config.py create mode 100644 core.lv2/serd-0.1.0.c create mode 100644 core.lv2/serd-0.1.0.h (limited to 'core.lv2') diff --git a/core.lv2/lv2config b/core.lv2/lv2config deleted file mode 100755 index 590656c..0000000 --- a/core.lv2/lv2config +++ /dev/null @@ -1,180 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""A program (and Python module) to generate a tree of symlinks to LV2 -extension bundles, where the path of the symlink corresponds to the URI of -the extension. This allows including extension headers in code without using -the bundle name. Including extension headers in this way is much better, -since there is no dependency on the (meaningless and non-persistent) bundle -name in the code using the header. - -For example, after running lv2config (and setting the compiler include -path appropriately), LV2 headers could be included like so: - -#include "lv2/lv2plug.in/ns/lv2core/lv2.h" -#include "lv2/lv2plug.in/ns/ext/event/event.h" -#include "lv2/example.org/foo/foo.h" - -Where the initial "lv2" is arbitrary; in this case lv2config's output -directory was "lv2", and that directory's parent was added to the compiler -include search path. It is a good idea to use such a prefix directory so -domain names do not conflict with anything else in the include path. -""" - -from __future__ import print_function - -__authors__ = 'David Robillard' -__license = 'GNU GPL v3 or later ' -__contact__ = 'devel@lists.lv2plug.in' -__date__ = '2010-10-05' - -import errno -import glob -import os -import stat -import sys - -redland = True - -try: - import RDF # Attempt to import Redland -except: - try: - import rdflib # Attempt to import RDFLib - redland = False - except: - print("""Failed to import `RDF' (Redland) or `rdflib'. -(Please install either package, likely `python-librdf' or `python-rdflib')""",file=sys.stderr) - sys.exit(1) - -def rdf_namespace(uri): - "Create a new RDF namespace" - if redland: - return RDF.NS(uri) - else: - return rdflib.Namespace(uri) - -def rdf_load(uri): - "Load an RDF model" - if redland: - model = RDF.Model() - parser = RDF.Parser(name="turtle") - parser.parse_into_model(model, uri) - else: - model = rdflib.ConjunctiveGraph() - model.parse(uri, format="n3") - return model - -def rdf_find_type(model, rdf_type): - "Return a list of the URIs of all resources in model with a given type" - if redland: - results = model.find_statements(RDF.Statement(None, rdf.type, rdf_type)) - ret = [] - for r in results: - ret.append(str(r.subject.uri)) - return ret - else: - results = model.triples([None, rdf.type, rdf_type]) - ret = [] - for r in results: - ret.append(r[0]) - return ret - -rdf = rdf_namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') -lv2 = rdf_namespace('http://lv2plug.in/ns/lv2core#') - -def lv2_path(): - "Return the LV2 search path (LV2_PATH in the environment, or a default)." - if 'LV2_PATH' in os.environ: - return os.environ['LV2_PATH'] - else: - ret = '/usr/lib/lv2' + os.pathsep + '/usr/local/lib/lv2' - print('LV2_PATH unset, using default', ret) - return ret - -def __mkdir_p(path): - "Equivalent of UNIX mkdir -p" - try: - os.makedirs(path) - except OSError, e: - if e.errno == errno.EEXIST: - pass - else: - raise - -def build_tree(search_path, outdir): - """Build a directory tree under outdir containing symlinks to all LV2 - extensions found in search_path, such that the symlink paths correspond to - the extension URIs.""" - if os.path.basename(outdir) != 'lv2': - print >> sys.stderr, "lv2config: output dir must be named `lv2'" - sys.exit(1) - - if os.access(outdir, os.F_OK) and not os.access(outdir, os.W_OK): - print ("lv2config: cannot build `%s': Permission denied" % outdir, file=sys.stderr) - sys.exit(1) - - for dir in search_path.split(os.pathsep): - if not os.access(dir, os.F_OK): - continue - - print('Building includes in %s for %s/*.lv2' % (outdir, dir)) - for bundle in glob.glob(os.path.join(dir, '*.lv2')): - # Load manifest into model - manifest = rdf_load('file://' + os.path.join(bundle, 'manifest.ttl')) - - # Query extension URI - specs = rdf_find_type(manifest, lv2.Specification) - for ext_uri in specs: - ext_path = os.path.normpath(ext_uri[ext_uri.find(':') + 1:].lstrip('/')) - ext_dir = os.path.join(outdir, ext_path) - - # Make parent directories - __mkdir_p(os.path.dirname(ext_dir)) - - # Remove existing symlink if necessary - if os.access(ext_dir, os.F_OK): - mode = os.lstat(ext_dir)[stat.ST_MODE] - if stat.S_ISLNK(mode): - os.remove(ext_dir) - else: - raise Exception(ext_dir + " exists and is not a link") - - # Make symlink to bundle directory - os.symlink(bundle, ext_dir) - -def __usage(): - script = os.path.basename(sys.argv[0]) - print("""Usage: %(script)s - Build the default system lv2 include directories, - /usr/include/lv2 and /usr/local/include/lv2 - -Usage: %(script)s INCLUDEDIR - Build an lv2 include directory tree at INCLUDEDIR - for all extensions found in $LV2_PATH. - -Usage: %(script)s BUNDLESDIR INCLUDEDIR - Build an lv2 include directory tree at INCLUDEDIR - for all extensions found in bundles under BUNDLESDIR. -""" % {'script' : script}) - -if __name__ == "__main__": - args = sys.argv[1:] - - if len(args) == 0: - build_tree('/usr/local/lib/lv2', '/usr/local/include/lv2') - build_tree('/usr/lib/lv2', '/usr/include/lv2') - build_tree('/Library/Audio/Plug-Ins/LV2', '/Developer/Headers/lv2') - - elif '--help' in args or '-h' in args: - __usage() - - elif len(args) == 1: - build_tree(lv2_path(), args[0]) - - elif len(args) == 2: - build_tree(args[0], args[1]) - - else: - __usage() - sys.exit(1) diff --git a/core.lv2/lv2config.c b/core.lv2/lv2config.c new file mode 100644 index 0000000..7674c14 --- /dev/null +++ b/core.lv2/lv2config.c @@ -0,0 +1,379 @@ +/* + Copyright 2011 David Robillard + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#define _XOPEN_SOURCE 500 + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "serd-0.1.0.h" + +#include "lv2-config.h" + +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define NS_LV2 "http://lv2plug.in/ns/lv2core#" + +typedef struct _Spec { + SerdNode uri; + SerdNode manifest; +} Spec; + +typedef struct { + SerdReader reader; + SerdReadState state; + const uint8_t* current_file; + Spec* specs; + size_t n_specs; +} World; + +/* Append a discovered specification to world->specs. */ +void +add_spec(World* world, + SerdNode* uri, + const uint8_t* manifest) +{ + world->specs = realloc(world->specs, sizeof(Spec) * (world->n_specs + 1)); + world->specs[world->n_specs].uri = *uri; + world->specs[world->n_specs].manifest = serd_node_from_string( + SERD_URI, (const uint8_t*)strdup((const char*)manifest)); + ++world->n_specs; +} + +/** Free world->specs. */ +void +free_specs(World* world) +{ + for (size_t i = 0; i < world->n_specs; ++i) { + Spec* spec = &world->specs[i]; + serd_node_free(&spec->uri); + serd_node_free(&spec->manifest); + } + free(world->specs); + world->specs = NULL; + world->n_specs = 0; +} + +/** Reader @base directive handler. */ +bool +on_base(void* handle, + const SerdNode* uri_node) +{ + World* const world = (World*)handle; + return serd_read_state_set_base_uri(world->state, uri_node); +} + +/** Reader @prefix directive handler. */ +static bool +on_prefix(void* handle, + const SerdNode* name, + const SerdNode* uri_node) +{ + World* const world = (World*)handle; + return serd_read_state_set_prefix(world->state, name, uri_node); +} + +/** Reader statement handler. */ +static bool +on_statement(void* handle, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang) +{ + World* world = (World*)handle; + SerdReadState state = world->state; + SerdNode abs_s = serd_read_state_expand(state, subject); + SerdNode abs_p = serd_read_state_expand(state, predicate); + SerdNode abs_o = serd_read_state_expand(state, object); + + if (abs_s.buf && abs_p.buf && abs_o.buf + && !strcmp((const char*)abs_p.buf, NS_RDF "type") + && !strcmp((const char*)abs_o.buf, NS_LV2 "Specification")) { + add_spec(world, &abs_s, world->current_file); + } else { + serd_node_free(&abs_s); + } + serd_node_free(&abs_p); + serd_node_free(&abs_o); + return true; +} + +/** Add any specifications found in a manifest.ttl to world->specs. */ +static void +scan_manifest(World* world, const char* uri) +{ + SerdEnv env = serd_env_new(); + + world->state = serd_read_state_new(env, (const uint8_t*)uri); + + const char* const path = uri + strlen("file://"); + FILE* fd = fopen(path, "r"); + if (fd) { + world->current_file = (const uint8_t*)uri; + if (!serd_reader_read_file(world->reader, fd, (const uint8_t*)uri)) { + fprintf(stderr, "lv2config: error reading <%s>\n", path); + } + world->current_file = NULL; + fclose(fd); + } else { + fprintf(stderr, "lv2config: failed to open <%s>\n", path); + } + + serd_read_state_free(world->state); + serd_env_free(env); + world->state = NULL; +} + +/** Expand variables, ~, etc. in path. */ +static char* +expand(const char* path) +{ + char* ret = NULL; + wordexp_t p; + + wordexp(path, &p, 0); + if (p.we_wordc == 0) { + /* Literal directory path (e.g. no variables or ~) */ + ret = strdup(path); + } else if (p.we_wordc == 1) { + /* Directory path expands (e.g. contains ~ or $FOO) */ + ret = strdup(p.we_wordv[0]); + } else { + /* Multiple expansions in a single directory path? */ + fprintf(stderr, "lv2config: malformed path `%s' ignored\n", path); + } + + wordfree(&p); + return ret; +} + +/** Scan all bundles in path (i.e. scan all path/foo.lv2/manifest.ttl). */ +void +scan_dir(World* world, const char* path) +{ + char* full_path = expand(path); + if (!full_path) { + return; + } + + DIR* dir = opendir(full_path); + if (!dir) { + free(full_path); + return; + } + + struct dirent* file; + while ((file = readdir(dir))) { + if (!strcmp(file->d_name, ".") || !strcmp(file->d_name, "..")) { + continue; + } + + char* uri = malloc(strlen("file://") + + strlen(full_path) + 1 + + strlen(file->d_name) + 1 + + strlen("manifest.ttl") + 1); + + sprintf(uri, "file://%s/%s/manifest.ttl", + full_path, file->d_name); + + scan_manifest(world, uri); + free(uri); + } + + closedir(dir); + free(full_path); +} + +/** Create all parent directories of dir_path, but not dir_path itself. */ +int +mkdir_parents(const char* dir_path) +{ + char* path = strdup(dir_path); + const size_t path_len = strlen(path); + size_t last_sep = 0; + for (size_t i = 1; i <= path_len; ++i) { + if (path[i] == LV2CORE_DIR_SEP[0]) { + path[i] = '\0'; + if (mkdir(path, 0755) && errno != EEXIST) { + fprintf(stderr, "lv2config: Failed to create %s (%s)\n", + path, strerror(errno)); + free(path); + return 1; + } + path[i] = LV2CORE_DIR_SEP[0]; + last_sep = i; + } + } + + free(path); + return 0; +} + +/** Return the output include dir based on path (prepend DESTDIR). */ +char* +output_dir(const char* path) +{ + char* destdir = getenv("DESTDIR"); + if (destdir) { + size_t len = strlen(destdir) + strlen(path); + char* ret = malloc(len + 1); + snprintf(ret, len + 1, "%s%s", destdir, path); + return ret; + } else { + return strdup(path); + } +} + +/** Build an LV2 include tree in dest for all bunles in lv2_path. */ +void +build_tree(World* world, const char* lv2_path, const char* dest) +{ + free_specs(world); + + /* Call scan_dir for each component of lv2_path, + which will build world->specs (a linked list of struct Spec). + */ + while (lv2_path[0] != '\0') { + const char* const sep = strchr(lv2_path, LV2CORE_PATH_SEP[0]); + if (sep) { + const size_t dir_len = sep - lv2_path; + char* const dir = malloc(dir_len + 1); + memcpy(dir, lv2_path, dir_len); + dir[dir_len] = '\0'; + scan_dir(world, dir); + free(dir); + lv2_path += dir_len + 1; + } else { + scan_dir(world, lv2_path); + lv2_path = "\0"; + } + } + + /* TODO: Check revisions */ + + /* Make a link in the include tree for each specification bundle. */ + for (size_t i = 0; i < world->n_specs; ++i) { + Spec* spec = &world->specs[i]; + const char* path = strchr((const char*)spec->uri.buf, ':'); + if (!path) { + fprintf(stderr, "lv2config: Invalid URI <%s>\n", spec->uri.buf); + continue; + } + for (++path; (path[0] == '/' && path[0] != '\0'); ++path) {} + + const char* bundle_uri = (const char*)spec->manifest.buf; + char* bundle_path = strdup(bundle_uri + strlen("file://")); + char* last_sep = strrchr(bundle_path, LV2CORE_DIR_SEP[0]); + if (last_sep) { + *(last_sep + 1) = '\0'; + } + + char* full_dest = output_dir(dest); + size_t len = strlen(full_dest) + 1 + strlen(path); + char* rel_inc_path = malloc(len + 1); + snprintf(rel_inc_path, len + 1, "%s/%s", full_dest, path); + free(full_dest); + + char* inc_path = expand(rel_inc_path); + free(rel_inc_path); + printf("%s => %s\n", inc_path, bundle_path); + + if (!mkdir_parents(inc_path)) { + if (!access(inc_path, F_OK) && unlink(inc_path)) { + fprintf(stderr, "lv2config: Failed to remove %s (%s)\n", + inc_path, strerror(errno)); + free(inc_path); + free(bundle_path); + continue; + } + + if (symlink(bundle_path, inc_path)) { + fprintf(stderr, "lv2config: Failed to create link (%s)\n", + strerror(errno)); + } + + free(inc_path); + free(bundle_path); + } + } +} + +int +usage(const char* name, bool error) +{ + FILE* out = (error ? stderr : stdout); + fprintf(out, "Usage: %s\n", name); + fprintf(out, "Build the default system LV2 include directories.\n\n"); + fprintf(out, "Usage: %s INCLUDE_DIR\n", name); + fprintf(out, "Build an LV2 include directory tree at INCLUDE_DIR\n"); + fprintf(out, "for all extensions found in $LV2_PATH.\n\n"); + fprintf(out, "Usage: %s INCLUDE_DIR BUNDLES_DIR\n", name); + fprintf(out, "Build an lv2 include directory tree at INCLUDE_DIR\n"); + fprintf(out, "for all extensions found in bundles under BUNDLES_DIR.\n"); + return (error ? EXIT_FAILURE : EXIT_SUCCESS); +} + +int +main(int argc, char** argv) +{ + World world = { NULL, NULL, NULL, NULL, 0 }; + world.reader = serd_reader_new( + SERD_TURTLE, &world, on_base, on_prefix, on_statement, NULL); + + if (argc == 1) { + /* lv2_config */ + build_tree(&world, "/usr/local/lib/lv2", "/usr/local/include/lv2"); + build_tree(&world, "/usr/lib/lv2", "/usr/include/lv2"); + } else if (argv[1][0] == '-') { + return usage(argv[0], false); + } else if (argc == 2) { + /* lv2_config INCLUDE_DIR */ + const char* lv2_path = getenv("LV2_PATH"); + if (!lv2_path) { + lv2_path = LV2CORE_DEFAULT_LV2_PATH; + } + build_tree(&world, lv2_path, argv[1]); + } else if (argc == 3) { + /* lv2_config INCLUDE_DIR BUNDLES_DIR */ + build_tree(&world, argv[2], argv[1]); + } else { + return usage(argv[0], true); + } + + free_specs(&world); + serd_reader_free(world.reader); + + return 0; +} diff --git a/core.lv2/lv2config.py b/core.lv2/lv2config.py deleted file mode 120000 index 7d77275..0000000 --- a/core.lv2/lv2config.py +++ /dev/null @@ -1 +0,0 @@ -lv2config \ No newline at end of file diff --git a/core.lv2/serd-0.1.0.c b/core.lv2/serd-0.1.0.c new file mode 100644 index 0000000..413fedb --- /dev/null +++ b/core.lv2/serd-0.1.0.c @@ -0,0 +1,2700 @@ +/* + Copyright 2011 David Robillard + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SERD_INTERNAL_H +#define SERD_INTERNAL_H + +#include +#include + +#include "serd/serd.h" + +/** A dynamic stack in memory. */ +typedef struct { + uint8_t* buf; ///< Stack memory + size_t buf_size; ///< Allocated size of buf (>= size) + size_t size; ///< Conceptual size of stack in buf +} SerdStack; + +/** An offset to start the stack at. Note 0 is reserved for NULL. */ +#define SERD_STACK_BOTTOM sizeof(void*) + +static inline SerdStack +serd_stack_new(size_t size) +{ + SerdStack stack; + stack.buf = malloc(size); + stack.buf_size = size; + stack.size = SERD_STACK_BOTTOM; + return stack; +} + +static inline bool +serd_stack_is_empty(SerdStack* stack) +{ + return stack->size <= SERD_STACK_BOTTOM; +} + +static inline void +serd_stack_free(SerdStack* stack) +{ + free(stack->buf); + stack->buf = NULL; + stack->buf_size = 0; + stack->size = 0; +} + +static inline uint8_t* +serd_stack_push(SerdStack* stack, size_t n_bytes) +{ + const size_t new_size = stack->size + n_bytes; + if (stack->buf_size < new_size) { + stack->buf_size *= 2; + stack->buf = realloc(stack->buf, stack->buf_size); + } + uint8_t* const ret = (stack->buf + stack->size); + stack->size = new_size; + return ret; +} + +static inline void +serd_stack_pop(SerdStack* stack, size_t n_bytes) +{ + assert(stack->size >= n_bytes); + stack->size -= n_bytes; +} + +/** Return true if @a c lies within [min...max] (inclusive) */ +static inline bool +in_range(const uint8_t c, const uint8_t min, const uint8_t max) +{ + return (c >= min && c <= max); +} + +/** RFC2234: ALPHA := %x41-5A / %x61-7A ; A-Z / a-z */ +static inline bool +is_alpha(const uint8_t c) +{ + return in_range(c, 'A', 'Z') || in_range(c, 'a', 'z'); +} + +/** RFC2234: DIGIT ::= %x30-39 ; 0-9 */ +static inline bool +is_digit(const uint8_t c) +{ + return in_range(c, '0', '9'); +} + +/** UTF-8 strlen. + * @return Lengh of @a utf8 in characters. + * @param utf8 A null-terminated UTF-8 string. + * @param out_n_bytes (Output) Set to the size of @a utf8 in bytes. + */ +static inline size_t +serd_strlen(const uint8_t* utf8, size_t* out_n_bytes) +{ + size_t n_chars = 0; + size_t i = 0; + for (; utf8[i]; ++i) { + if ((utf8[i] & 0xC0) != 0x80) { + // Does not start with `10', start of a new character + ++n_chars; + } + } + if (out_n_bytes) { + *out_n_bytes = i + 1; + } + return n_chars; +} + +#endif // SERD_INTERNAL_H + +/** + * @file env.c + */ + +#include +#include +#include +#include + + +typedef struct { + SerdNode name; + SerdNode uri; +} SerdPrefix; + +struct SerdEnvImpl { + SerdPrefix* prefixes; + size_t n_prefixes; +}; + +SERD_API +SerdEnv +serd_env_new() +{ + SerdEnv env = malloc(sizeof(struct SerdEnvImpl)); + env->prefixes = NULL; + env->n_prefixes = 0; + return env; +} + +SERD_API +void +serd_env_free(SerdEnv env) +{ + for (size_t i = 0; i < env->n_prefixes; ++i) { + serd_node_free(&env->prefixes[i].name); + serd_node_free(&env->prefixes[i].uri); + } + free(env->prefixes); + free(env); +} + +static inline SerdPrefix* +serd_env_find(SerdEnv env, + const uint8_t* name, + size_t name_len) +{ + for (size_t i = 0; i < env->n_prefixes; ++i) { + const SerdNode* const prefix_name = &env->prefixes[i].name; + if (prefix_name->n_bytes == name_len + 1) { + if (!memcmp(prefix_name->buf, name, name_len)) { + return &env->prefixes[i]; + } + } + } + return NULL; +} + +SERD_API +void +serd_env_add(SerdEnv env, + const SerdNode* name, + const SerdNode* uri) +{ + assert(name && uri); + SerdPrefix* const prefix = serd_env_find(env, name->buf, name->n_chars); + if (prefix) { + serd_node_free(&prefix->uri); + prefix->uri = serd_node_copy(uri); + } else { + env->prefixes = realloc(env->prefixes, + (++env->n_prefixes) * sizeof(SerdPrefix)); + env->prefixes[env->n_prefixes - 1].name = serd_node_copy(name); + env->prefixes[env->n_prefixes - 1].uri = serd_node_copy(uri); + } +} + +SERD_API +bool +serd_env_qualify(const SerdEnv env, + const SerdNode* uri, + SerdNode* prefix_name, + SerdChunk* suffix) +{ + for (size_t i = 0; i < env->n_prefixes; ++i) { + const SerdNode* const prefix_uri = &env->prefixes[i].uri; + if (uri->n_bytes >= prefix_uri->n_bytes) { + if (!strncmp((const char*)uri->buf, + (const char*)prefix_uri->buf, + prefix_uri->n_bytes - 1)) { + *prefix_name = env->prefixes[i].name; + suffix->buf = uri->buf + prefix_uri->n_bytes - 1; + suffix->len = uri->n_bytes - prefix_uri->n_bytes; + return true; + } + } + } + return false; +} + +SERD_API +bool +serd_env_expand(const SerdEnv env, + const SerdNode* qname, + SerdChunk* uri_prefix, + SerdChunk* uri_suffix) +{ + const uint8_t* const colon = memchr(qname->buf, ':', qname->n_bytes); + if (!colon) { + return false; // Illegal qname + } + + const size_t name_len = colon - qname->buf; + const SerdPrefix* const prefix = serd_env_find(env, qname->buf, name_len); + if (prefix) { + uri_prefix->buf = prefix->uri.buf; + uri_prefix->len = prefix->uri.n_bytes - 1; + uri_suffix->buf = colon + 1; + uri_suffix->len = qname->n_bytes - (colon - qname->buf) - 2; + return true; + } + return false; +} + +SERD_API +void +serd_env_foreach(const SerdEnv env, + SerdPrefixSink func, + void* handle) +{ + for (size_t i = 0; i < env->n_prefixes; ++i) { + func(handle, + &env->prefixes[i].name, + &env->prefixes[i].uri); + } +} + +/** + * @file node.c + */ + +#include +#include + + +SERD_API +SerdNode +serd_node_from_string(SerdType type, const uint8_t* buf) +{ + size_t buf_n_bytes; + const size_t buf_n_chars = serd_strlen(buf, &buf_n_bytes); + SerdNode ret = { type, buf_n_bytes, buf_n_chars, buf }; + return ret; +} + +SERD_API +SerdNode +serd_node_copy(const SerdNode* node) +{ + SerdNode copy = *node; + uint8_t* buf = malloc(copy.n_bytes); + memcpy(buf, node->buf, copy.n_bytes); + copy.buf = buf; + return copy; +} + +static size_t +serd_uri_string_length(const SerdURI* uri) +{ + size_t len = uri->path_base.len; + +#define ADD_LEN(field, n_delims) \ + if ((field).len) { len += (field).len + (n_delims); } + + ADD_LEN(uri->path, 1); // + possible leading `/' + ADD_LEN(uri->scheme, 1); // + trailing `:' + ADD_LEN(uri->authority, 2); // + leading `//' + ADD_LEN(uri->query, 1); // + leading `?' + ADD_LEN(uri->fragment, 1); // + leading `#' + + // Add 2 for authority // prefix (added even though authority.len = 0) + return len + 2; // + 2 for authority // +} + +static size_t +string_sink(const void* buf, size_t len, void* stream) +{ + uint8_t** ptr = (uint8_t**)stream; + memcpy(*ptr, buf, len); + *ptr += len; + return len; +} + +SERD_API +SerdNode +serd_node_new_uri_from_node(const SerdNode* uri_node, + const SerdURI* base, + SerdURI* out) +{ + return serd_node_new_uri_from_string(uri_node->buf, base, out); +} + +SERD_API +SerdNode +serd_node_new_uri_from_string(const uint8_t* str, + const SerdURI* base, + SerdURI* out) +{ + if (str[0] == '\0') { + return serd_node_new_uri(base, NULL, out); // Empty URI => Base URI + } else { + SerdURI uri; + if (serd_uri_parse(str, &uri)) { + return serd_node_new_uri(&uri, base, out); // Resolve/Serialise + } + } + return SERD_NODE_NULL; +} + +SERD_API +SerdNode +serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out) +{ + SerdURI abs_uri = *uri; + if (base) { + serd_uri_resolve(uri, base, &abs_uri); + } + + const size_t len = serd_uri_string_length(&abs_uri); + uint8_t* buf = malloc(len + 1); + + SerdNode node = { SERD_URI, len + 1, len, buf }; // FIXME: UTF-8 + + uint8_t* ptr = buf; + const size_t actual_len = serd_uri_serialise(&abs_uri, string_sink, &ptr); + + buf[actual_len] = '\0'; + node.n_bytes = actual_len + 1; + node.n_chars = actual_len; + + // FIXME: double parse + if (!serd_uri_parse(buf, out)) { + fprintf(stderr, "error parsing URI\n"); + return SERD_NODE_NULL; + } + + return node; +} + +SERD_API +void +serd_node_free(SerdNode* node) +{ + free((uint8_t*)node->buf); +} + +/** + * @file reader.c + */ + +#include +#include +#include +#include +#include +#include +#include + + +#define NS_XSD "http://www.w3.org/2001/XMLSchema#" +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + +#define TRY_THROW(exp) if (!(exp)) goto except; +#define TRY_RET(exp) if (!(exp)) return 0; + +#define STACK_PAGE_SIZE 4096 +#define READ_BUF_LEN 4096 + +typedef struct { + const uint8_t* filename; + unsigned line; + unsigned col; +} Cursor; + +typedef uint32_t uchar; + +typedef size_t Ref; + +typedef struct { + SerdType type; + Ref value; + Ref datatype; + Ref lang; +} Node; + +typedef struct { + const Node* graph; + const Node* subject; + const Node* predicate; +} ReadContext; + +/** Measured UTF-8 string. */ +typedef struct { + size_t n_bytes; ///< Size in bytes including trailing null byte + size_t n_chars; ///< Length in characters + uint8_t buf[]; ///< Buffer +} SerdString; + +static const Node INTERNAL_NODE_NULL = { 0, 0, 0, 0 }; + +struct SerdReaderImpl { + void* handle; + SerdBaseSink base_sink; + SerdPrefixSink prefix_sink; + SerdStatementSink statement_sink; + SerdEndSink end_sink; + Node rdf_type; + Node rdf_first; + Node rdf_rest; + Node rdf_nil; + FILE* fd; + SerdStack stack; + Cursor cur; + uint8_t* buf; + const uint8_t* blank_prefix; + unsigned next_id; + int err; + uint8_t* read_buf; + int32_t read_head; ///< Offset into read_buf + bool from_file; ///< True iff reading from @ref fd + bool eof; +#ifdef SUIL_STACK_CHECK + Ref* alloc_stack; ///< Stack of push offsets + size_t n_allocs; ///< Number of stack pushes +#endif +}; + +struct SerdReadStateImpl { + SerdEnv env; + SerdNode base_uri_node; + SerdURI base_uri; +}; + +typedef enum { + SERD_SUCCESS = 0, ///< Completed successfully + SERD_FAILURE = 1, ///< Non-fatal failure + SERD_ERROR = 2, ///< Fatal error +} SerdStatus; + +static inline int +error(SerdReader reader, const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + fprintf(stderr, "error: %s:%u:%u: ", + reader->cur.filename, reader->cur.line, reader->cur.col); + vfprintf(stderr, fmt, args); + return 0; +} + +static Node +make_node(SerdType type, Ref value, Ref datatype, Ref lang) +{ + const Node ret = { type, value, datatype, lang }; + return ret; +} + +static inline bool +page(SerdReader reader) +{ + assert(reader->from_file); + reader->read_head = 0; + const size_t n_read = fread(reader->read_buf, 1, READ_BUF_LEN, reader->fd); + if (n_read == 0) { + reader->read_buf[0] = '\0'; + reader->eof = true; + return false; + } else if (n_read < READ_BUF_LEN) { + reader->read_buf[n_read] = '\0'; + } + return true; +} + +static inline bool +peek_string(SerdReader reader, uint8_t* pre, int n) +{ + uint8_t* ptr = reader->read_buf + reader->read_head; + for (int i = 0; i < n; ++i) { + if (reader->from_file && (reader->read_head + i >= READ_BUF_LEN)) { + if (!page(reader)) { + return false; + } + ptr = reader->read_buf; + reader->read_head = -i; + memcpy(reader->read_buf + reader->read_head, pre, i); + assert(reader->read_buf[reader->read_head] == pre[0]); + } + if ((pre[i] = *ptr++) == '\0') { + return false; + } + } + return true; +} + +static inline uint8_t +peek_byte(SerdReader reader) +{ + return reader->read_buf[reader->read_head]; +} + +static inline uint8_t +eat_byte(SerdReader reader, const uint8_t byte) +{ + const uint8_t c = peek_byte(reader); + ++reader->read_head; + switch (c) { + case '\n': ++reader->cur.line; reader->cur.col = 0; break; + default: ++reader->cur.col; + } + + if (c != byte) { + return error(reader, "expected `%c', not `%c'\n", byte, c); + } + if (reader->from_file && (reader->read_head == READ_BUF_LEN)) { + TRY_RET(page(reader)); + assert(reader->read_head < READ_BUF_LEN); + } + if (reader->read_buf[reader->read_head] == '\0') { + reader->eof = true; + } + return c; +} + +static inline void +eat_string(SerdReader reader, const char* str, unsigned n) +{ + for (unsigned i = 0; i < n; ++i) { + eat_byte(reader, ((const uint8_t*)str)[i]); + } +} + +#ifdef SUIL_STACK_CHECK +static inline bool +stack_is_top_string(SerdReader reader, Ref ref) +{ + return ref == reader->alloc_stack[reader->n_allocs - 1]; +} +#endif + +static inline intptr_t +pad_size(intptr_t size) +{ + return (size + 7) & (~7); +} + +// Make a new string from a non-UTF-8 C string (internal use only) +static Ref +push_string(SerdReader reader, const char* c_str, size_t n_bytes) +{ + // Align strings to 64-bits (assuming malloc/realloc are aligned to 64-bits) + const size_t stack_size = pad_size((intptr_t)reader->stack.size); + const size_t pad = stack_size - reader->stack.size; + uint8_t* mem = serd_stack_push( + &reader->stack, pad + sizeof(SerdString) + n_bytes) + pad; + SerdString* const str = (SerdString*)mem; + str->n_bytes = n_bytes; + str->n_chars = n_bytes - 1; + memcpy(str->buf, c_str, n_bytes); +#ifdef SUIL_STACK_CHECK + reader->alloc_stack = realloc(reader->alloc_stack, + sizeof(uint8_t*) * (++reader->n_allocs)); + reader->alloc_stack[reader->n_allocs - 1] = (mem - reader->stack.buf); +#endif + return (uint8_t*)str - reader->stack.buf; +} + +static inline SerdString* +deref(SerdReader reader, const Ref ref) +{ + if (ref) { + return (SerdString*)(reader->stack.buf + ref); + } + return NULL; +} + +static inline void +push_byte(SerdReader reader, Ref ref, const uint8_t c) +{ + #ifdef SUIL_STACK_CHECK + assert(stack_is_top_string(reader, ref)); + #endif + serd_stack_push(&reader->stack, 1); + SerdString* const str = deref(reader, ref); + ++str->n_bytes; + if ((c & 0xC0) != 0x80) { + // Does not start with `10', start of a new character + ++str->n_chars; + } + assert(str->n_bytes > str->n_chars); + str->buf[str->n_bytes - 2] = c; + str->buf[str->n_bytes - 1] = '\0'; +} + +static void +pop_string(SerdReader reader, Ref ref) +{ + if (ref) { + if (ref == reader->rdf_nil.value + || ref == reader->rdf_first.value + || ref == reader->rdf_rest.value) { + return; + } + #ifdef SUIL_STACK_CHECK + if (!stack_is_top_string(reader, ref)) { + fprintf(stderr, "attempt to pop non-top string %s\n", + deref(reader, ref)->buf); + fprintf(stderr, "top: %s\n", + deref(reader, reader->alloc_stack[reader->n_allocs - 1])->buf); + } + assert(stack_is_top_string(reader, ref)); + --reader->n_allocs; + #endif + serd_stack_pop(&reader->stack, deref(reader, ref)->n_bytes); + } +} + +static inline SerdNode +public_node_from_ref(SerdReader reader, SerdType type, Ref ref) +{ + if (!ref) { + return SERD_NODE_NULL; + } + const SerdString* str = deref(reader, ref); + const SerdNode public = { type, str->n_bytes, str->n_chars, str->buf }; + return public; +} + +static inline SerdNode +public_node(SerdReader reader, const Node* private) +{ + return public_node_from_ref(reader, private->type, private->value); +} + + +static inline bool +emit_statement(SerdReader reader, + const Node* g, const Node* s, const Node* p, const Node* o) +{ + assert(s->value && p->value && o->value); + const SerdNode graph = g ? public_node(reader, g) : SERD_NODE_NULL; + const SerdNode subject = public_node(reader, s); + const SerdNode predicate = public_node(reader, p); + const SerdNode object = public_node(reader, o); + const SerdNode object_datatype = public_node_from_ref(reader, SERD_URI, o->datatype); + const SerdNode object_lang = public_node_from_ref(reader, SERD_LITERAL, o->lang); + return reader->statement_sink(reader->handle, + &graph, + &subject, + &predicate, + &object, + &object_datatype, + &object_lang); +} + +static bool read_collection(SerdReader reader, ReadContext ctx, Node* dest); +static bool read_predicateObjectList(SerdReader reader, ReadContext ctx); + +// [40] hex ::= [#x30-#x39] | [#x41-#x46] +static inline uint8_t +read_hex(SerdReader reader) +{ + const uint8_t c = peek_byte(reader); + if (in_range(c, 0x30, 0x39) || in_range(c, 0x41, 0x46)) { + return eat_byte(reader, c); + } else { + return error(reader, "illegal hexadecimal digit `%c'\n", c); + } +} + +static inline bool +read_hex_escape(SerdReader reader, unsigned length, Ref dest) +{ + uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + for (unsigned i = 0; i < length; ++i) { + buf[i] = read_hex(reader); + } + + uint32_t c; + sscanf((const char*)buf, "%X", &c); + + unsigned size = 0; + if (c < 0x00000080) { + size = 1; + } else if (c < 0x00000800) { + size = 2; + } else if (c < 0x00010000) { + size = 3; + } else if (c < 0x00200000) { + size = 4; + } else { + return false; + } + + // Build output in buf + // (Note # of bytes = # of leading 1 bits in first byte) + switch (size) { + case 4: + buf[3] = 0x80 | (uint8_t)(c & 0x3F); + c >>= 6; + c |= (16 << 12); // set bit 4 + case 3: + buf[2] = 0x80 | (uint8_t)(c & 0x3F); + c >>= 6; + c |= (32 << 6); // set bit 5 + case 2: + buf[1] = 0x80 | (uint8_t)(c & 0x3F); + c >>= 6; + c |= 0xC0; // set bits 6 and 7 + case 1: + buf[0] = (uint8_t)c; + } + + for (unsigned i = 0; i < size; ++i) { + push_byte(reader, dest, buf[i]); + } + return true; +} + +static inline bool +read_character_escape(SerdReader reader, Ref dest) +{ + switch (peek_byte(reader)) { + case '\\': + push_byte(reader, dest, eat_byte(reader, '\\')); + return true; + case 'u': + eat_byte(reader, 'u'); + return read_hex_escape(reader, 4, dest); + case 'U': + eat_byte(reader, 'U'); + return read_hex_escape(reader, 8, dest); + default: + return false; + } +} + +static inline bool +read_echaracter_escape(SerdReader reader, Ref dest) +{ + switch (peek_byte(reader)) { + case 't': + eat_byte(reader, 't'); + push_byte(reader, dest, '\t'); + return true; + case 'n': + eat_byte(reader, 'n'); + push_byte(reader, dest, '\n'); + return true; + case 'r': + eat_byte(reader, 'r'); + push_byte(reader, dest, '\r'); + return true; + default: + return read_character_escape(reader, dest); + } +} + +static inline bool +read_scharacter_escape(SerdReader reader, Ref dest) +{ + switch (peek_byte(reader)) { + case '"': + push_byte(reader, dest, eat_byte(reader, '"')); + return true; + default: + return read_echaracter_escape(reader, dest); + } +} + +static inline bool +read_ucharacter_escape(SerdReader reader, Ref dest) +{ + switch (peek_byte(reader)) { + case '>': + push_byte(reader, dest, eat_byte(reader, '>')); + return true; + default: + return read_echaracter_escape(reader, dest); + } +} + +// [38] character ::= '\u' hex hex hex hex +// | '\U' hex hex hex hex hex hex hex hex +// | '\\' +// | [#x20-#x5B] | [#x5D-#x10FFFF] +static inline SerdStatus +read_character(SerdReader reader, Ref dest) +{ + const uint8_t c = peek_byte(reader); + assert(c != '\\'); // Only called from methods that handle escapes first + switch (c) { + case '\0': + error(reader, "unexpected end of file\n", peek_byte(reader)); + return SERD_ERROR; + default: + if (c < 0x20) { // ASCII control character + error(reader, "unexpected control character\n"); + return SERD_ERROR; + } else if (c <= 0x7E) { // Printable ASCII + push_byte(reader, dest, eat_byte(reader, c)); + return SERD_SUCCESS; + } else { // Wide UTF-8 character + unsigned size = 1; + if ((c & 0xE0) == 0xC0) { // Starts with `110' + size = 2; + } else if ((c & 0xF0) == 0xE0) { // Starts with `1110' + size = 3; + } else if ((c & 0xF8) == 0xF0) { // Starts with `11110' + size = 4; + } else { + error(reader, "invalid character\n"); + return SERD_ERROR; + } + for (unsigned i = 0; i < size; ++i) { + push_byte(reader, dest, eat_byte(reader, peek_byte(reader))); + } + return SERD_SUCCESS; + } + } +} + +// [39] echaracter ::= character | '\t' | '\n' | '\r' +static inline SerdStatus +read_echaracter(SerdReader reader, Ref dest) +{ + uint8_t c = peek_byte(reader); + switch (c) { + case '\\': + eat_byte(reader, '\\'); + if (read_echaracter_escape(reader, peek_byte(reader))) { + return SERD_SUCCESS; + } else { + error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); + return SERD_ERROR; + } + default: + return read_character(reader, dest); + } +} + +// [43] lcharacter ::= echaracter | '\"' | #x9 | #xA | #xD +static inline SerdStatus +read_lcharacter(SerdReader reader, Ref dest) +{ + const uint8_t c = peek_byte(reader); + uint8_t pre[3]; + switch (c) { + case '"': + peek_string(reader, pre, 3); + if (pre[1] == '\"' && pre[2] == '\"') { + eat_byte(reader, '\"'); + eat_byte(reader, '\"'); + eat_byte(reader, '\"'); + return SERD_FAILURE; + } else { + push_byte(reader, dest, eat_byte(reader, '"')); + return SERD_SUCCESS; + } + case '\\': + eat_byte(reader, '\\'); + if (read_scharacter_escape(reader, dest)) { + return SERD_SUCCESS; + } else { + error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); + return SERD_ERROR; + } + case 0x9: case 0xA: case 0xD: + push_byte(reader, dest, eat_byte(reader, c)); + return SERD_SUCCESS; + default: + return read_echaracter(reader, dest); + } +} + +// [42] scharacter ::= ( echaracter - #x22 ) | '\"' +static inline SerdStatus +read_scharacter(SerdReader reader, Ref dest) +{ + uint8_t c = peek_byte(reader); + switch (c) { + case '\\': + eat_byte(reader, '\\'); + if (read_scharacter_escape(reader, dest)) { + return SERD_SUCCESS; + } else { + error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); + return SERD_ERROR; + } + case '\"': + return SERD_FAILURE; + default: + return read_character(reader, dest); + } +} + +// Spec: [41] ucharacter ::= ( character - #x3E ) | '\>' +// Impl: [41] ucharacter ::= ( echaracter - #x3E ) | '\>' +static inline SerdStatus +read_ucharacter(SerdReader reader, Ref dest) +{ + const uint8_t c = peek_byte(reader); + switch (c) { + case '\\': + eat_byte(reader, '\\'); + if (read_ucharacter_escape(reader, dest)) { + return SERD_SUCCESS; + } else { + return error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); + } + case '>': + return SERD_FAILURE; + default: + return read_character(reader, dest); + } +} + +// [10] comment ::= '#' ( [^#xA #xD] )* +static void +read_comment(SerdReader reader) +{ + eat_byte(reader, '#'); + uint8_t c; + while (((c = peek_byte(reader)) != 0xA) && (c != 0xD)) { + eat_byte(reader, c); + } +} + +// [24] ws ::= #x9 | #xA | #xD | #x20 | comment +static inline bool +read_ws(SerdReader reader) +{ + const uint8_t c = peek_byte(reader); + switch (c) { + case 0x9: case 0xA: case 0xD: case 0x20: + eat_byte(reader, c); + return true; + case '#': + read_comment(reader); + return true; + default: + return false; + } +} + +static inline void +read_ws_star(SerdReader reader) +{ + while (read_ws(reader)) {} +} + +static inline bool +read_ws_plus(SerdReader reader) +{ + TRY_RET(read_ws(reader)); + read_ws_star(reader); + return true; +} + +// [37] longSerdString ::= #x22 #x22 #x22 lcharacter* #x22 #x22 #x22 +static Ref +read_longString(SerdReader reader) +{ + eat_string(reader, "\"\"\"", 3); + Ref str = push_string(reader, "", 1); + SerdStatus st; + while (!(st = read_lcharacter(reader, str))) {} + if (st != SERD_ERROR) { + return str; + } + pop_string(reader, str); + return 0; +} + +// [36] string ::= #x22 scharacter* #x22 +static Ref +read_string(SerdReader reader) +{ + eat_byte(reader, '\"'); + Ref str = push_string(reader, "", 1); + SerdStatus st; + while (!(st = read_scharacter(reader, str))) {} + if (st != SERD_ERROR) { + eat_byte(reader, '\"'); + return str; + } + pop_string(reader, str); + return 0; +} + +// [35] quotedString ::= string | longSerdString +static Ref +read_quotedString(SerdReader reader) +{ + uint8_t pre[3]; + peek_string(reader, pre, 3); + assert(pre[0] == '\"'); + switch (pre[1]) { + case '\"': + if (pre[2] == '\"') + return read_longString(reader); + else + return read_string(reader); + default: + return read_string(reader); + } +} + +// [34] relativeURI ::= ucharacter* +static inline Ref +read_relativeURI(SerdReader reader) +{ + Ref str = push_string(reader, "", 1); + SerdStatus st; + while (!(st = read_ucharacter(reader, str))) {} + if (st != SERD_ERROR) { + return str; + } + pop_string(reader, str); + return 0; +} + +// [30] nameStartChar ::= [A-Z] | "_" | [a-z] +// | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] +// | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] +// | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] +static inline uchar +read_nameStartChar(SerdReader reader, bool required) +{ + const uint8_t c = peek_byte(reader); + if (c == '_' || is_alpha(c)) { + return eat_byte(reader, c); + } else { + if (required) { + error(reader, "illegal character `%c'\n", c); + } + return 0; + } +} + +// [31] nameChar ::= nameStartChar | '-' | [0-9] +// | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] +static inline uchar +read_nameChar(SerdReader reader) +{ + uchar c = read_nameStartChar(reader, false); + if (c) + return c; + + switch ((c = peek_byte(reader))) { + case '-': case 0xB7: case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return eat_byte(reader, c); + default: + // TODO: 0x300-0x036F | 0x203F-0x2040 + return 0; + } + return 0; +} + +// [33] prefixName ::= ( nameStartChar - '_' ) nameChar* +static Ref +read_prefixName(SerdReader reader) +{ + uint8_t c = peek_byte(reader); + if (c == '_') { + error(reader, "unexpected `_'\n"); + return 0; + } + TRY_RET(c = read_nameStartChar(reader, false)); + Ref str = push_string(reader, "", 1); + push_byte(reader, str, c); + while ((c = read_nameChar(reader)) != 0) { + push_byte(reader, str, c); + } + return str; +} + +// [32] name ::= nameStartChar nameChar* +static Ref +read_name(SerdReader reader, Ref dest, bool required) +{ + uchar c = read_nameStartChar(reader, required); + if (!c) { + if (required) { + error(reader, "illegal character at start of name\n"); + } + return 0; + } + do { + push_byte(reader, dest, c); + } while ((c = read_nameChar(reader)) != 0); + return dest; +} + +// [29] language ::= [a-z]+ ('-' [a-z0-9]+ )* +static Ref +read_language(SerdReader reader) +{ + const uint8_t start = peek_byte(reader); + if (!in_range(start, 'a', 'z')) { + error(reader, "unexpected `%c'\n", start); + return 0; + } + Ref str = push_string(reader, "", 1); + push_byte(reader, str, eat_byte(reader, start)); + uint8_t c; + while ((c = peek_byte(reader)) && in_range(c, 'a', 'z')) { + push_byte(reader, str, eat_byte(reader, c)); + } + if (peek_byte(reader) == '-') { + push_byte(reader, str, eat_byte(reader, '-')); + while ((c = peek_byte(reader)) && ( + in_range(c, 'a', 'z') || in_range(c, '0', '9'))) { + push_byte(reader, str, eat_byte(reader, c)); + } + } + return str; +} + +// [28] uriref ::= '<' relativeURI '>' +static Ref +read_uriref(SerdReader reader) +{ + TRY_RET(eat_byte(reader, '<')); + Ref const str = read_relativeURI(reader); + if (str && eat_byte(reader, '>')) { + return str; + } + pop_string(reader, str); + return 0; +} + +// [27] qname ::= prefixName? ':' name? +static Ref +read_qname(SerdReader reader) +{ + Ref prefix = read_prefixName(reader); + if (!prefix) { + prefix = push_string(reader, "", 1); + } + TRY_THROW(eat_byte(reader, ':')); + push_byte(reader, prefix, ':'); + Ref str = read_name(reader, prefix, false); + return str ? str : prefix; +except: + pop_string(reader, prefix); + return 0; +} + +static bool +read_0_9(SerdReader reader, Ref str, bool at_least_one) +{ + uint8_t c; + if (at_least_one) { + if (!is_digit((c = peek_byte(reader)))) { + return error(reader, "expected digit\n"); + } + push_byte(reader, str, eat_byte(reader, c)); + } + while (is_digit((c = peek_byte(reader)))) { + push_byte(reader, str, eat_byte(reader, c)); + } + return true; +} + +// [19] exponent ::= [eE] ('-' | '+')? [0-9]+ +// [18] decimal ::= ( '-' | '+' )? ( [0-9]+ '.' [0-9]* +// | '.' ([0-9])+ +// | ([0-9])+ ) +// [17] double ::= ( '-' | '+' )? ( [0-9]+ '.' [0-9]* exponent +// | '.' ([0-9])+ exponent +// | ([0-9])+ exponent ) +// [16] integer ::= ( '-' | '+' ) ? [0-9]+ +static bool +read_number(SerdReader reader, Node* dest) +{ + #define XSD_DECIMAL NS_XSD "decimal" + #define XSD_DOUBLE NS_XSD "double" + #define XSD_INTEGER NS_XSD "integer" + Ref str = push_string(reader, "", 1); + uint8_t c = peek_byte(reader); + bool has_decimal = false; + Ref datatype = 0; + if (c == '-' || c == '+') { + push_byte(reader, str, eat_byte(reader, c)); + } + if ((c = peek_byte(reader)) == '.') { + has_decimal = true; + // decimal case 2 (e.g. '.0' or `-.0' or `+.0') + push_byte(reader, str, eat_byte(reader, c)); + TRY_THROW(read_0_9(reader, str, true)); + } else { + // all other cases ::= ( '-' | '+' ) [0-9]+ ( . )? ( [0-9]+ )? ... + TRY_THROW(read_0_9(reader, str, true)); + if ((c = peek_byte(reader)) == '.') { + has_decimal = true; + push_byte(reader, str, eat_byte(reader, c)); + TRY_THROW(read_0_9(reader, str, false)); + } + } + c = peek_byte(reader); + if (c == 'e' || c == 'E') { + // double + push_byte(reader, str, eat_byte(reader, c)); + switch ((c = peek_byte(reader))) { + case '+': case '-': + push_byte(reader, str, eat_byte(reader, c)); + default: break; + } + read_0_9(reader, str, true); + datatype = push_string(reader, XSD_DOUBLE, strlen(XSD_DOUBLE) + 1); + } else if (has_decimal) { + datatype = push_string(reader, XSD_DECIMAL, strlen(XSD_DECIMAL) + 1); + } else { + datatype = push_string(reader, XSD_INTEGER, strlen(XSD_INTEGER) + 1); + } + *dest = make_node(SERD_LITERAL, str, datatype, 0); + assert(dest->value); + return true; +except: + pop_string(reader, datatype); + pop_string(reader, str); + return false; +} + +// [25] resource ::= uriref | qname +static bool +read_resource(SerdReader reader, Node* dest) +{ + switch (peek_byte(reader)) { + case '<': + *dest = make_node(SERD_URI, read_uriref(reader), 0, 0); + break; + default: + *dest = make_node(SERD_CURIE, read_qname(reader), 0, 0); + } + return (dest->value != 0); +} + +// [14] literal ::= quotedString ( '@' language )? | datatypeSerdString +// | integer | double | decimal | boolean +static bool +read_literal(SerdReader reader, Node* dest) +{ + Ref str = 0; + Node datatype = INTERNAL_NODE_NULL; + const uint8_t c = peek_byte(reader); + if (c == '-' || c == '+' || c == '.' || is_digit(c)) { + return read_number(reader, dest); + } else if (c == '\"') { + str = read_quotedString(reader); + if (!str) { + return false; + } + + Ref lang = 0; + switch (peek_byte(reader)) { + case '^': + eat_byte(reader, '^'); + eat_byte(reader, '^'); + TRY_THROW(read_resource(reader, &datatype)); + break; + case '@': + eat_byte(reader, '@'); + TRY_THROW(lang = read_language(reader)); + } + *dest = make_node(SERD_LITERAL, str, datatype.value, lang); + } else { + return error(reader, "Unknown literal type\n"); + } + return true; +except: + pop_string(reader, str); + return false; +} + +// [12] predicate ::= resource +static bool +read_predicate(SerdReader reader, Node* dest) +{ + return read_resource(reader, dest); +} + +// [9] verb ::= predicate | 'a' +static bool +read_verb(SerdReader reader, Node* dest) +{ + uint8_t pre[2]; + peek_string(reader, pre, 2); + switch (pre[0]) { + case 'a': + switch (pre[1]) { + case 0x9: case 0xA: case 0xD: case 0x20: + eat_byte(reader, 'a'); + *dest = make_node(SERD_URI, + push_string(reader, NS_RDF "type", 48), 0, 0); + return true; + default: break; // fall through + } + default: + return read_predicate(reader, dest); + } +} + +// [26] nodeID ::= '_:' name +static Ref +read_nodeID(SerdReader reader) +{ + eat_byte(reader, '_'); + eat_byte(reader, ':'); + Ref str = push_string(reader, "", 1); + return read_name(reader, str, true); +} + +static Ref +blank_id(SerdReader reader) +{ + const char* prefix = reader->blank_prefix + ? (const char*)reader->blank_prefix + : "genid"; + char str[32]; // FIXME: ensure length of reader->blank_prefix is OK + const int len = snprintf(str, sizeof(str), "%s%u", + prefix, reader->next_id++); + return push_string(reader, str, len + 1); +} + +// Spec: [21] blank ::= nodeID | '[]' +// | '[' predicateObjectList ']' | collection +// Impl: [21] blank ::= nodeID | '[ ws* ]' +// | '[' ws* predicateObjectList ws* ']' | collection +static bool +read_blank(SerdReader reader, ReadContext ctx, Node* dest) +{ + switch (peek_byte(reader)) { + case '_': + *dest = make_node(SERD_BLANK_ID, read_nodeID(reader), 0, 0); + return true; + case '[': + eat_byte(reader, '['); + read_ws_star(reader); + if (peek_byte(reader) == ']') { + eat_byte(reader, ']'); + *dest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0); + if (ctx.subject) { + TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest)); + } + return true; + } + *dest = make_node(SERD_ANON_BEGIN, blank_id(reader), 0, 0); + if (ctx.subject) { + TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest)); + dest->type = SERD_ANON; + } + ctx.subject = dest; + read_predicateObjectList(reader, ctx); + read_ws_star(reader); + eat_byte(reader, ']'); + if (reader->end_sink) { + const SerdNode end = public_node(reader, dest); + reader->end_sink(reader->handle, &end); + } + return true; + case '(': + if (read_collection(reader, ctx, dest)) { + if (ctx.subject) { + TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest)); + } + return true; + } + return false; + default: + return error(reader, "illegal blank node\n"); + } +} + +inline static bool +is_object_end(const uint8_t c) +{ + switch (c) { + case 0x9: case 0xA: case 0xD: case 0x20: case '\0': + case '#': case '.': case ';': + return true; + default: + return false; + } +} + +// [13] object ::= resource | blank | literal +// Recurses, calling statement_sink for every statement encountered. +// Leaves stack in original calling state (i.e. pops everything it pushes). +static bool +read_object(SerdReader reader, ReadContext ctx) +{ + static const char* const XSD_BOOLEAN = NS_XSD "boolean"; + static const size_t XSD_BOOLEAN_LEN = 40; + + uint8_t pre[6]; + bool ret = false; + bool emit = (ctx.subject != 0); + Node o = INTERNAL_NODE_NULL; + const uint8_t c = peek_byte(reader); + switch (c) { + case '\0': + case ')': + return false; + case '[': case '(': + emit = false; + // fall through + case '_': + TRY_THROW(ret = read_blank(reader, ctx, &o)); + break; + case '<': case ':': + TRY_THROW(ret = read_resource(reader, &o)); + break; + case '\"': case '+': case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + TRY_THROW(ret = read_literal(reader, &o)); + break; + case '.': + TRY_THROW(ret = read_literal(reader, &o)); + break; + default: + /* Either a boolean literal, or a qname. + Unfortunately there is no way to distinguish these without + readahead, since `true' or `false' could be the start of a qname. + */ + peek_string(reader, pre, 6); + if (!memcmp(pre, "true", 4) && is_object_end(pre[4])) { + eat_string(reader, "true", 4); + const Ref value = push_string(reader, "true", 5); + const Ref datatype = push_string(reader, XSD_BOOLEAN, XSD_BOOLEAN_LEN + 1); + o = make_node(SERD_LITERAL, value, datatype, 0); + } else if (!memcmp(pre, "false", 5) && is_object_end(pre[5])) { + eat_string(reader, "false", 5); + const Ref value = push_string(reader, "false", 6); + const Ref datatype = push_string(reader, XSD_BOOLEAN, XSD_BOOLEAN_LEN + 1); + o = make_node(SERD_LITERAL, value, datatype, 0); + } else if (!is_object_end(c)) { + o = make_node(SERD_CURIE, read_qname(reader), 0, 0); + } + ret = o.value; + } + + if (ret && emit) { + assert(o.value); + ret = emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, &o); + } + +except: + pop_string(reader, o.lang); + pop_string(reader, o.datatype); + pop_string(reader, o.value); + return ret; +} + +// Spec: [8] objectList ::= object ( ',' object )* +// Impl: [8] objectList ::= object ( ws* ',' ws* object )* +static bool +read_objectList(SerdReader reader, ReadContext ctx) +{ + TRY_RET(read_object(reader, ctx)); + read_ws_star(reader); + while (peek_byte(reader) == ',') { + eat_byte(reader, ','); + read_ws_star(reader); + TRY_RET(read_object(reader, ctx)); + read_ws_star(reader); + } + return true; +} + +// Spec: [7] predicateObjectList ::= verb objectList +// (';' verb objectList)* (';')? +// Impl: [7] predicateObjectList ::= verb ws+ objectList +// (ws* ';' ws* verb ws+ objectList)* (';')? +static bool +read_predicateObjectList(SerdReader reader, ReadContext ctx) +{ + if (reader->eof) { + return false; + } + Node predicate = INTERNAL_NODE_NULL; + TRY_RET(read_verb(reader, &predicate)); + TRY_THROW(read_ws_plus(reader)); + ctx.predicate = &predicate; + TRY_THROW(read_objectList(reader, ctx)); + pop_string(reader, predicate.value); + predicate.value = 0; + read_ws_star(reader); + while (peek_byte(reader) == ';') { + eat_byte(reader, ';'); + read_ws_star(reader); + switch (peek_byte(reader)) { + case '.': case ']': + return true; + default: + TRY_THROW(read_verb(reader, &predicate)); + ctx.predicate = &predicate; + TRY_THROW(read_ws_plus(reader)); + TRY_THROW(read_objectList(reader, ctx)); + pop_string(reader, predicate.value); + predicate.value = 0; + read_ws_star(reader); + } + } + return true; +except: + pop_string(reader, predicate.value); + return false; +} + +/** Recursive helper for read_collection. */ +static bool +read_collection_rec(SerdReader reader, ReadContext ctx) +{ + read_ws_star(reader); + if (peek_byte(reader) == ')') { + eat_byte(reader, ')'); + TRY_RET(emit_statement(reader, NULL, ctx.subject, + &reader->rdf_rest, &reader->rdf_nil)); + return false; + } else { + const Node rest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0); + TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, &reader->rdf_rest, &rest)); + ctx.subject = &rest; + ctx.predicate = &reader->rdf_first; + if (read_object(reader, ctx)) { + read_collection_rec(reader, ctx); + pop_string(reader, rest.value); + return true; + } else { + pop_string(reader, rest.value); + return false; + } + } +} + +// [22] itemList ::= object+ +// [23] collection ::= '(' itemList? ')' +static bool +read_collection(SerdReader reader, ReadContext ctx, Node* dest) +{ + TRY_RET(eat_byte(reader, '(')); + read_ws_star(reader); + if (peek_byte(reader) == ')') { // Empty collection + eat_byte(reader, ')'); + *dest = reader->rdf_nil; + return true; + } + + *dest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0); + ctx.subject = dest; + ctx.predicate = &reader->rdf_first; + if (!read_object(reader, ctx)) { + return error(reader, "unexpected end of collection\n"); + } + + ctx.subject = dest; + return read_collection_rec(reader, ctx); +} + +// [11] subject ::= resource | blank +static Node +read_subject(SerdReader reader, ReadContext ctx) +{ + Node subject = INTERNAL_NODE_NULL; + switch (peek_byte(reader)) { + case '[': case '(': case '_': + read_blank(reader, ctx, &subject); + break; + default: + read_resource(reader, &subject); + } + return subject; +} + +// Spec: [6] triples ::= subject predicateObjectList +// Impl: [6] triples ::= subject ws+ predicateObjectList +static bool +read_triples(SerdReader reader, ReadContext ctx) +{ + const Node subject = read_subject(reader, ctx); + bool ret = false; + if (subject.value != 0) { + ctx.subject = &subject; + TRY_RET(read_ws_plus(reader)); + ret = read_predicateObjectList(reader, ctx); + pop_string(reader, subject.value); + } + ctx.subject = ctx.predicate = 0; + return ret; +} + +// [5] base ::= '@base' ws+ uriref +static bool +read_base(SerdReader reader) +{ + // `@' is already eaten in read_directive + eat_string(reader, "base", 4); + TRY_RET(read_ws_plus(reader)); + Ref uri; + TRY_RET(uri = read_uriref(reader)); + const SerdNode uri_node = public_node_from_ref(reader, SERD_URI, uri); + reader->base_sink(reader->handle, &uri_node); + pop_string(reader, uri); + return true; +} + +// Spec: [4] prefixID ::= '@prefix' ws+ prefixName? ':' uriref +// Impl: [4] prefixID ::= '@prefix' ws+ prefixName? ':' ws* uriref +static bool +read_prefixID(SerdReader reader) +{ + // `@' is already eaten in read_directive + eat_string(reader, "prefix", 6); + TRY_RET(read_ws_plus(reader)); + bool ret = false; + Ref name = read_prefixName(reader); + if (!name) { + name = push_string(reader, "", 1); + } + TRY_THROW(eat_byte(reader, ':') == ':'); + read_ws_star(reader); + Ref uri = 0; + TRY_THROW(uri = read_uriref(reader)); + const SerdNode name_node = public_node_from_ref(reader, SERD_LITERAL, name); + const SerdNode uri_node = public_node_from_ref(reader, SERD_URI, uri); + ret = reader->prefix_sink(reader->handle, &name_node, &uri_node); + pop_string(reader, uri); +except: + pop_string(reader, name); + return ret; +} + +// [3] directive ::= prefixID | base +static bool +read_directive(SerdReader reader) +{ + eat_byte(reader, '@'); + switch (peek_byte(reader)) { + case 'b': + return read_base(reader); + case 'p': + return read_prefixID(reader); + default: + return error(reader, "illegal directive\n"); + } +} + +// Spec: [1] statement ::= directive '.' | triples '.' | ws+ +// Impl: [1] statement ::= directive ws* '.' | triples ws* '.' | ws+ +static bool +read_statement(SerdReader reader) +{ + ReadContext ctx = { 0, 0, 0 }; + read_ws_star(reader); + if (reader->eof) { + return true; + } + switch (peek_byte(reader)) { + case '@': + TRY_RET(read_directive(reader)); + break; + default: + TRY_RET(read_triples(reader, ctx)); + break; + } + read_ws_star(reader); + return eat_byte(reader, '.'); +} + +// [1] turtleDoc ::= statement +static bool +read_turtleDoc(SerdReader reader) +{ + while (!reader->eof) { + TRY_RET(read_statement(reader)); + } + return true; +} + +SERD_API +SerdReader +serd_reader_new(SerdSyntax syntax, + void* handle, + SerdBaseSink base_sink, + SerdPrefixSink prefix_sink, + SerdStatementSink statement_sink, + SerdEndSink end_sink) +{ + const Cursor cur = { NULL, 0, 0 }; + SerdReader me = malloc(sizeof(struct SerdReaderImpl)); + me->handle = handle; + me->base_sink = base_sink; + me->prefix_sink = prefix_sink; + me->statement_sink = statement_sink; + me->end_sink = end_sink; + me->fd = 0; + me->stack = serd_stack_new(STACK_PAGE_SIZE); + me->cur = cur; + me->blank_prefix = NULL; + me->next_id = 1; + me->read_buf = 0; + me->read_head = 0; + me->eof = false; +#ifdef SERD_STACK_CHECK + me->alloc_stack = 0; + me->n_allocs = 0; +#endif + +#define RDF_FIRST NS_RDF "first" +#define RDF_REST NS_RDF "rest" +#define RDF_NIL NS_RDF "nil" + me->rdf_first = make_node(SERD_URI, push_string(me, RDF_FIRST, 49), 0, 0); + me->rdf_rest = make_node(SERD_URI, push_string(me, RDF_REST, 48), 0, 0); + me->rdf_nil = make_node(SERD_URI, push_string(me, RDF_NIL, 47), 0, 0); + + return me; +} + +SERD_API +void +serd_reader_free(SerdReader reader) +{ + SerdReader const me = (SerdReader)reader; + pop_string(me, me->rdf_nil.value); + pop_string(me, me->rdf_rest.value); + pop_string(me, me->rdf_first.value); + +#ifdef SERD_STACK_CHECK + free(me->alloc_stack); +#endif + free(me->stack.buf); + free(me); +} + +SERD_API +void +serd_reader_set_blank_prefix(SerdReader reader, + const uint8_t* prefix) +{ + reader->blank_prefix = prefix; +} + +SERD_API +bool +serd_reader_read_file(SerdReader me, FILE* file, const uint8_t* name) +{ + const Cursor cur = { name, 1, 1 }; + me->fd = file; + me->read_buf = (uint8_t*)malloc(READ_BUF_LEN * 2); + me->read_head = 0; + me->cur = cur; + me->from_file = true; + me->eof = false; + + /* Read into the second page of the buffer. Occasionally peek_string + will move the read_head to before this point when readahead causes + a page fault. + */ + memset(me->read_buf, '\0', READ_BUF_LEN * 2); + me->read_buf += READ_BUF_LEN; + + const bool ret = !page(me) || read_turtleDoc(me); + + free(me->read_buf - READ_BUF_LEN); + me->fd = 0; + me->read_buf = NULL; + return ret; +} + +SERD_API +bool +serd_reader_read_string(SerdReader me, const uint8_t* utf8) +{ + const Cursor cur = { (const uint8_t*)"(string)", 1, 1 }; + + me->read_buf = (uint8_t*)utf8; + me->read_head = 0; + me->cur = cur; + me->from_file = false; + + const bool ret = read_turtleDoc(me); + + me->read_buf = NULL; + return ret; +} + +SERD_API +SerdReadState +serd_read_state_new(SerdEnv env, + const uint8_t* base_uri_str) +{ + SerdReadState state = malloc(sizeof(struct SerdReadStateImpl)); + SerdURI base_base_uri = SERD_URI_NULL; + state->env = env; + state->base_uri_node = serd_node_new_uri_from_string( + base_uri_str, &base_base_uri, &state->base_uri); + return state; +} + +SERD_API +void +serd_read_state_free(SerdReadState state) +{ + serd_node_free(&state->base_uri_node); + free(state); +} + +SERD_API +SerdNode +serd_read_state_expand(SerdReadState state, + const SerdNode* node) +{ + if (node->type == SERD_CURIE) { + SerdChunk prefix; + SerdChunk suffix; + serd_env_expand(state->env, node, &prefix, &suffix); + SerdNode ret = { SERD_URI, + prefix.len + suffix.len + 1, + prefix.len + suffix.len, // FIXME: UTF-8 + NULL }; + ret.buf = malloc(ret.n_bytes); + snprintf((char*)ret.buf, ret.n_bytes, "%s%s", prefix.buf, suffix.buf); + return ret; + } else if (node->type == SERD_URI) { + SerdURI ignored; + return serd_node_new_uri_from_node(node, &state->base_uri, &ignored); + } else { + return SERD_NODE_NULL; + } +} + +SERD_API +SerdNode +serd_read_state_get_base_uri(SerdReadState state, + SerdURI* out) +{ + *out = state->base_uri; + return state->base_uri_node; +} + +SERD_API +bool +serd_read_state_set_base_uri(SerdReadState state, + const SerdNode* uri_node) +{ + // Resolve base URI and create a new node and URI for it + SerdURI base_uri; + SerdNode base_uri_node = serd_node_new_uri_from_node( + uri_node, &state->base_uri, &base_uri); + + if (base_uri_node.buf) { + // Replace the current base URI + serd_node_free(&state->base_uri_node); + state->base_uri_node = base_uri_node; + state->base_uri = base_uri; + return true; + } + return false; +} + +SERD_API +bool +serd_read_state_set_prefix(SerdReadState state, + const SerdNode* name, + const SerdNode* uri_node) +{ + if (serd_uri_string_has_scheme(uri_node->buf)) { + // Set prefix to absolute URI + serd_env_add(state->env, name, uri_node); + return true; + } else { + // Resolve relative URI and create a new node and URI for it + SerdURI abs_uri; + SerdNode abs_uri_node = serd_node_new_uri_from_node( + uri_node, &state->base_uri, &abs_uri); + + if (!abs_uri_node.buf) { + return false; + } + + // Set prefix to resolved (absolute) URI + serd_env_add(state->env, name, &abs_uri_node); + serd_node_free(&abs_uri_node); + return true; + } + return false; +} + + +/** + * @file uri.c + */ + +/** @file uri.c */ + +#include +#include +#include + + +// #define URI_DEBUG 1 + +SERD_API +bool +serd_uri_string_has_scheme(const uint8_t* utf8) +{ + // RFC3986: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + if (!is_alpha(utf8[0])) { + return false; // Invalid scheme initial character, URI is relative + } + for (uint8_t c = *++utf8; (c = *utf8) != '\0'; ++utf8) { + switch (c) { + case ':': + return true; // End of scheme + case '+': case '-': case '.': + break; // Valid scheme character, continue + default: + if (!is_alpha(c) && !is_digit(c)) { + return false; // Invalid scheme character + } + } + } + + return false; +} + +#ifdef URI_DEBUG +static void +serd_uri_dump(const SerdURI* uri, FILE* file) +{ +#define PRINT_PART(range, name) \ + if (range.buf) { \ + fprintf(stderr, " " name " = "); \ + fwrite((range).buf, 1, (range).len, stderr); \ + fprintf(stderr, "\n"); \ + } + + PRINT_PART(uri->scheme, "scheme"); + PRINT_PART(uri->authority, "authority"); + PRINT_PART(uri->path_base, "path_base"); + PRINT_PART(uri->path, "path"); + PRINT_PART(uri->query, "query"); + PRINT_PART(uri->fragment, "fragment"); +} +#endif + +SERD_API +bool +serd_uri_parse(const uint8_t* utf8, SerdURI* uri) +{ + *uri = SERD_URI_NULL; + assert(uri->path_base.buf == NULL); + assert(uri->path_base.len == 0); + assert(uri->authority.len == 0); + + const uint8_t* ptr = utf8; + + /* See http://tools.ietf.org/html/rfc3986#section-3 + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + */ + + /* S3.1: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */ + if (is_alpha(*ptr)) { + for (uint8_t c = *++ptr; true; c = *++ptr) { + switch (c) { + case '\0': case '/': case '?': case '#': + ptr = utf8; + goto path; // Relative URI (starts with path by definition) + case ':': + uri->scheme.buf = utf8; + uri->scheme.len = (ptr++) - utf8; + goto maybe_authority; // URI with scheme + case '+': case '-': case '.': + continue; + default: + if (is_alpha(c) || is_digit(c)) { + continue; + } + } + } + } + + /* S3.2: The authority component is preceded by a double slash ("//") + and is terminated by the next slash ("/"), question mark ("?"), + or number sign ("#") character, or by the end of the URI. + */ +maybe_authority: + if (*ptr == '/' && *(ptr + 1) == '/') { + ptr += 2; + uri->authority.buf = ptr; + assert(uri->authority.len == 0); + for (uint8_t c = *ptr; (c = *ptr) != '\0'; ++ptr) { + switch (c) { + case '/': goto path; + case '?': goto query; + case '#': goto fragment; + default: + ++uri->authority.len; + } + } + } + + /* RFC3986 S3.3: The path is terminated by the first question mark ("?") + or number sign ("#") character, or by the end of the URI. + */ +path: + switch (*ptr) { + case '?': goto query; + case '#': goto fragment; + case '\0': goto end; + default: break; + } + uri->path.buf = ptr; + uri->path.len = 0; + for (uint8_t c = *ptr; (c = *ptr) != '\0'; ++ptr) { + switch (c) { + case '?': goto query; + case '#': goto fragment; + default: + ++uri->path.len; + } + } + + /* RFC3986 S3.4: The query component is indicated by the first question + mark ("?") character and terminated by a number sign ("#") character + or by the end of the URI. + */ +query: + if (*ptr == '?') { + uri->query.buf = ++ptr; + for (uint8_t c = *ptr; (c = *ptr) != '\0'; ++ptr) { + switch (c) { + case '#': + goto fragment; + default: + ++uri->query.len; + } + } + } + + /* RFC3986 S3.5: A fragment identifier component is indicated by the + presence of a number sign ("#") character and terminated by the end + of the URI. + */ +fragment: + if (*ptr == '#') { + uri->fragment.buf = ptr; + while (*ptr++ != '\0') { + ++uri->fragment.len; + } + } + +end: + #ifdef URI_DEBUG + fprintf(stderr, "PARSE URI <%s>\n", utf8); + serd_uri_dump(uri, stderr); + fprintf(stderr, "\n"); + #endif + + return true; +} + +SERD_API +void +serd_uri_resolve(const SerdURI* r, const SerdURI* base, SerdURI* t) +{ + // See http://tools.ietf.org/html/rfc3986#section-5.2.2 + + t->path_base.buf = NULL; + t->path_base.len = 0; + if (r->scheme.len) { + *t = *r; + } else { + if (r->authority.len) { + t->authority = r->authority; + t->path = r->path; + t->query = r->query; + } else { + t->path = r->path; + if (!r->path.len) { + t->path_base = base->path; + if (r->query.len) { + t->query = r->query; + } else { + t->query = base->query; + } + } else { + if (r->path.buf[0] != '/') { + t->path_base = base->path; + } + t->query = r->query; + } + t->authority = base->authority; + } + t->scheme = base->scheme; + t->fragment = r->fragment; + } + + #ifdef URI_DEBUG + fprintf(stderr, "RESOLVE URI\nBASE:\n"); + serd_uri_dump(base, stderr); + fprintf(stderr, "URI:\n"); + serd_uri_dump(r, stderr); + fprintf(stderr, "RESULT:\n"); + serd_uri_dump(t, stderr); + fprintf(stderr, "\n"); + #endif +} + +SERD_API +size_t +serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream) +{ + // See http://tools.ietf.org/html/rfc3986#section-5.3 + + size_t write_size = 0; +#define WRITE(buf, len) \ + write_size += len; \ + if (len) { \ + sink((const uint8_t*)buf, len, stream); \ + } +#define WRITE_CHAR(c) WRITE(&(c), 1) +#define WRITE_COMPONENT(prefix, field, suffix) \ + if ((field).len) { \ + for (const uint8_t* c = (const uint8_t*)prefix; *c != '\0'; ++c) { \ + WRITE(c, 1); \ + } \ + WRITE((field).buf, (field).len); \ + for (const uint8_t* c = (const uint8_t*)suffix; *c != '\0'; ++c) { \ + WRITE(c, 1); \ + } \ + } + + WRITE_COMPONENT("", uri->scheme, ":"); + if (uri->authority.buf) { + WRITE("//", 2); + WRITE(uri->authority.buf, uri->authority.len); + } + if (uri->path_base.len) { + if (!uri->path.buf && (uri->fragment.buf || uri->query.buf)) { + WRITE_COMPONENT("", uri->path_base, ""); + } else { + /* Merge paths, removing dot components. + See http://tools.ietf.org/html/rfc3986#section-5.2.3 + */ + const uint8_t* begin = uri->path.buf; + const uint8_t* end = begin; + size_t up = 1; + if (begin) { + // Count and skip leading dot components + end = uri->path.buf + uri->path.len; + for (bool done = false; !done && (begin < end);) { + switch (begin[0]) { + case '.': + switch (begin[1]) { + case '/': + begin += 2; // Chop leading "./" + break; + case '.': + ++up; + switch (begin[2]) { + case '/': + begin += 3; // Chop lading "../" + break; + default: + begin += 2; // Chop leading ".." + } + break; + default: + ++begin; // Chop leading "." + } + break; + case '/': + if (begin[1] == '/') { + ++begin; // Replace leading "//" with "/" + break; + } // else fall through + default: + done = true; // Finished chopping dot components + } + } + + if (uri->path.buf && uri->path_base.buf) { + // Find the up'th last slash + const uint8_t* base_last = uri->path_base.buf + uri->path_base.len - 1; + do { + if (*base_last == '/') { + --up; + } + } while (up > 0 && (--base_last > uri->path_base.buf)); + + // Write base URI prefix + const size_t base_len = base_last - uri->path_base.buf + 1; + WRITE(uri->path_base.buf, base_len); + + } else { + // Relative path is just query or fragment, append it to full base URI + WRITE_COMPONENT("", uri->path_base, ""); + } + + // Write URI suffix + WRITE(begin, end - begin); + } + } + } else { + WRITE_COMPONENT("", uri->path, ""); + } + WRITE_COMPONENT("?", uri->query, ""); + if (uri->fragment.buf) { + // Note uri->fragment.buf includes the leading `#' + WRITE_COMPONENT("", uri->fragment, ""); + } + return write_size; +} + +/** + * @file writer.c + */ + +#include +#include +#include +#include + + +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define NS_XSD "http://www.w3.org/2001/XMLSchema#" + +typedef struct { + SerdNode graph; + SerdNode subject; + SerdNode predicate; +} WriteContext; + +static const WriteContext WRITE_CONTEXT_NULL = { + { 0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0} +}; + +struct SerdWriterImpl { + SerdSyntax syntax; + SerdStyle style; + SerdEnv env; + SerdURI base_uri; + SerdStack anon_stack; + SerdSink sink; + void* stream; + WriteContext context; + unsigned indent; +}; + +typedef enum { + WRITE_NORMAL, + WRITE_URI, + WRITE_STRING +} TextContext; + +static inline WriteContext* +anon_stack_top(SerdWriter writer) +{ + assert(!serd_stack_is_empty(&writer->anon_stack)); + return (WriteContext*)(writer->anon_stack.buf + + writer->anon_stack.size - sizeof(WriteContext)); +} + +static bool +write_text(SerdWriter writer, TextContext ctx, + const uint8_t* utf8, size_t n_bytes, uint8_t terminator) +{ + char escape[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + for (size_t i = 0; i < n_bytes;) { + uint8_t in = utf8[i++]; + switch (in) { + case '\\': writer->sink("\\\\", 2, writer->stream); continue; + case '\n': writer->sink("\\n", 2, writer->stream); continue; + case '\r': writer->sink("\\r", 2, writer->stream); continue; + case '\t': writer->sink("\\t", 2, writer->stream); continue; + case '"': + if (terminator == '"') { + writer->sink("\\\"", 2, writer->stream); + continue; + } // else fall-through + default: break; + } + + if (in == terminator) { + snprintf(escape, 7, "\\u%04X", terminator); + writer->sink(escape, 6, writer->stream); + continue; + } + + uint32_t c = 0; + size_t size = 0; + if ((in & 0x80) == 0) { // Starts with `0' + size = 1; + c = in & 0x7F; + if (in_range(in, 0x20, 0x7E)) { // Printable ASCII + writer->sink(&in, 1, writer->stream); + continue; + } + } else if ((in & 0xE0) == 0xC0) { // Starts with `110' + size = 2; + c = in & 0x1F; + } else if ((in & 0xF0) == 0xE0) { // Starts with `1110' + size = 3; + c = in & 0x0F; + } else if ((in & 0xF8) == 0xF0) { // Starts with `11110' + size = 4; + c = in & 0x07; + } else { + fprintf(stderr, "invalid UTF-8 at offset %zu: %X\n", i, in); + return false; + } + + if (ctx == WRITE_STRING && !(writer->style & SERD_STYLE_ASCII)) { + // Write UTF-8 character directly to UTF-8 output + // TODO: Scan to next escape and write entire range at once + writer->sink(utf8 + i - 1, size, writer->stream); + i += size - 1; + continue; + } + +#define READ_BYTE() do { \ + assert(i < n_bytes); \ + in = utf8[i++] & 0x3f; \ + c <<= 6; \ + c |= in; \ + } while (0) + + switch (size) { + case 4: READ_BYTE(); + case 3: READ_BYTE(); + case 2: READ_BYTE(); + } + + if (c < 0xFFFF) { + snprintf(escape, 7, "\\u%04X", c); + writer->sink(escape, 6, writer->stream); + } else { + snprintf(escape, 11, "\\U%08X", c); + writer->sink(escape, 10, writer->stream); + } + } + return true; +} + +static void +serd_writer_write_delim(SerdWriter writer, const uint8_t delim) +{ + switch (delim) { + case '\n': + break; + default: + writer->sink(" ", 1, writer->stream); + case '[': + writer->sink(&delim, 1, writer->stream); + } + writer->sink("\n", 1, writer->stream); + for (unsigned i = 0; i < writer->indent; ++i) { + writer->sink("\t", 1, writer->stream); + } +} + +static bool +write_node(SerdWriter writer, + const SerdNode* node, + const SerdNode* datatype, + const SerdNode* lang) +{ + SerdChunk uri_prefix; + SerdChunk uri_suffix; + switch (node->type) { + case SERD_NOTHING: + return false; + case SERD_ANON_BEGIN: + if (writer->syntax != SERD_NTRIPLES) { + ++writer->indent; + serd_writer_write_delim(writer, '['); + WriteContext* ctx = (WriteContext*)serd_stack_push( + &writer->anon_stack, sizeof(WriteContext)); + *ctx = writer->context; + writer->context.subject = *node; + writer->context.predicate = SERD_NODE_NULL; + break; + } + case SERD_ANON: + if (writer->syntax != SERD_NTRIPLES) { + break; + } // else fall through + case SERD_BLANK_ID: + writer->sink("_:", 2, writer->stream); + writer->sink(node->buf, node->n_bytes - 1, writer->stream); + break; + case SERD_CURIE: + switch (writer->syntax) { + case SERD_NTRIPLES: + if (!serd_env_expand(writer->env, node, &uri_prefix, &uri_suffix)) { + fprintf(stderr, "error: undefined namespace prefix `%s'\n", node->buf); + return false; + } + writer->sink("<", 1, writer->stream); + write_text(writer, WRITE_URI, uri_prefix.buf, uri_prefix.len, '>'); + write_text(writer, WRITE_URI, uri_suffix.buf, uri_suffix.len, '>'); + writer->sink(">", 1, writer->stream); + break; + case SERD_TURTLE: + writer->sink(node->buf, node->n_bytes - 1, writer->stream); + } + break; + case SERD_LITERAL: + if (writer->syntax == SERD_TURTLE && datatype && datatype->buf) { + // TODO: compare against NS_XSD prefix once + if (!strcmp((const char*)datatype->buf, NS_XSD "boolean") + || !strcmp((const char*)datatype->buf, NS_XSD "decimal") + || !strcmp((const char*)datatype->buf, NS_XSD "integer")) { + writer->sink(node->buf, node->n_bytes - 1, writer->stream); + break; + } + } + writer->sink("\"", 1, writer->stream); + write_text(writer, WRITE_STRING, node->buf, node->n_bytes - 1, '"'); + writer->sink("\"", 1, writer->stream); + if (lang && lang->buf) { + writer->sink("@", 1, writer->stream); + writer->sink(lang->buf, lang->n_bytes - 1, writer->stream); + } else if (datatype && datatype->buf) { + writer->sink("^^", 2, writer->stream); + write_node(writer, datatype, NULL, NULL); + } + break; + case SERD_URI: + if ((writer->syntax == SERD_TURTLE) + && !strcmp((const char*)node->buf, NS_RDF "type")) { + writer->sink("a", 1, writer->stream); + return true; + } else if ((writer->style & SERD_STYLE_CURIED) + && serd_uri_string_has_scheme(node->buf)) { + SerdNode prefix; + SerdChunk suffix; + if (serd_env_qualify(writer->env, node, &prefix, &suffix)) { + write_text(writer, WRITE_URI, prefix.buf, prefix.n_bytes - 1, '>'); + writer->sink(":", 1, writer->stream); + write_text(writer, WRITE_URI, suffix.buf, suffix.len, '>'); + return true; + } + } else if ((writer->style & SERD_STYLE_RESOLVED) + && !serd_uri_string_has_scheme(node->buf)) { + SerdURI uri; + if (serd_uri_parse(node->buf, &uri)) { + SerdURI abs_uri; + serd_uri_resolve(&uri, &writer->base_uri, &abs_uri); + writer->sink("<", 1, writer->stream); + serd_uri_serialise(&abs_uri, writer->sink, writer->stream); + writer->sink(">", 1, writer->stream); + return true; + } + } + writer->sink("<", 1, writer->stream); + write_text(writer, WRITE_URI, node->buf, node->n_bytes - 1, '>'); + writer->sink(">", 1, writer->stream); + return true; + } + return true; +} + +SERD_API +bool +serd_writer_write_statement(SerdWriter writer, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang) +{ + assert(subject && predicate && object); + switch (writer->syntax) { + case SERD_NTRIPLES: + write_node(writer, subject, NULL, NULL); + writer->sink(" ", 1, writer->stream); + write_node(writer, predicate, NULL, NULL); + writer->sink(" ", 1, writer->stream); + if (!write_node(writer, object, object_datatype, object_lang)) { + return false; + } + writer->sink(" .\n", 3, writer->stream); + return true; + case SERD_TURTLE: + break; + } + if (subject->buf == writer->context.subject.buf) { + if (predicate->buf == writer->context.predicate.buf) { // Abbreviate S P + ++writer->indent; + serd_writer_write_delim(writer, ','); + write_node(writer, object, object_datatype, object_lang); + --writer->indent; + } else { // Abbreviate S + if (writer->context.predicate.buf) { + serd_writer_write_delim(writer, ';'); + } else { + ++writer->indent; + serd_writer_write_delim(writer, '\n'); + } + write_node(writer, predicate, NULL, NULL); + writer->context.predicate = *predicate; + writer->sink(" ", 1, writer->stream); + write_node(writer, object, object_datatype, object_lang); + } + } else { + if (writer->context.subject.buf) { + if (writer->indent > 0) { + --writer->indent; + } + if (serd_stack_is_empty(&writer->anon_stack)) { + serd_writer_write_delim(writer, '.'); + serd_writer_write_delim(writer, '\n'); + } + } + + if (subject->type == SERD_ANON_BEGIN) { + writer->sink("[ ", 2, writer->stream); + ++writer->indent; + WriteContext* ctx = (WriteContext*)serd_stack_push( + &writer->anon_stack, sizeof(WriteContext)); + *ctx = writer->context; + } else { + write_node(writer, subject, NULL, NULL); + ++writer->indent; + if (subject->type != SERD_ANON_BEGIN && subject->type != SERD_ANON) { + serd_writer_write_delim(writer, '\n'); + } + } + + writer->context.subject = *subject; + writer->context.predicate = SERD_NODE_NULL; + + write_node(writer, predicate, NULL, NULL); + writer->context.predicate = *predicate; + writer->sink(" ", 1, writer->stream); + + write_node(writer, object, object_datatype, object_lang); + } + + const WriteContext new_context = { graph ? *graph : SERD_NODE_NULL, + *subject, + *predicate }; + writer->context = new_context; + return true; +} + +SERD_API +bool +serd_writer_end_anon(SerdWriter writer, + const SerdNode* node) +{ + if (writer->syntax == SERD_NTRIPLES) { + return true; + } + if (serd_stack_is_empty(&writer->anon_stack)) { + fprintf(stderr, "unexpected end of anonymous node\n"); + return false; + } + assert(writer->indent > 0); + --writer->indent; + serd_writer_write_delim(writer, '\n'); + writer->sink("]", 1, writer->stream); + writer->context = *anon_stack_top(writer); + serd_stack_pop(&writer->anon_stack, sizeof(WriteContext)); + if (!writer->context.subject.buf) { // End of anonymous subject + writer->context.subject = *node; + } + return true; +} + +SERD_API +void +serd_writer_finish(SerdWriter writer) +{ + if (writer->context.subject.buf) { + writer->sink(" .\n", 3, writer->stream); + writer->context.subject.buf = NULL; + } +} + +SERD_API +SerdWriter +serd_writer_new(SerdSyntax syntax, + SerdStyle style, + SerdEnv env, + const SerdURI* base_uri, + SerdSink sink, + void* stream) +{ + const WriteContext context = WRITE_CONTEXT_NULL; + SerdWriter writer = malloc(sizeof(struct SerdWriterImpl)); + writer->syntax = syntax; + writer->style = style; + writer->env = env; + writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL; + writer->anon_stack = serd_stack_new(sizeof(WriteContext)); + writer->sink = sink; + writer->stream = stream; + writer->context = context; + writer->indent = 0; + return writer; +} + +SERD_API +void +serd_writer_set_base_uri(SerdWriter writer, + const SerdURI* uri) +{ + writer->base_uri = *uri; + if (writer->syntax != SERD_NTRIPLES) { + if (writer->context.graph.buf || writer->context.subject.buf) { + writer->sink(" .\n\n", 4, writer->stream); + writer->context = WRITE_CONTEXT_NULL; + } + writer->sink("@base <", 7, writer->stream); + serd_uri_serialise(uri, writer->sink, writer->stream); + writer->sink("> .\n", 4, writer->stream); + } + writer->context = WRITE_CONTEXT_NULL; +} + +SERD_API +bool +serd_writer_set_prefix(SerdWriter writer, + const SerdNode* name, + const SerdNode* uri) +{ + if (writer->syntax != SERD_NTRIPLES) { + if (writer->context.graph.buf || writer->context.subject.buf) { + writer->sink(" .\n\n", 4, writer->stream); + writer->context = WRITE_CONTEXT_NULL; + } + writer->sink("@prefix ", 8, writer->stream); + writer->sink(name->buf, name->n_bytes - 1, writer->stream); + writer->sink(": <", 3, writer->stream); + write_text(writer, WRITE_URI, uri->buf, uri->n_bytes - 1, '>'); + writer->sink("> .\n", 4, writer->stream); + } + writer->context = WRITE_CONTEXT_NULL; + return true; +} + +SERD_API +void +serd_writer_free(SerdWriter writer) +{ + SerdWriter const me = (SerdWriter)writer; + serd_writer_finish(me); + serd_stack_free(&writer->anon_stack); + free(me); +} diff --git a/core.lv2/serd-0.1.0.h b/core.lv2/serd-0.1.0.h new file mode 100644 index 0000000..56511b1 --- /dev/null +++ b/core.lv2/serd-0.1.0.h @@ -0,0 +1,634 @@ +/* + Copyright 2011 David Robillard + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** + @file serd.h API for Serd, a lightweight RDF syntax library. +*/ + +#ifndef SERD_SERD_H +#define SERD_SERD_H + +#include +#include +#include +#include + +#ifdef SERD_SHARED + #if defined _WIN32 || defined __CYGWIN__ + #define SERD_LIB_IMPORT __declspec(dllimport) + #define SERD_LIB_EXPORT __declspec(dllexport) + #else + #define SERD_LIB_IMPORT __attribute__ ((visibility("default"))) + #define SERD_LIB_EXPORT __attribute__ ((visibility("default"))) + #endif + #ifdef SERD_INTERNAL + #define SERD_API SERD_LIB_EXPORT + #else + #define SERD_API SERD_LIB_IMPORT + #endif +#else + #define SERD_API +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + @defgroup serd Serd + A lightweight RDF syntax library. + @{ +*/ + +/** + Environment (namespace prefixes). + + A SerdEnv represents a set of namespace prefixes, and is used to resolve + CURIEs to full URIs. +*/ +typedef struct SerdEnvImpl* SerdEnv; + +/** + RDF reader. + + A SerdReader parses RDF by reading some syntax and calling user-provided + sink functions as input is read (much like an XML SAX parser). +*/ +typedef struct SerdReaderImpl* SerdReader; + +/** + Read state. + + This represents state (context) necessary for fully resolving URIs during a + read (i.e. the base URI and namespace prefixes). It is implemented + separately from SerdReader so the reader can avoid the overhead in cases + where this information is unnecessary (e.g. streaming reserialisation). +*/ +typedef struct SerdReadStateImpl* SerdReadState; + +/** + RDF writer. + + A SerdWriter provides a number of functions to allow writing RDF syntax out + to some stream. These functions are deliberately compatible with the sink + functions used by SerdReader, so a reader can be directly connected to a + writer to re-serialise a document. +*/ +typedef struct SerdWriterImpl* SerdWriter; + +/** + RDF syntax type. +*/ +typedef enum { + /** + Turtle - Terse RDF Triple Language (UTF-8). + @see Turtle + */ + SERD_TURTLE = 1, + + /** + NTriples - Line-based RDF triples (ASCII). + @see NTriples + */ + SERD_NTRIPLES = 2 +} SerdSyntax; + +/** + Type of a syntactic RDF node. + + This is more precise than the type of an abstract RDF node. An abstract node + is either a resource, literal, or blank. In syntax there are two ways to + refer to both a resource (by URI or CURIE) and a blank (by ID or + anonymously). + + Serd represents all nodes as an unquoted UTF-8 string "value" associated + with a @ref SerdType, which is precise enough to preserve the syntactic + information required for streaming abbreviation. A non-abbreviating sink may + simply consider @ref SERD_ANON_BEGIN and @ref SERD_ANON equivalent to + @ref SERD_BLANK_ID. +*/ +typedef enum { + /** + The type of a nonexistent node. + + This type is occasionally useful, but is never emitted by the reader. + */ + SERD_NOTHING = 0, + + /** + Literal value. + + A literal optionally has either an associated language, or an associated + datatype (not both). + */ + SERD_LITERAL = 1, + + /** + URI (absolute or relative). + + Value is an unquoted URI string, which is either a relative reference + with respect to the current base URI, or an absolute URI. A URI is an ID + with universal scope. + @see RFC3986. + */ + SERD_URI = 2, + + /** + CURIE, a shortened URI. + + Value is an unquoted CURIE string relative to the current environment, + e.g. "rdf:type". + @see CURIE Syntax 1.0 + */ + SERD_CURIE = 3, + + /** + A blank node ID. + + Value is a blank node ID, e.g. "id3", which is valid only in this + serialisation. + @see Turtle + nodeID + */ + SERD_BLANK_ID = 4, + + /** + The first reference to an anonymous (inlined) blank node. + + Value is identical to a @ref SERD_BLANK_ID value (i.e. this type may be + safely considered equivalent to @ref SERD_BLANK_ID). + */ + SERD_ANON_BEGIN = 5, + + /** + An anonymous blank node. + + Value is identical to a @ref SERD_BLANK_ID value (i.e. this type may be + safely considered equivalent to @ref SERD_BLANK_ID). + */ + SERD_ANON = 6 +} SerdType; + +/** + @name SerdURI + @{ +*/ + +/** + An unterminated string fragment. +*/ +typedef struct { + const uint8_t* buf; /**< Start of chunk */ + size_t len; /**< Length of chunk in bytes */ +} SerdChunk; + +/** + A parsed URI. + + This struct directly refers to chunks in other strings, it does not own any + memory itself. Thus, URIs can be parsed and/or resolved against a base URI + in-place without allocating memory. +*/ +typedef struct { + SerdChunk scheme; /**< Scheme */ + SerdChunk authority; /**< Authority */ + SerdChunk path_base; /**< Path prefix if relative */ + SerdChunk path; /**< Path suffix */ + SerdChunk query; /**< Query */ + SerdChunk fragment; /**< Fragment */ +} SerdURI; + +static const SerdURI SERD_URI_NULL = {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}; + +/** + Return true iff @a utf8 starts with a valid URI scheme. +*/ +SERD_API +bool +serd_uri_string_has_scheme(const uint8_t* utf8); + +/** + Parse @a utf8, writing result to @a out. +*/ +SERD_API +bool +serd_uri_parse(const uint8_t* utf8, SerdURI* out); + +/** + Set @a out to @a uri resolved against @a base. +*/ +SERD_API +void +serd_uri_resolve(const SerdURI* uri, const SerdURI* base, SerdURI* out); + +/** + Sink function for raw string output. +*/ +typedef size_t (*SerdSink)(const void* buf, size_t len, void* stream); + +/** + Serialise @a uri with a series of calls to @a sink. +*/ +SERD_API +size_t +serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream); + +/** + @} + @name SerdNode + @{ +*/ + +/** + A syntactic RDF node. +*/ +typedef struct { + SerdType type; + size_t n_bytes; /**< Size in bytes (including null) */ + size_t n_chars; /**< Length in characters */ + const uint8_t* buf; /**< Buffer */ +} SerdNode; + +static const SerdNode SERD_NODE_NULL = { SERD_NOTHING, 0, 0, 0 }; + +/** + Make a (shallow) node from @a str. + + This measures, but does not copy, @a str. No memory is allocated. +*/ +SERD_API +SerdNode +serd_node_from_string(SerdType type, const uint8_t* str); + +/** + Make a deep copy of @a node. + + @return a node that the caller must free with @ref serd_node_free. +*/ +SERD_API +SerdNode +serd_node_copy(const SerdNode* node); + +/** + Simple wrapper for serd_node_new_uri to resolve a URI node. +*/ +SERD_API +SerdNode +serd_node_new_uri_from_node(const SerdNode* uri_node, + const SerdURI* base, + SerdURI* out); + +/** + Simple wrapper for serd_node_new_uri to resolve a URI string. +*/ +SERD_API +SerdNode +serd_node_new_uri_from_string(const uint8_t* str, + const SerdURI* base, + SerdURI* out); + +/** + Create a new node by serialising @a uri into a new string. + + @param uri The URI to parse and serialise. + + @param base Base URI to resolve @a uri against (or NULL for no resolution). + + @param out Set to the parsing of the new URI (i.e. points only to + memory owned by the new returned node). +*/ +SERD_API +SerdNode +serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out); + +/** + Free any data owned by @a node. + + Note that if @a node is itself dynamically allocated (which is not the case + for nodes created internally by serd), it will not be freed. +*/ +SERD_API +void +serd_node_free(SerdNode* node); + +/** + @} + @name Handlers + @{ +*/ + +/** + Sink (callback) for base URI changes. + + Called whenever the base URI of the serialisation changes. +*/ +typedef bool (*SerdBaseSink)(void* handle, + const SerdNode* uri); + +/** + Sink (callback) for namespace definitions. + + Called whenever a prefix is defined in the serialisation. +*/ +typedef bool (*SerdPrefixSink)(void* handle, + const SerdNode* name, + const SerdNode* uri); + +/** + Sink (callback) for statements. + + Called for every RDF statement in the serialisation. +*/ +typedef bool (*SerdStatementSink)(void* handle, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang); + +/** + Sink (callback) for anonymous node end markers. + + This is called to indicate that the anonymous node with the given + @a value will no longer be referred to by any future statements + (i.e. the anonymous serialisation of the node is finished). +*/ +typedef bool (*SerdEndSink)(void* handle, + const SerdNode* node); + +/** + @} + @name SerdEnv + @{ +*/ + +/** + Create a new environment. +*/ +SERD_API +SerdEnv +serd_env_new(); + +/** + Free @a ns. +*/ +SERD_API +void +serd_env_free(SerdEnv env); + +/** + Add namespace @a uri to @a ns using prefix @a name. +*/ +SERD_API +void +serd_env_add(SerdEnv env, + const SerdNode* name, + const SerdNode* uri); + +/** + Qualify @a into a CURIE if possible. +*/ +SERD_API +bool +serd_env_qualify(const SerdEnv env, + const SerdNode* uri, + SerdNode* prefix, + SerdChunk* suffix); + +/** + Expand @a curie. +*/ +SERD_API +bool +serd_env_expand(const SerdEnv env, + const SerdNode* curie, + SerdChunk* uri_prefix, + SerdChunk* uri_suffix); + +/** + Call @a func for each prefix defined in @a env. +*/ +SERD_API +void +serd_env_foreach(const SerdEnv env, + SerdPrefixSink func, + void* handle); + +/** + @} + @name SerdReader + @{ +*/ + +/** + Create a new RDF reader. +*/ +SERD_API +SerdReader +serd_reader_new(SerdSyntax syntax, + void* handle, + SerdBaseSink base_sink, + SerdPrefixSink prefix_sink, + SerdStatementSink statement_sink, + SerdEndSink end_sink); + +/** + Set a prefix to be added to all blank node identifiers. + + This is useful when multiple files are to be parsed into the same output + (e.g. a store, or other files). Since Serd preserves blank node IDs, this + could cause conflicts where two non-equivalent blank nodes are merged, + resulting in corrupt data. By setting a unique blank node prefix for each + parsed file, this can be avoided, while preserving blank node names. +*/ +SERD_API +void +serd_reader_set_blank_prefix(SerdReader reader, + const uint8_t* prefix); + +/** + Read @a file. +*/ +SERD_API +bool +serd_reader_read_file(SerdReader reader, + FILE* file, + const uint8_t* name); + +/** + Read @a utf8. +*/ +SERD_API +bool +serd_reader_read_string(SerdReader me, const uint8_t* utf8); + +/** + Free @a reader. +*/ +SERD_API +void +serd_reader_free(SerdReader reader); + +/** + Create a new read state with the given initial base URI and environment. + + A reference to @a env will be kept, and @a env will be modified as the + state is modified. +*/ +SERD_API +SerdReadState +serd_read_state_new(SerdEnv env, + const uint8_t* base_uri_str); + +/** + Free @a state. +*/ +SERD_API +void +serd_read_state_free(SerdReadState state); + +/** + Expand @a node to a full URI. + + @param node A CURIE or URI node to expand and/or resolve. +*/ +SERD_API +SerdNode +serd_read_state_expand(SerdReadState state, + const SerdNode* node); + +/** + Get the current base URI. +*/ +SERD_API +SerdNode +serd_read_state_get_base_uri(SerdReadState state, + SerdURI* out); + +/** + Set the current base URI. +*/ +SERD_API +bool +serd_read_state_set_base_uri(SerdReadState state, + const SerdNode* uri_node); + +/** + Set a namespace prefix. +*/ +SERD_API +bool +serd_read_state_set_prefix(SerdReadState state, + const SerdNode* name, + const SerdNode* uri_node); + +/** + @} + @name SerdWriter + @{ +*/ + +typedef enum { + SERD_STYLE_ABBREVIATED = 1, /**< Abbreviate triples when possible. */ + SERD_STYLE_ASCII = 1 << 1, /**< Escape all non-ASCII characters. */ + SERD_STYLE_RESOLVED = 1 << 2, /**< Resolve relative URIs against base. */ + SERD_STYLE_CURIED = 1 << 3 /**< Shorted URIs into CURIEs. */ +} SerdStyle; + +/** + Create a new RDF writer. +*/ +SERD_API +SerdWriter +serd_writer_new(SerdSyntax syntax, + SerdStyle style, + SerdEnv env, + const SerdURI* base_uri, + SerdSink sink, + void* stream); + +/** + Free @a writer. +*/ +SERD_API +void +serd_writer_free(SerdWriter writer); + +/** + Set the current output base URI (and emit directive if applicable). +*/ +SERD_API +void +serd_writer_set_base_uri(SerdWriter writer, + const SerdURI* uri); + +/** + Set a namespace prefix (and emit directive if applicable). +*/ +SERD_API +bool +serd_writer_set_prefix(SerdWriter writer, + const SerdNode* name, + const SerdNode* uri); + +/** + Write a statement. +*/ +SERD_API +bool +serd_writer_write_statement(SerdWriter writer, + const SerdNode* graph, + const SerdNode* subject, + const SerdNode* predicate, + const SerdNode* object, + const SerdNode* object_datatype, + const SerdNode* object_lang); + +/** + Mark the end of an anonymous node's description. +*/ +SERD_API +bool +serd_writer_end_anon(SerdWriter writer, + const SerdNode* node); + +/** + Finish a write. +*/ +SERD_API +void +serd_writer_finish(SerdWriter writer); + +/** + @} + @} +*/ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* SERD_SERD_H */ diff --git a/core.lv2/wscript b/core.lv2/wscript index 358a5b0..872dfb9 100644 --- a/core.lv2/wscript +++ b/core.lv2/wscript @@ -15,12 +15,48 @@ top = '.' out = 'build' def options(opt): - opt.add_option('--bundle-only', action='store_true', default=False, dest='bundle_only', - help="Only install LV2 bundle (not header or pkg-config file)") + opt.add_option('--default-lv2-path', type='string', default='', + dest='default_lv2_path', + help="Default LV2 path to use if $LV2_PATH is unset") + opt.add_option('--bundle-only', action='store_true', default=False, + dest='bundle_only', + help="Only install LV2 bundle (not header or pkg-config file)") autowaf.set_options(opt) def configure(conf): autowaf.configure(conf) + lv2core_path_sep = ':' + lv2core_dir_sep = '/' + if sys.platform == 'win32': + lv2core_path_sep = ';' + lv2core_dir_sep = '\\' + + autowaf.define(conf, 'LV2CORE_PATH_SEP', lv2core_path_sep) + autowaf.define(conf, 'LV2CORE_DIR_SEP', lv2core_dir_sep) + + if Options.options.default_lv2_path == '': + if Options.platform == 'darwin': + Options.options.default_lv2_path = lv2core_path_sep.join([ + '~/Library/Audio/Plug-Ins/LV2', + '~/.lv2', + '/usr/local/lib/lv2', + '/usr/lib/lv2', + '/Library/Audio/Plug-Ins/LV2']) + elif Options.platform == 'haiku': + Options.options.default_lv2_path = lv2core_path_sep.join([ + '~/.lv2', + '/boot/common/add-ons/lv2']) + elif Options.platform == 'win32': + Options.options.default_lv2_path = 'C:\\Program Files\\LV2' + else: + Options.options.default_lv2_path = lv2core_path_sep.join([ + '~/.lv2', + '/usr/%s/lv2' % conf.env['LIBDIRNAME'], + '/usr/local/%s/lv2' % conf.env['LIBDIRNAME']]) + + autowaf.define(conf, 'LV2CORE_DEFAULT_LV2_PATH', Options.options.default_lv2_path) + + conf.write_config_header('lv2-config.h', remove=False) def build(bld): # Header "library" @@ -40,10 +76,13 @@ def build(bld): # Bundle (data) bld.install_files('${LV2DIR}/lv2core.lv2', 'lv2.ttl manifest.ttl') - # lv2config - bld.install_files('${BINDIR}', 'lv2config', chmod=0755) + obj = bld(features = 'c cprogram') + obj.source = 'lv2config.c serd-0.1.0.c' + obj.target = 'lv2config' + obj.install_path = '${BINDIR}' + obj.cflags = '-std=c99' def dist(): - import Scripting - Scripting.g_gz = 'gz' + import Scripting + Scripting.g_gz = 'gz' Scripting.dist() -- cgit v1.2.1