diff options
| author | David Robillard <d@drobilla.net> | 2011-03-02 07:20:25 +0000 | 
|---|---|---|
| committer | David Robillard <d@drobilla.net> | 2011-03-02 07:20:25 +0000 | 
| commit | dc2943a4ace97bbadcc27e006b461f650e0a4cf2 (patch) | |
| tree | 6dd4a9dcd746d375cf7fd05453dd59a6a6109e04 | |
| parent | b9cbae66b9d5daede2b8f1793278b876a3455f8b (diff) | |
| download | lv2-dc2943a4ace97bbadcc27e006b461f650e0a4cf2.tar.xz | |
Port lv2config to C.
| -rwxr-xr-x | core.lv2/lv2config | 180 | ||||
| -rw-r--r-- | core.lv2/lv2config.c | 379 | ||||
| l--------- | core.lv2/lv2config.py | 1 | ||||
| -rw-r--r-- | core.lv2/serd-0.1.0.c | 2700 | ||||
| -rw-r--r-- | core.lv2/serd-0.1.0.h | 634 | ||||
| -rw-r--r-- | core.lv2/wscript | 51 | 
6 files changed, 3758 insertions, 187 deletions
| diff --git a/core.lv2/lv2config b/core.lv2/lv2config deleted file mode 100755 index 590656c..0000000 --- a/core.lv2/lv2config +++ /dev/null @@ -1,180 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""A program (and Python module) to generate a tree of symlinks to LV2 -extension bundles, where the path of the symlink corresponds to the URI of -the extension.  This allows including extension headers in code without using -the bundle name.  Including extension headers in this way is much better, -since there is no dependency on the (meaningless and non-persistent) bundle -name in the code using the header. - -For example, after running lv2config (and setting the compiler include -path appropriately), LV2 headers could be included like so: - -#include "lv2/lv2plug.in/ns/lv2core/lv2.h" -#include "lv2/lv2plug.in/ns/ext/event/event.h" -#include "lv2/example.org/foo/foo.h" - -Where the initial "lv2" is arbitrary; in this case lv2config's output -directory was "lv2", and that directory's parent was added to the compiler -include search path.  It is a good idea to use such a prefix directory so -domain names do not conflict with anything else in the include path. -""" - -from __future__ import print_function - -__authors__ = 'David Robillard' -__license   = 'GNU GPL v3 or later <http://www.gnu.org/licenses/gpl.html>' -__contact__ = 'devel@lists.lv2plug.in' -__date__    = '2010-10-05' - -import errno -import glob -import os -import stat -import sys - -redland = True - -try: -    import RDF # Attempt to import Redland -except: -    try: -        import rdflib # Attempt to import RDFLib -        redland = False -    except: -        print("""Failed to import `RDF' (Redland) or `rdflib'. -(Please install either package, likely `python-librdf' or `python-rdflib')""",file=sys.stderr) -        sys.exit(1) - -def rdf_namespace(uri): -    "Create a new RDF namespace" -    if redland: -        return RDF.NS(uri) -    else: -        return rdflib.Namespace(uri) - -def rdf_load(uri): -    "Load an RDF model" -    if redland: -        model = RDF.Model() -        parser = RDF.Parser(name="turtle") -        parser.parse_into_model(model, uri) -    else: -        model = rdflib.ConjunctiveGraph() -        model.parse(uri, format="n3") -    return model - -def rdf_find_type(model, rdf_type): -    "Return a list of the URIs of all resources in model with a given type" -    if redland: -        results = model.find_statements(RDF.Statement(None, rdf.type, rdf_type)) -        ret = [] -        for r in results: -            ret.append(str(r.subject.uri)) -        return ret -    else: -        results = model.triples([None, rdf.type, rdf_type]) -        ret = [] -        for r in results: -            ret.append(r[0]) -        return ret - -rdf = rdf_namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') -lv2 = rdf_namespace('http://lv2plug.in/ns/lv2core#') - -def lv2_path(): -    "Return the LV2 search path (LV2_PATH in the environment, or a default)." -    if 'LV2_PATH' in os.environ: -        return os.environ['LV2_PATH'] -    else: -        ret = '/usr/lib/lv2' + os.pathsep + '/usr/local/lib/lv2' -        print('LV2_PATH unset, using default', ret) -        return ret - -def __mkdir_p(path): -    "Equivalent of UNIX mkdir -p" -    try: -        os.makedirs(path) -    except OSError, e: -        if e.errno == errno.EEXIST: -            pass -        else: -            raise - -def build_tree(search_path, outdir): -    """Build a directory tree under outdir containing symlinks to all LV2 -    extensions found in search_path, such that the symlink paths correspond to -    the extension URIs.""" -    if os.path.basename(outdir) != 'lv2': -        print >> sys.stderr, "lv2config: output dir must be named `lv2'" -        sys.exit(1) -         -    if os.access(outdir, os.F_OK) and not os.access(outdir, os.W_OK): -        print ("lv2config: cannot build `%s': Permission denied" % outdir, file=sys.stderr) -        sys.exit(1) - -    for dir in search_path.split(os.pathsep): -        if not os.access(dir, os.F_OK): -            continue - -        print('Building includes in %s for %s/*.lv2' % (outdir, dir)) -        for bundle in glob.glob(os.path.join(dir, '*.lv2')): -            # Load manifest into model -            manifest = rdf_load('file://' + os.path.join(bundle, 'manifest.ttl')) - -            # Query extension URI -            specs = rdf_find_type(manifest, lv2.Specification) -            for ext_uri in specs: -                ext_path   = os.path.normpath(ext_uri[ext_uri.find(':') + 1:].lstrip('/')) -                ext_dir    = os.path.join(outdir, ext_path) -     -                # Make parent directories -                __mkdir_p(os.path.dirname(ext_dir)) -     -                # Remove existing symlink if necessary -                if os.access(ext_dir, os.F_OK): -                    mode = os.lstat(ext_dir)[stat.ST_MODE] -                    if stat.S_ISLNK(mode): -                        os.remove(ext_dir) -                    else: -                        raise Exception(ext_dir + " exists and is not a link") -     -                # Make symlink to bundle directory -                os.symlink(bundle, ext_dir) -             -def __usage(): -    script = os.path.basename(sys.argv[0]) -    print("""Usage: %(script)s -    Build the default system lv2 include directories, -    /usr/include/lv2 and /usr/local/include/lv2 - -Usage: %(script)s INCLUDEDIR -    Build an lv2 include directory tree at INCLUDEDIR -    for all extensions found in $LV2_PATH. - -Usage: %(script)s BUNDLESDIR INCLUDEDIR -    Build an lv2 include directory tree at INCLUDEDIR -    for all extensions found in bundles under BUNDLESDIR. -""" % {'script' : script}) - -if __name__ == "__main__": -    args = sys.argv[1:] - -    if len(args) == 0: -        build_tree('/usr/local/lib/lv2',          '/usr/local/include/lv2') -        build_tree('/usr/lib/lv2',                '/usr/include/lv2') -        build_tree('/Library/Audio/Plug-Ins/LV2', '/Developer/Headers/lv2') - -    elif '--help' in args or '-h' in args: -        __usage() - -    elif len(args) == 1: -        build_tree(lv2_path(), args[0]) - -    elif len(args) == 2: -        build_tree(args[0], args[1]) -         -    else: -        __usage() -        sys.exit(1) diff --git a/core.lv2/lv2config.c b/core.lv2/lv2config.c new file mode 100644 index 0000000..7674c14 --- /dev/null +++ b/core.lv2/lv2config.c @@ -0,0 +1,379 @@ +/* +  Copyright 2011 David Robillard <http://drobilla.net> + +  Redistribution and use in source and binary forms, with or without +  modification, are permitted provided that the following conditions are met: + +  1. Redistributions of source code must retain the above copyright notice, +  this list of conditions and the following disclaimer. + +  2. Redistributions in binary form must reproduce the above copyright +  notice, this list of conditions and the following disclaimer in the +  documentation and/or other materials provided with the distribution. + +  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, +  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +  AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +  AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +  OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +  THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#define _XOPEN_SOURCE 500 + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <dirent.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <wordexp.h> + +#include "serd-0.1.0.h" + +#include "lv2-config.h" + +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define NS_LV2 "http://lv2plug.in/ns/lv2core#" + +typedef struct _Spec { +	SerdNode      uri; +	SerdNode      manifest; +} Spec; + +typedef struct { +	SerdReader     reader; +	SerdReadState  state; +	const uint8_t* current_file; +	Spec*          specs; +	size_t         n_specs; +} World; + +/* Append a discovered specification to world->specs. */ +void +add_spec(World*         world, +         SerdNode*      uri, +         const uint8_t* manifest) +{ +	world->specs = realloc(world->specs, sizeof(Spec) * (world->n_specs + 1)); +	world->specs[world->n_specs].uri = *uri; +	world->specs[world->n_specs].manifest = serd_node_from_string( +		SERD_URI, (const uint8_t*)strdup((const char*)manifest)); +	++world->n_specs; +} + +/** Free world->specs. */ +void +free_specs(World* world) +{ +	for (size_t i = 0; i < world->n_specs; ++i) { +		Spec* spec = &world->specs[i]; +		serd_node_free(&spec->uri); +		serd_node_free(&spec->manifest); +	} +	free(world->specs); +	world->specs   = NULL; +	world->n_specs = 0; +} + +/** Reader @base directive handler. */ +bool +on_base(void*           handle, +        const SerdNode* uri_node) +{ +	World* const world = (World*)handle; +	return serd_read_state_set_base_uri(world->state, uri_node); +} + +/** Reader @prefix directive handler. */ +static bool +on_prefix(void*           handle, +          const SerdNode* name, +          const SerdNode* uri_node) +{ +	World* const world = (World*)handle; +	return serd_read_state_set_prefix(world->state, name, uri_node); +} + +/** Reader statement handler. */ +static bool +on_statement(void*           handle, +             const SerdNode* graph, +             const SerdNode* subject, +             const SerdNode* predicate, +             const SerdNode* object, +             const SerdNode* object_datatype, +             const SerdNode* object_lang) +{ +	World*        world = (World*)handle; +	SerdReadState state = world->state; +	SerdNode      abs_s = serd_read_state_expand(state, subject); +	SerdNode      abs_p = serd_read_state_expand(state, predicate); +	SerdNode      abs_o = serd_read_state_expand(state, object); + +	if (abs_s.buf && abs_p.buf && abs_o.buf +	    && !strcmp((const char*)abs_p.buf, NS_RDF "type") +	    && !strcmp((const char*)abs_o.buf, NS_LV2 "Specification")) { +		add_spec(world, &abs_s, world->current_file); +	} else { +		serd_node_free(&abs_s); +	} +	serd_node_free(&abs_p); +	serd_node_free(&abs_o); +	return true; +} + +/** Add any specifications found in a manifest.ttl to world->specs. */ +static void +scan_manifest(World* world, const char* uri) +{ +	SerdEnv env = serd_env_new(); + +	world->state = serd_read_state_new(env, (const uint8_t*)uri); + +	const char* const path = uri + strlen("file://"); +	FILE*             fd   = fopen(path, "r"); +	if (fd) { +		world->current_file = (const uint8_t*)uri; +		if (!serd_reader_read_file(world->reader, fd, (const uint8_t*)uri)) { +			fprintf(stderr, "lv2config: error reading <%s>\n", path); +		} +		world->current_file = NULL; +		fclose(fd); +	} else { +		fprintf(stderr, "lv2config: failed to open <%s>\n", path); +	} + +	serd_read_state_free(world->state); +	serd_env_free(env); +	world->state = NULL; +} + +/** Expand variables, ~, etc. in path. */ +static char* +expand(const char* path) +{ +	char*     ret = NULL; +	wordexp_t p; + +	wordexp(path, &p, 0); +	if (p.we_wordc == 0) { +		/* Literal directory path (e.g. no variables or ~) */ +		ret = strdup(path); +	} else if (p.we_wordc == 1) { +		/* Directory path expands (e.g. contains ~ or $FOO) */ +		ret = strdup(p.we_wordv[0]); +	} else { +		/* Multiple expansions in a single directory path? */ +		fprintf(stderr, "lv2config: malformed path `%s' ignored\n", path); +	} + +	wordfree(&p); +	return ret; +} + +/** Scan all bundles in path (i.e. scan all path/foo.lv2/manifest.ttl). */ +void +scan_dir(World* world, const char* path) +{ +	char* full_path = expand(path); +	if (!full_path) { +		return; +	} + +	DIR* dir = opendir(full_path); +	if (!dir) { +		free(full_path); +		return; +	} + +	struct dirent* file; +	while ((file = readdir(dir))) { +		if (!strcmp(file->d_name, ".") || !strcmp(file->d_name, "..")) { +			continue; +		} + +		char* uri = malloc(strlen("file://") +		                   + strlen(full_path) + 1 +		                   + strlen(file->d_name) + 1 +		                   + strlen("manifest.ttl") + 1); + +		sprintf(uri, "file://%s/%s/manifest.ttl", +		        full_path, file->d_name); + +		scan_manifest(world, uri); +		free(uri); +	} + +	closedir(dir); +	free(full_path); +} + +/** Create all parent directories of dir_path, but not dir_path itself. */ +int +mkdir_parents(const char* dir_path) +{ +	char*        path     = strdup(dir_path); +	const size_t path_len = strlen(path); +	size_t       last_sep = 0; +	for (size_t i = 1; i <= path_len; ++i) { +		if (path[i] == LV2CORE_DIR_SEP[0]) { +			path[i] = '\0'; +			if (mkdir(path, 0755) && errno != EEXIST) { +				fprintf(stderr, "lv2config: Failed to create %s (%s)\n", +				        path, strerror(errno)); +				free(path); +				return 1; +			} +			path[i] = LV2CORE_DIR_SEP[0]; +			last_sep = i; +		} +	} + +	free(path); +	return 0; +} + +/** Return the output include dir based on path (prepend DESTDIR). */ +char* +output_dir(const char* path) +{ +	char* destdir = getenv("DESTDIR"); +	if (destdir) { +		size_t len = strlen(destdir) + strlen(path); +		char*  ret = malloc(len + 1); +		snprintf(ret, len + 1, "%s%s", destdir, path); +		return ret; +	} else { +		return strdup(path); +	} +} + +/** Build an LV2 include tree in dest for all bunles in lv2_path. */ +void +build_tree(World* world, const char* lv2_path, const char* dest) +{ +	free_specs(world); + +	/* Call scan_dir for each component of lv2_path, +	   which will build world->specs (a linked list of struct Spec). +	*/ +	while (lv2_path[0] != '\0') { +		const char* const sep = strchr(lv2_path, LV2CORE_PATH_SEP[0]); +		if (sep) { +			const size_t dir_len = sep - lv2_path; +			char* const  dir     = malloc(dir_len + 1); +			memcpy(dir, lv2_path, dir_len); +			dir[dir_len] = '\0'; +			scan_dir(world, dir); +			free(dir); +			lv2_path += dir_len + 1; +		} else { +			scan_dir(world, lv2_path); +			lv2_path = "\0"; +		} +	} + +	/* TODO: Check revisions */ + +	/* Make a link in the include tree for each specification bundle. */ +	for (size_t i = 0; i < world->n_specs; ++i) { +		Spec*       spec = &world->specs[i]; +		const char* path = strchr((const char*)spec->uri.buf, ':'); +		if (!path) { +			fprintf(stderr, "lv2config: Invalid URI <%s>\n", spec->uri.buf); +			continue; +		} +		for (++path; (path[0] == '/' && path[0] != '\0'); ++path) {} + +		const char* bundle_uri  = (const char*)spec->manifest.buf; +		char*       bundle_path = strdup(bundle_uri + strlen("file://")); +		char*       last_sep    = strrchr(bundle_path, LV2CORE_DIR_SEP[0]); +		if (last_sep) { +			*(last_sep + 1) = '\0'; +		} + +		char*  full_dest    = output_dir(dest); +		size_t len          = strlen(full_dest) + 1 + strlen(path); +		char*  rel_inc_path = malloc(len + 1); +		snprintf(rel_inc_path, len + 1, "%s/%s", full_dest, path); +		free(full_dest); + +		char* inc_path = expand(rel_inc_path); +		free(rel_inc_path); +		printf("%s => %s\n", inc_path, bundle_path); + +		if (!mkdir_parents(inc_path)) { +			if (!access(inc_path, F_OK) && unlink(inc_path)) { +				fprintf(stderr, "lv2config: Failed to remove %s (%s)\n", +				        inc_path, strerror(errno)); +				free(inc_path); +				free(bundle_path); +				continue; +			} + +			if (symlink(bundle_path, inc_path)) { +				fprintf(stderr, "lv2config: Failed to create link (%s)\n", +				        strerror(errno)); +			} + +			free(inc_path); +			free(bundle_path); +		} +	} +} + +int +usage(const char* name, bool error) +{ +	FILE* out = (error ? stderr : stdout); +	fprintf(out, "Usage: %s\n", name); +	fprintf(out, "Build the default system LV2 include directories.\n\n"); +	fprintf(out, "Usage: %s INCLUDE_DIR\n", name); +	fprintf(out, "Build an LV2 include directory tree at INCLUDE_DIR\n"); +	fprintf(out, "for all extensions found in $LV2_PATH.\n\n"); +	fprintf(out, "Usage: %s INCLUDE_DIR BUNDLES_DIR\n", name); +	fprintf(out, "Build an lv2 include directory tree at INCLUDE_DIR\n"); +	fprintf(out, "for all extensions found in bundles under BUNDLES_DIR.\n"); +	return (error ? EXIT_FAILURE : EXIT_SUCCESS); +} + +int +main(int argc, char** argv) +{ +	World world = { NULL, NULL, NULL, NULL, 0 }; +	world.reader = serd_reader_new( +		SERD_TURTLE, &world, on_base, on_prefix, on_statement, NULL); + +	if (argc == 1) { +		/* lv2_config */ +		build_tree(&world, "/usr/local/lib/lv2", "/usr/local/include/lv2"); +		build_tree(&world, "/usr/lib/lv2",       "/usr/include/lv2"); +	} else if (argv[1][0] == '-') { +		return usage(argv[0], false); +	} else if (argc == 2) { +		/* lv2_config INCLUDE_DIR */ +		const char* lv2_path = getenv("LV2_PATH"); +		if (!lv2_path) { +			lv2_path = LV2CORE_DEFAULT_LV2_PATH; +		} +		build_tree(&world, lv2_path, argv[1]); +	} else if (argc == 3) { +		/* lv2_config INCLUDE_DIR BUNDLES_DIR */ +		build_tree(&world, argv[2], argv[1]); +	} else { +		return usage(argv[0], true); +	} + +	free_specs(&world); +	serd_reader_free(world.reader); + +	return 0; +} diff --git a/core.lv2/lv2config.py b/core.lv2/lv2config.py deleted file mode 120000 index 7d77275..0000000 --- a/core.lv2/lv2config.py +++ /dev/null @@ -1 +0,0 @@ -lv2config
\ No newline at end of file diff --git a/core.lv2/serd-0.1.0.c b/core.lv2/serd-0.1.0.c new file mode 100644 index 0000000..413fedb --- /dev/null +++ b/core.lv2/serd-0.1.0.c @@ -0,0 +1,2700 @@ +/* +  Copyright 2011 David Robillard <http://drobilla.net> + +  Redistribution and use in source and binary forms, with or without +  modification, are permitted provided that the following conditions are met: + +  1. Redistributions of source code must retain the above copyright notice, +     this list of conditions and the following disclaimer. + +  2. Redistributions in binary form must reproduce the above copyright +     notice, this list of conditions and the following disclaimer in the +     documentation and/or other materials provided with the distribution. + +  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, +  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +  AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +  AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +  OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +  THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SERD_INTERNAL_H +#define SERD_INTERNAL_H + +#include <assert.h> +#include <stdlib.h> + +#include "serd/serd.h" + +/** A dynamic stack in memory. */ +typedef struct { +	uint8_t* buf;       ///< Stack memory +	size_t   buf_size;  ///< Allocated size of buf (>= size) +	size_t   size;      ///< Conceptual size of stack in buf +} SerdStack; + +/** An offset to start the stack at. Note 0 is reserved for NULL. */ +#define SERD_STACK_BOTTOM sizeof(void*) + +static inline SerdStack +serd_stack_new(size_t size) +{ +	SerdStack stack; +	stack.buf       = malloc(size); +	stack.buf_size  = size; +	stack.size      = SERD_STACK_BOTTOM; +	return stack; +} + +static inline bool +serd_stack_is_empty(SerdStack* stack) +{ +	return stack->size <= SERD_STACK_BOTTOM; +} + +static inline void +serd_stack_free(SerdStack* stack) +{ +	free(stack->buf); +	stack->buf      = NULL; +	stack->buf_size = 0; +	stack->size     = 0; +} + +static inline uint8_t* +serd_stack_push(SerdStack* stack, size_t n_bytes) +{ +	const size_t new_size = stack->size + n_bytes; +	if (stack->buf_size < new_size) { +		stack->buf_size *= 2; +		stack->buf = realloc(stack->buf, stack->buf_size); +	} +	uint8_t* const ret = (stack->buf + stack->size); +	stack->size = new_size; +	return ret; +} + +static inline void +serd_stack_pop(SerdStack* stack, size_t n_bytes) +{ +	assert(stack->size >= n_bytes); +	stack->size -= n_bytes; +} + +/** Return true if @a c lies within [min...max] (inclusive) */ +static inline bool +in_range(const uint8_t c, const uint8_t min, const uint8_t max) +{ +	return (c >= min && c <= max); +} + +/** RFC2234: ALPHA := %x41-5A / %x61-7A  ; A-Z / a-z */ +static inline bool +is_alpha(const uint8_t c) +{ +	return in_range(c, 'A', 'Z') || in_range(c, 'a', 'z'); +} + +/** RFC2234: DIGIT ::= %x30-39  ; 0-9 */ +static inline bool +is_digit(const uint8_t c) +{ +	return in_range(c, '0', '9'); +} + +/** UTF-8 strlen. + * @return Lengh of @a utf8 in characters. + * @param utf8 A null-terminated UTF-8 string. + * @param out_n_bytes (Output) Set to the size of @a utf8 in bytes. + */ +static inline size_t +serd_strlen(const uint8_t* utf8, size_t* out_n_bytes) +{ +	size_t n_chars = 0; +	size_t i       = 0; +	for (; utf8[i]; ++i) { +		if ((utf8[i] & 0xC0) != 0x80) { +			// Does not start with `10', start of a new character +			++n_chars; +		} +	} +	if (out_n_bytes) { +		*out_n_bytes = i + 1; +	} +	return n_chars; +} + +#endif  // SERD_INTERNAL_H + +/** + * @file env.c + */ + +#include <assert.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + + +typedef struct { +	SerdNode name; +	SerdNode uri; +} SerdPrefix; + +struct SerdEnvImpl { +	SerdPrefix* prefixes; +	size_t      n_prefixes; +}; + +SERD_API +SerdEnv +serd_env_new() +{ +	SerdEnv env = malloc(sizeof(struct SerdEnvImpl)); +	env->prefixes   = NULL; +	env->n_prefixes = 0; +	return env; +} + +SERD_API +void +serd_env_free(SerdEnv env) +{ +	for (size_t i = 0; i < env->n_prefixes; ++i) { +		serd_node_free(&env->prefixes[i].name); +		serd_node_free(&env->prefixes[i].uri); +	} +	free(env->prefixes); +	free(env); +} + +static inline SerdPrefix* +serd_env_find(SerdEnv        env, +              const uint8_t* name, +              size_t         name_len) +{ +	for (size_t i = 0; i < env->n_prefixes; ++i) { +		const SerdNode* const prefix_name = &env->prefixes[i].name; +		if (prefix_name->n_bytes == name_len + 1) { +			if (!memcmp(prefix_name->buf, name, name_len)) { +				return &env->prefixes[i]; +			} +		} +	} +	return NULL; +} + +SERD_API +void +serd_env_add(SerdEnv         env, +             const SerdNode* name, +             const SerdNode* uri) +{ +	assert(name && uri); +	SerdPrefix* const prefix = serd_env_find(env, name->buf, name->n_chars); +	if (prefix) { +		serd_node_free(&prefix->uri); +		prefix->uri = serd_node_copy(uri); +	} else { +		env->prefixes = realloc(env->prefixes, +		                        (++env->n_prefixes) * sizeof(SerdPrefix)); +		env->prefixes[env->n_prefixes - 1].name = serd_node_copy(name); +		env->prefixes[env->n_prefixes - 1].uri  = serd_node_copy(uri); +	} +} + +SERD_API +bool +serd_env_qualify(const SerdEnv   env, +                 const SerdNode* uri, +                 SerdNode*       prefix_name, +                 SerdChunk*      suffix) +{ +	for (size_t i = 0; i < env->n_prefixes; ++i) { +		const SerdNode* const prefix_uri = &env->prefixes[i].uri; +		if (uri->n_bytes >= prefix_uri->n_bytes) { +			if (!strncmp((const char*)uri->buf, +			             (const char*)prefix_uri->buf, +			             prefix_uri->n_bytes - 1)) { +				*prefix_name = env->prefixes[i].name; +				suffix->buf = uri->buf + prefix_uri->n_bytes - 1; +				suffix->len = uri->n_bytes - prefix_uri->n_bytes; +				return true; +			} +		} +	} +	return false; +} + +SERD_API +bool +serd_env_expand(const SerdEnv   env, +                const SerdNode* qname, +                SerdChunk*      uri_prefix, +                SerdChunk*      uri_suffix) +{ +	const uint8_t* const colon = memchr(qname->buf, ':', qname->n_bytes); +	if (!colon) { +		return false;  // Illegal qname +	} + +	const size_t            name_len = colon - qname->buf; +	const SerdPrefix* const prefix   = serd_env_find(env, qname->buf, name_len); +	if (prefix) { +		uri_prefix->buf = prefix->uri.buf; +		uri_prefix->len = prefix->uri.n_bytes - 1; +		uri_suffix->buf = colon + 1; +		uri_suffix->len = qname->n_bytes - (colon - qname->buf) - 2; +		return true; +	} +	return false; +} + +SERD_API +void +serd_env_foreach(const SerdEnv  env, +                 SerdPrefixSink func, +                 void*          handle) +{ +	for (size_t i = 0; i < env->n_prefixes; ++i) { +		func(handle, +		     &env->prefixes[i].name, +		     &env->prefixes[i].uri); +	} +} + +/** + * @file node.c + */ + +#include <stdlib.h> +#include <string.h> + + +SERD_API +SerdNode +serd_node_from_string(SerdType type, const uint8_t* buf) +{ +	size_t buf_n_bytes; +	const size_t buf_n_chars = serd_strlen(buf, &buf_n_bytes); +	SerdNode ret = { type, buf_n_bytes, buf_n_chars, buf }; +	return ret; +} + +SERD_API +SerdNode +serd_node_copy(const SerdNode* node) +{ +	SerdNode copy = *node; +	uint8_t* buf  = malloc(copy.n_bytes); +	memcpy(buf, node->buf, copy.n_bytes); +	copy.buf = buf; +	return copy; +} + +static size_t +serd_uri_string_length(const SerdURI* uri) +{ +	size_t len = uri->path_base.len; + +#define ADD_LEN(field, n_delims) \ +	if ((field).len) { len += (field).len + (n_delims); } + +	ADD_LEN(uri->path,      1);  // + possible leading `/' +	ADD_LEN(uri->scheme,    1);  // + trailing `:' +	ADD_LEN(uri->authority, 2);  // + leading `//' +	ADD_LEN(uri->query,     1);  // + leading `?' +	ADD_LEN(uri->fragment,  1);  // + leading `#' + +	// Add 2 for authority // prefix (added even though authority.len = 0) +	return len + 2; // + 2 for authority // +} + +static size_t +string_sink(const void* buf, size_t len, void* stream) +{ +	uint8_t** ptr = (uint8_t**)stream; +	memcpy(*ptr, buf, len); +	*ptr += len; +	return len; +} + +SERD_API +SerdNode +serd_node_new_uri_from_node(const SerdNode* uri_node, +                            const SerdURI*  base, +                            SerdURI*        out) +{ +	return serd_node_new_uri_from_string(uri_node->buf, base, out); +} + +SERD_API +SerdNode +serd_node_new_uri_from_string(const uint8_t* str, +                              const SerdURI* base, +                              SerdURI*       out) +{ +	if (str[0] == '\0') { +		return serd_node_new_uri(base, NULL, out);  // Empty URI => Base URI +	} else { +		SerdURI uri; +		if (serd_uri_parse(str, &uri)) { +			return serd_node_new_uri(&uri, base, out);  // Resolve/Serialise +		} +	} +	return SERD_NODE_NULL; +} + +SERD_API +SerdNode +serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out) +{ +	SerdURI abs_uri = *uri; +	if (base) { +		serd_uri_resolve(uri, base, &abs_uri); +	} +		 +	const size_t len = serd_uri_string_length(&abs_uri); +	uint8_t*     buf = malloc(len + 1); + +	SerdNode node = { SERD_URI, len + 1, len, buf };  // FIXME: UTF-8 + +	uint8_t*     ptr        = buf; +	const size_t actual_len = serd_uri_serialise(&abs_uri, string_sink, &ptr); + +	buf[actual_len] = '\0'; +	node.n_bytes    = actual_len + 1; +	node.n_chars    = actual_len; + +	// FIXME: double parse +	if (!serd_uri_parse(buf, out)) { +		fprintf(stderr, "error parsing URI\n"); +		return SERD_NODE_NULL; +	} + +	return node; +} + +SERD_API +void +serd_node_free(SerdNode* node) +{ +	free((uint8_t*)node->buf); +} + +/** + * @file reader.c + */ + +#include <assert.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + +#define NS_XSD "http://www.w3.org/2001/XMLSchema#" +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + +#define TRY_THROW(exp) if (!(exp)) goto except; +#define TRY_RET(exp)   if (!(exp)) return 0; + +#define STACK_PAGE_SIZE 4096 +#define READ_BUF_LEN    4096 + +typedef struct { +	const uint8_t* filename; +	unsigned       line; +	unsigned       col; +} Cursor; + +typedef uint32_t uchar; + +typedef size_t Ref; + +typedef struct { +	SerdType type; +	Ref      value; +	Ref      datatype; +	Ref      lang; +} Node; + +typedef struct { +	const Node* graph; +	const Node* subject; +	const Node* predicate; +} ReadContext; + +/** Measured UTF-8 string. */ +typedef struct { +	size_t  n_bytes;  ///< Size in bytes including trailing null byte +	size_t  n_chars;  ///< Length in characters +	uint8_t buf[];    ///< Buffer +} SerdString; + +static const Node INTERNAL_NODE_NULL = { 0, 0, 0, 0 }; + +struct SerdReaderImpl { +	void*             handle; +	SerdBaseSink      base_sink; +	SerdPrefixSink    prefix_sink; +	SerdStatementSink statement_sink; +	SerdEndSink       end_sink; +	Node              rdf_type; +	Node              rdf_first; +	Node              rdf_rest; +	Node              rdf_nil; +	FILE*             fd; +	SerdStack         stack; +	Cursor            cur; +	uint8_t*          buf; +	const uint8_t*    blank_prefix; +	unsigned          next_id; +	int               err; +	uint8_t*          read_buf; +	int32_t           read_head;    ///< Offset into read_buf +	bool              from_file;    ///< True iff reading from @ref fd +	bool              eof; +#ifdef SUIL_STACK_CHECK +	Ref*              alloc_stack;  ///< Stack of push offsets +	size_t            n_allocs;     ///< Number of stack pushes +#endif +}; + +struct SerdReadStateImpl { +	SerdEnv  env; +	SerdNode base_uri_node; +	SerdURI  base_uri; +}; + +typedef enum { +	SERD_SUCCESS = 0,  ///< Completed successfully +	SERD_FAILURE = 1,  ///< Non-fatal failure +	SERD_ERROR   = 2,  ///< Fatal error +} SerdStatus; + +static inline int +error(SerdReader reader, const char* fmt, ...) +{ +	va_list args; +	va_start(args, fmt); +	fprintf(stderr, "error: %s:%u:%u: ", +	        reader->cur.filename, reader->cur.line, reader->cur.col); +	vfprintf(stderr, fmt, args); +	return 0; +} + +static Node +make_node(SerdType type, Ref value, Ref datatype, Ref lang) +{ +	const Node ret = { type, value, datatype, lang }; +	return ret; +} + +static inline bool +page(SerdReader reader) +{ +	assert(reader->from_file); +	reader->read_head = 0; +	const size_t n_read = fread(reader->read_buf, 1, READ_BUF_LEN, reader->fd); +	if (n_read == 0) { +		reader->read_buf[0] = '\0'; +		reader->eof = true; +		return false; +	} else if (n_read < READ_BUF_LEN) { +		reader->read_buf[n_read] = '\0'; +	} +	return true; +} + +static inline bool +peek_string(SerdReader reader, uint8_t* pre, int n) +{ +	uint8_t* ptr = reader->read_buf + reader->read_head; +	for (int i = 0; i < n; ++i) { +		if (reader->from_file && (reader->read_head + i >= READ_BUF_LEN)) { +			if (!page(reader)) { +				return false; +			} +			ptr = reader->read_buf; +			reader->read_head = -i; +			memcpy(reader->read_buf + reader->read_head, pre, i); +			assert(reader->read_buf[reader->read_head] == pre[0]); +		} +		if ((pre[i] = *ptr++) == '\0') { +			return false; +		} +	} +	return true; +} + +static inline uint8_t +peek_byte(SerdReader reader) +{ +	return reader->read_buf[reader->read_head]; +} + +static inline uint8_t +eat_byte(SerdReader reader, const uint8_t byte) +{ +	const uint8_t c = peek_byte(reader); +	++reader->read_head; +	switch (c) { +	case '\n': ++reader->cur.line; reader->cur.col = 0; break; +	default:   ++reader->cur.col; +	} + +	if (c != byte) { +		return error(reader, "expected `%c', not `%c'\n", byte, c); +	} +	if (reader->from_file && (reader->read_head == READ_BUF_LEN)) { +		TRY_RET(page(reader)); +		assert(reader->read_head < READ_BUF_LEN); +	} +	if (reader->read_buf[reader->read_head] == '\0') { +		reader->eof = true; +	} +	return c; +} + +static inline void +eat_string(SerdReader reader, const char* str, unsigned n) +{ +	for (unsigned i = 0; i < n; ++i) { +		eat_byte(reader, ((const uint8_t*)str)[i]); +	} +} + +#ifdef SUIL_STACK_CHECK +static inline bool +stack_is_top_string(SerdReader reader, Ref ref) +{ +	return ref == reader->alloc_stack[reader->n_allocs - 1]; +} +#endif + +static inline intptr_t +pad_size(intptr_t size) +{ +	return (size + 7) & (~7); +} + +// Make a new string from a non-UTF-8 C string (internal use only) +static Ref +push_string(SerdReader reader, const char* c_str, size_t n_bytes) +{ +	// Align strings to 64-bits (assuming malloc/realloc are aligned to 64-bits) +	const size_t stack_size = pad_size((intptr_t)reader->stack.size); +	const size_t pad        = stack_size - reader->stack.size; +	uint8_t*     mem        = serd_stack_push( +		&reader->stack, pad + sizeof(SerdString) + n_bytes) + pad; +	SerdString* const str = (SerdString*)mem; +	str->n_bytes = n_bytes; +	str->n_chars = n_bytes - 1; +	memcpy(str->buf, c_str, n_bytes); +#ifdef SUIL_STACK_CHECK +	reader->alloc_stack = realloc(reader->alloc_stack, +	                              sizeof(uint8_t*) * (++reader->n_allocs)); +	reader->alloc_stack[reader->n_allocs - 1] = (mem - reader->stack.buf); +#endif +	return (uint8_t*)str - reader->stack.buf; +} + +static inline SerdString* +deref(SerdReader reader, const Ref ref) +{ +	if (ref) { +		return (SerdString*)(reader->stack.buf + ref); +	} +	return NULL; +} + +static inline void +push_byte(SerdReader reader, Ref ref, const uint8_t c) +{ +	#ifdef SUIL_STACK_CHECK +	assert(stack_is_top_string(reader, ref)); +	#endif +	serd_stack_push(&reader->stack, 1); +	SerdString* const str = deref(reader, ref); +	++str->n_bytes; +	if ((c & 0xC0) != 0x80) { +		// Does not start with `10', start of a new character +		++str->n_chars; +	} +	assert(str->n_bytes > str->n_chars); +	str->buf[str->n_bytes - 2] = c; +	str->buf[str->n_bytes - 1] = '\0'; +} + +static void +pop_string(SerdReader reader, Ref ref) +{ +	if (ref) { +		if (ref == reader->rdf_nil.value +		    || ref == reader->rdf_first.value +		    || ref == reader->rdf_rest.value) { +			return; +		} +		#ifdef SUIL_STACK_CHECK +		if (!stack_is_top_string(reader, ref)) { +			fprintf(stderr, "attempt to pop non-top string %s\n", +			        deref(reader, ref)->buf); +			fprintf(stderr, "top: %s\n", +			        deref(reader, reader->alloc_stack[reader->n_allocs - 1])->buf); +		} +		assert(stack_is_top_string(reader, ref)); +		--reader->n_allocs; +		#endif +		serd_stack_pop(&reader->stack, deref(reader, ref)->n_bytes); +	} +} + +static inline SerdNode +public_node_from_ref(SerdReader reader, SerdType type, Ref ref) +{ +	if (!ref) { +		return SERD_NODE_NULL; +	} +	const SerdString* str    = deref(reader, ref); +	const SerdNode    public = { type, str->n_bytes, str->n_chars, str->buf }; +	return public; +} + +static inline SerdNode +public_node(SerdReader reader, const Node* private) +{ +	return public_node_from_ref(reader, private->type, private->value); +} + +	 +static inline bool +emit_statement(SerdReader reader, +               const Node* g, const Node* s, const Node* p, const Node* o) +{ +	assert(s->value && p->value && o->value); +	const SerdNode graph           = g ? public_node(reader, g) : SERD_NODE_NULL; +	const SerdNode subject         = public_node(reader, s); +	const SerdNode predicate       = public_node(reader, p); +	const SerdNode object          = public_node(reader, o); +	const SerdNode object_datatype = public_node_from_ref(reader, SERD_URI, o->datatype); +	const SerdNode object_lang     = public_node_from_ref(reader, SERD_LITERAL, o->lang); +	return reader->statement_sink(reader->handle, +	                              &graph, +	                              &subject, +	                              &predicate, +	                              &object, +	                              &object_datatype, +	                              &object_lang); +} + +static bool read_collection(SerdReader reader, ReadContext ctx, Node* dest); +static bool read_predicateObjectList(SerdReader reader, ReadContext ctx); + +// [40]	hex	::=	[#x30-#x39] | [#x41-#x46] +static inline uint8_t +read_hex(SerdReader reader) +{ +	const uint8_t c = peek_byte(reader); +	if (in_range(c, 0x30, 0x39) || in_range(c, 0x41, 0x46)) { +		return eat_byte(reader, c); +	} else { +		return error(reader, "illegal hexadecimal digit `%c'\n", c); +	} +} + +static inline bool +read_hex_escape(SerdReader reader, unsigned length, Ref dest) +{ +	uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; +	for (unsigned i = 0; i < length; ++i) { +		buf[i] = read_hex(reader); +	} + +	uint32_t c; +	sscanf((const char*)buf, "%X", &c); + +	unsigned size = 0; +	if (c < 0x00000080) { +		size = 1; +	} else if (c < 0x00000800) { +		size = 2; +	} else if (c < 0x00010000) { +		size = 3; +	} else if (c < 0x00200000) { +		size = 4; +	} else { +		return false; +	} + +	// Build output in buf +	// (Note # of bytes = # of leading 1 bits in first byte) +	switch (size) { +	case 4: +		buf[3] = 0x80 | (uint8_t)(c & 0x3F); +		c >>= 6; +		c |= (16 << 12);  // set bit 4 +	case 3: +		buf[2] = 0x80 | (uint8_t)(c & 0x3F); +		c >>= 6; +		c |= (32 << 6);  // set bit 5 +	case 2: +		buf[1] = 0x80 | (uint8_t)(c & 0x3F); +		c >>= 6; +		c |= 0xC0;  // set bits 6 and 7 +	case 1: +		buf[0] = (uint8_t)c; +	} + +	for (unsigned i = 0; i < size; ++i) { +		push_byte(reader, dest, buf[i]); +	} +	return true; +} + +static inline bool +read_character_escape(SerdReader reader, Ref dest) +{ +	switch (peek_byte(reader)) { +	case '\\': +		push_byte(reader, dest, eat_byte(reader, '\\')); +		return true; +	case 'u': +		eat_byte(reader, 'u'); +		return read_hex_escape(reader, 4, dest); +	case 'U': +		eat_byte(reader, 'U'); +		return read_hex_escape(reader, 8, dest); +	default: +		return false; +	} +} + +static inline bool +read_echaracter_escape(SerdReader reader, Ref dest) +{ +	switch (peek_byte(reader)) { +	case 't': +		eat_byte(reader, 't'); +		push_byte(reader, dest, '\t'); +		return true; +	case 'n': +		eat_byte(reader, 'n'); +		push_byte(reader, dest, '\n'); +		return true; +	case 'r': +		eat_byte(reader, 'r'); +		push_byte(reader, dest, '\r'); +		return true; +	default: +		return read_character_escape(reader, dest); +	} +} + +static inline bool +read_scharacter_escape(SerdReader reader, Ref dest) +{ +	switch (peek_byte(reader)) { +	case '"': +		push_byte(reader, dest, eat_byte(reader, '"')); +		return true; +	default: +		return read_echaracter_escape(reader, dest); +	} +} + +static inline bool +read_ucharacter_escape(SerdReader reader, Ref dest) +{ +	switch (peek_byte(reader)) { +	case '>': +		push_byte(reader, dest, eat_byte(reader, '>')); +		return true; +	default: +		return read_echaracter_escape(reader, dest); +	} +} + +// [38] character ::= '\u' hex hex hex hex +//    | '\U' hex hex hex hex hex hex hex hex +//    | '\\' +//    | [#x20-#x5B] | [#x5D-#x10FFFF] +static inline SerdStatus +read_character(SerdReader reader, Ref dest) +{ +	const uint8_t c = peek_byte(reader); +	assert(c != '\\');  // Only called from methods that handle escapes first +	switch (c) { +	case '\0': +		error(reader, "unexpected end of file\n", peek_byte(reader)); +		return SERD_ERROR; +	default: +		if (c < 0x20) {  // ASCII control character +			error(reader, "unexpected control character\n"); +			return SERD_ERROR; +		} else if (c <= 0x7E) {  // Printable ASCII +			push_byte(reader, dest, eat_byte(reader, c)); +			return SERD_SUCCESS; +		} else {  // Wide UTF-8 character +			unsigned size = 1; +			if ((c & 0xE0) == 0xC0) {  // Starts with `110' +				size = 2; +			} else if ((c & 0xF0) == 0xE0) {  // Starts with `1110' +				size = 3; +			} else if ((c & 0xF8) == 0xF0) {  // Starts with `11110' +				size = 4; +			} else { +				error(reader, "invalid character\n"); +				return SERD_ERROR; +			} +			for (unsigned i = 0; i < size; ++i) { +				push_byte(reader, dest, eat_byte(reader, peek_byte(reader))); +			} +			return SERD_SUCCESS; +		} +	} +} + +// [39] echaracter ::= character | '\t' | '\n' | '\r' +static inline SerdStatus +read_echaracter(SerdReader reader, Ref dest) +{ +	uint8_t c = peek_byte(reader); +	switch (c) { +	case '\\': +		eat_byte(reader, '\\'); +		if (read_echaracter_escape(reader, peek_byte(reader))) { +			return SERD_SUCCESS; +		} else { +			error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); +			return SERD_ERROR; +		} +	default: +		return read_character(reader, dest); +	} +} + +// [43] lcharacter ::= echaracter | '\"' | #x9 | #xA | #xD +static inline SerdStatus +read_lcharacter(SerdReader reader, Ref dest) +{ +	const uint8_t c = peek_byte(reader); +	uint8_t       pre[3]; +	switch (c) { +	case '"': +		peek_string(reader, pre, 3); +		if (pre[1] == '\"' && pre[2] == '\"') { +			eat_byte(reader, '\"'); +			eat_byte(reader, '\"'); +			eat_byte(reader, '\"'); +			return SERD_FAILURE; +		} else { +			push_byte(reader, dest, eat_byte(reader, '"')); +			return SERD_SUCCESS; +		} +	case '\\': +		eat_byte(reader, '\\'); +		if (read_scharacter_escape(reader, dest)) { +			return SERD_SUCCESS; +		} else { +			error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); +			return SERD_ERROR; +		} +	case 0x9: case 0xA: case 0xD: +		push_byte(reader, dest, eat_byte(reader, c)); +		return SERD_SUCCESS; +	default: +		return read_echaracter(reader, dest); +	} +} + +// [42] scharacter ::= ( echaracter - #x22 ) | '\"' +static inline SerdStatus +read_scharacter(SerdReader reader, Ref dest) +{ +	uint8_t c = peek_byte(reader); +	switch (c) { +	case '\\': +		eat_byte(reader, '\\'); +		if (read_scharacter_escape(reader, dest)) { +			return SERD_SUCCESS; +		} else { +			error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); +			return SERD_ERROR; +		} +	case '\"': +		return SERD_FAILURE; +	default: +		return read_character(reader, dest); +	} +} + +// Spec: [41] ucharacter ::= ( character - #x3E ) | '\>' +// Impl: [41] ucharacter ::= ( echaracter - #x3E ) | '\>' +static inline SerdStatus +read_ucharacter(SerdReader reader, Ref dest) +{ +	const uint8_t c = peek_byte(reader); +	switch (c) { +	case '\\': +		eat_byte(reader, '\\'); +		if (read_ucharacter_escape(reader, dest)) { +			return SERD_SUCCESS; +		} else { +			return error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); +		} +	case '>': +		return SERD_FAILURE; +	default: +		return read_character(reader, dest); +	} +} + +// [10] comment ::= '#' ( [^#xA #xD] )* +static void +read_comment(SerdReader reader) +{ +	eat_byte(reader, '#'); +	uint8_t c; +	while (((c = peek_byte(reader)) != 0xA) && (c != 0xD)) { +		eat_byte(reader, c); +	} +} + +// [24] ws ::= #x9 | #xA | #xD | #x20 | comment +static inline bool +read_ws(SerdReader reader) +{ +	const uint8_t c = peek_byte(reader); +	switch (c) { +	case 0x9: case 0xA: case 0xD: case 0x20: +		eat_byte(reader, c); +		return true; +	case '#': +		read_comment(reader); +		return true; +	default: +		return false; +	} +} + +static inline void +read_ws_star(SerdReader reader) +{ +	while (read_ws(reader)) {} +} + +static inline bool +read_ws_plus(SerdReader reader) +{ +	TRY_RET(read_ws(reader)); +	read_ws_star(reader); +	return true; +} + +// [37] longSerdString ::= #x22 #x22 #x22 lcharacter* #x22 #x22 #x22 +static Ref +read_longString(SerdReader reader) +{ +	eat_string(reader, "\"\"\"", 3); +	Ref        str = push_string(reader, "", 1); +	SerdStatus st; +	while (!(st = read_lcharacter(reader, str))) {} +	if (st != SERD_ERROR) { +		return str; +	} +	pop_string(reader, str); +	return 0; +} + +// [36] string ::= #x22 scharacter* #x22 +static Ref +read_string(SerdReader reader) +{ +	eat_byte(reader, '\"'); +	Ref        str = push_string(reader, "", 1); +	SerdStatus st; +	while (!(st = read_scharacter(reader, str))) {} +	if (st != SERD_ERROR) { +		eat_byte(reader, '\"'); +		return str; +	} +	pop_string(reader, str); +	return 0; +} + +// [35] quotedString ::= string | longSerdString +static Ref +read_quotedString(SerdReader reader) +{ +	uint8_t pre[3]; +	peek_string(reader, pre, 3); +	assert(pre[0] == '\"'); +	switch (pre[1]) { +	case '\"': +		if (pre[2] == '\"') +			return read_longString(reader); +		else +			return read_string(reader); +	default: +		return read_string(reader); +	} +} + +// [34] relativeURI ::= ucharacter* +static inline Ref +read_relativeURI(SerdReader reader) +{ +	Ref str = push_string(reader, "", 1); +	SerdStatus st; +	while (!(st = read_ucharacter(reader, str))) {} +	if (st != SERD_ERROR) { +		return str; +	} +	pop_string(reader, str); +	return 0; +} + +// [30] nameStartChar ::= [A-Z] | "_" | [a-z] +//    | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] +//    | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] +//    | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] +static inline uchar +read_nameStartChar(SerdReader reader, bool required) +{ +	const uint8_t c = peek_byte(reader); +	if (c == '_' || is_alpha(c)) { +		return eat_byte(reader, c); +	} else { +		if (required) { +			error(reader, "illegal character `%c'\n", c); +		} +		return 0; +	} +} + +// [31] nameChar ::= nameStartChar | '-' | [0-9] +//    | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] +static inline uchar +read_nameChar(SerdReader reader) +{ +	uchar c = read_nameStartChar(reader, false); +	if (c) +		return c; + +	switch ((c = peek_byte(reader))) { +	case '-': case 0xB7: case '0': case '1': case '2': case '3': case '4': +	case '5': case '6': case '7': case '8': case '9': +		return eat_byte(reader, c); +	default: +		// TODO: 0x300-0x036F | 0x203F-0x2040 +		return 0; +	} +	return 0; +} + +// [33] prefixName ::= ( nameStartChar - '_' ) nameChar* +static Ref +read_prefixName(SerdReader reader) +{ +	uint8_t c = peek_byte(reader); +	if (c == '_') { +		error(reader, "unexpected `_'\n"); +		return 0; +	} +	TRY_RET(c = read_nameStartChar(reader, false)); +	Ref str = push_string(reader, "", 1); +	push_byte(reader, str, c); +	while ((c = read_nameChar(reader)) != 0) { +		push_byte(reader, str, c); +	} +	return str; +} + +// [32] name ::= nameStartChar nameChar* +static Ref +read_name(SerdReader reader, Ref dest, bool required) +{ +	uchar c = read_nameStartChar(reader, required); +	if (!c) { +		if (required) { +			error(reader, "illegal character at start of name\n"); +		} +		return 0; +	} +	do { +		push_byte(reader, dest, c); +	} while ((c = read_nameChar(reader)) != 0); +	return dest; +} + +// [29] language ::= [a-z]+ ('-' [a-z0-9]+ )* +static Ref +read_language(SerdReader reader) +{ +	const uint8_t start = peek_byte(reader); +	if (!in_range(start, 'a', 'z')) { +		error(reader, "unexpected `%c'\n", start); +		return 0; +	} +	Ref str = push_string(reader, "", 1); +	push_byte(reader, str, eat_byte(reader, start)); +	uint8_t c; +	while ((c = peek_byte(reader)) && in_range(c, 'a', 'z')) { +		push_byte(reader, str, eat_byte(reader, c)); +	} +	if (peek_byte(reader) == '-') { +		push_byte(reader, str, eat_byte(reader, '-')); +		while ((c = peek_byte(reader)) && ( +			       in_range(c, 'a', 'z') || in_range(c, '0', '9'))) { +			push_byte(reader, str, eat_byte(reader, c)); +		} +	} +	return str; +} + +// [28] uriref ::= '<' relativeURI '>' +static Ref +read_uriref(SerdReader reader) +{ +	TRY_RET(eat_byte(reader, '<')); +	Ref const str = read_relativeURI(reader); +	if (str && eat_byte(reader, '>')) { +		return str; +	} +	pop_string(reader, str); +	return 0; +} + +// [27] qname ::= prefixName? ':' name? +static Ref +read_qname(SerdReader reader) +{ +	Ref prefix = read_prefixName(reader); +	if (!prefix) { +		prefix = push_string(reader, "", 1); +	} +	TRY_THROW(eat_byte(reader, ':')); +	push_byte(reader, prefix, ':'); +	Ref str = read_name(reader, prefix, false); +	return str ? str : prefix; +except: +	pop_string(reader, prefix); +	return 0; +} + +static bool +read_0_9(SerdReader reader, Ref str, bool at_least_one) +{ +	uint8_t c; +	if (at_least_one) { +		if (!is_digit((c = peek_byte(reader)))) { +			return error(reader, "expected digit\n"); +		} +		push_byte(reader, str, eat_byte(reader, c)); +	} +	while (is_digit((c = peek_byte(reader)))) { +		push_byte(reader, str, eat_byte(reader, c)); +	} +	return true; +} + +// [19] exponent ::= [eE] ('-' | '+')? [0-9]+ +// [18] decimal ::= ( '-' | '+' )? ( [0-9]+ '.' [0-9]* +//                                  | '.' ([0-9])+ +//                                  | ([0-9])+ ) +// [17] double  ::= ( '-' | '+' )? ( [0-9]+ '.' [0-9]* exponent +//                                  | '.' ([0-9])+ exponent +//                                  | ([0-9])+ exponent ) +// [16] integer ::= ( '-' | '+' ) ? [0-9]+ +static bool +read_number(SerdReader reader, Node* dest) +{ +	#define XSD_DECIMAL NS_XSD "decimal" +	#define XSD_DOUBLE  NS_XSD "double" +	#define XSD_INTEGER NS_XSD "integer" +	Ref     str         = push_string(reader, "", 1); +	uint8_t c           = peek_byte(reader); +	bool    has_decimal = false; +	Ref     datatype    = 0; +	if (c == '-' || c == '+') { +		push_byte(reader, str, eat_byte(reader, c)); +	} +	if ((c = peek_byte(reader)) == '.') { +		has_decimal = true; +		// decimal case 2 (e.g. '.0' or `-.0' or `+.0') +		push_byte(reader, str, eat_byte(reader, c)); +		TRY_THROW(read_0_9(reader, str, true)); +	} else { +		// all other cases ::= ( '-' | '+' ) [0-9]+ ( . )? ( [0-9]+ )? ... +		TRY_THROW(read_0_9(reader, str, true)); +		if ((c = peek_byte(reader)) == '.') { +			has_decimal = true; +			push_byte(reader, str, eat_byte(reader, c)); +			TRY_THROW(read_0_9(reader, str, false)); +		} +	} +	c = peek_byte(reader); +	if (c == 'e' || c == 'E') { +		// double +		push_byte(reader, str, eat_byte(reader, c)); +		switch ((c = peek_byte(reader))) { +		case '+': case '-': +			push_byte(reader, str, eat_byte(reader, c)); +		default: break; +		} +		read_0_9(reader, str, true); +		datatype = push_string(reader, XSD_DOUBLE, strlen(XSD_DOUBLE) + 1); +	} else if (has_decimal) { +		datatype = push_string(reader, XSD_DECIMAL, strlen(XSD_DECIMAL) + 1); +	} else { +		datatype = push_string(reader, XSD_INTEGER, strlen(XSD_INTEGER) + 1); +	} +	*dest = make_node(SERD_LITERAL, str, datatype, 0); +	assert(dest->value); +	return true; +except: +	pop_string(reader, datatype); +	pop_string(reader, str); +	return false; +} + +// [25] resource ::= uriref | qname +static bool +read_resource(SerdReader reader, Node* dest) +{ +	switch (peek_byte(reader)) { +	case '<': +		*dest = make_node(SERD_URI, read_uriref(reader), 0, 0); +		break; +	default: +		*dest = make_node(SERD_CURIE, read_qname(reader), 0, 0); +	} +	return (dest->value != 0); +} + +// [14] literal ::= quotedString ( '@' language )? | datatypeSerdString +//    | integer | double | decimal | boolean +static bool +read_literal(SerdReader reader, Node* dest) +{ +	Ref           str      = 0; +	Node          datatype = INTERNAL_NODE_NULL; +	const uint8_t c        = peek_byte(reader); +	if (c == '-' || c == '+' || c == '.' || is_digit(c)) { +		return read_number(reader, dest); +	} else if (c == '\"') { +		str = read_quotedString(reader); +		if (!str) { +			return false; +		} + +		Ref lang = 0; +		switch (peek_byte(reader)) { +		case '^': +			eat_byte(reader, '^'); +			eat_byte(reader, '^'); +			TRY_THROW(read_resource(reader, &datatype)); +			break; +		case '@': +			eat_byte(reader, '@'); +			TRY_THROW(lang = read_language(reader)); +		} +		*dest = make_node(SERD_LITERAL, str, datatype.value, lang); +	} else { +		return error(reader, "Unknown literal type\n"); +	} +	return true; +except: +	pop_string(reader, str); +	return false; +} + +// [12] predicate ::= resource +static bool +read_predicate(SerdReader reader, Node* dest) +{ +	return read_resource(reader, dest); +} + +// [9] verb ::= predicate | 'a' +static bool +read_verb(SerdReader reader, Node* dest) +{ +	uint8_t pre[2]; +	peek_string(reader, pre, 2); +	switch (pre[0]) { +	case 'a': +		switch (pre[1]) { +		case 0x9: case 0xA: case 0xD: case 0x20: +			eat_byte(reader, 'a'); +			*dest = make_node(SERD_URI, +			                  push_string(reader, NS_RDF "type", 48), 0, 0); +			return true; +		default: break;  // fall through +		} +	default: +		return read_predicate(reader, dest); +	} +} + +// [26] nodeID ::= '_:' name +static Ref +read_nodeID(SerdReader reader) +{ +	eat_byte(reader, '_'); +	eat_byte(reader, ':'); +	Ref str = push_string(reader, "", 1); +	return read_name(reader, str, true); +} + +static Ref +blank_id(SerdReader reader) +{ +	const char* prefix = reader->blank_prefix +		? (const char*)reader->blank_prefix +		: "genid"; +	char str[32];  // FIXME: ensure length of reader->blank_prefix is OK +	const int len = snprintf(str, sizeof(str), "%s%u", +	                         prefix, reader->next_id++); +	return push_string(reader, str, len + 1); +} + +// Spec: [21] blank ::= nodeID | '[]' +//          | '[' predicateObjectList ']' | collection +// Impl: [21] blank ::= nodeID | '[ ws* ]' +//          | '[' ws* predicateObjectList ws* ']' | collection +static bool +read_blank(SerdReader reader, ReadContext ctx, Node* dest) +{ +	switch (peek_byte(reader)) { +	case '_': +		*dest = make_node(SERD_BLANK_ID, read_nodeID(reader), 0, 0); +		return true; +	case '[': +		eat_byte(reader, '['); +		read_ws_star(reader); +		if (peek_byte(reader) == ']') { +			eat_byte(reader, ']'); +			*dest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0); +			if (ctx.subject) { +				TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest)); +			} +			return true; +		} +		*dest = make_node(SERD_ANON_BEGIN, blank_id(reader), 0, 0); +		if (ctx.subject) { +			TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest)); +			dest->type = SERD_ANON; +		} +		ctx.subject = dest; +		read_predicateObjectList(reader, ctx); +		read_ws_star(reader); +		eat_byte(reader, ']'); +		if (reader->end_sink) { +			const SerdNode end = public_node(reader, dest); +			reader->end_sink(reader->handle, &end); +		} +		return true; +	case '(': +		if (read_collection(reader, ctx, dest)) { +			if (ctx.subject) { +				TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest)); +			} +			return true; +		} +		return false; +	default: +		return error(reader, "illegal blank node\n"); +	} +} + +inline static bool +is_object_end(const uint8_t c) +{ +	switch (c) { +	case 0x9: case 0xA: case 0xD: case 0x20: case '\0': +	case '#': case '.': case ';': +		return true; +	default: +		return false; +	} +} + +// [13] object ::= resource | blank | literal +// Recurses, calling statement_sink for every statement encountered. +// Leaves stack in original calling state (i.e. pops everything it pushes). +static bool +read_object(SerdReader reader, ReadContext ctx) +{ +	static const char* const XSD_BOOLEAN     = NS_XSD "boolean"; +	static const size_t      XSD_BOOLEAN_LEN = 40; + +	uint8_t       pre[6]; +	bool          ret  = false; +	bool          emit = (ctx.subject != 0); +	Node          o    = INTERNAL_NODE_NULL; +	const uint8_t c    = peek_byte(reader); +	switch (c) { +	case '\0': +	case ')': +		return false; +	case '[': case '(': +		emit = false; +		// fall through +	case '_': +		TRY_THROW(ret = read_blank(reader, ctx, &o)); +		break; +	case '<': case ':': +		TRY_THROW(ret = read_resource(reader, &o)); +		break; +	case '\"': case '+': case '-': +	case '0': case '1': case '2': case '3': case '4': +	case '5': case '6': case '7': case '8': case '9': +		TRY_THROW(ret = read_literal(reader, &o)); +		break; +	case '.': +		TRY_THROW(ret = read_literal(reader, &o)); +		break; +	default: +		/* Either a boolean literal, or a qname. +		   Unfortunately there is no way to distinguish these without +		   readahead, since `true' or `false' could be the start of a qname. +		*/ +		peek_string(reader, pre, 6); +		if (!memcmp(pre, "true", 4) && is_object_end(pre[4])) { +			eat_string(reader, "true", 4); +			const Ref value     = push_string(reader, "true", 5); +			const Ref datatype  = push_string(reader, XSD_BOOLEAN, XSD_BOOLEAN_LEN + 1); +			o = make_node(SERD_LITERAL, value, datatype, 0); +		} else if (!memcmp(pre, "false", 5) && is_object_end(pre[5])) { +			eat_string(reader, "false", 5); +			const Ref value     = push_string(reader, "false", 6); +			const Ref datatype  = push_string(reader, XSD_BOOLEAN, XSD_BOOLEAN_LEN + 1); +			o = make_node(SERD_LITERAL, value, datatype, 0); +		} else if (!is_object_end(c)) { +			o = make_node(SERD_CURIE, read_qname(reader), 0, 0); +		} +		ret = o.value; +	} + +	if (ret && emit) { +		assert(o.value); +		ret = emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, &o); +	} + +except: +	pop_string(reader, o.lang); +	pop_string(reader, o.datatype); +	pop_string(reader, o.value); +	return ret; +} + +// Spec: [8] objectList ::= object ( ',' object )* +// Impl: [8] objectList ::= object ( ws* ',' ws* object )* +static bool +read_objectList(SerdReader reader, ReadContext ctx) +{ +	TRY_RET(read_object(reader, ctx)); +	read_ws_star(reader); +	while (peek_byte(reader) == ',') { +		eat_byte(reader, ','); +		read_ws_star(reader); +		TRY_RET(read_object(reader, ctx)); +		read_ws_star(reader); +	} +	return true; +} + +// Spec: [7] predicateObjectList ::= verb objectList +//                                   (';' verb objectList)* (';')? +// Impl: [7] predicateObjectList ::= verb ws+ objectList +//                                   (ws* ';' ws* verb ws+ objectList)* (';')? +static bool +read_predicateObjectList(SerdReader reader, ReadContext ctx) +{ +	if (reader->eof) { +		return false; +	} +	Node predicate = INTERNAL_NODE_NULL; +	TRY_RET(read_verb(reader, &predicate)); +	TRY_THROW(read_ws_plus(reader)); +	ctx.predicate = &predicate; +	TRY_THROW(read_objectList(reader, ctx)); +	pop_string(reader, predicate.value); +	predicate.value = 0; +	read_ws_star(reader); +	while (peek_byte(reader) == ';') { +		eat_byte(reader, ';'); +		read_ws_star(reader); +		switch (peek_byte(reader)) { +		case '.': case ']': +			return true; +		default: +			TRY_THROW(read_verb(reader, &predicate)); +			ctx.predicate = &predicate; +			TRY_THROW(read_ws_plus(reader)); +			TRY_THROW(read_objectList(reader, ctx)); +			pop_string(reader, predicate.value); +			predicate.value = 0; +			read_ws_star(reader); +		} +	} +	return true; +except: +	pop_string(reader, predicate.value); +	return false; +} + +/** Recursive helper for read_collection. */ +static bool +read_collection_rec(SerdReader reader, ReadContext ctx) +{ +	read_ws_star(reader); +	if (peek_byte(reader) == ')') { +		eat_byte(reader, ')'); +		TRY_RET(emit_statement(reader, NULL, ctx.subject, +		                       &reader->rdf_rest, &reader->rdf_nil)); +		return false; +	} else { +		const Node rest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0); +		TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, &reader->rdf_rest, &rest)); +		ctx.subject = &rest; +		ctx.predicate = &reader->rdf_first; +		if (read_object(reader, ctx)) { +			read_collection_rec(reader, ctx); +			pop_string(reader, rest.value); +			return true; +		} else { +			pop_string(reader, rest.value); +			return false; +		} +	} +} + +// [22] itemList   ::= object+ +// [23] collection ::= '(' itemList? ')' +static bool +read_collection(SerdReader reader, ReadContext ctx, Node* dest) +{ +	TRY_RET(eat_byte(reader, '(')); +	read_ws_star(reader); +	if (peek_byte(reader) == ')') {  // Empty collection +		eat_byte(reader, ')'); +		*dest = reader->rdf_nil; +		return true; +	} + +	*dest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0); +	ctx.subject   = dest; +	ctx.predicate = &reader->rdf_first; +	if (!read_object(reader, ctx)) { +		return error(reader, "unexpected end of collection\n"); +	} + +	ctx.subject = dest; +	return read_collection_rec(reader, ctx); +} + +// [11] subject ::= resource | blank +static Node +read_subject(SerdReader reader, ReadContext ctx) +{ +	Node    subject = INTERNAL_NODE_NULL; +	switch (peek_byte(reader)) { +	case '[': case '(': case '_': +		read_blank(reader, ctx, &subject); +		break; +	default: +		read_resource(reader, &subject); +	} +	return subject; +} + +// Spec: [6] triples ::= subject predicateObjectList +// Impl: [6] triples ::= subject ws+ predicateObjectList +static bool +read_triples(SerdReader reader, ReadContext ctx) +{ +	const Node subject = read_subject(reader, ctx); +	bool       ret     = false; +	if (subject.value != 0) { +		ctx.subject = &subject; +		TRY_RET(read_ws_plus(reader)); +		ret = read_predicateObjectList(reader, ctx); +		pop_string(reader, subject.value); +	} +	ctx.subject = ctx.predicate = 0; +	return ret; +} + +// [5] base ::= '@base' ws+ uriref +static bool +read_base(SerdReader reader) +{ +	// `@' is already eaten in read_directive +	eat_string(reader, "base", 4); +	TRY_RET(read_ws_plus(reader)); +	Ref uri; +	TRY_RET(uri = read_uriref(reader)); +	const SerdNode uri_node = public_node_from_ref(reader, SERD_URI, uri); +	reader->base_sink(reader->handle, &uri_node); +	pop_string(reader, uri); +	return true; +} + +// Spec: [4] prefixID ::= '@prefix' ws+ prefixName? ':' uriref +// Impl: [4] prefixID ::= '@prefix' ws+ prefixName? ':' ws* uriref +static bool +read_prefixID(SerdReader reader) +{ +	// `@' is already eaten in read_directive +	eat_string(reader, "prefix", 6); +	TRY_RET(read_ws_plus(reader)); +	bool ret = false; +	Ref name = read_prefixName(reader); +	if (!name) { +		name = push_string(reader, "", 1); +	} +	TRY_THROW(eat_byte(reader, ':') == ':'); +	read_ws_star(reader); +	Ref uri = 0; +	TRY_THROW(uri = read_uriref(reader)); +	const SerdNode name_node = public_node_from_ref(reader, SERD_LITERAL, name); +	const SerdNode uri_node  = public_node_from_ref(reader, SERD_URI, uri); +	ret = reader->prefix_sink(reader->handle, &name_node, &uri_node); +	pop_string(reader, uri); +except: +	pop_string(reader, name); +	return ret; +} + +// [3] directive ::= prefixID | base +static bool +read_directive(SerdReader reader) +{ +	eat_byte(reader, '@'); +	switch (peek_byte(reader)) { +	case 'b': +		return read_base(reader); +	case 'p': +		return read_prefixID(reader); +	default: +		return error(reader, "illegal directive\n"); +	} +} + +// Spec: [1] statement ::= directive '.' | triples '.' | ws+ +// Impl: [1] statement ::= directive ws* '.' | triples ws* '.' | ws+ +static bool +read_statement(SerdReader reader) +{ +	ReadContext ctx = { 0, 0, 0 }; +	read_ws_star(reader); +	if (reader->eof) { +		return true; +	} +	switch (peek_byte(reader)) { +	case '@': +		TRY_RET(read_directive(reader)); +		break; +	default: +		TRY_RET(read_triples(reader, ctx)); +		break; +	} +	read_ws_star(reader); +	return eat_byte(reader, '.'); +} + +// [1] turtleDoc ::= statement +static bool +read_turtleDoc(SerdReader reader) +{ +	while (!reader->eof) { +		TRY_RET(read_statement(reader)); +	} +	return true; +} + +SERD_API +SerdReader +serd_reader_new(SerdSyntax        syntax, +                void*             handle, +                SerdBaseSink      base_sink, +                SerdPrefixSink    prefix_sink, +                SerdStatementSink statement_sink, +                SerdEndSink       end_sink) +{ +	const Cursor cur = { NULL, 0, 0 }; +	SerdReader   me  = malloc(sizeof(struct SerdReaderImpl)); +	me->handle         = handle; +	me->base_sink      = base_sink; +	me->prefix_sink    = prefix_sink; +	me->statement_sink = statement_sink; +	me->end_sink       = end_sink; +	me->fd             = 0; +	me->stack          = serd_stack_new(STACK_PAGE_SIZE); +	me->cur            = cur; +	me->blank_prefix   = NULL; +	me->next_id        = 1; +	me->read_buf       = 0; +	me->read_head      = 0; +	me->eof            = false; +#ifdef SERD_STACK_CHECK +	me->alloc_stack    = 0; +	me->n_allocs       = 0; +#endif + +#define RDF_FIRST NS_RDF "first" +#define RDF_REST  NS_RDF "rest" +#define RDF_NIL   NS_RDF "nil" +	me->rdf_first = make_node(SERD_URI, push_string(me, RDF_FIRST, 49), 0, 0); +	me->rdf_rest  = make_node(SERD_URI, push_string(me, RDF_REST, 48), 0, 0); +	me->rdf_nil   = make_node(SERD_URI, push_string(me, RDF_NIL, 47), 0, 0); + +	return me; +} + +SERD_API +void +serd_reader_free(SerdReader reader) +{ +	SerdReader const me = (SerdReader)reader; +	pop_string(me, me->rdf_nil.value); +	pop_string(me, me->rdf_rest.value); +	pop_string(me, me->rdf_first.value); + +#ifdef SERD_STACK_CHECK +	free(me->alloc_stack); +#endif +	free(me->stack.buf); +	free(me); +} + +SERD_API +void +serd_reader_set_blank_prefix(SerdReader     reader, +                             const uint8_t* prefix) +{ +	reader->blank_prefix = prefix; +} + +SERD_API +bool +serd_reader_read_file(SerdReader me, FILE* file, const uint8_t* name) +{ +	const Cursor cur = { name, 1, 1 }; +	me->fd        = file; +	me->read_buf  = (uint8_t*)malloc(READ_BUF_LEN * 2); +	me->read_head = 0; +	me->cur       = cur; +	me->from_file = true; +	me->eof       = false; + +	/* Read into the second page of the buffer. Occasionally peek_string +	   will move the read_head to before this point when readahead causes +	   a page fault. +	*/ +	memset(me->read_buf, '\0', READ_BUF_LEN * 2); +	me->read_buf += READ_BUF_LEN; + +	const bool ret = !page(me) || read_turtleDoc(me); + +	free(me->read_buf - READ_BUF_LEN); +	me->fd       = 0; +	me->read_buf = NULL; +	return ret; +} + +SERD_API +bool +serd_reader_read_string(SerdReader me, const uint8_t* utf8) +{ +	const Cursor cur = { (const uint8_t*)"(string)", 1, 1 }; + +	me->read_buf  = (uint8_t*)utf8; +	me->read_head = 0; +	me->cur       = cur; +	me->from_file = false; + +	const bool ret = read_turtleDoc(me); + +	me->read_buf = NULL; +	return ret; +} + +SERD_API +SerdReadState +serd_read_state_new(SerdEnv        env, +                    const uint8_t* base_uri_str) +{ +	SerdReadState state         = malloc(sizeof(struct SerdReadStateImpl)); +	SerdURI       base_base_uri = SERD_URI_NULL; +	state->env           = env; +	state->base_uri_node = serd_node_new_uri_from_string( +		base_uri_str, &base_base_uri, &state->base_uri); +	return state; +} + +SERD_API +void +serd_read_state_free(SerdReadState state) +{ +	serd_node_free(&state->base_uri_node); +	free(state); +} + +SERD_API +SerdNode +serd_read_state_expand(SerdReadState   state, +                       const SerdNode* node) +{ +	if (node->type == SERD_CURIE) { +		SerdChunk prefix; +		SerdChunk suffix; +		serd_env_expand(state->env, node, &prefix, &suffix); +		SerdNode ret = { SERD_URI, +		                 prefix.len + suffix.len + 1, +		                 prefix.len + suffix.len, // FIXME: UTF-8 +		                 NULL }; +		ret.buf = malloc(ret.n_bytes); +		snprintf((char*)ret.buf, ret.n_bytes, "%s%s", prefix.buf, suffix.buf); +		return ret; +	} else if (node->type == SERD_URI) { +		SerdURI ignored; +		return serd_node_new_uri_from_node(node, &state->base_uri, &ignored); +	} else { +		return SERD_NODE_NULL; +	} +} + +SERD_API +SerdNode +serd_read_state_get_base_uri(SerdReadState state, +                             SerdURI*      out) +{ +	*out = state->base_uri; +	return state->base_uri_node; +} + +SERD_API +bool +serd_read_state_set_base_uri(SerdReadState   state, +                             const SerdNode* uri_node) +{ +	// Resolve base URI and create a new node and URI for it +	SerdURI  base_uri; +	SerdNode base_uri_node = serd_node_new_uri_from_node( +		uri_node, &state->base_uri, &base_uri); + +	if (base_uri_node.buf) { +		// Replace the current base URI +		serd_node_free(&state->base_uri_node); +		state->base_uri_node = base_uri_node; +		state->base_uri      = base_uri; +		return true; +	} +	return false; +} + +SERD_API +bool +serd_read_state_set_prefix(SerdReadState   state, +                           const SerdNode* name, +                           const SerdNode* uri_node) +{ +	if (serd_uri_string_has_scheme(uri_node->buf)) { +		// Set prefix to absolute URI +		serd_env_add(state->env, name, uri_node); +		return true; +	} else { +		// Resolve relative URI and create a new node and URI for it +		SerdURI  abs_uri; +		SerdNode abs_uri_node = serd_node_new_uri_from_node( +			uri_node, &state->base_uri, &abs_uri); + +		if (!abs_uri_node.buf) { +			return false; +		} + +		// Set prefix to resolved (absolute) URI +		serd_env_add(state->env, name, &abs_uri_node); +		serd_node_free(&abs_uri_node); +		return true; +	} +	return false; +} + + +/** + * @file uri.c + */ + +/** @file uri.c */ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + + +// #define URI_DEBUG 1 + +SERD_API +bool +serd_uri_string_has_scheme(const uint8_t* utf8) +{ +	// RFC3986: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +	if (!is_alpha(utf8[0])) { +		return false;  // Invalid scheme initial character, URI is relative +	} +	for (uint8_t c = *++utf8; (c = *utf8) != '\0'; ++utf8) { +		switch (c) { +		case ':': +			return true;  // End of scheme +		case '+': case '-': case '.': +			break;  // Valid scheme character, continue +		default: +			if (!is_alpha(c) && !is_digit(c)) { +				return false;  // Invalid scheme character +			} +		} +	} + +	return false; +} + +#ifdef URI_DEBUG +static void +serd_uri_dump(const SerdURI* uri, FILE* file) +{ +#define PRINT_PART(range, name) \ +	if (range.buf) { \ +		fprintf(stderr, "  " name " = "); \ +		fwrite((range).buf, 1, (range).len, stderr); \ +		fprintf(stderr, "\n"); \ +	} + +	PRINT_PART(uri->scheme,    "scheme"); +	PRINT_PART(uri->authority, "authority"); +	PRINT_PART(uri->path_base, "path_base"); +	PRINT_PART(uri->path,      "path"); +	PRINT_PART(uri->query,     "query"); +	PRINT_PART(uri->fragment,  "fragment"); +} +#endif + +SERD_API +bool +serd_uri_parse(const uint8_t* utf8, SerdURI* uri) +{ +	*uri = SERD_URI_NULL; +	assert(uri->path_base.buf == NULL); +	assert(uri->path_base.len == 0); +	assert(uri->authority.len == 0); + +	const uint8_t* ptr = utf8; + +	/* See http://tools.ietf.org/html/rfc3986#section-3 +	   URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +	*/ + +	/* S3.1: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */ +	if (is_alpha(*ptr)) { +		for (uint8_t c = *++ptr; true; c = *++ptr) { +			switch (c) { +			case '\0': case '/': case '?': case '#': +				ptr = utf8; +				goto path;  // Relative URI (starts with path by definition) +			case ':': +				uri->scheme.buf = utf8; +				uri->scheme.len = (ptr++) - utf8; +				goto maybe_authority;  // URI with scheme +			case '+': case '-': case '.': +				continue; +			default: +				if (is_alpha(c) || is_digit(c)) { +					continue; +				} +			} +		} +	} + +	/* S3.2: The authority component is preceded by a double slash ("//") +	   and is terminated by the next slash ("/"), question mark ("?"), +	   or number sign ("#") character, or by the end of the URI. +	*/ +maybe_authority: +	if (*ptr == '/' && *(ptr + 1) == '/') { +		ptr += 2; +		uri->authority.buf = ptr; +		assert(uri->authority.len == 0); +		for (uint8_t c = *ptr; (c = *ptr) != '\0'; ++ptr) { +			switch (c) { +			case '/': goto path; +			case '?': goto query; +			case '#': goto fragment; +			default: +				++uri->authority.len; +			} +		} +	} + +	/* RFC3986 S3.3: The path is terminated by the first question mark ("?") +	   or number sign ("#") character, or by the end of the URI. +	*/ +path: +	switch (*ptr) { +	case '?':  goto query; +	case '#':  goto fragment; +	case '\0': goto end; +	default:  break; +	} +	uri->path.buf = ptr; +	uri->path.len = 0; +	for (uint8_t c = *ptr; (c = *ptr) != '\0'; ++ptr) { +		switch (c) { +		case '?': goto query; +		case '#': goto fragment; +		default: +			++uri->path.len; +		} +	} + +	/* RFC3986 S3.4: The query component is indicated by the first question +	   mark ("?") character and terminated by a number sign ("#") character +	   or by the end of the URI. +	*/ +query: +	if (*ptr == '?') { +		uri->query.buf = ++ptr; +		for (uint8_t c = *ptr; (c = *ptr) != '\0'; ++ptr) { +			switch (c) { +			case '#': +				goto fragment; +			default: +				++uri->query.len; +			} +		} +	} + +	/* RFC3986 S3.5: A fragment identifier component is indicated by the +	   presence of a number sign ("#") character and terminated by the end +	   of the URI. +	*/ +fragment: +	if (*ptr == '#') { +		uri->fragment.buf = ptr; +		while (*ptr++ != '\0') { +			++uri->fragment.len; +		} +	} + +end: +	#ifdef URI_DEBUG +	fprintf(stderr, "PARSE URI <%s>\n", utf8); +	serd_uri_dump(uri, stderr); +	fprintf(stderr, "\n"); +	#endif + +	return true; +} + +SERD_API +void +serd_uri_resolve(const SerdURI* r, const SerdURI* base, SerdURI* t) +{ +	// See http://tools.ietf.org/html/rfc3986#section-5.2.2 + +	t->path_base.buf = NULL; +	t->path_base.len = 0; +	if (r->scheme.len) { +		*t = *r; +	} else { +		if (r->authority.len) { +			t->authority = r->authority; +			t->path      = r->path; +			t->query     = r->query; +		} else { +			t->path = r->path; +			if (!r->path.len) { +				t->path_base = base->path; +				if (r->query.len) { +					t->query = r->query; +				} else { +					t->query = base->query; +				} +			} else { +				if (r->path.buf[0] != '/') { +					t->path_base = base->path; +				} +				t->query = r->query; +			} +			t->authority = base->authority; +		} +		t->scheme   = base->scheme; +		t->fragment = r->fragment; +	} + +	#ifdef URI_DEBUG +	fprintf(stderr, "RESOLVE URI\nBASE:\n"); +	serd_uri_dump(base, stderr); +	fprintf(stderr, "URI:\n"); +	serd_uri_dump(r, stderr); +	fprintf(stderr, "RESULT:\n"); +	serd_uri_dump(t, stderr); +	fprintf(stderr, "\n"); +	#endif +} + +SERD_API +size_t +serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream) +{ +	// See http://tools.ietf.org/html/rfc3986#section-5.3 + +	size_t write_size = 0; +#define WRITE(buf, len) \ +	write_size += len; \ +	if (len) { \ +		sink((const uint8_t*)buf, len, stream); \ +	} +#define WRITE_CHAR(c) WRITE(&(c), 1) +#define WRITE_COMPONENT(prefix, field, suffix) \ +	if ((field).len) { \ +		for (const uint8_t* c = (const uint8_t*)prefix; *c != '\0'; ++c) { \ +			WRITE(c, 1); \ +		} \ +		WRITE((field).buf, (field).len); \ +		for (const uint8_t* c = (const uint8_t*)suffix; *c != '\0'; ++c) { \ +			WRITE(c, 1); \ +		} \ +	} + +	WRITE_COMPONENT("",   uri->scheme,    ":"); +	if (uri->authority.buf) { +		WRITE("//", 2); +		WRITE(uri->authority.buf, uri->authority.len); +	} +	if (uri->path_base.len) { +		if (!uri->path.buf && (uri->fragment.buf || uri->query.buf)) { +			WRITE_COMPONENT("", uri->path_base, ""); +		} else { +			/* Merge paths, removing dot components. +			   See http://tools.ietf.org/html/rfc3986#section-5.2.3 +			*/ +			const uint8_t* begin = uri->path.buf; +			const uint8_t* end   = begin; +			size_t         up        = 1; +			if (begin) { +				// Count and skip leading dot components +				end = uri->path.buf + uri->path.len; +				for (bool done = false; !done && (begin < end);) { +					switch (begin[0]) { +					case '.': +						switch (begin[1]) { +						case '/': +							begin += 2;  // Chop leading "./" +							break; +						case '.': +							++up; +							switch (begin[2]) { +							case '/': +								begin += 3;  // Chop lading "../" +								break; +							default: +								begin += 2;  // Chop leading ".." +							} +							break; +						default: +							++begin;  // Chop leading "." +						} +						break; +					case '/': +						if (begin[1] == '/') { +							++begin;  // Replace leading "//" with "/" +							break; +						}  // else fall through +					default: +						done = true;  // Finished chopping dot components +					} +				} + +				if (uri->path.buf && uri->path_base.buf) { +					// Find the up'th last slash +					const uint8_t* base_last = uri->path_base.buf + uri->path_base.len - 1; +					do { +						if (*base_last == '/') { +							--up; +						} +					} while (up > 0 && (--base_last > uri->path_base.buf)); + +					// Write base URI prefix +					const size_t base_len = base_last - uri->path_base.buf + 1; +					WRITE(uri->path_base.buf, base_len); + +				} else { +					// Relative path is just query or fragment, append it to full base URI +					WRITE_COMPONENT("", uri->path_base, ""); +				} + +				// Write URI suffix +				WRITE(begin, end - begin); +			} +		} +	} else { +		WRITE_COMPONENT("", uri->path, ""); +	} +	WRITE_COMPONENT("?", uri->query, ""); +	if (uri->fragment.buf) { +		// Note uri->fragment.buf includes the leading `#' +		WRITE_COMPONENT("", uri->fragment, ""); +	} +	return write_size; +} + +/** + * @file writer.c + */ + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + +#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define NS_XSD "http://www.w3.org/2001/XMLSchema#" + +typedef struct { +	SerdNode graph; +	SerdNode subject; +	SerdNode predicate; +} WriteContext; + +static const WriteContext WRITE_CONTEXT_NULL = { +	{ 0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0} +}; + +struct SerdWriterImpl { +	SerdSyntax   syntax; +	SerdStyle    style; +	SerdEnv      env; +	SerdURI      base_uri; +	SerdStack    anon_stack; +	SerdSink     sink; +	void*        stream; +	WriteContext context; +	unsigned     indent; +}; + +typedef enum { +	WRITE_NORMAL, +	WRITE_URI, +	WRITE_STRING +} TextContext; + +static inline WriteContext* +anon_stack_top(SerdWriter writer) +{ +	assert(!serd_stack_is_empty(&writer->anon_stack)); +	return (WriteContext*)(writer->anon_stack.buf +	                       + writer->anon_stack.size - sizeof(WriteContext)); +} + +static bool +write_text(SerdWriter writer, TextContext ctx, +           const uint8_t* utf8, size_t n_bytes, uint8_t terminator) +{ +	char escape[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; +	for (size_t i = 0; i < n_bytes;) { +		uint8_t in = utf8[i++]; +		switch (in) { +		case '\\': writer->sink("\\\\", 2, writer->stream); continue; +		case '\n': writer->sink("\\n", 2, writer->stream);  continue; +		case '\r': writer->sink("\\r", 2, writer->stream);  continue; +		case '\t': writer->sink("\\t", 2, writer->stream);  continue; +		case '"': +			if (terminator == '"') { +				writer->sink("\\\"", 2, writer->stream); +				continue; +			}  // else fall-through +		default: break; +		} + +		if (in == terminator) { +			snprintf(escape, 7, "\\u%04X", terminator); +			writer->sink(escape, 6, writer->stream); +			continue; +		} + +		uint32_t c    = 0; +		size_t   size = 0; +		if ((in & 0x80) == 0) {  // Starts with `0' +			size = 1; +			c = in & 0x7F; +			if (in_range(in, 0x20, 0x7E)) {  // Printable ASCII +				writer->sink(&in, 1, writer->stream); +				continue; +			} +		} else if ((in & 0xE0) == 0xC0) {  // Starts with `110' +			size = 2; +			c = in & 0x1F; +		} else if ((in & 0xF0) == 0xE0) {  // Starts with `1110' +			size = 3; +			c = in & 0x0F; +		} else if ((in & 0xF8) == 0xF0) {  // Starts with `11110' +			size = 4; +			c = in & 0x07; +		} else { +			fprintf(stderr, "invalid UTF-8 at offset %zu: %X\n", i, in); +			return false; +		} + +		if (ctx == WRITE_STRING && !(writer->style & SERD_STYLE_ASCII)) { +			// Write UTF-8 character directly to UTF-8 output +			// TODO: Scan to next escape and write entire range at once +			writer->sink(utf8 + i - 1, size, writer->stream); +			i += size - 1; +			continue; +		} + +#define READ_BYTE() do { \ +			assert(i < n_bytes); \ +			in = utf8[i++] & 0x3f; \ +			c <<= 6; \ +			c |= in; \ +		} while (0) + +		switch (size) { +		case 4: READ_BYTE(); +		case 3: READ_BYTE(); +		case 2: READ_BYTE(); +		} + +		if (c < 0xFFFF) { +			snprintf(escape, 7, "\\u%04X", c); +			writer->sink(escape, 6, writer->stream); +		} else { +			snprintf(escape, 11, "\\U%08X", c); +			writer->sink(escape, 10, writer->stream); +		} +	} +	return true; +} + +static void +serd_writer_write_delim(SerdWriter writer, const uint8_t delim) +{ +	switch (delim) { +	case '\n': +		break; +	default: +		writer->sink(" ", 1, writer->stream); +	case '[': +		writer->sink(&delim, 1, writer->stream); +	} +	writer->sink("\n", 1, writer->stream); +	for (unsigned i = 0; i < writer->indent; ++i) { +		writer->sink("\t", 1, writer->stream); +	} +} + +static bool +write_node(SerdWriter      writer, +           const SerdNode* node, +           const SerdNode* datatype, +           const SerdNode* lang) +{ +	SerdChunk uri_prefix; +	SerdChunk uri_suffix; +	switch (node->type) { +	case SERD_NOTHING: +		return false; +	case SERD_ANON_BEGIN: +		if (writer->syntax != SERD_NTRIPLES) { +			++writer->indent; +			serd_writer_write_delim(writer, '['); +			WriteContext* ctx = (WriteContext*)serd_stack_push( +				&writer->anon_stack, sizeof(WriteContext)); +			*ctx = writer->context; +			writer->context.subject   = *node; +			writer->context.predicate = SERD_NODE_NULL; +			break; +		} +	case SERD_ANON: +		if (writer->syntax != SERD_NTRIPLES) { +			break; +		}  // else fall through +	case SERD_BLANK_ID: +		writer->sink("_:", 2, writer->stream); +		writer->sink(node->buf, node->n_bytes - 1, writer->stream); +		break; +	case SERD_CURIE: +		switch (writer->syntax) { +		case SERD_NTRIPLES: +			if (!serd_env_expand(writer->env, node, &uri_prefix, &uri_suffix)) { +				fprintf(stderr, "error: undefined namespace prefix `%s'\n", node->buf); +				return false; +			} +			writer->sink("<", 1, writer->stream); +			write_text(writer, WRITE_URI, uri_prefix.buf, uri_prefix.len, '>'); +			write_text(writer, WRITE_URI, uri_suffix.buf, uri_suffix.len, '>'); +			writer->sink(">", 1, writer->stream); +			break; +		case SERD_TURTLE: +			writer->sink(node->buf, node->n_bytes - 1, writer->stream); +		} +		break; +	case SERD_LITERAL: +		if (writer->syntax == SERD_TURTLE && datatype && datatype->buf) { +			// TODO: compare against NS_XSD prefix once +			if (!strcmp((const char*)datatype->buf,    NS_XSD "boolean") +			    || !strcmp((const char*)datatype->buf, NS_XSD "decimal") +			    || !strcmp((const char*)datatype->buf, NS_XSD "integer")) { +				writer->sink(node->buf, node->n_bytes - 1, writer->stream); +				break; +			} +		} +		writer->sink("\"", 1, writer->stream); +		write_text(writer, WRITE_STRING, node->buf, node->n_bytes - 1, '"'); +		writer->sink("\"", 1, writer->stream); +		if (lang && lang->buf) { +			writer->sink("@", 1, writer->stream); +			writer->sink(lang->buf, lang->n_bytes - 1, writer->stream); +		} else if (datatype && datatype->buf) { +			writer->sink("^^", 2, writer->stream); +			write_node(writer, datatype, NULL, NULL); +		} +		break; +	case SERD_URI: +		if ((writer->syntax == SERD_TURTLE) +		    && !strcmp((const char*)node->buf, NS_RDF "type")) { +			writer->sink("a", 1, writer->stream); +			return true; +		} else if ((writer->style & SERD_STYLE_CURIED) +		           && serd_uri_string_has_scheme(node->buf)) { +			SerdNode  prefix; +			SerdChunk suffix; +			if (serd_env_qualify(writer->env, node, &prefix, &suffix)) { +				write_text(writer, WRITE_URI, prefix.buf, prefix.n_bytes - 1, '>'); +				writer->sink(":", 1, writer->stream); +				write_text(writer, WRITE_URI, suffix.buf, suffix.len, '>'); +				return true; +			} +		} else if ((writer->style & SERD_STYLE_RESOLVED) +		           && !serd_uri_string_has_scheme(node->buf)) { +			SerdURI uri; +			if (serd_uri_parse(node->buf, &uri)) { +				SerdURI abs_uri; +				serd_uri_resolve(&uri, &writer->base_uri, &abs_uri); +				writer->sink("<", 1, writer->stream); +				serd_uri_serialise(&abs_uri, writer->sink, writer->stream); +				writer->sink(">", 1, writer->stream); +				return true; +			} +		} +		writer->sink("<", 1, writer->stream); +		write_text(writer, WRITE_URI, node->buf, node->n_bytes - 1, '>'); +		writer->sink(">", 1, writer->stream); +		return true; +	} +	return true; +} + +SERD_API +bool +serd_writer_write_statement(SerdWriter      writer, +                            const SerdNode* graph, +                            const SerdNode* subject, +                            const SerdNode* predicate, +                            const SerdNode* object, +                            const SerdNode* object_datatype, +                            const SerdNode* object_lang) +{ +	assert(subject && predicate && object); +	switch (writer->syntax) { +	case SERD_NTRIPLES: +		write_node(writer, subject, NULL, NULL); +		writer->sink(" ", 1, writer->stream); +		write_node(writer, predicate, NULL, NULL); +		writer->sink(" ", 1, writer->stream); +		if (!write_node(writer, object, object_datatype, object_lang)) { +			return false; +		} +		writer->sink(" .\n", 3, writer->stream); +		return true; +	case SERD_TURTLE: +		break; +	} +	if (subject->buf == writer->context.subject.buf) { +		if (predicate->buf == writer->context.predicate.buf) {  // Abbreviate S P +			++writer->indent; +			serd_writer_write_delim(writer, ','); +			write_node(writer, object, object_datatype, object_lang); +			--writer->indent; +		} else {  // Abbreviate S +			if (writer->context.predicate.buf) { +				serd_writer_write_delim(writer, ';'); +			} else { +				++writer->indent; +				serd_writer_write_delim(writer, '\n'); +			} +			write_node(writer, predicate, NULL, NULL); +			writer->context.predicate = *predicate; +			writer->sink(" ", 1, writer->stream); +			write_node(writer, object, object_datatype, object_lang); +		} +	} else { +		if (writer->context.subject.buf) { +			if (writer->indent > 0) { +				--writer->indent; +			} +			if (serd_stack_is_empty(&writer->anon_stack)) { +				serd_writer_write_delim(writer, '.'); +				serd_writer_write_delim(writer, '\n'); +			} +		} + +		if (subject->type == SERD_ANON_BEGIN) { +			writer->sink("[ ", 2, writer->stream); +			++writer->indent; +			WriteContext* ctx = (WriteContext*)serd_stack_push( +				&writer->anon_stack, sizeof(WriteContext)); +			*ctx = writer->context; +		} else { +			write_node(writer, subject, NULL, NULL); +			++writer->indent; +			if (subject->type != SERD_ANON_BEGIN && subject->type != SERD_ANON) { +				serd_writer_write_delim(writer, '\n'); +			} +		} + +		writer->context.subject   = *subject; +		writer->context.predicate = SERD_NODE_NULL; + +		write_node(writer, predicate, NULL, NULL); +		writer->context.predicate = *predicate; +		writer->sink(" ", 1, writer->stream); + +		write_node(writer, object, object_datatype, object_lang); +	} + +	const WriteContext new_context = { graph ? *graph : SERD_NODE_NULL, +	                                   *subject, +	                                   *predicate }; +	writer->context = new_context; +	return true; +} + +SERD_API +bool +serd_writer_end_anon(SerdWriter      writer, +                     const SerdNode* node) +{ +	if (writer->syntax == SERD_NTRIPLES) { +		return true; +	} +	if (serd_stack_is_empty(&writer->anon_stack)) { +		fprintf(stderr, "unexpected end of anonymous node\n"); +		return false; +	} +	assert(writer->indent > 0); +	--writer->indent; +	serd_writer_write_delim(writer, '\n'); +	writer->sink("]", 1, writer->stream); +	writer->context = *anon_stack_top(writer); +	serd_stack_pop(&writer->anon_stack, sizeof(WriteContext)); +	if (!writer->context.subject.buf) {  // End of anonymous subject +		writer->context.subject = *node; +	} +	return true; +} + +SERD_API +void +serd_writer_finish(SerdWriter writer) +{ +	if (writer->context.subject.buf) { +		writer->sink(" .\n", 3, writer->stream); +		writer->context.subject.buf = NULL; +	} +} + +SERD_API +SerdWriter +serd_writer_new(SerdSyntax     syntax, +                SerdStyle      style, +                SerdEnv        env, +                const SerdURI* base_uri, +                SerdSink       sink, +                void*          stream) +{ +	const WriteContext context = WRITE_CONTEXT_NULL; +	SerdWriter         writer  = malloc(sizeof(struct SerdWriterImpl)); +	writer->syntax     = syntax; +	writer->style      = style; +	writer->env        = env; +	writer->base_uri   = base_uri ? *base_uri : SERD_URI_NULL; +	writer->anon_stack = serd_stack_new(sizeof(WriteContext)); +	writer->sink       = sink; +	writer->stream     = stream; +	writer->context    = context; +	writer->indent     = 0; +	return writer; +} + +SERD_API +void +serd_writer_set_base_uri(SerdWriter     writer, +                         const SerdURI* uri) +{ +	writer->base_uri = *uri; +	if (writer->syntax != SERD_NTRIPLES) { +		if (writer->context.graph.buf || writer->context.subject.buf) { +			writer->sink(" .\n\n", 4, writer->stream); +			writer->context = WRITE_CONTEXT_NULL; +		} +		writer->sink("@base <", 7, writer->stream); +		serd_uri_serialise(uri, writer->sink, writer->stream); +		writer->sink("> .\n", 4, writer->stream); +	} +	writer->context = WRITE_CONTEXT_NULL; +} + +SERD_API +bool +serd_writer_set_prefix(SerdWriter      writer, +                       const SerdNode* name, +                       const SerdNode* uri) +{ +	if (writer->syntax != SERD_NTRIPLES) { +		if (writer->context.graph.buf || writer->context.subject.buf) { +			writer->sink(" .\n\n", 4, writer->stream); +			writer->context = WRITE_CONTEXT_NULL; +		} +		writer->sink("@prefix ", 8, writer->stream); +		writer->sink(name->buf, name->n_bytes - 1, writer->stream); +		writer->sink(": <", 3, writer->stream); +		write_text(writer, WRITE_URI, uri->buf, uri->n_bytes - 1, '>'); +		writer->sink("> .\n", 4, writer->stream); +	} +	writer->context = WRITE_CONTEXT_NULL; +	return true; +} + +SERD_API +void +serd_writer_free(SerdWriter writer) +{ +	SerdWriter const me = (SerdWriter)writer; +	serd_writer_finish(me); +	serd_stack_free(&writer->anon_stack); +	free(me); +} diff --git a/core.lv2/serd-0.1.0.h b/core.lv2/serd-0.1.0.h new file mode 100644 index 0000000..56511b1 --- /dev/null +++ b/core.lv2/serd-0.1.0.h @@ -0,0 +1,634 @@ +/* +  Copyright 2011 David Robillard <http://drobilla.net> + +  Redistribution and use in source and binary forms, with or without +  modification, are permitted provided that the following conditions are met: + +  1. Redistributions of source code must retain the above copyright notice, +     this list of conditions and the following disclaimer. + +  2. Redistributions in binary form must reproduce the above copyright +     notice, this list of conditions and the following disclaimer in the +     documentation and/or other materials provided with the distribution. + +  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, +  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +  AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +  AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +  OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +  THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** +   @file serd.h API for Serd, a lightweight RDF syntax library. +*/ + +#ifndef SERD_SERD_H +#define SERD_SERD_H + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> + +#ifdef SERD_SHARED +	#if defined _WIN32 || defined __CYGWIN__ +		#define SERD_LIB_IMPORT __declspec(dllimport) +		#define SERD_LIB_EXPORT __declspec(dllexport) +	#else +		#define SERD_LIB_IMPORT __attribute__ ((visibility("default"))) +		#define SERD_LIB_EXPORT __attribute__ ((visibility("default"))) +	#endif +	#ifdef SERD_INTERNAL +		#define SERD_API SERD_LIB_EXPORT +	#else +		#define SERD_API SERD_LIB_IMPORT +	#endif +#else +	#define SERD_API +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** +   @defgroup serd Serd +   A lightweight RDF syntax library. +   @{ +*/ + +/** +   Environment (namespace prefixes). + +   A SerdEnv represents a set of namespace prefixes, and is used to resolve +   CURIEs to full URIs. +*/ +typedef struct SerdEnvImpl* SerdEnv; + +/** +   RDF reader. + +   A SerdReader parses RDF by reading some syntax and calling user-provided +   sink functions as input is read (much like an XML SAX parser). +*/ +typedef struct SerdReaderImpl* SerdReader; + +/** +   Read state. + +   This represents state (context) necessary for fully resolving URIs during a +   read (i.e. the base URI and namespace prefixes). It is implemented +   separately from SerdReader so the reader can avoid the overhead in cases +   where this information is unnecessary (e.g. streaming reserialisation). +*/ +typedef struct SerdReadStateImpl* SerdReadState; + +/** +   RDF writer. + +   A SerdWriter provides a number of functions to allow writing RDF syntax out +   to some stream. These functions are deliberately compatible with the sink +   functions used by SerdReader, so a reader can be directly connected to a +   writer to re-serialise a document. +*/ +typedef struct SerdWriterImpl* SerdWriter; + +/** +   RDF syntax type. +*/ +typedef enum { +	/** +	   Turtle - Terse RDF Triple Language (UTF-8). +	   @see <a href="http://www.w3.org/TeamSubmission/turtle/">Turtle</a> +	*/ +	SERD_TURTLE = 1, + +	/** +	   NTriples - Line-based RDF triples (ASCII). +	   @see <a href="http://www.w3.org/TR/rdf-testcases#ntriples">NTriples</a> +	*/ +	SERD_NTRIPLES = 2 +} SerdSyntax; + +/** +   Type of a syntactic RDF node. + +   This is more precise than the type of an abstract RDF node. An abstract node +   is either a resource, literal, or blank. In syntax there are two ways to +   refer to both a resource (by URI or CURIE) and a blank (by ID or +   anonymously). + +   Serd represents all nodes as an unquoted UTF-8 string "value" associated +   with a @ref SerdType, which is precise enough to preserve the syntactic +   information required for streaming abbreviation. A non-abbreviating sink may +   simply consider @ref SERD_ANON_BEGIN and @ref SERD_ANON equivalent to +   @ref SERD_BLANK_ID. +*/ +typedef enum { +	/** +	   The type of a nonexistent node. + +	   This type is occasionally useful, but is never emitted by the reader. +	*/ +	SERD_NOTHING = 0, + +	/** +	   Literal value. + +	   A literal optionally has either an associated language, or an associated +	   datatype (not both). +	*/ +	SERD_LITERAL = 1, + +	/** +	   URI (absolute or relative). + +	   Value is an unquoted URI string, which is either a relative reference +	   with respect to the current base URI, or an absolute URI. A URI is an ID +	   with universal scope. +	   @see <a href="http://tools.ietf.org/html/rfc3986">RFC3986</a>. +	*/ +	SERD_URI = 2, + +	/** +	   CURIE, a shortened URI. + +	   Value is an unquoted CURIE string relative to the current environment, +	   e.g. "rdf:type". +	   @see <a href="http://www.w3.org/TR/curie">CURIE Syntax 1.0</a> +	*/ +	SERD_CURIE = 3, + +	/** +	   A blank node ID. + +	   Value is a blank node ID, e.g. "id3", which is valid only in this +	   serialisation. +	   @see <a href="http://www.w3.org/TeamSubmission/turtle#nodeID">Turtle +	   <tt>nodeID</tt></a> +	*/ +	SERD_BLANK_ID = 4, + +	/** +	   The first reference to an anonymous (inlined) blank node. + +	   Value is identical to a @ref SERD_BLANK_ID value (i.e. this type may be +	   safely considered equivalent to @ref SERD_BLANK_ID). +	*/ +	SERD_ANON_BEGIN = 5, + +	/** +	   An anonymous blank node. + +	   Value is identical to a @ref SERD_BLANK_ID value (i.e. this type may be +	   safely considered equivalent to @ref SERD_BLANK_ID). +	*/ +	SERD_ANON = 6 +} SerdType; + +/** +   @name SerdURI +   @{ +*/ + +/** +   An unterminated string fragment. +*/ +typedef struct { +	const uint8_t* buf;  /**< Start of chunk */ +	size_t         len;  /**< Length of chunk in bytes */ +} SerdChunk; + +/** +   A parsed URI. + +   This struct directly refers to chunks in other strings, it does not own any +   memory itself. Thus, URIs can be parsed and/or resolved against a base URI +   in-place without allocating memory. +*/ +typedef struct { +	SerdChunk scheme;     /**< Scheme */ +	SerdChunk authority;  /**< Authority */ +	SerdChunk path_base;  /**< Path prefix if relative */ +	SerdChunk path;       /**< Path suffix */ +	SerdChunk query;      /**< Query */ +	SerdChunk fragment;   /**< Fragment */ +} SerdURI; + +static const SerdURI SERD_URI_NULL = {{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}; + +/** +   Return true iff @a utf8 starts with a valid URI scheme. +*/ +SERD_API +bool +serd_uri_string_has_scheme(const uint8_t* utf8); + +/** +   Parse @a utf8, writing result to @a out. +*/ +SERD_API +bool +serd_uri_parse(const uint8_t* utf8, SerdURI* out); + +/** +   Set @a out to @a uri resolved against @a base. +*/ +SERD_API +void +serd_uri_resolve(const SerdURI* uri, const SerdURI* base, SerdURI* out); + +/** +   Sink function for raw string output. +*/ +typedef size_t (*SerdSink)(const void* buf, size_t len, void* stream); + +/** +   Serialise @a uri with a series of calls to @a sink. +*/ +SERD_API +size_t +serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream); + +/** +   @} +   @name SerdNode +   @{ +*/ + +/** +   A syntactic RDF node. +*/ +typedef struct { +	SerdType       type; +	size_t         n_bytes;  /**< Size in bytes (including null) */ +	size_t         n_chars;  /**< Length in characters */ +	const uint8_t* buf;      /**< Buffer */ +} SerdNode; + +static const SerdNode SERD_NODE_NULL = { SERD_NOTHING, 0, 0, 0 }; + +/** +   Make a (shallow) node from @a str. + +   This measures, but does not copy, @a str.  No memory is allocated. +*/ +SERD_API +SerdNode +serd_node_from_string(SerdType type, const uint8_t* str); + +/** +   Make a deep copy of @a node. + +   @return a node that the caller must free with @ref serd_node_free. +*/ +SERD_API +SerdNode +serd_node_copy(const SerdNode* node); + +/** +   Simple wrapper for serd_node_new_uri to resolve a URI node. +*/ +SERD_API +SerdNode +serd_node_new_uri_from_node(const SerdNode* uri_node, +                            const SerdURI*  base, +                            SerdURI*        out); + +/** +   Simple wrapper for serd_node_new_uri to resolve a URI string. +*/ +SERD_API +SerdNode +serd_node_new_uri_from_string(const uint8_t* str, +                              const SerdURI* base, +                              SerdURI*       out); + +/** +   Create a new node by serialising @a uri into a new string. + +   @param uri The URI to parse and serialise. + +   @param base Base URI to resolve @a uri against (or NULL for no resolution). + +   @param out Set to the parsing of the new URI (i.e. points only to +   memory owned by the new returned node). +*/ +SERD_API +SerdNode +serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out); + +/** +   Free any data owned by @a node. + +   Note that if @a node is itself dynamically allocated (which is not the case +   for nodes created internally by serd), it will not be freed. +*/ +SERD_API +void +serd_node_free(SerdNode* node); + +/** +   @} +   @name Handlers +   @{ +*/ + +/** +   Sink (callback) for base URI changes. + +   Called whenever the base URI of the serialisation changes. +*/ +typedef bool (*SerdBaseSink)(void*           handle, +                             const SerdNode* uri); + +/** +   Sink (callback) for namespace definitions. + +   Called whenever a prefix is defined in the serialisation. +*/ +typedef bool (*SerdPrefixSink)(void*           handle, +                               const SerdNode* name, +                               const SerdNode* uri); + +/** +   Sink (callback) for statements. + +   Called for every RDF statement in the serialisation. +*/ +typedef bool (*SerdStatementSink)(void*           handle, +                                  const SerdNode* graph, +                                  const SerdNode* subject, +                                  const SerdNode* predicate, +                                  const SerdNode* object, +                                  const SerdNode* object_datatype, +                                  const SerdNode* object_lang); + +/** +   Sink (callback) for anonymous node end markers. + +   This is called to indicate that the anonymous node with the given +   @a value will no longer be referred to by any future statements +   (i.e. the anonymous serialisation of the node is finished). +*/ +typedef bool (*SerdEndSink)(void*           handle, +                            const SerdNode* node); + +/** +   @} +   @name SerdEnv +   @{ +*/ + +/** +   Create a new environment. +*/ +SERD_API +SerdEnv +serd_env_new(); + +/** +   Free @a ns. +*/ +SERD_API +void +serd_env_free(SerdEnv env); + +/** +   Add namespace @a uri to @a ns using prefix @a name. +*/ +SERD_API +void +serd_env_add(SerdEnv         env, +             const SerdNode* name, +             const SerdNode* uri); + +/** +   Qualify @a into a CURIE if possible. +*/ +SERD_API +bool +serd_env_qualify(const SerdEnv   env, +                 const SerdNode* uri, +                 SerdNode*       prefix, +                 SerdChunk*      suffix); + +/** +   Expand @a curie. +*/ +SERD_API +bool +serd_env_expand(const SerdEnv   env, +                const SerdNode* curie, +                SerdChunk*      uri_prefix, +                SerdChunk*      uri_suffix); + +/** +   Call @a func for each prefix defined in @a env. +*/ +SERD_API +void +serd_env_foreach(const SerdEnv  env, +                 SerdPrefixSink func, +                 void*          handle); + +/** +   @} +   @name SerdReader +   @{ +*/ + +/** +   Create a new RDF reader. +*/ +SERD_API +SerdReader +serd_reader_new(SerdSyntax        syntax, +                void*             handle, +                SerdBaseSink      base_sink, +                SerdPrefixSink    prefix_sink, +                SerdStatementSink statement_sink, +                SerdEndSink       end_sink); + +/** +   Set a prefix to be added to all blank node identifiers. + +   This is useful when multiple files are to be parsed into the same output +   (e.g. a store, or other files).  Since Serd preserves blank node IDs, this +   could cause conflicts where two non-equivalent blank nodes are merged, +   resulting in corrupt data.  By setting a unique blank node prefix for each +   parsed file, this can be avoided, while preserving blank node names. +*/ +SERD_API +void +serd_reader_set_blank_prefix(SerdReader     reader, +                             const uint8_t* prefix); + +/** +   Read @a file. +*/ +SERD_API +bool +serd_reader_read_file(SerdReader     reader, +                      FILE*          file, +                      const uint8_t* name); + +/** +   Read @a utf8. +*/ +SERD_API +bool +serd_reader_read_string(SerdReader me, const uint8_t* utf8); + +/** +   Free @a reader. +*/ +SERD_API +void +serd_reader_free(SerdReader reader); + +/** +   Create a new read state with the given initial base URI and environment. + +   A reference to @a env will be kept, and @a env will be modified as the +   state is modified. +*/ +SERD_API +SerdReadState +serd_read_state_new(SerdEnv        env, +                    const uint8_t* base_uri_str); + +/** +   Free @a state. +*/ +SERD_API +void +serd_read_state_free(SerdReadState state); + +/** +   Expand @a node to a full URI. + +   @param node A CURIE or URI node to expand and/or resolve. +*/ +SERD_API +SerdNode +serd_read_state_expand(SerdReadState   state, +                       const SerdNode* node); + +/** +   Get the current base URI. +*/ +SERD_API +SerdNode +serd_read_state_get_base_uri(SerdReadState state, +                             SerdURI*      out); + +/** +   Set the current base URI. +*/ +SERD_API +bool +serd_read_state_set_base_uri(SerdReadState   state, +                             const SerdNode* uri_node); + +/** +   Set a namespace prefix. +*/ +SERD_API +bool +serd_read_state_set_prefix(SerdReadState   state, +                           const SerdNode* name, +                           const SerdNode* uri_node); + +/** +   @} +   @name SerdWriter +   @{ +*/ + +typedef enum { +	SERD_STYLE_ABBREVIATED = 1,      /**< Abbreviate triples when possible. */ +	SERD_STYLE_ASCII       = 1 << 1, /**< Escape all non-ASCII characters. */ +	SERD_STYLE_RESOLVED    = 1 << 2, /**< Resolve relative URIs against base. */ +	SERD_STYLE_CURIED      = 1 << 3  /**< Shorted URIs into CURIEs. */ +} SerdStyle; + +/** +   Create a new RDF writer. +*/ +SERD_API +SerdWriter +serd_writer_new(SerdSyntax     syntax, +                SerdStyle      style, +                SerdEnv        env, +                const SerdURI* base_uri, +                SerdSink       sink, +                void*          stream); + +/** +   Free @a writer. +*/ +SERD_API +void +serd_writer_free(SerdWriter writer); + +/** +   Set the current output base URI (and emit directive if applicable). +*/ +SERD_API +void +serd_writer_set_base_uri(SerdWriter     writer, +                         const SerdURI* uri); + +/** +   Set a namespace prefix (and emit directive if applicable). +*/ +SERD_API +bool +serd_writer_set_prefix(SerdWriter      writer, +                       const SerdNode* name, +                       const SerdNode* uri); + +/** +   Write a statement. +*/ +SERD_API +bool +serd_writer_write_statement(SerdWriter      writer, +                            const SerdNode* graph, +                            const SerdNode* subject, +                            const SerdNode* predicate, +                            const SerdNode* object, +                            const SerdNode* object_datatype, +                            const SerdNode* object_lang); + +/** +   Mark the end of an anonymous node's description. +*/ +SERD_API +bool +serd_writer_end_anon(SerdWriter      writer, +                     const SerdNode* node); + +/** +   Finish a write. +*/ +SERD_API +void +serd_writer_finish(SerdWriter writer); + +/** +   @} +   @} +*/ + +#ifdef __cplusplus +}  /* extern "C" */ +#endif + +#endif  /* SERD_SERD_H */ diff --git a/core.lv2/wscript b/core.lv2/wscript index 358a5b0..872dfb9 100644 --- a/core.lv2/wscript +++ b/core.lv2/wscript @@ -15,12 +15,48 @@ top = '.'  out = 'build'  def options(opt): -	opt.add_option('--bundle-only', action='store_true', default=False, dest='bundle_only', -			help="Only install LV2 bundle (not header or pkg-config file)") +	opt.add_option('--default-lv2-path', type='string', default='', +	               dest='default_lv2_path', +	               help="Default LV2 path to use if $LV2_PATH is unset") +	opt.add_option('--bundle-only', action='store_true', default=False, +	               dest='bundle_only', +	               help="Only install LV2 bundle (not header or pkg-config file)")  	autowaf.set_options(opt)  def configure(conf):  	autowaf.configure(conf) +	lv2core_path_sep = ':' +	lv2core_dir_sep  = '/' +	if sys.platform == 'win32': +		lv2core_path_sep = ';' +		lv2core_dir_sep = '\\' + +	autowaf.define(conf, 'LV2CORE_PATH_SEP', lv2core_path_sep) +	autowaf.define(conf, 'LV2CORE_DIR_SEP',  lv2core_dir_sep) + +	if Options.options.default_lv2_path == '': +		if Options.platform == 'darwin': +			Options.options.default_lv2_path = lv2core_path_sep.join([ +					'~/Library/Audio/Plug-Ins/LV2', +					'~/.lv2', +					'/usr/local/lib/lv2', +					'/usr/lib/lv2', +					'/Library/Audio/Plug-Ins/LV2']) +		elif Options.platform == 'haiku': +			Options.options.default_lv2_path = lv2core_path_sep.join([ +				'~/.lv2', +				'/boot/common/add-ons/lv2']) +		elif Options.platform == 'win32': +			Options.options.default_lv2_path = 'C:\\Program Files\\LV2' +		else: +			Options.options.default_lv2_path = lv2core_path_sep.join([ +					'~/.lv2', +					'/usr/%s/lv2' % conf.env['LIBDIRNAME'], +					'/usr/local/%s/lv2' % conf.env['LIBDIRNAME']]) + +	autowaf.define(conf, 'LV2CORE_DEFAULT_LV2_PATH', Options.options.default_lv2_path) + +	conf.write_config_header('lv2-config.h', remove=False)  def build(bld):  	# Header "library" @@ -40,10 +76,13 @@ def build(bld):  	# Bundle (data)  	bld.install_files('${LV2DIR}/lv2core.lv2', 'lv2.ttl manifest.ttl') -	# lv2config -	bld.install_files('${BINDIR}', 'lv2config', chmod=0755) +	obj = bld(features = 'c cprogram') +	obj.source       = 'lv2config.c serd-0.1.0.c' +	obj.target       = 'lv2config' +	obj.install_path = '${BINDIR}' +	obj.cflags       = '-std=c99'  def dist(): -        import Scripting -        Scripting.g_gz = 'gz' +	import Scripting +	Scripting.g_gz = 'gz'  	Scripting.dist() |