/* Copyright 2011 David Robillard Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef SERD_INTERNAL_H #define SERD_INTERNAL_H #include #include #include "serd-0.1.0.h" /** A dynamic stack in memory. */ typedef struct { uint8_t* buf; ///< Stack memory size_t buf_size; ///< Allocated size of buf (>= size) size_t size; ///< Conceptual size of stack in buf } SerdStack; /** An offset to start the stack at. Note 0 is reserved for NULL. */ #define SERD_STACK_BOTTOM sizeof(void*) static inline SerdStack serd_stack_new(size_t size) { SerdStack stack; stack.buf = malloc(size); stack.buf_size = size; stack.size = SERD_STACK_BOTTOM; return stack; } static inline bool serd_stack_is_empty(SerdStack* stack) { return stack->size <= SERD_STACK_BOTTOM; } static inline void serd_stack_free(SerdStack* stack) { free(stack->buf); stack->buf = NULL; stack->buf_size = 0; stack->size = 0; } static inline uint8_t* serd_stack_push(SerdStack* stack, size_t n_bytes) { const size_t new_size = stack->size + n_bytes; if (stack->buf_size < new_size) { stack->buf_size *= 2; stack->buf = realloc(stack->buf, stack->buf_size); } uint8_t* const ret = (stack->buf + stack->size); stack->size = new_size; return ret; } static inline void serd_stack_pop(SerdStack* stack, size_t n_bytes) { assert(stack->size >= n_bytes); stack->size -= n_bytes; } /** Return true if @a c lies within [min...max] (inclusive) */ static inline bool in_range(const uint8_t c, const uint8_t min, const uint8_t max) { return (c >= min && c <= max); } /** RFC2234: ALPHA := %x41-5A / %x61-7A ; A-Z / a-z */ static inline bool is_alpha(const uint8_t c) { return in_range(c, 'A', 'Z') || in_range(c, 'a', 'z'); } /** RFC2234: DIGIT ::= %x30-39 ; 0-9 */ static inline bool is_digit(const uint8_t c) { return in_range(c, '0', '9'); } /** UTF-8 strlen. * @return Lengh of @a utf8 in characters. * @param utf8 A null-terminated UTF-8 string. * @param out_n_bytes (Output) Set to the size of @a utf8 in bytes. */ static inline size_t serd_strlen(const uint8_t* utf8, size_t* out_n_bytes) { size_t n_chars = 0; size_t i = 0; for (; utf8[i]; ++i) { if ((utf8[i] & 0xC0) != 0x80) { // Does not start with `10', start of a new character ++n_chars; } } if (out_n_bytes) { *out_n_bytes = i + 1; } return n_chars; } #endif // SERD_INTERNAL_H /** * @file env.c */ #include #include #include #include typedef struct { SerdNode name; SerdNode uri; } SerdPrefix; struct SerdEnvImpl { SerdPrefix* prefixes; size_t n_prefixes; }; SERD_API SerdEnv serd_env_new() { SerdEnv env = malloc(sizeof(struct SerdEnvImpl)); env->prefixes = NULL; env->n_prefixes = 0; return env; } SERD_API void serd_env_free(SerdEnv env) { for (size_t i = 0; i < env->n_prefixes; ++i) { serd_node_free(&env->prefixes[i].name); serd_node_free(&env->prefixes[i].uri); } free(env->prefixes); free(env); } static inline SerdPrefix* serd_env_find(SerdEnv env, const uint8_t* name, size_t name_len) { for (size_t i = 0; i < env->n_prefixes; ++i) { const SerdNode* const prefix_name = &env->prefixes[i].name; if (prefix_name->n_bytes == name_len + 1) { if (!memcmp(prefix_name->buf, name, name_len)) { return &env->prefixes[i]; } } } return NULL; } SERD_API void serd_env_add(SerdEnv env, const SerdNode* name, const SerdNode* uri) { assert(name && uri); SerdPrefix* const prefix = serd_env_find(env, name->buf, name->n_chars); if (prefix) { serd_node_free(&prefix->uri); prefix->uri = serd_node_copy(uri); } else { env->prefixes = realloc(env->prefixes, (++env->n_prefixes) * sizeof(SerdPrefix)); env->prefixes[env->n_prefixes - 1].name = serd_node_copy(name); env->prefixes[env->n_prefixes - 1].uri = serd_node_copy(uri); } } SERD_API bool serd_env_qualify(const SerdEnv env, const SerdNode* uri, SerdNode* prefix_name, SerdChunk* suffix) { for (size_t i = 0; i < env->n_prefixes; ++i) { const SerdNode* const prefix_uri = &env->prefixes[i].uri; if (uri->n_bytes >= prefix_uri->n_bytes) { if (!strncmp((const char*)uri->buf, (const char*)prefix_uri->buf, prefix_uri->n_bytes - 1)) { *prefix_name = env->prefixes[i].name; suffix->buf = uri->buf + prefix_uri->n_bytes - 1; suffix->len = uri->n_bytes - prefix_uri->n_bytes; return true; } } } return false; } SERD_API bool serd_env_expand(const SerdEnv env, const SerdNode* qname, SerdChunk* uri_prefix, SerdChunk* uri_suffix) { const uint8_t* const colon = memchr(qname->buf, ':', qname->n_bytes); if (!colon) { return false; // Illegal qname } const size_t name_len = colon - qname->buf; const SerdPrefix* const prefix = serd_env_find(env, qname->buf, name_len); if (prefix) { uri_prefix->buf = prefix->uri.buf; uri_prefix->len = prefix->uri.n_bytes - 1; uri_suffix->buf = colon + 1; uri_suffix->len = qname->n_bytes - (colon - qname->buf) - 2; return true; } return false; } SERD_API void serd_env_foreach(const SerdEnv env, SerdPrefixSink func, void* handle) { for (size_t i = 0; i < env->n_prefixes; ++i) { func(handle, &env->prefixes[i].name, &env->prefixes[i].uri); } } /** * @file node.c */ #include #include SERD_API SerdNode serd_node_from_string(SerdType type, const uint8_t* buf) { size_t buf_n_bytes; const size_t buf_n_chars = serd_strlen(buf, &buf_n_bytes); SerdNode ret = { type, buf_n_bytes, buf_n_chars, buf }; return ret; } SERD_API SerdNode serd_node_copy(const SerdNode* node) { SerdNode copy = *node; uint8_t* buf = malloc(copy.n_bytes); memcpy(buf, node->buf, copy.n_bytes); copy.buf = buf; return copy; } static size_t serd_uri_string_length(const SerdURI* uri) { size_t len = uri->path_base.len; #define ADD_LEN(field, n_delims) \ if ((field).len) { len += (field).len + (n_delims); } ADD_LEN(uri->path, 1); // + possible leading `/' ADD_LEN(uri->scheme, 1); // + trailing `:' ADD_LEN(uri->authority, 2); // + leading `//' ADD_LEN(uri->query, 1); // + leading `?' ADD_LEN(uri->fragment, 1); // + leading `#' // Add 2 for authority // prefix (added even though authority.len = 0) return len + 2; // + 2 for authority // } static size_t string_sink(const void* buf, size_t len, void* stream) { uint8_t** ptr = (uint8_t**)stream; memcpy(*ptr, buf, len); *ptr += len; return len; } SERD_API SerdNode serd_node_new_uri_from_node(const SerdNode* uri_node, const SerdURI* base, SerdURI* out) { return serd_node_new_uri_from_string(uri_node->buf, base, out); } SERD_API SerdNode serd_node_new_uri_from_string(const uint8_t* str, const SerdURI* base, SerdURI* out) { if (str[0] == '\0') { return serd_node_new_uri(base, NULL, out); // Empty URI => Base URI } else { SerdURI uri; if (serd_uri_parse(str, &uri)) { return serd_node_new_uri(&uri, base, out); // Resolve/Serialise } } return SERD_NODE_NULL; } SERD_API SerdNode serd_node_new_uri(const SerdURI* uri, const SerdURI* base, SerdURI* out) { SerdURI abs_uri = *uri; if (base) { serd_uri_resolve(uri, base, &abs_uri); } const size_t len = serd_uri_string_length(&abs_uri); uint8_t* buf = malloc(len + 1); SerdNode node = { SERD_URI, len + 1, len, buf }; // FIXME: UTF-8 uint8_t* ptr = buf; const size_t actual_len = serd_uri_serialise(&abs_uri, string_sink, &ptr); buf[actual_len] = '\0'; node.n_bytes = actual_len + 1; node.n_chars = actual_len; // FIXME: double parse if (!serd_uri_parse(buf, out)) { fprintf(stderr, "error parsing URI\n"); return SERD_NODE_NULL; } return node; } SERD_API void serd_node_free(SerdNode* node) { free((uint8_t*)node->buf); } /** * @file reader.c */ #include #include #include #include #include #include #include #define NS_XSD "http://www.w3.org/2001/XMLSchema#" #define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" #define TRY_THROW(exp) if (!(exp)) goto except; #define TRY_RET(exp) if (!(exp)) return 0; #define STACK_PAGE_SIZE 4096 #define READ_BUF_LEN 4096 typedef struct { const uint8_t* filename; unsigned line; unsigned col; } Cursor; typedef uint32_t uchar; typedef size_t Ref; typedef struct { SerdType type; Ref value; Ref datatype; Ref lang; } Node; typedef struct { const Node* graph; const Node* subject; const Node* predicate; } ReadContext; /** Measured UTF-8 string. */ typedef struct { size_t n_bytes; ///< Size in bytes including trailing null byte size_t n_chars; ///< Length in characters uint8_t buf[]; ///< Buffer } SerdString; static const Node INTERNAL_NODE_NULL = { 0, 0, 0, 0 }; struct SerdReaderImpl { void* handle; SerdBaseSink base_sink; SerdPrefixSink prefix_sink; SerdStatementSink statement_sink; SerdEndSink end_sink; Node rdf_type; Node rdf_first; Node rdf_rest; Node rdf_nil; FILE* fd; SerdStack stack; Cursor cur; uint8_t* buf; const uint8_t* blank_prefix; unsigned next_id; int err; uint8_t* read_buf; int32_t read_head; ///< Offset into read_buf bool from_file; ///< True iff reading from @ref fd bool eof; #ifdef SUIL_STACK_CHECK Ref* alloc_stack; ///< Stack of push offsets size_t n_allocs; ///< Number of stack pushes #endif }; struct SerdReadStateImpl { SerdEnv env; SerdNode base_uri_node; SerdURI base_uri; }; typedef enum { SERD_SUCCESS = 0, ///< Completed successfully SERD_FAILURE = 1, ///< Non-fatal failure SERD_ERROR = 2, ///< Fatal error } SerdStatus; static inline int error(SerdReader reader, const char* fmt, ...) { va_list args; va_start(args, fmt); fprintf(stderr, "error: %s:%u:%u: ", reader->cur.filename, reader->cur.line, reader->cur.col); vfprintf(stderr, fmt, args); return 0; } static Node make_node(SerdType type, Ref value, Ref datatype, Ref lang) { const Node ret = { type, value, datatype, lang }; return ret; } static inline bool page(SerdReader reader) { assert(reader->from_file); reader->read_head = 0; const size_t n_read = fread(reader->read_buf, 1, READ_BUF_LEN, reader->fd); if (n_read == 0) { reader->read_buf[0] = '\0'; reader->eof = true; return false; } else if (n_read < READ_BUF_LEN) { reader->read_buf[n_read] = '\0'; } return true; } static inline bool peek_string(SerdReader reader, uint8_t* pre, int n) { uint8_t* ptr = reader->read_buf + reader->read_head; for (int i = 0; i < n; ++i) { if (reader->from_file && (reader->read_head + i >= READ_BUF_LEN)) { if (!page(reader)) { return false; } ptr = reader->read_buf; reader->read_head = -i; memcpy(reader->read_buf + reader->read_head, pre, i); assert(reader->read_buf[reader->read_head] == pre[0]); } if ((pre[i] = *ptr++) == '\0') { return false; } } return true; } static inline uint8_t peek_byte(SerdReader reader) { return reader->read_buf[reader->read_head]; } static inline uint8_t eat_byte(SerdReader reader, const uint8_t byte) { const uint8_t c = peek_byte(reader); ++reader->read_head; switch (c) { case '\n': ++reader->cur.line; reader->cur.col = 0; break; default: ++reader->cur.col; } if (c != byte) { return error(reader, "expected `%c', not `%c'\n", byte, c); } if (reader->from_file && (reader->read_head == READ_BUF_LEN)) { TRY_RET(page(reader)); assert(reader->read_head < READ_BUF_LEN); } if (reader->read_buf[reader->read_head] == '\0') { reader->eof = true; } return c; } static inline void eat_string(SerdReader reader, const char* str, unsigned n) { for (unsigned i = 0; i < n; ++i) { eat_byte(reader, ((const uint8_t*)str)[i]); } } #ifdef SUIL_STACK_CHECK static inline bool stack_is_top_string(SerdReader reader, Ref ref) { return ref == reader->alloc_stack[reader->n_allocs - 1]; } #endif static inline intptr_t pad_size(intptr_t size) { return (size + 7) & (~7); } // Make a new string from a non-UTF-8 C string (internal use only) static Ref push_string(SerdReader reader, const char* c_str, size_t n_bytes) { // Align strings to 64-bits (assuming malloc/realloc are aligned to 64-bits) const size_t stack_size = pad_size((intptr_t)reader->stack.size); const size_t pad = stack_size - reader->stack.size; uint8_t* mem = serd_stack_push( &reader->stack, pad + sizeof(SerdString) + n_bytes) + pad; SerdString* const str = (SerdString*)mem; str->n_bytes = n_bytes; str->n_chars = n_bytes - 1; memcpy(str->buf, c_str, n_bytes); #ifdef SUIL_STACK_CHECK reader->alloc_stack = realloc(reader->alloc_stack, sizeof(uint8_t*) * (++reader->n_allocs)); reader->alloc_stack[reader->n_allocs - 1] = (mem - reader->stack.buf); #endif return (uint8_t*)str - reader->stack.buf; } static inline SerdString* deref(SerdReader reader, const Ref ref) { if (ref) { return (SerdString*)(reader->stack.buf + ref); } return NULL; } static inline void push_byte(SerdReader reader, Ref ref, const uint8_t c) { #ifdef SUIL_STACK_CHECK assert(stack_is_top_string(reader, ref)); #endif serd_stack_push(&reader->stack, 1); SerdString* const str = deref(reader, ref); ++str->n_bytes; if ((c & 0xC0) != 0x80) { // Does not start with `10', start of a new character ++str->n_chars; } assert(str->n_bytes > str->n_chars); str->buf[str->n_bytes - 2] = c; str->buf[str->n_bytes - 1] = '\0'; } static void pop_string(SerdReader reader, Ref ref) { if (ref) { if (ref == reader->rdf_nil.value || ref == reader->rdf_first.value || ref == reader->rdf_rest.value) { return; } #ifdef SUIL_STACK_CHECK if (!stack_is_top_string(reader, ref)) { fprintf(stderr, "attempt to pop non-top string %s\n", deref(reader, ref)->buf); fprintf(stderr, "top: %s\n", deref(reader, reader->alloc_stack[reader->n_allocs - 1])->buf); } assert(stack_is_top_string(reader, ref)); --reader->n_allocs; #endif serd_stack_pop(&reader->stack, deref(reader, ref)->n_bytes); } } static inline SerdNode public_node_from_ref(SerdReader reader, SerdType type, Ref ref) { if (!ref) { return SERD_NODE_NULL; } const SerdString* str = deref(reader, ref); const SerdNode public = { type, str->n_bytes, str->n_chars, str->buf }; return public; } static inline SerdNode public_node(SerdReader reader, const Node* private) { return public_node_from_ref(reader, private->type, private->value); } static inline bool emit_statement(SerdReader reader, const Node* g, const Node* s, const Node* p, const Node* o) { assert(s->value && p->value && o->value); const SerdNode graph = g ? public_node(reader, g) : SERD_NODE_NULL; const SerdNode subject = public_node(reader, s); const SerdNode predicate = public_node(reader, p); const SerdNode object = public_node(reader, o); const SerdNode object_datatype = public_node_from_ref(reader, SERD_URI, o->datatype); const SerdNode object_lang = public_node_from_ref(reader, SERD_LITERAL, o->lang); return reader->statement_sink(reader->handle, &graph, &subject, &predicate, &object, &object_datatype, &object_lang); } static bool read_collection(SerdReader reader, ReadContext ctx, Node* dest); static bool read_predicateObjectList(SerdReader reader, ReadContext ctx); // [40] hex ::= [#x30-#x39] | [#x41-#x46] static inline uint8_t read_hex(SerdReader reader) { const uint8_t c = peek_byte(reader); if (in_range(c, 0x30, 0x39) || in_range(c, 0x41, 0x46)) { return eat_byte(reader, c); } else { return error(reader, "illegal hexadecimal digit `%c'\n", c); } } static inline bool read_hex_escape(SerdReader reader, unsigned length, Ref dest) { uint8_t buf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; for (unsigned i = 0; i < length; ++i) { buf[i] = read_hex(reader); } uint32_t c; sscanf((const char*)buf, "%X", &c); unsigned size = 0; if (c < 0x00000080) { size = 1; } else if (c < 0x00000800) { size = 2; } else if (c < 0x00010000) { size = 3; } else if (c < 0x00200000) { size = 4; } else { return false; } // Build output in buf // (Note # of bytes = # of leading 1 bits in first byte) switch (size) { case 4: buf[3] = 0x80 | (uint8_t)(c & 0x3F); c >>= 6; c |= (16 << 12); // set bit 4 case 3: buf[2] = 0x80 | (uint8_t)(c & 0x3F); c >>= 6; c |= (32 << 6); // set bit 5 case 2: buf[1] = 0x80 | (uint8_t)(c & 0x3F); c >>= 6; c |= 0xC0; // set bits 6 and 7 case 1: buf[0] = (uint8_t)c; } for (unsigned i = 0; i < size; ++i) { push_byte(reader, dest, buf[i]); } return true; } static inline bool read_character_escape(SerdReader reader, Ref dest) { switch (peek_byte(reader)) { case '\\': push_byte(reader, dest, eat_byte(reader, '\\')); return true; case 'u': eat_byte(reader, 'u'); return read_hex_escape(reader, 4, dest); case 'U': eat_byte(reader, 'U'); return read_hex_escape(reader, 8, dest); default: return false; } } static inline bool read_echaracter_escape(SerdReader reader, Ref dest) { switch (peek_byte(reader)) { case 't': eat_byte(reader, 't'); push_byte(reader, dest, '\t'); return true; case 'n': eat_byte(reader, 'n'); push_byte(reader, dest, '\n'); return true; case 'r': eat_byte(reader, 'r'); push_byte(reader, dest, '\r'); return true; default: return read_character_escape(reader, dest); } } static inline bool read_scharacter_escape(SerdReader reader, Ref dest) { switch (peek_byte(reader)) { case '"': push_byte(reader, dest, eat_byte(reader, '"')); return true; default: return read_echaracter_escape(reader, dest); } } static inline bool read_ucharacter_escape(SerdReader reader, Ref dest) { switch (peek_byte(reader)) { case '>': push_byte(reader, dest, eat_byte(reader, '>')); return true; default: return read_echaracter_escape(reader, dest); } } // [38] character ::= '\u' hex hex hex hex // | '\U' hex hex hex hex hex hex hex hex // | '\\' // | [#x20-#x5B] | [#x5D-#x10FFFF] static inline SerdStatus read_character(SerdReader reader, Ref dest) { const uint8_t c = peek_byte(reader); assert(c != '\\'); // Only called from methods that handle escapes first switch (c) { case '\0': error(reader, "unexpected end of file\n", peek_byte(reader)); return SERD_ERROR; default: if (c < 0x20) { // ASCII control character error(reader, "unexpected control character\n"); return SERD_ERROR; } else if (c <= 0x7E) { // Printable ASCII push_byte(reader, dest, eat_byte(reader, c)); return SERD_SUCCESS; } else { // Wide UTF-8 character unsigned size = 1; if ((c & 0xE0) == 0xC0) { // Starts with `110' size = 2; } else if ((c & 0xF0) == 0xE0) { // Starts with `1110' size = 3; } else if ((c & 0xF8) == 0xF0) { // Starts with `11110' size = 4; } else { error(reader, "invalid character\n"); return SERD_ERROR; } for (unsigned i = 0; i < size; ++i) { push_byte(reader, dest, eat_byte(reader, peek_byte(reader))); } return SERD_SUCCESS; } } } // [39] echaracter ::= character | '\t' | '\n' | '\r' static inline SerdStatus read_echaracter(SerdReader reader, Ref dest) { uint8_t c = peek_byte(reader); switch (c) { case '\\': eat_byte(reader, '\\'); if (read_echaracter_escape(reader, peek_byte(reader))) { return SERD_SUCCESS; } else { error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); return SERD_ERROR; } default: return read_character(reader, dest); } } // [43] lcharacter ::= echaracter | '\"' | #x9 | #xA | #xD static inline SerdStatus read_lcharacter(SerdReader reader, Ref dest) { const uint8_t c = peek_byte(reader); uint8_t pre[3]; switch (c) { case '"': peek_string(reader, pre, 3); if (pre[1] == '\"' && pre[2] == '\"') { eat_byte(reader, '\"'); eat_byte(reader, '\"'); eat_byte(reader, '\"'); return SERD_FAILURE; } else { push_byte(reader, dest, eat_byte(reader, '"')); return SERD_SUCCESS; } case '\\': eat_byte(reader, '\\'); if (read_scharacter_escape(reader, dest)) { return SERD_SUCCESS; } else { error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); return SERD_ERROR; } case 0x9: case 0xA: case 0xD: push_byte(reader, dest, eat_byte(reader, c)); return SERD_SUCCESS; default: return read_echaracter(reader, dest); } } // [42] scharacter ::= ( echaracter - #x22 ) | '\"' static inline SerdStatus read_scharacter(SerdReader reader, Ref dest) { uint8_t c = peek_byte(reader); switch (c) { case '\\': eat_byte(reader, '\\'); if (read_scharacter_escape(reader, dest)) { return SERD_SUCCESS; } else { error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); return SERD_ERROR; } case '\"': return SERD_FAILURE; default: return read_character(reader, dest); } } // Spec: [41] ucharacter ::= ( character - #x3E ) | '\>' // Impl: [41] ucharacter ::= ( echaracter - #x3E ) | '\>' static inline SerdStatus read_ucharacter(SerdReader reader, Ref dest) { const uint8_t c = peek_byte(reader); switch (c) { case '\\': eat_byte(reader, '\\'); if (read_ucharacter_escape(reader, dest)) { return SERD_SUCCESS; } else { return error(reader, "illegal escape `\\%c'\n", peek_byte(reader)); } case '>': return SERD_FAILURE; default: return read_character(reader, dest); } } // [10] comment ::= '#' ( [^#xA #xD] )* static void read_comment(SerdReader reader) { eat_byte(reader, '#'); uint8_t c; while (((c = peek_byte(reader)) != 0xA) && (c != 0xD)) { eat_byte(reader, c); } } // [24] ws ::= #x9 | #xA | #xD | #x20 | comment static inline bool read_ws(SerdReader reader) { const uint8_t c = peek_byte(reader); switch (c) { case 0x9: case 0xA: case 0xD: case 0x20: eat_byte(reader, c); return true; case '#': read_comment(reader); return true; default: return false; } } static inline void read_ws_star(SerdReader reader) { while (read_ws(reader)) {} } static inline bool read_ws_plus(SerdReader reader) { TRY_RET(read_ws(reader)); read_ws_star(reader); return true; } // [37] longSerdString ::= #x22 #x22 #x22 lcharacter* #x22 #x22 #x22 static Ref read_longString(SerdReader reader) { eat_string(reader, "\"\"\"", 3); Ref str = push_string(reader, "", 1); SerdStatus st; while (!(st = read_lcharacter(reader, str))) {} if (st != SERD_ERROR) { return str; } pop_string(reader, str); return 0; } // [36] string ::= #x22 scharacter* #x22 static Ref read_string(SerdReader reader) { eat_byte(reader, '\"'); Ref str = push_string(reader, "", 1); SerdStatus st; while (!(st = read_scharacter(reader, str))) {} if (st != SERD_ERROR) { eat_byte(reader, '\"'); return str; } pop_string(reader, str); return 0; } // [35] quotedString ::= string | longSerdString static Ref read_quotedString(SerdReader reader) { uint8_t pre[3]; peek_string(reader, pre, 3); assert(pre[0] == '\"'); switch (pre[1]) { case '\"': if (pre[2] == '\"') return read_longString(reader); else return read_string(reader); default: return read_string(reader); } } // [34] relativeURI ::= ucharacter* static inline Ref read_relativeURI(SerdReader reader) { Ref str = push_string(reader, "", 1); SerdStatus st; while (!(st = read_ucharacter(reader, str))) {} if (st != SERD_ERROR) { return str; } pop_string(reader, str); return 0; } // [30] nameStartChar ::= [A-Z] | "_" | [a-z] // | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] // | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] // | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] static inline uchar read_nameStartChar(SerdReader reader, bool required) { const uint8_t c = peek_byte(reader); if (c == '_' || is_alpha(c)) { return eat_byte(reader, c); } else { if (required) { error(reader, "illegal character `%c'\n", c); } return 0; } } // [31] nameChar ::= nameStartChar | '-' | [0-9] // | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] static inline uchar read_nameChar(SerdReader reader) { uchar c = read_nameStartChar(reader, false); if (c) return c; switch ((c = peek_byte(reader))) { case '-': case 0xB7: case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return eat_byte(reader, c); default: // TODO: 0x300-0x036F | 0x203F-0x2040 return 0; } return 0; } // [33] prefixName ::= ( nameStartChar - '_' ) nameChar* static Ref read_prefixName(SerdReader reader) { uint8_t c = peek_byte(reader); if (c == '_') { error(reader, "unexpected `_'\n"); return 0; } TRY_RET(c = read_nameStartChar(reader, false)); Ref str = push_string(reader, "", 1); push_byte(reader, str, c); while ((c = read_nameChar(reader)) != 0) { push_byte(reader, str, c); } return str; } // [32] name ::= nameStartChar nameChar* static Ref read_name(SerdReader reader, Ref dest, bool required) { uchar c = read_nameStartChar(reader, required); if (!c) { if (required) { error(reader, "illegal character at start of name\n"); } return 0; } do { push_byte(reader, dest, c); } while ((c = read_nameChar(reader)) != 0); return dest; } // [29] language ::= [a-z]+ ('-' [a-z0-9]+ )* static Ref read_language(SerdReader reader) { const uint8_t start = peek_byte(reader); if (!in_range(start, 'a', 'z')) { error(reader, "unexpected `%c'\n", start); return 0; } Ref str = push_string(reader, "", 1); push_byte(reader, str, eat_byte(reader, start)); uint8_t c; while ((c = peek_byte(reader)) && in_range(c, 'a', 'z')) { push_byte(reader, str, eat_byte(reader, c)); } if (peek_byte(reader) == '-') { push_byte(reader, str, eat_byte(reader, '-')); while ((c = peek_byte(reader)) && ( in_range(c, 'a', 'z') || in_range(c, '0', '9'))) { push_byte(reader, str, eat_byte(reader, c)); } } return str; } // [28] uriref ::= '<' relativeURI '>' static Ref read_uriref(SerdReader reader) { TRY_RET(eat_byte(reader, '<')); Ref const str = read_relativeURI(reader); if (str && eat_byte(reader, '>')) { return str; } pop_string(reader, str); return 0; } // [27] qname ::= prefixName? ':' name? static Ref read_qname(SerdReader reader) { Ref prefix = read_prefixName(reader); if (!prefix) { prefix = push_string(reader, "", 1); } TRY_THROW(eat_byte(reader, ':')); push_byte(reader, prefix, ':'); Ref str = read_name(reader, prefix, false); return str ? str : prefix; except: pop_string(reader, prefix); return 0; } static bool read_0_9(SerdReader reader, Ref str, bool at_least_one) { uint8_t c; if (at_least_one) { if (!is_digit((c = peek_byte(reader)))) { return error(reader, "expected digit\n"); } push_byte(reader, str, eat_byte(reader, c)); } while (is_digit((c = peek_byte(reader)))) { push_byte(reader, str, eat_byte(reader, c)); } return true; } // [19] exponent ::= [eE] ('-' | '+')? [0-9]+ // [18] decimal ::= ( '-' | '+' )? ( [0-9]+ '.' [0-9]* // | '.' ([0-9])+ // | ([0-9])+ ) // [17] double ::= ( '-' | '+' )? ( [0-9]+ '.' [0-9]* exponent // | '.' ([0-9])+ exponent // | ([0-9])+ exponent ) // [16] integer ::= ( '-' | '+' ) ? [0-9]+ static bool read_number(SerdReader reader, Node* dest) { #define XSD_DECIMAL NS_XSD "decimal" #define XSD_DOUBLE NS_XSD "double" #define XSD_INTEGER NS_XSD "integer" Ref str = push_string(reader, "", 1); uint8_t c = peek_byte(reader); bool has_decimal = false; Ref datatype = 0; if (c == '-' || c == '+') { push_byte(reader, str, eat_byte(reader, c)); } if ((c = peek_byte(reader)) == '.') { has_decimal = true; // decimal case 2 (e.g. '.0' or `-.0' or `+.0') push_byte(reader, str, eat_byte(reader, c)); TRY_THROW(read_0_9(reader, str, true)); } else { // all other cases ::= ( '-' | '+' ) [0-9]+ ( . )? ( [0-9]+ )? ... TRY_THROW(read_0_9(reader, str, true)); if ((c = peek_byte(reader)) == '.') { has_decimal = true; push_byte(reader, str, eat_byte(reader, c)); TRY_THROW(read_0_9(reader, str, false)); } } c = peek_byte(reader); if (c == 'e' || c == 'E') { // double push_byte(reader, str, eat_byte(reader, c)); switch ((c = peek_byte(reader))) { case '+': case '-': push_byte(reader, str, eat_byte(reader, c)); default: break; } read_0_9(reader, str, true); datatype = push_string(reader, XSD_DOUBLE, strlen(XSD_DOUBLE) + 1); } else if (has_decimal) { datatype = push_string(reader, XSD_DECIMAL, strlen(XSD_DECIMAL) + 1); } else { datatype = push_string(reader, XSD_INTEGER, strlen(XSD_INTEGER) + 1); } *dest = make_node(SERD_LITERAL, str, datatype, 0); assert(dest->value); return true; except: pop_string(reader, datatype); pop_string(reader, str); return false; } // [25] resource ::= uriref | qname static bool read_resource(SerdReader reader, Node* dest) { switch (peek_byte(reader)) { case '<': *dest = make_node(SERD_URI, read_uriref(reader), 0, 0); break; default: *dest = make_node(SERD_CURIE, read_qname(reader), 0, 0); } return (dest->value != 0); } // [14] literal ::= quotedString ( '@' language )? | datatypeSerdString // | integer | double | decimal | boolean static bool read_literal(SerdReader reader, Node* dest) { Ref str = 0; Node datatype = INTERNAL_NODE_NULL; const uint8_t c = peek_byte(reader); if (c == '-' || c == '+' || c == '.' || is_digit(c)) { return read_number(reader, dest); } else if (c == '\"') { str = read_quotedString(reader); if (!str) { return false; } Ref lang = 0; switch (peek_byte(reader)) { case '^': eat_byte(reader, '^'); eat_byte(reader, '^'); TRY_THROW(read_resource(reader, &datatype)); break; case '@': eat_byte(reader, '@'); TRY_THROW(lang = read_language(reader)); } *dest = make_node(SERD_LITERAL, str, datatype.value, lang); } else { return error(reader, "Unknown literal type\n"); } return true; except: pop_string(reader, str); return false; } // [12] predicate ::= resource static bool read_predicate(SerdReader reader, Node* dest) { return read_resource(reader, dest); } // [9] verb ::= predicate | 'a' static bool read_verb(SerdReader reader, Node* dest) { uint8_t pre[2]; peek_string(reader, pre, 2); switch (pre[0]) { case 'a': switch (pre[1]) { case 0x9: case 0xA: case 0xD: case 0x20: eat_byte(reader, 'a'); *dest = make_node(SERD_URI, push_string(reader, NS_RDF "type", 48), 0, 0); return true; default: break; // fall through } default: return read_predicate(reader, dest); } } // [26] nodeID ::= '_:' name static Ref read_nodeID(SerdReader reader) { eat_byte(reader, '_'); eat_byte(reader, ':'); Ref str = push_string(reader, "", 1); return read_name(reader, str, true); } static Ref blank_id(SerdReader reader) { const char* prefix = reader->blank_prefix ? (const char*)reader->blank_prefix : "genid"; char str[32]; // FIXME: ensure length of reader->blank_prefix is OK const int len = snprintf(str, sizeof(str), "%s%u", prefix, reader->next_id++); return push_string(reader, str, len + 1); } // Spec: [21] blank ::= nodeID | '[]' // | '[' predicateObjectList ']' | collection // Impl: [21] blank ::= nodeID | '[ ws* ]' // | '[' ws* predicateObjectList ws* ']' | collection static bool read_blank(SerdReader reader, ReadContext ctx, Node* dest) { switch (peek_byte(reader)) { case '_': *dest = make_node(SERD_BLANK_ID, read_nodeID(reader), 0, 0); return true; case '[': eat_byte(reader, '['); read_ws_star(reader); if (peek_byte(reader) == ']') { eat_byte(reader, ']'); *dest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0); if (ctx.subject) { TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest)); } return true; } *dest = make_node(SERD_ANON_BEGIN, blank_id(reader), 0, 0); if (ctx.subject) { TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest)); dest->type = SERD_ANON; } ctx.subject = dest; read_predicateObjectList(reader, ctx); read_ws_star(reader); eat_byte(reader, ']'); if (reader->end_sink) { const SerdNode end = public_node(reader, dest); reader->end_sink(reader->handle, &end); } return true; case '(': if (read_collection(reader, ctx, dest)) { if (ctx.subject) { TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, dest)); } return true; } return false; default: return error(reader, "illegal blank node\n"); } } inline static bool is_object_end(const uint8_t c) { switch (c) { case 0x9: case 0xA: case 0xD: case 0x20: case '\0': case '#': case '.': case ';': return true; default: return false; } } // [13] object ::= resource | blank | literal // Recurses, calling statement_sink for every statement encountered. // Leaves stack in original calling state (i.e. pops everything it pushes). static bool read_object(SerdReader reader, ReadContext ctx) { static const char* const XSD_BOOLEAN = NS_XSD "boolean"; static const size_t XSD_BOOLEAN_LEN = 40; uint8_t pre[6]; bool ret = false; bool emit = (ctx.subject != 0); Node o = INTERNAL_NODE_NULL; const uint8_t c = peek_byte(reader); switch (c) { case '\0': case ')': return false; case '[': case '(': emit = false; // fall through case '_': TRY_THROW(ret = read_blank(reader, ctx, &o)); break; case '<': case ':': TRY_THROW(ret = read_resource(reader, &o)); break; case '\"': case '+': case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': TRY_THROW(ret = read_literal(reader, &o)); break; case '.': TRY_THROW(ret = read_literal(reader, &o)); break; default: /* Either a boolean literal, or a qname. Unfortunately there is no way to distinguish these without readahead, since `true' or `false' could be the start of a qname. */ peek_string(reader, pre, 6); if (!memcmp(pre, "true", 4) && is_object_end(pre[4])) { eat_string(reader, "true", 4); const Ref value = push_string(reader, "true", 5); const Ref datatype = push_string(reader, XSD_BOOLEAN, XSD_BOOLEAN_LEN + 1); o = make_node(SERD_LITERAL, value, datatype, 0); } else if (!memcmp(pre, "false", 5) && is_object_end(pre[5])) { eat_string(reader, "false", 5); const Ref value = push_string(reader, "false", 6); const Ref datatype = push_string(reader, XSD_BOOLEAN, XSD_BOOLEAN_LEN + 1); o = make_node(SERD_LITERAL, value, datatype, 0); } else if (!is_object_end(c)) { o = make_node(SERD_CURIE, read_qname(reader), 0, 0); } ret = o.value; } if (ret && emit) { assert(o.value); ret = emit_statement(reader, ctx.graph, ctx.subject, ctx.predicate, &o); } except: pop_string(reader, o.lang); pop_string(reader, o.datatype); pop_string(reader, o.value); return ret; } // Spec: [8] objectList ::= object ( ',' object )* // Impl: [8] objectList ::= object ( ws* ',' ws* object )* static bool read_objectList(SerdReader reader, ReadContext ctx) { TRY_RET(read_object(reader, ctx)); read_ws_star(reader); while (peek_byte(reader) == ',') { eat_byte(reader, ','); read_ws_star(reader); TRY_RET(read_object(reader, ctx)); read_ws_star(reader); } return true; } // Spec: [7] predicateObjectList ::= verb objectList // (';' verb objectList)* (';')? // Impl: [7] predicateObjectList ::= verb ws+ objectList // (ws* ';' ws* verb ws+ objectList)* (';')? static bool read_predicateObjectList(SerdReader reader, ReadContext ctx) { if (reader->eof) { return false; } Node predicate = INTERNAL_NODE_NULL; TRY_RET(read_verb(reader, &predicate)); TRY_THROW(read_ws_plus(reader)); ctx.predicate = &predicate; TRY_THROW(read_objectList(reader, ctx)); pop_string(reader, predicate.value); predicate.value = 0; read_ws_star(reader); while (peek_byte(reader) == ';') { eat_byte(reader, ';'); read_ws_star(reader); switch (peek_byte(reader)) { case '.': case ']': return true; default: TRY_THROW(read_verb(reader, &predicate)); ctx.predicate = &predicate; TRY_THROW(read_ws_plus(reader)); TRY_THROW(read_objectList(reader, ctx)); pop_string(reader, predicate.value); predicate.value = 0; read_ws_star(reader); } } return true; except: pop_string(reader, predicate.value); return false; } /** Recursive helper for read_collection. */ static bool read_collection_rec(SerdReader reader, ReadContext ctx) { read_ws_star(reader); if (peek_byte(reader) == ')') { eat_byte(reader, ')'); TRY_RET(emit_statement(reader, NULL, ctx.subject, &reader->rdf_rest, &reader->rdf_nil)); return false; } else { const Node rest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0); TRY_RET(emit_statement(reader, ctx.graph, ctx.subject, &reader->rdf_rest, &rest)); ctx.subject = &rest; ctx.predicate = &reader->rdf_first; if (read_object(reader, ctx)) { read_collection_rec(reader, ctx); pop_string(reader, rest.value); return true; } else { pop_string(reader, rest.value); return false; } } } // [22] itemList ::= object+ // [23] collection ::= '(' itemList? ')' static bool read_collection(SerdReader reader, ReadContext ctx, Node* dest) { TRY_RET(eat_byte(reader, '(')); read_ws_star(reader); if (peek_byte(reader) == ')') { // Empty collection eat_byte(reader, ')'); *dest = reader->rdf_nil; return true; } *dest = make_node(SERD_BLANK_ID, blank_id(reader), 0, 0); ctx.subject = dest; ctx.predicate = &reader->rdf_first; if (!read_object(reader, ctx)) { return error(reader, "unexpected end of collection\n"); } ctx.subject = dest; return read_collection_rec(reader, ctx); } // [11] subject ::= resource | blank static Node read_subject(SerdReader reader, ReadContext ctx) { Node subject = INTERNAL_NODE_NULL; switch (peek_byte(reader)) { case '[': case '(': case '_': read_blank(reader, ctx, &subject); break; default: read_resource(reader, &subject); } return subject; } // Spec: [6] triples ::= subject predicateObjectList // Impl: [6] triples ::= subject ws+ predicateObjectList static bool read_triples(SerdReader reader, ReadContext ctx) { const Node subject = read_subject(reader, ctx); bool ret = false; if (subject.value != 0) { ctx.subject = &subject; TRY_RET(read_ws_plus(reader)); ret = read_predicateObjectList(reader, ctx); pop_string(reader, subject.value); } ctx.subject = ctx.predicate = 0; return ret; } // [5] base ::= '@base' ws+ uriref static bool read_base(SerdReader reader) { // `@' is already eaten in read_directive eat_string(reader, "base", 4); TRY_RET(read_ws_plus(reader)); Ref uri; TRY_RET(uri = read_uriref(reader)); const SerdNode uri_node = public_node_from_ref(reader, SERD_URI, uri); reader->base_sink(reader->handle, &uri_node); pop_string(reader, uri); return true; } // Spec: [4] prefixID ::= '@prefix' ws+ prefixName? ':' uriref // Impl: [4] prefixID ::= '@prefix' ws+ prefixName? ':' ws* uriref static bool read_prefixID(SerdReader reader) { // `@' is already eaten in read_directive eat_string(reader, "prefix", 6); TRY_RET(read_ws_plus(reader)); bool ret = false; Ref name = read_prefixName(reader); if (!name) { name = push_string(reader, "", 1); } TRY_THROW(eat_byte(reader, ':') == ':'); read_ws_star(reader); Ref uri = 0; TRY_THROW(uri = read_uriref(reader)); const SerdNode name_node = public_node_from_ref(reader, SERD_LITERAL, name); const SerdNode uri_node = public_node_from_ref(reader, SERD_URI, uri); ret = reader->prefix_sink(reader->handle, &name_node, &uri_node); pop_string(reader, uri); except: pop_string(reader, name); return ret; } // [3] directive ::= prefixID | base static bool read_directive(SerdReader reader) { eat_byte(reader, '@'); switch (peek_byte(reader)) { case 'b': return read_base(reader); case 'p': return read_prefixID(reader); default: return error(reader, "illegal directive\n"); } } // Spec: [1] statement ::= directive '.' | triples '.' | ws+ // Impl: [1] statement ::= directive ws* '.' | triples ws* '.' | ws+ static bool read_statement(SerdReader reader) { ReadContext ctx = { 0, 0, 0 }; read_ws_star(reader); if (reader->eof) { return true; } switch (peek_byte(reader)) { case '@': TRY_RET(read_directive(reader)); break; default: TRY_RET(read_triples(reader, ctx)); break; } read_ws_star(reader); return eat_byte(reader, '.'); } // [1] turtleDoc ::= statement static bool read_turtleDoc(SerdReader reader) { while (!reader->eof) { TRY_RET(read_statement(reader)); } return true; } SERD_API SerdReader serd_reader_new(SerdSyntax syntax, void* handle, SerdBaseSink base_sink, SerdPrefixSink prefix_sink, SerdStatementSink statement_sink, SerdEndSink end_sink) { const Cursor cur = { NULL, 0, 0 }; SerdReader me = malloc(sizeof(struct SerdReaderImpl)); me->handle = handle; me->base_sink = base_sink; me->prefix_sink = prefix_sink; me->statement_sink = statement_sink; me->end_sink = end_sink; me->fd = 0; me->stack = serd_stack_new(STACK_PAGE_SIZE); me->cur = cur; me->blank_prefix = NULL; me->next_id = 1; me->read_buf = 0; me->read_head = 0; me->eof = false; #ifdef SERD_STACK_CHECK me->alloc_stack = 0; me->n_allocs = 0; #endif #define RDF_FIRST NS_RDF "first" #define RDF_REST NS_RDF "rest" #define RDF_NIL NS_RDF "nil" me->rdf_first = make_node(SERD_URI, push_string(me, RDF_FIRST, 49), 0, 0); me->rdf_rest = make_node(SERD_URI, push_string(me, RDF_REST, 48), 0, 0); me->rdf_nil = make_node(SERD_URI, push_string(me, RDF_NIL, 47), 0, 0); return me; } SERD_API void serd_reader_free(SerdReader reader) { SerdReader const me = (SerdReader)reader; pop_string(me, me->rdf_nil.value); pop_string(me, me->rdf_rest.value); pop_string(me, me->rdf_first.value); #ifdef SERD_STACK_CHECK free(me->alloc_stack); #endif free(me->stack.buf); free(me); } SERD_API void serd_reader_set_blank_prefix(SerdReader reader, const uint8_t* prefix) { reader->blank_prefix = prefix; } SERD_API bool serd_reader_read_file(SerdReader me, FILE* file, const uint8_t* name) { const Cursor cur = { name, 1, 1 }; me->fd = file; me->read_buf = (uint8_t*)malloc(READ_BUF_LEN * 2); me->read_head = 0; me->cur = cur; me->from_file = true; me->eof = false; /* Read into the second page of the buffer. Occasionally peek_string will move the read_head to before this point when readahead causes a page fault. */ memset(me->read_buf, '\0', READ_BUF_LEN * 2); me->read_buf += READ_BUF_LEN; const bool ret = !page(me) || read_turtleDoc(me); free(me->read_buf - READ_BUF_LEN); me->fd = 0; me->read_buf = NULL; return ret; } SERD_API bool serd_reader_read_string(SerdReader me, const uint8_t* utf8) { const Cursor cur = { (const uint8_t*)"(string)", 1, 1 }; me->read_buf = (uint8_t*)utf8; me->read_head = 0; me->cur = cur; me->from_file = false; me->eof = false; const bool ret = read_turtleDoc(me); me->read_buf = NULL; return ret; } SERD_API SerdReadState serd_read_state_new(SerdEnv env, const uint8_t* base_uri_str) { SerdReadState state = malloc(sizeof(struct SerdReadStateImpl)); SerdURI base_base_uri = SERD_URI_NULL; state->env = env; state->base_uri_node = serd_node_new_uri_from_string( base_uri_str, &base_base_uri, &state->base_uri); return state; } SERD_API void serd_read_state_free(SerdReadState state) { serd_node_free(&state->base_uri_node); free(state); } SERD_API SerdNode serd_read_state_expand(SerdReadState state, const SerdNode* node) { if (node->type == SERD_CURIE) { SerdChunk prefix; SerdChunk suffix; serd_env_expand(state->env, node, &prefix, &suffix); SerdNode ret = { SERD_URI, prefix.len + suffix.len + 1, prefix.len + suffix.len, // FIXME: UTF-8 NULL }; ret.buf = malloc(ret.n_bytes); snprintf((char*)ret.buf, ret.n_bytes, "%s%s", prefix.buf, suffix.buf); return ret; } else if (node->type == SERD_URI) { SerdURI ignored; return serd_node_new_uri_from_node(node, &state->base_uri, &ignored); } else { return SERD_NODE_NULL; } } SERD_API SerdNode serd_read_state_get_base_uri(SerdReadState state, SerdURI* out) { *out = state->base_uri; return state->base_uri_node; } SERD_API bool serd_read_state_set_base_uri(SerdReadState state, const SerdNode* uri_node) { // Resolve base URI and create a new node and URI for it SerdURI base_uri; SerdNode base_uri_node = serd_node_new_uri_from_node( uri_node, &state->base_uri, &base_uri); if (base_uri_node.buf) { // Replace the current base URI serd_node_free(&state->base_uri_node); state->base_uri_node = base_uri_node; state->base_uri = base_uri; return true; } return false; } SERD_API bool serd_read_state_set_prefix(SerdReadState state, const SerdNode* name, const SerdNode* uri_node) { if (serd_uri_string_has_scheme(uri_node->buf)) { // Set prefix to absolute URI serd_env_add(state->env, name, uri_node); return true; } else { // Resolve relative URI and create a new node and URI for it SerdURI abs_uri; SerdNode abs_uri_node = serd_node_new_uri_from_node( uri_node, &state->base_uri, &abs_uri); if (!abs_uri_node.buf) { return false; } // Set prefix to resolved (absolute) URI serd_env_add(state->env, name, &abs_uri_node); serd_node_free(&abs_uri_node); return true; } return false; } /** * @file uri.c */ /** @file uri.c */ #include #include #include // #define URI_DEBUG 1 SERD_API bool serd_uri_string_has_scheme(const uint8_t* utf8) { // RFC3986: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) if (!is_alpha(utf8[0])) { return false; // Invalid scheme initial character, URI is relative } for (uint8_t c = *++utf8; (c = *utf8) != '\0'; ++utf8) { switch (c) { case ':': return true; // End of scheme case '+': case '-': case '.': break; // Valid scheme character, continue default: if (!is_alpha(c) && !is_digit(c)) { return false; // Invalid scheme character } } } return false; } #ifdef URI_DEBUG static void serd_uri_dump(const SerdURI* uri, FILE* file) { #define PRINT_PART(range, name) \ if (range.buf) { \ fprintf(stderr, " " name " = "); \ fwrite((range).buf, 1, (range).len, stderr); \ fprintf(stderr, "\n"); \ } PRINT_PART(uri->scheme, "scheme"); PRINT_PART(uri->authority, "authority"); PRINT_PART(uri->path_base, "path_base"); PRINT_PART(uri->path, "path"); PRINT_PART(uri->query, "query"); PRINT_PART(uri->fragment, "fragment"); } #endif SERD_API bool serd_uri_parse(const uint8_t* utf8, SerdURI* uri) { *uri = SERD_URI_NULL; assert(uri->path_base.buf == NULL); assert(uri->path_base.len == 0); assert(uri->authority.len == 0); const uint8_t* ptr = utf8; /* See http://tools.ietf.org/html/rfc3986#section-3 URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] */ /* S3.1: scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */ if (is_alpha(*ptr)) { for (uint8_t c = *++ptr; true; c = *++ptr) { switch (c) { case '\0': case '/': case '?': case '#': ptr = utf8; goto path; // Relative URI (starts with path by definition) case ':': uri->scheme.buf = utf8; uri->scheme.len = (ptr++) - utf8; goto maybe_authority; // URI with scheme case '+': case '-': case '.': continue; default: if (is_alpha(c) || is_digit(c)) { continue; } } } } /* S3.2: The authority component is preceded by a double slash ("//") and is terminated by the next slash ("/"), question mark ("?"), or number sign ("#") character, or by the end of the URI. */ maybe_authority: if (*ptr == '/' && *(ptr + 1) == '/') { ptr += 2; uri->authority.buf = ptr; assert(uri->authority.len == 0); for (uint8_t c = *ptr; (c = *ptr) != '\0'; ++ptr) { switch (c) { case '/': goto path; case '?': goto query; case '#': goto fragment; default: ++uri->authority.len; } } } /* RFC3986 S3.3: The path is terminated by the first question mark ("?") or number sign ("#") character, or by the end of the URI. */ path: switch (*ptr) { case '?': goto query; case '#': goto fragment; case '\0': goto end; default: break; } uri->path.buf = ptr; uri->path.len = 0; for (uint8_t c = *ptr; (c = *ptr) != '\0'; ++ptr) { switch (c) { case '?': goto query; case '#': goto fragment; default: ++uri->path.len; } } /* RFC3986 S3.4: The query component is indicated by the first question mark ("?") character and terminated by a number sign ("#") character or by the end of the URI. */ query: if (*ptr == '?') { uri->query.buf = ++ptr; for (uint8_t c = *ptr; (c = *ptr) != '\0'; ++ptr) { switch (c) { case '#': goto fragment; default: ++uri->query.len; } } } /* RFC3986 S3.5: A fragment identifier component is indicated by the presence of a number sign ("#") character and terminated by the end of the URI. */ fragment: if (*ptr == '#') { uri->fragment.buf = ptr; while (*ptr++ != '\0') { ++uri->fragment.len; } } end: #ifdef URI_DEBUG fprintf(stderr, "PARSE URI <%s>\n", utf8); serd_uri_dump(uri, stderr); fprintf(stderr, "\n"); #endif return true; } SERD_API void serd_uri_resolve(const SerdURI* r, const SerdURI* base, SerdURI* t) { // See http://tools.ietf.org/html/rfc3986#section-5.2.2 t->path_base.buf = NULL; t->path_base.len = 0; if (r->scheme.len) { *t = *r; } else { if (r->authority.len) { t->authority = r->authority; t->path = r->path; t->query = r->query; } else { t->path = r->path; if (!r->path.len) { t->path_base = base->path; if (r->query.len) { t->query = r->query; } else { t->query = base->query; } } else { if (r->path.buf[0] != '/') { t->path_base = base->path; } t->query = r->query; } t->authority = base->authority; } t->scheme = base->scheme; t->fragment = r->fragment; } #ifdef URI_DEBUG fprintf(stderr, "RESOLVE URI\nBASE:\n"); serd_uri_dump(base, stderr); fprintf(stderr, "URI:\n"); serd_uri_dump(r, stderr); fprintf(stderr, "RESULT:\n"); serd_uri_dump(t, stderr); fprintf(stderr, "\n"); #endif } SERD_API size_t serd_uri_serialise(const SerdURI* uri, SerdSink sink, void* stream) { // See http://tools.ietf.org/html/rfc3986#section-5.3 size_t write_size = 0; #define WRITE(buf, len) \ write_size += len; \ if (len) { \ sink((const uint8_t*)buf, len, stream); \ } #define WRITE_CHAR(c) WRITE(&(c), 1) #define WRITE_COMPONENT(prefix, field, suffix) \ if ((field).len) { \ for (const uint8_t* c = (const uint8_t*)prefix; *c != '\0'; ++c) { \ WRITE(c, 1); \ } \ WRITE((field).buf, (field).len); \ for (const uint8_t* c = (const uint8_t*)suffix; *c != '\0'; ++c) { \ WRITE(c, 1); \ } \ } WRITE_COMPONENT("", uri->scheme, ":"); if (uri->authority.buf) { WRITE("//", 2); WRITE(uri->authority.buf, uri->authority.len); } if (uri->path_base.len) { if (!uri->path.buf && (uri->fragment.buf || uri->query.buf)) { WRITE_COMPONENT("", uri->path_base, ""); } else { /* Merge paths, removing dot components. See http://tools.ietf.org/html/rfc3986#section-5.2.3 */ const uint8_t* begin = uri->path.buf; const uint8_t* end = begin; size_t up = 1; if (begin) { // Count and skip leading dot components end = uri->path.buf + uri->path.len; for (bool done = false; !done && (begin < end);) { switch (begin[0]) { case '.': switch (begin[1]) { case '/': begin += 2; // Chop leading "./" break; case '.': ++up; switch (begin[2]) { case '/': begin += 3; // Chop lading "../" break; default: begin += 2; // Chop leading ".." } break; default: ++begin; // Chop leading "." } break; case '/': if (begin[1] == '/') { ++begin; // Replace leading "//" with "/" break; } // else fall through default: done = true; // Finished chopping dot components } } if (uri->path.buf && uri->path_base.buf) { // Find the up'th last slash const uint8_t* base_last = uri->path_base.buf + uri->path_base.len - 1; do { if (*base_last == '/') { --up; } } while (up > 0 && (--base_last > uri->path_base.buf)); // Write base URI prefix const size_t base_len = base_last - uri->path_base.buf + 1; WRITE(uri->path_base.buf, base_len); } else { // Relative path is just query or fragment, append it to full base URI WRITE_COMPONENT("", uri->path_base, ""); } // Write URI suffix WRITE(begin, end - begin); } } } else { WRITE_COMPONENT("", uri->path, ""); } WRITE_COMPONENT("?", uri->query, ""); if (uri->fragment.buf) { // Note uri->fragment.buf includes the leading `#' WRITE_COMPONENT("", uri->fragment, ""); } return write_size; } /** * @file writer.c */ #include #include #include #include #define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" #define NS_XSD "http://www.w3.org/2001/XMLSchema#" typedef struct { SerdNode graph; SerdNode subject; SerdNode predicate; } WriteContext; static const WriteContext WRITE_CONTEXT_NULL = { { 0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0} }; struct SerdWriterImpl { SerdSyntax syntax; SerdStyle style; SerdEnv env; SerdURI base_uri; SerdStack anon_stack; SerdSink sink; void* stream; WriteContext context; unsigned indent; }; typedef enum { WRITE_NORMAL, WRITE_URI, WRITE_STRING } TextContext; static inline WriteContext* anon_stack_top(SerdWriter writer) { assert(!serd_stack_is_empty(&writer->anon_stack)); return (WriteContext*)(writer->anon_stack.buf + writer->anon_stack.size - sizeof(WriteContext)); } static bool write_text(SerdWriter writer, TextContext ctx, const uint8_t* utf8, size_t n_bytes, uint8_t terminator) { char escape[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; for (size_t i = 0; i < n_bytes;) { uint8_t in = utf8[i++]; switch (in) { case '\\': writer->sink("\\\\", 2, writer->stream); continue; case '\n': writer->sink("\\n", 2, writer->stream); continue; case '\r': writer->sink("\\r", 2, writer->stream); continue; case '\t': writer->sink("\\t", 2, writer->stream); continue; case '"': if (terminator == '"') { writer->sink("\\\"", 2, writer->stream); continue; } // else fall-through default: break; } if (in == terminator) { snprintf(escape, 7, "\\u%04X", terminator); writer->sink(escape, 6, writer->stream); continue; } uint32_t c = 0; size_t size = 0; if ((in & 0x80) == 0) { // Starts with `0' size = 1; c = in & 0x7F; if (in_range(in, 0x20, 0x7E)) { // Printable ASCII writer->sink(&in, 1, writer->stream); continue; } } else if ((in & 0xE0) == 0xC0) { // Starts with `110' size = 2; c = in & 0x1F; } else if ((in & 0xF0) == 0xE0) { // Starts with `1110' size = 3; c = in & 0x0F; } else if ((in & 0xF8) == 0xF0) { // Starts with `11110' size = 4; c = in & 0x07; } else { fprintf(stderr, "invalid UTF-8 at offset %zu: %X\n", i, in); return false; } if (ctx == WRITE_STRING && !(writer->style & SERD_STYLE_ASCII)) { // Write UTF-8 character directly to UTF-8 output // TODO: Scan to next escape and write entire range at once writer->sink(utf8 + i - 1, size, writer->stream); i += size - 1; continue; } #define READ_BYTE() do { \ assert(i < n_bytes); \ in = utf8[i++] & 0x3f; \ c <<= 6; \ c |= in; \ } while (0) switch (size) { case 4: READ_BYTE(); case 3: READ_BYTE(); case 2: READ_BYTE(); } if (c < 0xFFFF) { snprintf(escape, 7, "\\u%04X", c); writer->sink(escape, 6, writer->stream); } else { snprintf(escape, 11, "\\U%08X", c); writer->sink(escape, 10, writer->stream); } } return true; } static void serd_writer_write_delim(SerdWriter writer, const uint8_t delim) { switch (delim) { case '\n': break; default: writer->sink(" ", 1, writer->stream); case '[': writer->sink(&delim, 1, writer->stream); } writer->sink("\n", 1, writer->stream); for (unsigned i = 0; i < writer->indent; ++i) { writer->sink("\t", 1, writer->stream); } } static bool write_node(SerdWriter writer, const SerdNode* node, const SerdNode* datatype, const SerdNode* lang) { SerdChunk uri_prefix; SerdChunk uri_suffix; switch (node->type) { case SERD_NOTHING: return false; case SERD_ANON_BEGIN: if (writer->syntax != SERD_NTRIPLES) { ++writer->indent; serd_writer_write_delim(writer, '['); WriteContext* ctx = (WriteContext*)serd_stack_push( &writer->anon_stack, sizeof(WriteContext)); *ctx = writer->context; writer->context.subject = *node; writer->context.predicate = SERD_NODE_NULL; break; } case SERD_ANON: if (writer->syntax != SERD_NTRIPLES) { break; } // else fall through case SERD_BLANK_ID: writer->sink("_:", 2, writer->stream); writer->sink(node->buf, node->n_bytes - 1, writer->stream); break; case SERD_CURIE: switch (writer->syntax) { case SERD_NTRIPLES: if (!serd_env_expand(writer->env, node, &uri_prefix, &uri_suffix)) { fprintf(stderr, "error: undefined namespace prefix `%s'\n", node->buf); return false; } writer->sink("<", 1, writer->stream); write_text(writer, WRITE_URI, uri_prefix.buf, uri_prefix.len, '>'); write_text(writer, WRITE_URI, uri_suffix.buf, uri_suffix.len, '>'); writer->sink(">", 1, writer->stream); break; case SERD_TURTLE: writer->sink(node->buf, node->n_bytes - 1, writer->stream); } break; case SERD_LITERAL: if (writer->syntax == SERD_TURTLE && datatype && datatype->buf) { // TODO: compare against NS_XSD prefix once if (!strcmp((const char*)datatype->buf, NS_XSD "boolean") || !strcmp((const char*)datatype->buf, NS_XSD "decimal") || !strcmp((const char*)datatype->buf, NS_XSD "integer")) { writer->sink(node->buf, node->n_bytes - 1, writer->stream); break; } } writer->sink("\"", 1, writer->stream); write_text(writer, WRITE_STRING, node->buf, node->n_bytes - 1, '"'); writer->sink("\"", 1, writer->stream); if (lang && lang->buf) { writer->sink("@", 1, writer->stream); writer->sink(lang->buf, lang->n_bytes - 1, writer->stream); } else if (datatype && datatype->buf) { writer->sink("^^", 2, writer->stream); write_node(writer, datatype, NULL, NULL); } break; case SERD_URI: if ((writer->syntax == SERD_TURTLE) && !strcmp((const char*)node->buf, NS_RDF "type")) { writer->sink("a", 1, writer->stream); return true; } else if ((writer->style & SERD_STYLE_CURIED) && serd_uri_string_has_scheme(node->buf)) { SerdNode prefix; SerdChunk suffix; if (serd_env_qualify(writer->env, node, &prefix, &suffix)) { write_text(writer, WRITE_URI, prefix.buf, prefix.n_bytes - 1, '>'); writer->sink(":", 1, writer->stream); write_text(writer, WRITE_URI, suffix.buf, suffix.len, '>'); return true; } } else if ((writer->style & SERD_STYLE_RESOLVED) && !serd_uri_string_has_scheme(node->buf)) { SerdURI uri; if (serd_uri_parse(node->buf, &uri)) { SerdURI abs_uri; serd_uri_resolve(&uri, &writer->base_uri, &abs_uri); writer->sink("<", 1, writer->stream); serd_uri_serialise(&abs_uri, writer->sink, writer->stream); writer->sink(">", 1, writer->stream); return true; } } writer->sink("<", 1, writer->stream); write_text(writer, WRITE_URI, node->buf, node->n_bytes - 1, '>'); writer->sink(">", 1, writer->stream); return true; } return true; } SERD_API bool serd_writer_write_statement(SerdWriter writer, const SerdNode* graph, const SerdNode* subject, const SerdNode* predicate, const SerdNode* object, const SerdNode* object_datatype, const SerdNode* object_lang) { assert(subject && predicate && object); switch (writer->syntax) { case SERD_NTRIPLES: write_node(writer, subject, NULL, NULL); writer->sink(" ", 1, writer->stream); write_node(writer, predicate, NULL, NULL); writer->sink(" ", 1, writer->stream); if (!write_node(writer, object, object_datatype, object_lang)) { return false; } writer->sink(" .\n", 3, writer->stream); return true; case SERD_TURTLE: break; } if (subject->buf == writer->context.subject.buf) { if (predicate->buf == writer->context.predicate.buf) { // Abbreviate S P ++writer->indent; serd_writer_write_delim(writer, ','); write_node(writer, object, object_datatype, object_lang); --writer->indent; } else { // Abbreviate S if (writer->context.predicate.buf) { serd_writer_write_delim(writer, ';'); } else { ++writer->indent; serd_writer_write_delim(writer, '\n'); } write_node(writer, predicate, NULL, NULL); writer->context.predicate = *predicate; writer->sink(" ", 1, writer->stream); write_node(writer, object, object_datatype, object_lang); } } else { if (writer->context.subject.buf) { if (writer->indent > 0) { --writer->indent; } if (serd_stack_is_empty(&writer->anon_stack)) { serd_writer_write_delim(writer, '.'); serd_writer_write_delim(writer, '\n'); } } if (subject->type == SERD_ANON_BEGIN) { writer->sink("[ ", 2, writer->stream); ++writer->indent; WriteContext* ctx = (WriteContext*)serd_stack_push( &writer->anon_stack, sizeof(WriteContext)); *ctx = writer->context; } else { write_node(writer, subject, NULL, NULL); ++writer->indent; if (subject->type != SERD_ANON_BEGIN && subject->type != SERD_ANON) { serd_writer_write_delim(writer, '\n'); } } writer->context.subject = *subject; writer->context.predicate = SERD_NODE_NULL; write_node(writer, predicate, NULL, NULL); writer->context.predicate = *predicate; writer->sink(" ", 1, writer->stream); write_node(writer, object, object_datatype, object_lang); } const WriteContext new_context = { graph ? *graph : SERD_NODE_NULL, *subject, *predicate }; writer->context = new_context; return true; } SERD_API bool serd_writer_end_anon(SerdWriter writer, const SerdNode* node) { if (writer->syntax == SERD_NTRIPLES) { return true; } if (serd_stack_is_empty(&writer->anon_stack)) { fprintf(stderr, "unexpected end of anonymous node\n"); return false; } assert(writer->indent > 0); --writer->indent; serd_writer_write_delim(writer, '\n'); writer->sink("]", 1, writer->stream); writer->context = *anon_stack_top(writer); serd_stack_pop(&writer->anon_stack, sizeof(WriteContext)); if (!writer->context.subject.buf) { // End of anonymous subject writer->context.subject = *node; } return true; } SERD_API void serd_writer_finish(SerdWriter writer) { if (writer->context.subject.buf) { writer->sink(" .\n", 3, writer->stream); writer->context.subject.buf = NULL; } } SERD_API SerdWriter serd_writer_new(SerdSyntax syntax, SerdStyle style, SerdEnv env, const SerdURI* base_uri, SerdSink sink, void* stream) { const WriteContext context = WRITE_CONTEXT_NULL; SerdWriter writer = malloc(sizeof(struct SerdWriterImpl)); writer->syntax = syntax; writer->style = style; writer->env = env; writer->base_uri = base_uri ? *base_uri : SERD_URI_NULL; writer->anon_stack = serd_stack_new(sizeof(WriteContext)); writer->sink = sink; writer->stream = stream; writer->context = context; writer->indent = 0; return writer; } SERD_API void serd_writer_set_base_uri(SerdWriter writer, const SerdURI* uri) { writer->base_uri = *uri; if (writer->syntax != SERD_NTRIPLES) { if (writer->context.graph.buf || writer->context.subject.buf) { writer->sink(" .\n\n", 4, writer->stream); writer->context = WRITE_CONTEXT_NULL; } writer->sink("@base <", 7, writer->stream); serd_uri_serialise(uri, writer->sink, writer->stream); writer->sink("> .\n", 4, writer->stream); } writer->context = WRITE_CONTEXT_NULL; } SERD_API bool serd_writer_set_prefix(SerdWriter writer, const SerdNode* name, const SerdNode* uri) { if (writer->syntax != SERD_NTRIPLES) { if (writer->context.graph.buf || writer->context.subject.buf) { writer->sink(" .\n\n", 4, writer->stream); writer->context = WRITE_CONTEXT_NULL; } writer->sink("@prefix ", 8, writer->stream); writer->sink(name->buf, name->n_bytes - 1, writer->stream); writer->sink(": <", 3, writer->stream); write_text(writer, WRITE_URI, uri->buf, uri->n_bytes - 1, '>'); writer->sink("> .\n", 4, writer->stream); } writer->context = WRITE_CONTEXT_NULL; return true; } SERD_API void serd_writer_free(SerdWriter writer) { SerdWriter const me = (SerdWriter)writer; serd_writer_finish(me); serd_stack_free(&writer->anon_stack); free(me); } a> 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701