|
libu8
|
These functions and macros support I/O with UTF-8 streams. More...
Data Structures | |
| struct | U8_STREAM |
| struct U8_STREAM is an abstract structural type which is extended by U8_INPUT and U8_OUTPUT. More... | |
| struct | U8_OUTPUT |
| struct U8_OUTPUT is an structural type which provides for UTF-8 output. More... | |
| struct | U8_INPUT |
| struct U8_INPUT Structure used for stream-based UTF-8 input. More... | |
Defines | |
| #define | U8_STREAM_MALLOCD 0x01 |
| This bit describes whether the stream is mallocd or static. | |
| #define | U8_OUTPUT_STREAM 0x02 |
| This bit describes whether the stream is an output or input stream. | |
| #define | U8_STREAM_GROWS 0x04 |
| This bit describes whether the stream can grow to accomodate more input or output. | |
| #define | U8_STREAM_OWNS_BUF 0x08 |
| This bit describes whether the stream is responsible for freeing its buffer when closed. | |
| #define | U8_STREAM_OWNS_XBUF 0x10 |
| This bit describes whether an XFILE stream is responsible for freeing its translation buffer when closed. | |
| #define | U8_STREAM_OWNS_SOCKET 0x20 |
| This bit describes whether an XFILE stream is responsible for closing its socket/file descriptor when closed. | |
| #define | U8_STREAM_CAN_SEEK 0x40 |
| This bit describes whether seeks are possible on an XFILE's underlying socket/file descriptor. | |
| #define | U8_STREAM_CRLFS 0x80 |
| This bit describes whether the XFILE should do CRLF translation. | |
| #define | U8_STREAM_TACITURN 0x100 |
| This bit describes a verbosity level for the stream. | |
| #define | U8_STREAM_UTF8WARN 0x200 |
| This bit describes whether the stream should emit warnings for invalid UTF-8 bytes or sequences. | |
| #define | U8_INIT_OUTPUT(s, sz) U8_INIT_OUTPUT_X((s),sz,NULL,U8_STREAM_GROWS) |
| Initializes a string output stream with a particular initial size This always allocates a buffer but arranges for the buffer to grow. | |
| #define | U8_INIT_OUTPUT_BUF(s, sz, buf) U8_INIT_OUTPUT_X((s),sz,buf,U8_STREAM_GROWS) |
| Initializes a string output stream with a initial buffer. | |
| #define | U8_INIT_FIXED_OUTPUT(s, sz, buf) U8_INIT_OUTPUT_X(s,sz,buf,0) |
| U8_INIT_FIXED_OUTPUT Initializes a string output stream with a fixed size buffer This stream will discard content after the buffer is exhausted. | |
| #define | u8_outstring(s) ((s)->u8_outbuf) |
| Returns the string content of the output stream. | |
| #define | u8_outlen(s) (((s)->u8_outptr)-((s)->u8_outbuf)) |
| Returns the length in bytes of the string content of the output stream. | |
| #define | U8_INIT_INPUT(s, n) U8_INIT_INPUT_X(s,n,NULL,0) |
| Initializes an input stream with a buffer of n bytes This allocates the buffer and sets its U8_OWNS_BUF bit. | |
| #define | u8_getrec(f, eos) (u8_gets_x(NULL,0,f,eos,NULL)) |
| Returns a UTF-8 string from f terminated by eos or the end of the stream. | |
| #define | u8_gets(f) (u8_gets_x(NULL,0,f,"\n",NULL)) |
| Returns a UTF-8 string from f terminated by a newline the end of the stream. | |
Typedefs | |
| typedef struct U8_STREAM | U8_STREAM |
| struct U8_STREAM is an abstract structural type which is extended by U8_INPUT and U8_OUTPUT. | |
| typedef struct U8_OUTPUT | U8_OUTPUT |
| struct U8_OUTPUT is an structural type which provides for UTF-8 output. | |
| typedef struct U8_INPUT | U8_INPUT |
| struct U8_INPUT Structure used for stream-based UTF-8 input. | |
Functions | |
| U8_EXPORT U8_OUTPUT * | u8_open_output_string (int initial_size) |
| Allocates and opens an output string with an initial size. | |
| U8_EXPORT U8_INPUT * | u8_open_input_string (u8_string input) |
| Opens an input stream reading characters from the UTF-8 string input. | |
| U8_EXPORT u8_string | u8_gets_x (u8_byte *buf, int len, struct U8_INPUT *f, u8_string eos, int *sizep) |
| Reads a string from f into buf up to the string eos. | |
| U8_EXPORT int | u8_ungetc (struct U8_INPUT *f, int c) |
| Puts the character c back into the input stream f. | |
| U8_EXPORT int | u8_probec (struct U8_INPUT *f) |
| Returns the next character to be read from f. | |
| U8_EXPORT int | u8_get_entity (U8_INPUT *in) |
| Reads and interprets an XML character entity from in. | |
These functions and macros support I/O with UTF-8 streams.
The files here provide a generic buffered I/O layer and immediate operations with in-memory streams writing to UTF-8 byte buffers. Within libu8, this provides support for "xfiles" which provide automatic conversion to/from external character encodings.
| #define u8_getrec | ( | f, | |
| eos | |||
| ) | (u8_gets_x(NULL,0,f,eos,NULL)) |
Returns a UTF-8 string from f terminated by eos or the end of the stream.
The terminating sequence itself is not included in the result.
| f | a pointer to a U8_INPUT stream |
| eos | a string indicating the end of a record |
| #define u8_gets | ( | f | ) | (u8_gets_x(NULL,0,f,"\n",NULL)) |
Returns a UTF-8 string from f terminated by a newline the end of the stream.
| f | a pointer to a U8_INPUT stream |
| #define U8_INIT_FIXED_OUTPUT | ( | s, | |
| sz, | |||
| buf | |||
| ) | U8_INIT_OUTPUT_X(s,sz,buf,0) |
U8_INIT_FIXED_OUTPUT Initializes a string output stream with a fixed size buffer This stream will discard content after the buffer is exhausted.
| s | a pointer to a U8_OUTPUT structure |
| sz | the number of bytes in the buffer |
| buf | a pointer to a byte buffer which must exist |
| #define U8_INIT_INPUT | ( | s, | |
| n | |||
| ) | U8_INIT_INPUT_X(s,n,NULL,0) |
Initializes an input stream with a buffer of n bytes This allocates the buffer and sets its U8_OWNS_BUF bit.
| s | a pointer to a U8_INPUT stream |
| n | the size of the buffer for the stream to use |
| #define U8_INIT_OUTPUT | ( | s, | |
| sz | |||
| ) | U8_INIT_OUTPUT_X((s),sz,NULL,U8_STREAM_GROWS) |
Initializes a string output stream with a particular initial size This always allocates a buffer but arranges for the buffer to grow.
| s | a pointer to a U8_OUTPUT structure |
| sz | the number of bytes in the buffer |
| #define U8_INIT_OUTPUT_BUF | ( | s, | |
| sz, | |||
| buf | |||
| ) | U8_INIT_OUTPUT_X((s),sz,buf,U8_STREAM_GROWS) |
Initializes a string output stream with a initial buffer.
This will allocates a buffer if the output grows beyond the initial size.
| s | a pointer to a U8_OUTPUT structure |
| sz | the number of bytes in the buffer |
| buf | a pointer to a byte/character array with at least sz elements |
| #define u8_outlen | ( | s | ) | (((s)->u8_outptr)-((s)->u8_outbuf)) |
Returns the length in bytes of the string content of the output stream.
| #define U8_OUTPUT_STREAM 0x02 |
This bit describes whether the stream is an output or input stream.
| #define u8_outstring | ( | s | ) | ((s)->u8_outbuf) |
Returns the string content of the output stream.
| #define U8_STREAM_CAN_SEEK 0x40 |
This bit describes whether seeks are possible on an XFILE's underlying socket/file descriptor.
| #define U8_STREAM_CRLFS 0x80 |
This bit describes whether the XFILE should do CRLF translation.
This is mostly neccessary for dealing with DOS/Windows, and causes newlines (0x) to turn into the sequence (0x0x).
| #define U8_STREAM_GROWS 0x04 |
This bit describes whether the stream can grow to accomodate more input or output.
| #define U8_STREAM_MALLOCD 0x01 |
This bit describes whether the stream is mallocd or static.
Mallocd streams are freed when closed.
| #define U8_STREAM_OWNS_BUF 0x08 |
This bit describes whether the stream is responsible for freeing its buffer when closed.
| #define U8_STREAM_OWNS_SOCKET 0x20 |
This bit describes whether an XFILE stream is responsible for closing its socket/file descriptor when closed.
| #define U8_STREAM_OWNS_XBUF 0x10 |
This bit describes whether an XFILE stream is responsible for freeing its translation buffer when closed.
| #define U8_STREAM_TACITURN 0x100 |
This bit describes a verbosity level for the stream.
This may be consulted by I/O routines to determine detail or decoration.
| #define U8_STREAM_UTF8WARN 0x200 |
This bit describes whether the stream should emit warnings for invalid UTF-8 bytes or sequences.
struct U8_INPUT Structure used for stream-based UTF-8 input.
This structure is subclassed by other structures which share its initial fields, allowing casting into the more general class which input functions operate over. At any point, the stream has at least one internal buffer of UTF-8 characters, pointed to by u8_inbuf and with a current cursor of u8_inptr and a limit (the end of valid data) of u8_inlim. The size of the buffer is in u8_bufsz and various other bits are stored in u8_streaminfo. If an input operation needs more than the buffered data, the u8_fillfn is called on the stream, if non-NULL. Also provided is a u8_closefn which is used whenever the application indicates that it is done with a stream.
struct U8_OUTPUT is an structural type which provides for UTF-8 output.
This structure is subclassed by other structures which share its initial fields, allowing casting into the more general class which output functions operate over. At any point, the stream has at least one internal buffer of UTF-8 characters, pointed to by u8_inbuf and with a current cursor of u8_inptr and a limit (the end of writable data) of u8_inlim. The size of the buffer is in u8_bufsz (note that this is redundant with u8_outlim) and various other bits are stored in u8_streaminfo. If an output operation overflows the buffer, the u8_flushfn (if non-NULL) is called on the stream. If space is still not available, the output buffer is automatically grown. Also provided is a u8_closefn which indicates that an application is done with a stream.
struct U8_STREAM is an abstract structural type which is extended by U8_INPUT and U8_OUTPUT.
The general layout of a stream structure is an integer buffer size, and an integer to store streaminfo bitwise. This is followed by three string pointers into a UTF-8 stream, either for input or output, and a pointer to a close function and a transfer (xfn).
| U8_EXPORT int u8_get_entity | ( | U8_INPUT * | in | ) |
Reads and interprets an XML character entity from in.
| in | a pointer to a U8_INPUT stream positioned just after the ampersand (&) of an XML character entity |
| U8_EXPORT u8_string u8_gets_x | ( | u8_byte * | buf, |
| int | len, | ||
| struct U8_INPUT * | f, | ||
| u8_string | eos, | ||
| int * | sizep | ||
| ) |
Reads a string from f into buf up to the string eos.
This stores the number of bytes read into sizep and returns a pointer to buf. If there is not enough space in buf (which has len bytes), u8_gets_x returns NULL but deposits the number of bytes needed into sizep. If buf is NULL, this function allocates a new buffer/string with enough space to hold the requested data. The terminating sequence itself is not included in the result.
| buf | an buffer/string of n bytes |
| len | the number of bytes available in buf |
| f | a pointer to a U8_INPUT stream |
| eos | a UTF-8 string indicating the "end of record" |
| sizep | a pointer to an int used to record how many bytes were read (or are needed) |
| U8_EXPORT U8_INPUT* u8_open_input_string | ( | u8_string | input | ) |
Opens an input stream reading characters from the UTF-8 string input.
This is the simplest kind of input stream and is malloc'd.
| input | a null-terminated UTF-8 string |
| U8_EXPORT U8_OUTPUT* u8_open_output_string | ( | int | initial_size | ) |
Allocates and opens an output string with an initial size.
| initial_size | the initial space allocated for the stream |
| U8_EXPORT int u8_probec | ( | struct U8_INPUT * | f | ) |
Returns the next character to be read from f.
This does not advance the buffer point and provides another way to write parsing functions which inspect and then use data.
| f | a pointer to a U8_INPUT stream |
| U8_EXPORT int u8_ungetc | ( | struct U8_INPUT * | f, |
| int | c | ||
| ) |
Puts the character c back into the input stream f.
This can be used by parsing algorithms which get a character, look at it and then put it back before calling another procedure.
| f | a pointer to a U8_INPUT stream |
| c | the unicode code point last read from stream |
1.7.4