netatalk  4.4.0
Free and Open Source Apple Filing Protocol (AFP) Server
Loading...
Searching...
No Matches
unicode.h File Reference
#include <errno.h>
#include <stdint.h>
#include <sys/param.h>

Go to the source code of this file.

Data Structures

struct  atalk_iconv_t
struct  charset_functions

Macros

#define ucs2_t   uint16_t
#define SAFE_FREE(x)
#define EILSEQ   84
#define CHARSET_CLIENT   1
#define CHARSET_VOLUME   2
#define CHARSET_PRECOMPOSED   4
#define CHARSET_DECOMPOSED   8
#define CHARSET_MULTIBYTE   16
#define CHARSET_WIDECHAR   32
#define CHARSET_ICONV   64
#define IGNORE_CHAR   '_'
#define CONV_IGNORE   (1<<0)
#define CONV_ESCAPEHEX   (1<<1)
#define CONV_ESCAPEDOTS   (1<<2)
#define CONV_UNESCAPEHEX   (1<<3)
#define CONV_TOUPPER   (1<<4)
#define CONV_TOLOWER   (1<<5)
#define CONV_PRECOMPOSE   (1<<6)
#define CONV_DECOMPOSE   (1<<7)
#define CONV_FORCE   (1<<8)
#define CONV__EILSEQ   (1<<9)
#define CONV_REQMANGLE   (1<<14)
#define CONV_REQESCAPE   (1<<15)
#define NUM_CHARSETS   5

Enumerations

enum  charset_t {
  CH_UCS2 = 0 , CH_UTF8 = 1 , CH_MAC = 2 , CH_UNIX = 3 ,
  CH_UTF8_MAC = 4
}

Functions

atalk_iconv_t atalk_iconv_open (const char *, const char *)
size_t atalk_iconv (atalk_iconv_t, const char **, size_t *, char **, size_t *)
 This is a simple portable iconv() implementaion.
int atalk_iconv_close (atalk_iconv_t)
struct charset_functionsfind_charset_functions (const char *)
int atalk_register_charset (struct charset_functions *)
ucs2_t toupper_w (ucs2_t)
uint32_t toupper_sp (uint32_t)
ucs2_t tolower_w (ucs2_t)
uint32_t tolower_sp (uint32_t)
int strupper_w (ucs2_t *)
 Convert a string to upper case.
int strlower_w (ucs2_t *)
 Convert a string to lower case.
size_t strlen_w (const ucs2_t *)
 wide strlen()
size_t strnlen_w (const ucs2_t *, size_t)
 wide strnlen()
ucs2_tstrchr_w (const ucs2_t *, ucs2_t)
 wide strchr()
ucs2_tstrcasechr_w (const ucs2_t *s, ucs2_t c)
 wide strcasechr()
int strcmp_w (const ucs2_t *, const ucs2_t *)
 wide strcmp()
int strncmp_w (const ucs2_t *, const ucs2_t *, size_t)
 wide strncmp()
int strcasecmp_w (const ucs2_t *, const ucs2_t *)
 wide strcasecmp()
int strncasecmp_w (const ucs2_t *, const ucs2_t *, size_t)
 wide strncasecmp()
ucs2_tstrstr_w (const ucs2_t *s, const ucs2_t *ins)
 wide strstr()
ucs2_tstrcasestr_w (const ucs2_t *, const ucs2_t *)
 wide strcasestr()
ucs2_tstrndup_w (const ucs2_t *, size_t)
 wide strndup()
ucs2_tstrdup_w (const ucs2_t *)
 wide strdup()
size_t precompose_w (ucs2_t *, size_t, ucs2_t *, size_t *)
 pre|decomposition
size_t decompose_w (ucs2_t *, size_t, ucs2_t *, size_t *)
int set_charset_name (charset_t, const char *)
void free_charset_names (void)
void init_iconv (void)
 Initialize iconv conversion descriptors.
size_t convert_string (charset_t, charset_t, void const *, size_t, void *, size_t)
size_t convert_string_allocate (charset_t, charset_t, void const *, size_t, char **)
size_t charset_strupper (charset_t, const char *, size_t, char *, size_t)
size_t charset_strlower (charset_t, const char *, size_t, char *, size_t)
size_t ucs2_to_charset_allocate (charset_t, char **dest, const ucs2_t *src)
size_t ucs2_to_charset (charset_t, const ucs2_t *src, char *dest, size_t)
 Copy a string from a UCS2 src to a unix char * destination, allocating a buffer.
size_t convert_charset (charset_t, charset_t, charset_t, const char *, size_t, char *, size_t, uint16_t *)
size_t charset_precompose (charset_t, char *, size_t, char *, size_t)
size_t charset_decompose (charset_t, char *, size_t, char *, size_t)
charset_t add_charset (const char *name)

Macro Definition Documentation

◆ CHARSET_CLIENT

#define CHARSET_CLIENT   1

◆ CHARSET_DECOMPOSED

#define CHARSET_DECOMPOSED   8

◆ CHARSET_ICONV

#define CHARSET_ICONV   64

◆ CHARSET_MULTIBYTE

#define CHARSET_MULTIBYTE   16

◆ CHARSET_PRECOMPOSED

#define CHARSET_PRECOMPOSED   4

◆ CHARSET_VOLUME

#define CHARSET_VOLUME   2

◆ CHARSET_WIDECHAR

#define CHARSET_WIDECHAR   32

◆ CONV__EILSEQ

#define CONV__EILSEQ   (1<<9)

ignore EILSEQ, replace with IGNORE_CHAR (try USC2)

◆ CONV_DECOMPOSE

#define CONV_DECOMPOSE   (1<<7)

decompose

◆ CONV_ESCAPEDOTS

#define CONV_ESCAPEDOTS   (1<<2)

escape leading dots with :2600

◆ CONV_ESCAPEHEX

#define CONV_ESCAPEHEX   (1<<1)

escape unconvertable chars with :[UCS2HEX], also escape '/'. Escape ':' if also CONV_ALLOW_COLON, else ':' raises EILSEQ

◆ CONV_FORCE

#define CONV_FORCE   (1<<8)

force convertion

◆ CONV_IGNORE

#define CONV_IGNORE   (1<<0)

return the first convertable characters.

◆ CONV_PRECOMPOSE

#define CONV_PRECOMPOSE   (1<<6)

precompose

◆ CONV_REQESCAPE

#define CONV_REQESCAPE   (1<<15)

espace unconvertable chars with :[UCS2HEX]

◆ CONV_REQMANGLE

#define CONV_REQMANGLE   (1<<14)

mangling of returned name is required

◆ CONV_TOLOWER

#define CONV_TOLOWER   (1<<5)

convert to lowercase

◆ CONV_TOUPPER

#define CONV_TOUPPER   (1<<4)

convert to UPPERcase

◆ CONV_UNESCAPEHEX

#define CONV_UNESCAPEHEX   (1<<3)

◆ EILSEQ

#define EILSEQ   84

Illegal byte sequence.

◆ IGNORE_CHAR

#define IGNORE_CHAR   '_'

◆ NUM_CHARSETS

#define NUM_CHARSETS   5

◆ SAFE_FREE

#define SAFE_FREE ( x)
Value:
do { if ((x) != NULL) {free(x); x=NULL;} } while(0)

◆ ucs2_t

#define ucs2_t   uint16_t

Enumeration Type Documentation

◆ charset_t

enum charset_t

this defines the charset types used in netatalk

Enumerator
CH_UCS2 
CH_UTF8 
CH_MAC 
CH_UNIX 
CH_UTF8_MAC 

Function Documentation

◆ add_charset()

charset_t add_charset ( const char * name)
extern

◆ atalk_iconv()

size_t atalk_iconv ( atalk_iconv_t cd,
const char ** inbuf,
size_t * inbytesleft,
char ** outbuf,
size_t * outbytesleft )
extern

This is a simple portable iconv() implementaion.

It only knows about a very small number of character sets - just enough that netatalk works on systems that don't have iconv.

◆ atalk_iconv_close()

int atalk_iconv_close ( atalk_iconv_t cd)
extern

simple iconv_close() wrapper

◆ atalk_iconv_open()

atalk_iconv_t atalk_iconv_open ( const char * tocode,
const char * fromcode )
extern

simple iconv_open() wrapper

◆ atalk_register_charset()

int atalk_register_charset ( struct charset_functions * funcs)
extern

◆ charset_decompose()

size_t charset_decompose ( charset_t ch,
char * src,
size_t inlen,
char * dst,
size_t outlen )
extern

◆ charset_precompose()

size_t charset_precompose ( charset_t ch,
char * src,
size_t inlen,
char * dst,
size_t outlen )
extern

◆ charset_strlower()

size_t charset_strlower ( charset_t ch,
const char * src,
size_t srclen,
char * dest,
size_t destlen )
extern

◆ charset_strupper()

size_t charset_strupper ( charset_t ch,
const char * src,
size_t srclen,
char * dest,
size_t destlen )
extern

◆ convert_charset()

size_t convert_charset ( charset_t from_set,
charset_t to_set,
charset_t cap_charset,
const char * src,
size_t src_len,
char * dest,
size_t dest_len,
uint16_t * flags )
extern
Bug
the size is a mess we really need a malloc/free logic
Note
dest size must be dest_len +2

◆ convert_string()

size_t convert_string ( charset_t from,
charset_t to,
void const * src,
size_t srclen,
void * dest,
size_t destlen )
extern

◆ convert_string_allocate()

size_t convert_string_allocate ( charset_t from,
charset_t to,
void const * src,
size_t srclen,
char ** dest )
extern

◆ decompose_w()

size_t decompose_w ( ucs2_t * name,
size_t inplen,
ucs2_t * comp,
size_t * outlen )
extern

◆ find_charset_functions()

struct charset_functions * find_charset_functions ( const char * name)
extern

◆ free_charset_names()

void free_charset_names ( void )
extern

◆ init_iconv()

void init_iconv ( void )
extern

Initialize iconv conversion descriptors.

This is called the first time it is needed, and also called again every time the configuration is reloaded, because the charset or codepage might have changed.

◆ precompose_w()

size_t precompose_w ( ucs2_t * name,
size_t inplen,
ucs2_t * comp,
size_t * outlen )
extern

pre|decomposition

we can't use static, this stuff needs to be reentrant static char comp[MAXPATHLEN +1];

We don't implement Singleton and Canonical Ordering. We ignore CompositionExclusions.txt because they cause the problem of the roundtrip such as Dancing Icon.

exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges in precompose.h from composition according to AFP 3.x spec

◆ set_charset_name()

int set_charset_name ( charset_t ch,
const char * name )
extern

◆ strcasechr_w()

ucs2_t * strcasechr_w ( const ucs2_t * s,
ucs2_t c )
extern

wide strcasechr()

Note
separately process BMP and surrogate pair

◆ strcasecmp_w()

int strcasecmp_w ( const ucs2_t * a,
const ucs2_t * b )
extern

wide strcasecmp()

case insensitive string comparison

Note
surrogate pair support

◆ strcasestr_w()

ucs2_t * strcasestr_w ( const ucs2_t * s,
const ucs2_t * ins )
extern

wide strcasestr()

Note
surrogate pair support

◆ strchr_w()

ucs2_t * strchr_w ( const ucs2_t * s,
ucs2_t c )
extern

wide strchr()

Note
hi and lo of surrogate pair are separately processed.

◆ strcmp_w()

int strcmp_w ( const ucs2_t * a,
const ucs2_t * b )
extern

wide strcmp()

Note
no problem of surrogate pair

◆ strdup_w()

ucs2_t * strdup_w ( const ucs2_t * src)
extern

wide strdup()

duplicate string

Note
no problem of surrogate pair

◆ strlen_w()

size_t strlen_w ( const ucs2_t * src)
extern

wide strlen()

Count the number of characters in a UTF-16 string.

Note
one surrogate pair is two characters.

◆ strlower_w()

int strlower_w ( ucs2_t * s)
extern

Convert a string to lower case.

Returns
True if any char is converted
Note
surrogate pair support

◆ strncasecmp_w()

int strncasecmp_w ( const ucs2_t * a,
const ucs2_t * b,
size_t len )
extern

wide strncasecmp()

case insensitive string comparison, length limited

Note
compare up to 'len+1' if 'len' isolate surrogate pair

◆ strncmp_w()

int strncmp_w ( const ucs2_t * a,
const ucs2_t * b,
size_t len )
extern

wide strncmp()

Note
no problem of surrogate pair

◆ strndup_w()

ucs2_t * strndup_w ( const ucs2_t * src,
size_t len )
extern

wide strndup()

duplicate string

Note
not check isolation of surrogate pair
if len == 0 then duplicate the whole string

◆ strnlen_w()

size_t strnlen_w ( const ucs2_t * src,
size_t max )
extern

wide strnlen()

Count up to max number of characters in a UTF-16 string.

Note
one surrogate pair is two characters.

◆ strstr_w()

ucs2_t * strstr_w ( const ucs2_t * s,
const ucs2_t * ins )
extern

wide strstr()

Note
no problem of surrogate pair

◆ strupper_w()

int strupper_w ( ucs2_t * s)
extern

Convert a string to upper case.

Returns
True if any char is converted
Note
surrogate pair support

◆ tolower_sp()

uint32_t tolower_sp ( uint32_t val)
extern

◆ tolower_w()

ucs2_t tolower_w ( ucs2_t val)
extern

◆ toupper_sp()

uint32_t toupper_sp ( uint32_t val)
extern

◆ toupper_w()

ucs2_t toupper_w ( ucs2_t val)
extern

◆ ucs2_to_charset()

size_t ucs2_to_charset ( charset_t ch,
const ucs2_t * src,
char * dest,
size_t destlen )
extern

Copy a string from a UCS2 src to a unix char * destination, allocating a buffer.

Parameters
chdestination character set
srcsource UCS2 string
destalways set at least to NULL
destlenmaximum length of destination buffer
Returns
The number of bytes occupied by the string in the destination

◆ ucs2_to_charset_allocate()

size_t ucs2_to_charset_allocate ( charset_t ch,
char ** dest,
const ucs2_t * src )
extern