#include <sqlite3.h>
#include "Unihan_phonetic-private.h"
Go to the source code of this file.
Data Structures | |
struct | Syllable |
Syllable holds transcription and tone. More... | |
Defines | |
#define | PINYIN_MAX_LENGTH 13 |
Maximum length of pinyin in byte. | |
#define | ZHUYIN_MAX_LENGTH 13 |
Maximum length of zhuyin in byte. | |
#define | TRANSCRIPTION_MAX_LENGTH 13 |
Maximum length of transcription in syllable in byte. | |
#define | ZHUYIN_SYMBOL_COUNT ZHUYIN_SYMBOL_NEUTRAL + 1 |
Total number of support Zhuyin symbols. | |
Regex pattern and store format for Pinyin importing. | |
These define the regex pattern and out format for Pinyin related field importing.
The input string must be normalized as NFD. | |
#define | PINYIN_REGEX_IMPORT PINYIN_REGEX_IMPORT_PRIVATE |
Regex pattern for tone accents. | |
#define | PINYIN_IMPORT_SUBSTITUTE PINYIN_IMPORT_SUBSTITUTE_PRIVATE |
#define | PINYIN_IMPORT_SUBSTITUTE_TONE_ACCENT PINYIN_IMPORT_SUBSTITUTE_TONE_ACCENT_PRIVATE |
Pinyin tone accent mark pattern substitute (store format). | |
#define | PINYIN_IMPORT_SUBSTITUTE_TONE PINYIN_IMPORT_SUBSTITUTE_TONE_ACCENT "$20" |
Pinyin tone number pattern substitute (store format). | |
#define | PINYIN_IMPORT_SUBSTITUTE_XHC PINYIN_IMPORT_SUBSTITUTE_XHC_PRIVATE |
Pinyin pattern substitute (store format) for Xiandai Hanyu Cidian (XHC1983). | |
#define | PINYIN_IMPORT_SUBSTITUTE_TONE_ACCENT_XHC PINYIN_IMPORT_SUBSTITUTE_TONE_ACCENT_XHC_PRIVATE |
Pinyin tone accent mark pattern substitute (store format) for Xiandai Hanyu Cidian (XHC1983). | |
#define | PINYIN_IMPORT_SUBSTITUTE_TONE_XHC PINYIN_IMPORT_SUBSTITUTE_TONE_ACCENT_XHC "$24" |
Pinyin tone number pattern substitute (store format) for Xiandai Hanyu Cidian (XHC1983). | |
Typedefs | |
typedef gunichar | ZhuyinSymbol |
Zhuyin symbol. | |
typedef char | Zhuyin |
Pronunciation in Zhuyin UTF-8 string. | |
typedef char | Pinyin |
Pronunciation in Pinyin UTF-8 string. | |
Enumerations | |
enum | ZhuyinSymbolId { ZHUYIN_INVALID_SYMBOL = -1, ZHUYIN_SYMBOL_B, ZHUYIN_SYMBOL_P, ZHUYIN_SYMBOL_M, ZHUYIN_SYMBOL_F, ZHUYIN_SYMBOL_D, ZHUYIN_SYMBOL_T, ZHUYIN_SYMBOL_N, ZHUYIN_SYMBOL_L, ZHUYIN_SYMBOL_G, ZHUYIN_SYMBOL_K, ZHUYIN_SYMBOL_H, ZHUYIN_SYMBOL_J, ZHUYIN_SYMBOL_Q, ZHUYIN_SYMBOL_X, ZHUYIN_SYMBOL_ZH, ZHUYIN_SYMBOL_CH, ZHUYIN_SYMBOL_SH, ZHUYIN_SYMBOL_R, ZHUYIN_SYMBOL_Z, ZHUYIN_SYMBOL_C, ZHUYIN_SYMBOL_S, ZHUYIN_SYMBOL_I, ZHUYIN_SYMBOL_U, ZHUYIN_SYMBOL_U_DIAERESIS, ZHUYIN_SYMBOL_A, ZHUYIN_SYMBOL_O, ZHUYIN_SYMBOL_E, ZHUYIN_SYMBOL_E_CIRCUMFLEX, ZHUYIN_SYMBOL_AI, ZHUYIN_SYMBOL_EI, ZHUYIN_SYMBOL_AO, ZHUYIN_SYMBOL_OU, ZHUYIN_SYMBOL_AN, ZHUYIN_SYMBOL_EN, ZHUYIN_SYMBOL_ANG, ZHUYIN_SYMBOL_ENG, ZHUYIN_SYMBOL_ER, ZHUYIN_SYMBOL_1, ZHUYIN_SYMBOL_2, ZHUYIN_SYMBOL_3, ZHUYIN_SYMBOL_4, ZHUYIN_SYMBOL_NEUTRAL } |
Enumeration of Zhuyin symbols. More... | |
Functions | |
Syllable * | syllable_new () |
New a syllable instance. | |
Syllable * | syllable_new_pinyin (const Pinyin *pinyin_str) |
New a syllable instance from pinyin. | |
Syllable * | syllable_new_zhuyin (const Zhuyin *zhuyin_str) |
New a syllable instance from zhuyin. | |
Syllable * | syllable_clone (Syllable *syl) |
Clone a syllable instance. | |
Pinyin * | syllable_to_pinyin (Syllable *syl, PinyinFormatFlags formatFlags) |
Output syllable as pinyin. | |
Zhuyin * | syllable_to_zhuyin (Syllable *syl, ZhuyinFormatFlags formatFlags) |
Output syllable as zhuyin. | |
gboolean | syllable_is_zhuyin (Syllable *syl) |
Whether the transcription of syllable is Zhuyin. | |
gboolean | syllable_is_zhuyin_fast (Syllable *syl) |
Whether the first character of transcription of syllable is Zhuyin. | |
void | syllable_free (Syllable *syl) |
Free a syllable instance. | |
int | syllabel_regex_t_init () |
Initialize all pinyin/zhuyin related regex expressions for subsequent search. | |
Pinyin * | pinyin_new (const char *pinyin_str) |
New a Pinyin instance. | |
guint | pinyin_get_tone (const Pinyin *pinyin) |
Return the explicit-specified tone of Pinyin. | |
guint | pinyin_strip_tone (Pinyin *pinyin) |
Strip the tone mark of Pinyin and return explicit-specified the tone Id. | |
guint | pinyin_strip_tone_normalized (Pinyin *pinyin) |
Normalize pinyin into NFD, strip the tone mark of Pinyin, then return explicit-specified the tone Id. | |
void | pinyin_add_tone (Pinyin *pinyin, guint tone, gboolean useTrailNumber) |
Add the tone mark to pinyin. | |
void | pinyin_add_tone_formatFlags (Pinyin *pinyin, guint tone, PinyinFormatFlags formatFlags) |
Add the tone mark to pinyin, according to PinyinFormatFlags. | |
Pinyin * | pinyin_convert_formatFlags (const Pinyin *pinyin, PinyinFormatFlags formatFlags) |
Convert pinyin to new format. | |
Pinyin * | pinyin_convert_accent_format (const Pinyin *pinyin, PinyinAccentFormat toFormat, gboolean useTrailNumber) |
Convert pinyin to new accent format. | |
Zhuyin * | pinyin_to_zhuyin (const Pinyin *pinyin, ZhuyinToneMarkFormat toFormat) |
Pinyin to Zhuyin. | |
Zhuyin * | pinyin_to_zhuyin_formatFlags (const Pinyin *pinyin, ZhuyinFormatFlags formatFlags) |
Pinyin to Zhuyin, according to ZhuyinFormatFlags. | |
Zhuyin * | zhuyin_new (const char *zhuyin_str) |
New a Zhuyin instance. | |
guint | zhuyin_get_tone (const Zhuyin *zhuyin) |
Return the explicit-specified tone of Zhuyin. | |
guint | zhuyin_strip_tone (Zhuyin *zhuyin) |
Strip the tone mark of Zhuyin and return explicit-specified the tone Id. | |
void | zhuyin_add_tone (Zhuyin *zhuyin, guint tone, ZhuyinToneMarkFormat toFormat) |
Add the tone mark to zhuyin, according to ZhuyinToneMarkFormat. | |
void | zhuyin_add_tone_formatFlags (Zhuyin *zhuyin, guint tone, ZhuyinFormatFlags formatFlags) |
Add the tone mark to zhuyin, according to ZhuyinFormatFlags. | |
Zhuyin * | zhuyin_convert_toneMark_format (const Zhuyin *zhuyin, ZhuyinToneMarkFormat toFormat) |
Convert zhuyin to new tone mark format. | |
Zhuyin * | zhuyin_convert_formatFlags (const Zhuyin *zhuyin, ZhuyinFormatFlags formatFlags) |
Convert zhuyin to new format. | |
Pinyin * | zhuyin_to_pinyin (const Zhuyin *zhuyin, PinyinAccentFormat toFormat, gboolean useTrailNumber) |
Zhuyin to Pinyin. | |
Pinyin * | zhuyin_to_pinyin_formatFlags (const Zhuyin *zhuyin, PinyinFormatFlags formatFlags) |
Zhuyin to Pinyin, according to PinyinFormatFlags. | |
ZhuyinSymbol | zhuyinSymbol_from_id (ZhuyinSymbolId id) |
Return the Zhuyin symbol by its Id. | |
ZhuyinSymbolId | zhuyinSymbol_get_id (ZhuyinSymbol zSym) |
Return the Id of a Zhuyin symbol. | |
gboolean | zhuyinSymbol_is_initial (ZhuyinSymbol zSym) |
Whether the zhuyin symbol is an initial. | |
gboolean | zhuyinSymbol_is_medial (ZhuyinSymbol zSym) |
Whether the zhuyin symbol is an medial. | |
gboolean | zhuyinSymbol_is_final (ZhuyinSymbol zSym) |
Whether the zhuyin symbol is an final. | |
gboolean | zhuyinSymbol_is_tone (ZhuyinSymbol zSym) |
Whether the zhuyin symbol is either a toneMark or number which indicates the tone. | |
guint | zhuyinSymbol_to_toneMark_id (ZhuyinSymbol zSym) |
Return the tone id of given tone mark. | |
ZhuyinSymbol | zhuyinSymbol_from_toneMark_id (guint toneMark_id) |
Return the tone mark of given tone id. | |
void | pinyin_convert_accent_format_scalar_func (sqlite3_context *context, int argc, sqlite3_value **argv) |
Pinyin convert accent format scalar function for SQL command call. | |
void | pinyin_to_zhuyin_scalar_func (sqlite3_context *context, int argc, sqlite3_value **argv) |
Pinyin to Zhuyin converting scalar function for SQL command call. | |
void | zhuyin_convert_toneMark_format_scalar_func (sqlite3_context *context, int argc, sqlite3_value **argv) |
Zhuyin convert accent format scalar function for SQL command call. | |
void | zhuyin_to_pinyin_scalar_func (sqlite3_context *context, int argc, sqlite3_value **argv) |
Zhuyin to Pinyin converting scalar function for SQL command call. | |
Variables | |
const ZhuyinSymbol | ZHUYIN_SYMBOL_LIST [] |
An array of Zhuyin symbols. |
From libUnihan 1.0, pinyin is stored as lowercase, so as to be consistent with other phonetic fields. Though field UNIHAN_FIELD_kMANDARIN still outputs uppercase to make it compatible with the original Unihan.txt.
Note that this header is included in Unihan.h, thus no need to explicitly include it if Unihan.h is already included.
enum ZhuyinSymbolId |
This enumeration lists the Zhuyin symbols, including the symbols for tone mark. Corresponding Pinyin phonemes can also be located using these Ids.
pinyin_phoneme_get_id()
void pinyin_add_tone | ( | Pinyin * | pinyin, | |
guint | tone, | |||
gboolean | useTrailNumber | |||
) |
This function add tone to pinyin, as trailing number if useTrailNumber=TRUE or as tone accent mark if useTrailNumber=FALSE. Otherwise it is similar to pinyin_add_tone_formatFlags()
pinyin | the pinyin instance to be processed. | |
tone | the tone to be added. | |
useTrailNumber | TRUE trailing number is preferred, FALSE to use traditional tonemark. |
void pinyin_add_tone_formatFlags | ( | Pinyin * | pinyin, | |
guint | tone, | |||
PinyinFormatFlags | formatFlags | |||
) |
This function add tone mark to pinyin according to PinyinFormatFlags, existing tone will be removed before adding new tone. If tone is 0, then existing tone will be removed, but no new tone will be added.
The result will be stored in pinyin, so backup it with strdup() or g_strdup() to keep the original.
pinyin | the pinyin instance to be processed. | |
tone | the tone to be added. | |
formatFlags | Pinyin Format Flags. |
Pinyin* pinyin_convert_accent_format | ( | const Pinyin * | pinyin, | |
PinyinAccentFormat | toFormat, | |||
gboolean | useTrailNumber | |||
) |
This function performs exactly the same with pinyin_convert_formatFlags() except this function accepts PinyinAccentFormat and a boolean value as arguments.
Use free() or g_free() to free the newly allocated instance.
pinyin | the Pinyin to be converted. | |
toFormat | the Pinyin accent mode to be converted to. | |
useTrailNumber | TRUE to present tone as trailing number, FALSE to present tone as combining accent. |
void pinyin_convert_accent_format_scalar_func | ( | sqlite3_context * | context, | |
int | argc, | |||
sqlite3_value ** | argv | |||
) |
This function is meant to be called by sqlite3_create_function() and used in SQL command. Do not use it directly.
context | The sqlite3_context. | |
argc | Number of argument expected. | |
argv | Arguments for this scalar function . |
Pinyin* pinyin_convert_formatFlags | ( | const Pinyin * | pinyin, | |
PinyinFormatFlags | formatFlags | |||
) |
Return a newly allocated Pinyin instance which contains the pinyin in new format, which is specified in formatFlags.
Note that this function does not try to convert toneless pinyin to 5th tone. Use syl=syllabel_new
(pinyin); syllable_to_pinyin(syl,formatFlags)
for that.
Use free() or g_free() to free the newly allocated instance.
pinyin | the Pinyin to be converted. | |
formatFlags | Pinyin Format Flags. |
guint pinyin_get_tone | ( | const Pinyin * | pinyin | ) |
This function finds and returns the explicit-specified tone of pinyin. Thus 0 will be returned if pinyin does not have any explicit-specified tone.
This function acts this way in order to accommodate the SQL LIKE query such as ... WHERE kMandarin LIKE 'KE'
.
Sometimes, 5th tone mark is omitted, please convert the value 0 to 5 if this is the case.
pinyin | the pinyin instance to be stripped. |
Pinyin* pinyin_new | ( | const char * | pinyin_str | ) |
This function allocates a new Pinyin instance. If pinyin_str
is given, it will be copied to the newly allocated Pinyin instance. Note that the Pinyin instance only hold PINYIN_MAX_LENGTH bytes, including the EOL ('') character. Longer pinyin will be truncated.
pinyin_str
to upper case.
pinyin_str | the Pinyin in string, NULL for blank instance. |
guint pinyin_strip_tone | ( | Pinyin * | pinyin | ) |
This function strips the tone mark of pinyin, then returns the stripped tone id.
Note that this function returns 0 if no tone notation is in pinyin, disregard of whether the tone is neutral or pinyin is already toneless.
pinyin | the pinyin instance to be stripped. |
guint pinyin_strip_tone_normalized | ( | Pinyin * | pinyin | ) |
This function normalizes pinyin into Unicode Normalization Form D (NFD), otherwise it is similar with pinyin_strip_tone()
pinyin | the pinyin instance to be stripped. |
Zhuyin* pinyin_to_zhuyin | ( | const Pinyin * | pinyin, | |
ZhuyinToneMarkFormat | toFormat | |||
) |
pinyin | the Pinyin to be converted. | |
toFormat | the Zhuyin tone mark mode. |
Zhuyin* pinyin_to_zhuyin_formatFlags | ( | const Pinyin * | pinyin, | |
ZhuyinFormatFlags | formatFlags | |||
) |
pinyin | the Pinyin to be converted. | |
formatFlags | Zhuyin format flags. |
void pinyin_to_zhuyin_scalar_func | ( | sqlite3_context * | context, | |
int | argc, | |||
sqlite3_value ** | argv | |||
) |
This function is meant to be called by sqlite3_create_function() and used in SQL command. Do not use it directly.
context | The sqlite3_context. | |
argc | Number of argument expected. | |
argv | Arguments for this scalar function . |
int syllabel_regex_t_init | ( | ) |
This function compiles regex expressions into forms that are suitable for subsequent regexec() searches.
syl | The syllable to be processed. |
void syllable_free | ( | Syllable * | syl | ) |
syl | The syllable to be freed. |
gboolean syllable_is_zhuyin | ( | Syllable * | syl | ) |
syl | The syllable to be processed. |
gboolean syllable_is_zhuyin_fast | ( | Syllable * | syl | ) |
syl | The syllable to be processed. |
Syllable* syllable_new | ( | ) |
This function new an empty syllable instance. Free it with syllable_free()
This function new a syllable instance according to given pinyin string. The pinyin can be in any format from PinyinAccentFormat. And can be toned or toneless.
pinyin_str | pinyin string |
This function new a syllable instance according to given zhuyin string. The zinyin string can be in any format from ZhuyinToneMarkFormat . And can be toned or toneless.
zhuyin_str | pinyin string |
Pinyin* syllable_to_pinyin | ( | Syllable * | syl, | |
PinyinFormatFlags | formatFlags | |||
) |
This function returns a newly allocated Pinyin
instance as result. Note that if PINYIN_FORMAT_FLAG_STRIP_TRIVIAL_TONE
is not set in formatFlags, this function will "guess" the tone if it is not previously given.
syl | The syllable to be outputted. | |
formatFlags | The format of the outputted pinyin. |
Zhuyin* syllable_to_zhuyin | ( | Syllable * | syl, | |
ZhuyinFormatFlags | formatFlags | |||
) |
This function returns a newly allocated Zhunyin
instance as result. Note that if ZHUYIN_FORMAT_FLAG_STRIP_1ST_TONE
is not set in formatFlags, this function will "guess" the tone if it is not previously given.
syl | The syllable to be outputted. | |
formatFlags | The format of the outputted pinyin. |
void zhuyin_add_tone | ( | Zhuyin * | zhuyin, | |
guint | tone, | |||
ZhuyinToneMarkFormat | toFormat | |||
) |
This function add tone to zhuyin, according to the format specified in toFormat. Otherwise it is similar to pinyin_add_tone_formatFlags().
zhuyin | the zhuyin instance to be processed. | |
tone | the tone to be added. | |
toFormat | the Zhuyin tone mark mode to be converted to. |
void zhuyin_add_tone_formatFlags | ( | Zhuyin * | zhuyin, | |
guint | tone, | |||
ZhuyinFormatFlags | formatFlags | |||
) |
This function add tone mark to zhuyin according to ZhuyinFormatFlags, existing tone will be removed before adding new tone.
If tone is 0, then existing tone will be removed, but no new tone will be added.
The result will be stored in zhuyin, so backup it with strdup() or g_strdup() to keep the original.
zhuyin | the zhuyin instance to be processed. | |
tone | the tone to be added. | |
formatFlags | the Zhuyin format flags. |
Zhuyin* zhuyin_convert_formatFlags | ( | const Zhuyin * | zhuyin, | |
ZhuyinFormatFlags | formatFlags | |||
) |
Return a newly allocated Zhuyin instance which contains the zhuyin in new format, which is specified in formatFlags.
Note that this function does not try to convert toneless zhuyin to 5th tone. Use syl=syllabel_new
(zhuyin); syllable_to_zhuyin(syl,formatFlags)
for that.
Use free() or g_free() to free the newly allocated instance.
zhuyin | the Zhuyin to be converted. | |
formatFlags | Zhuyin format flags. |
Zhuyin* zhuyin_convert_toneMark_format | ( | const Zhuyin * | zhuyin, | |
ZhuyinToneMarkFormat | toFormat | |||
) |
Return a newly allocated Zhuyin instance which contains the zhuyin in new tone mark format, which is specified in toFormat.
zhuyin | the Zhuyin to be converted. | |
toFormat | the Zhuyin tone mark mode to be converted to. |
void zhuyin_convert_toneMark_format_scalar_func | ( | sqlite3_context * | context, | |
int | argc, | |||
sqlite3_value ** | argv | |||
) |
This function is meant to be called by sqlite3_create_function() and used in SQL command. Do not use it directly.
context | The sqlite3_context. | |
argc | Number of argument expected. | |
argv | Arguments for this scalar function . |
guint zhuyin_get_tone | ( | const Zhuyin * | zhuyin | ) |
This function finds and returns the explicit-specified tone of zhuyin. Thus 0 will be returned if zhuyin does not have any explicit-specified tone.
This function acts this way in order to accommodate the SQL LIKE query such as ... WHERE zhuyin LIKE 'ㄊㄧㄢ'
.
Sometimes, 1th tone mark is omitted, please convert the value 0 to 1 if this is the case.
zhuyin | the zhuyin instance to be stripped. |
Zhuyin* zhuyin_new | ( | const char * | zhuyin_str | ) |
This function allocate a new Zhuyin instance. Non-NULL zhuyin_str will be copied to the new Zhuyin instance and converted to uppercase. Note that the Zhuyin instance only holds ZHUYIN_MAX_LENGTH bytes, including the EOL ('') character. Longer zhuyin will be truncated.
Note: use g_free to free the newly allocated instance.
zhuyin_str | the Zhuyin in string, NULL for blank instance. |
guint zhuyin_strip_tone | ( | Zhuyin * | zhuyin | ) |
This function strips the tone mark of zhuyin, otherwise is similar to zhuyin_get_tone().
zhuyin | the zhuyin instance to be stripped. |
Pinyin* zhuyin_to_pinyin | ( | const Zhuyin * | zhuyin, | |
PinyinAccentFormat | toFormat, | |||
gboolean | useTrailNumber | |||
) |
zhuyin | the Zhuyin to be converted. | |
toFormat | the Pinyin accent mode. | |
useTrailNumber | TRUE to present tone as trailing number, FALSE to present tone as combining accent. |
Pinyin* zhuyin_to_pinyin_formatFlags | ( | const Zhuyin * | zhuyin, | |
PinyinFormatFlags | formatFlags | |||
) |
zhuyin | the Zhuyin to be converted. | |
formatFlags | Zhuyin format flags. |
void zhuyin_to_pinyin_scalar_func | ( | sqlite3_context * | context, | |
int | argc, | |||
sqlite3_value ** | argv | |||
) |
This function is meant to be called by sqlite3_create_function() and used in SQL command. Do not use it directly.
context | The sqlite3_context. | |
argc | Number of argument expected. | |
argv | Arguments for this scalar function . |
ZhuyinSymbol zhuyinSymbol_from_id | ( | ZhuyinSymbolId | id | ) |
id | Zhuyin symbol Id. |
ZhuyinSymbol zhuyinSymbol_from_toneMark_id | ( | guint | toneMark_id | ) |
toneMark_id | toneMark_id. |
ZhuyinSymbolId zhuyinSymbol_get_id | ( | ZhuyinSymbol | zSym | ) |
zSym | Zhuyin symbol. |
gboolean zhuyinSymbol_is_final | ( | ZhuyinSymbol | zSym | ) |
zSym | Zhuyin symbol. |
gboolean zhuyinSymbol_is_initial | ( | ZhuyinSymbol | zSym | ) |
zSym | Zhuyin symbol. |
gboolean zhuyinSymbol_is_medial | ( | ZhuyinSymbol | zSym | ) |
zSym | Zhuyin symbol. |
gboolean zhuyinSymbol_is_tone | ( | ZhuyinSymbol | zSym | ) |
zSym | Zhuyin symbol. |
guint zhuyinSymbol_to_toneMark_id | ( | ZhuyinSymbol | zSym | ) |
zSym | Zhuyin symbol. |