#include "Unihan_enum.h"
#include "Unihan_phonetic.h"
#include "sqlite_functions.h"
#include "str_functions.h"
Go to the source code of this file.
Data Structures | |
struct | UnihanIRG_SourceData |
IRG source data. More... | |
struct | UnihanIRG_SourceRec |
IRG source rec. More... | |
struct | DatabaseFuncStru |
Data structure of database supporting functions. More... | |
Unihan query options. | |
#define | UNIHAN_QUERY_OPTION_LIKE 1 |
Using SQL LIKE in WHERE expression. | |
#define | UNIHAN_QUERY_OPTION_SCALAR_STRING 1 << 1 |
Show code point as string "U+xxxx". | |
#define | UNIHAN_QUERY_OPTION_SHOW_GIVEN_FIELD 1 << 2 |
Show the given field in results. | |
#define | UNIHAN_QUERY_OPTION_PINYIN_TONE_ACCENT 1 << 3 |
Use accent mark for pinyin tone. | |
#define | UNIHAN_QUERY_OPTION_PINYIN_FORMAT_MASK 7 << 4 |
Mask for pinyin format. | |
#define | UNIHAN_QUERY_OPTION_ZHUYIN_FORCE_DISPLAY 1 << 7 |
Force ZhuYin display. | |
#define | UNIHAN_QUERY_OPTION_ZHUYIN_FORMAT_MASK 7 << 8 |
Mask for zhuyin format. | |
#define | UNIHAN_QUERY_OPTION_DEFAULT (PINYIN_ACCENT_UNIHAN << 4) | (ZHUYIN_TONEMARK_ORIGINAL << 8) |
Default options, PinYin format is Unihan; and ZhuYin format is Original. | |
#define | UNIHAN_QUERY_OPTION_GET_PINYIN_FORMAT(options) (options & UNIHAN_QUERY_OPTION_PINYIN_FORMAT_MASK) >> 4 |
Get PinYin format from UnihanQueryOption. | |
#define | UNIHAN_QUERY_OPTION_SET_PINYIN_FORMAT(options, format) options |= format << 4 |
Set PinYin format to UnihanQueryOption. | |
#define | UNIHAN_QUERY_OPTION_GET_ZHUYIN_FORMAT(options) (options & UNIHAN_QUERY_OPTION_ZHUYIN_FORMAT_MASK) >> 8 |
Get ZhuYin format from UnihanQueryOption. | |
#define | UNIHAN_QUERY_OPTION_SET_ZHUYIN_FORMAT(options, format) options |= format << 8 |
Set ZhuYin format to UnihanQueryOption. | |
typedef guint | UnihanQueryOption |
Unihan query options. | |
Typedefs | |
typedef int(* | UnihanCallback )(void *userOption, int col_num, char **results, char **col_names) |
Prototype of callback function for SQL execution. | |
Functions | |
SQL_Result * | unihan_find_all_matched (UnihanField givenField, const char *givenValue, UnihanField queryField, UnihanQueryOption qOption) |
Find all matched results, given a field and its value. | |
char * | unihan_find_firstMatched (UnihanField givenField, const char *givenValue, UnihanField queryField, UnihanQueryOption qOption) |
Find the first matched result, given a field and its value. | |
int | unihan_count_matched_record (UnihanTable table, StringList *valueList) |
Count number of matched records in a table. | |
int | unihan_insert (UnihanTable table, StringList *valueList) |
Insert a record to table. | |
int | unihan_insert_no_duplicate (UnihanTable table, StringList *valueList) |
Insert a record to table with duplication check. | |
int | unihan_insert_value (gunichar code, UnihanField field, const char *value) |
Insert a Unihan textual formated record to corresponding tables. | |
gboolean | unihanChar_has_property (gunichar code, UnihanField field) |
Whether the character is associate with the given field. | |
gboolean | unihanChar_is_in_source (gunichar code, UnihanIRG_SourceId source) |
Whether the character appeared in given source. | |
UnihanIRG_SourceId | unihanChar_is_in_sources (gunichar code, UnihanIRG_SourceId source,...) |
Find the first source which the character appears in. | |
gboolean | unihanChar_is_common_in_locale (gunichar code, UnihanLocale locale) |
Whether the character is common in the specified locale. | |
UnihanRange | unihanChar_in_range (gunichar code) |
Return the range which the character belong to. | |
gunichar | unihanChar_parse (const char *str) |
Parses the string argument as a UCS4 (gunichar) character. | |
char * | unihanChar_to_scalar_string (gunichar code) |
Returns a string representing a UCS4 character. | |
sqlite3 * | unihanDb_get () |
Returns the db which libUnihan is using. | |
SQL_Result * | unihanDb_get_tableNames () |
Returns the tables in database. | |
int | unihanDb_open (const char *filename, int flags) |
Open a Unihan db. | |
int | unihanDb_open_default () |
Open the system default Unihan Db as read-only. | |
int | unihanDb_close () |
Close Unihan db. | |
int | unihanField_array_index (UnihanField field, const UnihanField *fieldArray) |
Return the index of a UnihanField in array. | |
UnihanIRG_Source | unihanField_get_IRG_source (UnihanField field) |
Return the corresponding IRG source if the field is IRG source field. | |
UnihanTable | unihanField_get_table (UnihanField field) |
Return the table that contains the key. | |
UnihanTable * | unihanField_get_all_tables (UnihanField field) |
Return all the tables that contains the key. | |
UnihanTable | unihanField_get_extra_table (UnihanField field) |
Return the corresponding extra table if the field needs one. | |
gboolean | unihanField_is_IRG_Source (UnihanField field) |
Whether the field is IRG_Source. | |
gboolean | unihanField_is_indexed (UnihanField field) |
Whether the field is indexed. | |
gboolean | unihanField_is_integer (UnihanField field) |
Whether the field is an integer field. | |
gboolean | unihanField_is_case_no_change (UnihanField field) |
Whether the case of value should be kept. | |
gboolean | unihanField_is_lowercase (UnihanField field) |
Whether the value in the field is stored as lowercase. | |
gboolean | unihanField_is_mandarin (UnihanField field) |
Whether the field contains mandarin pronunciation. | |
gboolean | unihanField_is_pseudo (UnihanField field) |
Whether the field is a pseudo field. | |
gboolean | unihanField_is_ucs4 (UnihanField field) |
Whether the field holds UCS4 value. | |
gboolean | unihanField_is_singleton (UnihanField field) |
Whether the field is a singleton field. | |
UnihanField | unihanField_parse (const char *str) |
Parses the string argument as a UnihanField. | |
const char * | unihanField_to_string (UnihanField field) |
Returns a string representing a UnihanField. | |
gboolean | unihanIRG_Source_has_no_mapping (UnihanIRG_SourceId sourceId) |
Whether the SourceId has mapping. | |
const UnihanIRG_SourceData * | unihanIRG_SourceData_get (UnihanIRG_SourceId sourceId) |
Return the Unihan IRG_Source Data. | |
UnihanIRG_SourceId | unihanIRG_SourceId_parse (const char *sourceShortName) |
Parse the string argument as Unihan IRG Source ID. | |
UnihanIRG_SourceRec * | unihanIRG_SourceRec_parse (UnihanField field, const char *value) |
Parse the string argument as Unihan IRG Source Rec. | |
void | unihanIRG_SourceRec_free (UnihanIRG_SourceRec *rec) |
Free the UnihanIRG_SourceRec. | |
int | unihanSql_count_matches (const char *sqlClause, char **errMsg_ptr) |
Count the number of matches. | |
int | unihanSql_exec (char *sqlClause, UnihanCallback callback, void *callbackOption, char **errMsg_ptr) |
Execute the SQL to Unihan db. | |
SQL_Result * | unihanSql_get_sql_result (const char *sqlClause) |
Obtains a SQL_Result table of SQL command. | |
UnihanLocale | unihanLocale_parse (char *str) |
Parse the string argument as Unihan Locale. | |
const char * | unihanLocale_to_string (UnihanLocale locale) |
Returns a string representing a UnihanLocale. | |
const char * | unihanRange_to_string (UnihanRange uRange) |
Returns a string representing a UnihanLocale. | |
UnihanTable | unihanTable_parse (const char *tableName) |
Parse the string argument as UnihanTable. | |
const char * | unihanTable_to_string (UnihanTable table) |
Returns a string representing a UnihanTable. | |
UnihanField * | unihanTable_get_db_fields (UnihanTable table) |
Returns the actual data table fields in an UnihanField array. | |
UnihanField * | unihanTable_get_fields (UnihanTable table) |
Returns all fields of the table in an UnihanField array. | |
UnihanField * | unihanTable_get_primary_key_fields (UnihanTable table) |
Returns all primary key fields of the given table in an UnihanField array. | |
Variables | |
const DatabaseFuncStru | DATABASE_FUNCS [] |
List of database supporting functions. |
#define UNIHAN_QUERY_OPTION_GET_PINYIN_FORMAT | ( | options | ) | (options & UNIHAN_QUERY_OPTION_PINYIN_FORMAT_MASK) >> 4 |
options | A UnihanQueryOption. |
#define UNIHAN_QUERY_OPTION_GET_ZHUYIN_FORMAT | ( | options | ) | (options & UNIHAN_QUERY_OPTION_ZHUYIN_FORMAT_MASK) >> 8 |
options | A UnihanQueryOption. |
#define UNIHAN_QUERY_OPTION_SET_PINYIN_FORMAT | ( | options, | |||
format | ) | options |= format << 4 |
options | A UnihanQueryOption. | |
format | PinYin_Accent_Format. |
#define UNIHAN_QUERY_OPTION_SET_ZHUYIN_FORMAT | ( | options, | |||
format | ) | options |= format << 8 |
options | A UnihanQueryOption. | |
format | ZhuYin_Accent_Format. |
typedef guint UnihanQueryOption |
Unihan query options provides additional control of query processing, such as SQL like query and output format.
int unihan_count_matched_record | ( | UnihanTable | table, | |
StringList * | valueList | |||
) |
Also useful to avoid the duplication record. For example, use unihan_count_matched_record(UNIHAN_TABLE_KRSADOBE_JAPAN_1_6, valueArray)
to check whether the kRSAdobe_Japan_1_6Table has the record (13317,"C",15387,"3",1,1) before insertion, where the valueArray[]={ "13317", "C","15387","3","1","1", NULL};
table | the database table to be looked at. | |
valueList | Values in StringList. |
SQL_Result* unihan_find_all_matched | ( | UnihanField | givenField, | |
const char * | givenValue, | |||
UnihanField | queryField, | |||
UnihanQueryOption | qOption | |||
) |
This function is a convenient wrapper of unihanSql_get_sql_result().
Put the known field as givenFiled
and its value as givenValue
. The values of field specified in queryField
will be put in the result table.
Use sql_result_free() to free the pResults after the finish using result table.
givenField | the given (input) field. | |
givenValue | the given value of the field. | |
queryField | the result field. | |
qOption | the UnihanQueryOption. |
char* unihan_find_firstMatched | ( | UnihanField | givenField, | |
const char * | givenValue, | |||
UnihanField | queryField, | |||
UnihanQueryOption | qOption | |||
) |
This is a simplified version of unihan_find_all_matched(). Instead of returning whole result table, it only returns the first matched result as string.
Use g_free() to free the returned result.
givenField | the given (input) field. | |
givenValue | the given value of the field. | |
queryField | the result field. | |
qOption | the UnihanQueryOption. |
int unihan_insert | ( | UnihanTable | table, | |
StringList * | valueList | |||
) |
The value to be insert should be in string representation, as if put in plain text SQL command.
table | The database table to be looked at. | |
valueList | Values in StringList. |
int unihan_insert_no_duplicate | ( | UnihanTable | table, | |
StringList * | valueList | |||
) |
It will check the duplication before insertion, otherwise is same with unihan_insert(). Return negative value if the duplication is found.
table | the database table to be looked at. | |
valueList | Values in StringList. |
int unihan_insert_value | ( | gunichar | code, | |
UnihanField | field, | |||
const char * | value | |||
) |
This function deals with the insertion of Unihan textual formated records (as shown in Unihan.txt), which have 3 fields:
This function will parse the value and insert the parsed result to corresponding tables.
Before using this function, convert first field to UCS4 (gunichar) format (using unihanChar_parse()); second to UnihanField (using unihanField_parse()).
code | character in UCS4 (gunichar) | |
field | the UnihanField | |
value | the value in as in Unihan.txt. |
gboolean unihanChar_has_property | ( | gunichar | code, | |
UnihanField | field | |||
) |
code | character in UCS4 (gunichar) | |
field | the UnihanField |
UnihanRange unihanChar_in_range | ( | gunichar | code | ) |
code | character in UCS4 (gunichar) |
gboolean unihanChar_is_common_in_locale | ( | gunichar | code, | |
UnihanLocale | locale | |||
) |
"Common" characters are the ones which appear in the well-known source, such as GB-2312 and JIS X 0208:1990. See Common sources for a locale for exact sources.
code | character in UCS4 (gunichar) | |
locale | the locale. |
gboolean unihanChar_is_in_source | ( | gunichar | code, | |
UnihanIRG_SourceId | source | |||
) |
code | character in UCS4 (gunichar) | |
source | IRG Source ID |
UnihanIRG_SourceId unihanChar_is_in_sources | ( | gunichar | code, | |
UnihanIRG_SourceId | source, | |||
... | ||||
) |
This function will return the first matched source, or until it reaches the end of
code | character in UCS4 (gunichar) | |
source | IRG Source ID, use UNIHAN_INVALID_SOURCEID as indefinite value terminator. |
gunichar unihanChar_parse | ( | const char * | str | ) |
str | the string to be parsed. |
char* unihanChar_to_scalar_string | ( | gunichar | code | ) |
code | the UCS4 character. |
int unihanDb_close | ( | ) |
sqlite3* unihanDb_get | ( | ) |
Normally this function is not needed, except to get additional control beyond SQL.
SQL_Result* unihanDb_get_tableNames | ( | ) |
This function will get the names from database.
Use stringList_free() to free it.
int unihanDb_open | ( | const char * | filename, | |
int | flags | |||
) |
The flags parameter provides additional control of database access. It is supported by sqlite3_open_v2(), thus it takes one of the following three values, optionally combined with the SQLITE_OPEN_NOMUTEX flag, just like the flags parameter of sqlite3_open_v2():
Note that SQLITE_OPEN_NOMUTEX flags is not supported in SQLite 3.3.X and earlier.
filename | name of db file to be open. | |
flags | Database access flags. |
int unihanDb_open_default | ( | ) |
int unihanField_array_index | ( | UnihanField | field, | |
const UnihanField * | fieldArray | |||
) |
field | Field to be found | |
fieldArray | the array of UnihanFields. |
UnihanTable* unihanField_get_all_tables | ( | UnihanField | field | ) |
This function returns an array of tables, terminated by UNIHAN_INVALID_TABLE. Use unihanField_get_allTables() to obtain all the table that the field belongs to.
field | the UnihanField. |
UnihanTable unihanField_get_extra_table | ( | UnihanField | field | ) |
A extra table is a table that provide additional information for pseudo field. For example, kSemanticVariant is a pseudo field which combines fields in kSemanticVariantTable and kSemanticVariantTableExtra.
unihanFIeld_get_table(UNIHAN_KSEMANTIC_VARIANT) returns kSemanticVariantTable while unihanFIeld_get_extra_table returns the extra table kSemanticVariantTableExtra.
Pseudo fields usually associate with extra tables, see unihanField_is_pseudo() for details.
field | the UnihanField. |
UnihanIRG_Source unihanField_get_IRG_source | ( | UnihanField | field | ) |
field | the UnihanField. |
UnihanTable unihanField_get_table | ( | UnihanField | field | ) |
This function only returns one table. If the field is in more than one table, then returns UNIHAN_AMBIGUOUS_TABLE. Returns UNIHAN_INVALID_TABLE if invalid field is given.
Use unihanField_get_allTables() to obtain all the table that the field belongs to.
field | the UnihanField. |
gboolean unihanField_is_case_no_change | ( | UnihanField | field | ) |
Usually, the Unihan tag values are stored as uppercase, such as UNIHAN_FIELD_KMANDARIN
, UNIHAN_FIELD_PINYIN
.
However, there are exceptions such as field UNIHAN_FIELD_KCANTONESE
which always stores as lowercase; while field UNIHAN_FIELD_KDEFINITION
the other hand, may have uppercase and lowercase characters.
Integer fields need not change case, however, as UCS-4 can have 'U+xxxxx' form, UCS-4 should be convert to uppercase.
This function returns TRUE
if the case of field value need not be changed, as with UNIHAN_FIELD_KDEFINITION
; FALSE
otherwise.
field | the UnihanField |
gboolean unihanField_is_indexed | ( | UnihanField | field | ) |
Indexed field are non-pseudo fields which are indexed in database. Note that UNIHAN_FIELD_KDEFITION is not indexed as well.
field | the UnihanField |
gboolean unihanField_is_integer | ( | UnihanField | field | ) |
field | the UnihanField |
gboolean unihanField_is_IRG_Source | ( | UnihanField | field | ) |
It is a convenient wrapper of unihanField_get_IRG_source().
field | the UnihanField. |
gboolean unihanField_is_lowercase | ( | UnihanField | field | ) |
Usually, the Unihan tag values are stored as uppercase, such as UNIHAN_FIELD_KMANDARIN
, UNIHAN_FIELD_PINYIN
.
However, there are exceptions such as field UNIHAN_FIELD_KCANTONESE
which always stores as lowercase; while field UNIHAN_FIELD_KDEFINITIONon
the other hand, may have uppercase and lowercase characters.
field | the UnihanField |
gboolean unihanField_is_mandarin | ( | UnihanField | field | ) |
field | the UnihanField |
gboolean unihanField_is_pseudo | ( | UnihanField | field | ) |
A pseudo field is a field whose value is not derived directly from table but database functions. Field zhuyin, for example, is not in database but derived from function PINYIN_TO_ZHUYIN(). It is deemed to be a short cut for database functions.
Another example is field kSemanticVariant, which is a pseudo field which combines: kSemanticVariantTable.varinatCode, kSemanticVariantTableExtra.fromDict, kSemanticVariantTableExtra.semanticT, kSemanticVariantTableExtra.semanticB and kSemanticVariantTableExtra.semanticZ
field | the UnihanField |
gboolean unihanField_is_singleton | ( | UnihanField | field | ) |
A singleton field is a field whose value cannot be further split, and is functional dependent to the UNIHAN_FIELD_CODE.
Most of tag value in Unihan.txt is delimited by space, however kDefinition, for example, should not be split in this way.
field | the UnihanField |
gboolean unihanField_is_ucs4 | ( | UnihanField | field | ) |
UCS4 fields can be displayed in the form of Unicode scalar string (U+xxxxx)
field | the UnihanField |
UnihanField unihanField_parse | ( | const char * | str | ) |
str | the string to be parsed. |
const char* unihanField_to_string | ( | UnihanField | field | ) |
gboolean unihanIRG_Source_has_no_mapping | ( | UnihanIRG_SourceId | sourceId | ) |
Some IRG sources (such as UNIHAN_SOURCE_GKX
, UNIHAN_SOURCE_G4K
) does not have mapping index (inner code). This function tells whether a Source ID has mapping.
sourceId | the IRG source ID. |
const UnihanIRG_SourceData* unihanIRG_SourceData_get | ( | UnihanIRG_SourceId | sourceId | ) |
Note: the returned data is static, not need to be freed.
sourceId | Unihan IRG source ID. |
UnihanIRG_SourceId unihanIRG_SourceId_parse | ( | const char * | sourceShortName | ) |
sourceShortName | the string to be parsed. |
void unihanIRG_SourceRec_free | ( | UnihanIRG_SourceRec * | rec | ) |
rec | the unihanIRG_SourceRec |
UnihanIRG_SourceRec* unihanIRG_SourceRec_parse | ( | UnihanField | field, | |
const char * | value | |||
) |
field | the UnihanField | |
value | the string to be parsed. |
UnihanLocale unihanLocale_parse | ( | char * | str | ) |
str | the string to be parsed. |
const char* unihanLocale_to_string | ( | UnihanLocale | locale | ) |
Note: the return string is static, not need to free it.
locale | the UnihanLocale. |
const char* unihanRange_to_string | ( | UnihanRange | uRange | ) |
Note: the return string is static, not need to free it.
uRange | the UnihanRange. |
int unihanSql_count_matches | ( | const char * | sqlClause, | |
char ** | errMsg_ptr | |||
) |
sqlClause | SQL command to be passed to Unihan db. | |
errMsg_ptr | pointer for error message. |
int unihanSql_exec | ( | char * | sqlClause, | |
UnihanCallback | callback, | |||
void * | callbackOption, | |||
char ** | errMsg_ptr | |||
) |
A convenient wrapper of sqlite3_exec().
sqlClause | SQL command to be passed to Unihan db. | |
callback | callback function for each match record, can be NULL. | |
callbackOption | option for callback function, can be NULL. | |
errMsg_ptr | pointer for error message. |
SQL_Result* unihanSql_get_sql_result | ( | const char * | sqlClause | ) |
sqlClause | SQL command to be passed to Unihan db. |
UnihanField* unihanTable_get_db_fields | ( | UnihanTable | table | ) |
Unlike unihanTable_get_fields(), this function will retrieves the actual fields from the database. Hence, it is slower than unihanTable_get_fields().
The returned UnihanField array is terminated by UNIHAN_INVALID_FIELD. Note: use g_free to free the UnihanField array.
table | the UnihanTable. |
UnihanField* unihanTable_get_fields | ( | UnihanTable | table | ) |
Unlike unihanTable_get_db_fields(), this function returns the fields that supposedly in the given data table. It will not check the database, therefore it is faster than unihanTable_get_db_fields();
The returned UnihanField array is terminated by UNIHAN_INVALID_FIELD. Note: use g_free to free the UnihanField array.
table | the UnihanTable. |
UnihanField* unihanTable_get_primary_key_fields | ( | UnihanTable | table | ) |
The returned UnihanField array is terminated by UNIHAN_INVALID_FIELD. Note: use g_free to free the UnihanField array.
table | the UnihanTable. |
UnihanTable unihanTable_parse | ( | const char * | tableName | ) |
tableName | the string to be parsed. |
const char* unihanTable_to_string | ( | UnihanTable | table | ) |
Note: the return string is static, not need to free it.
table | the UnihanTable. |