#include "Unihan_enum.h"
#include "Unihan_phonetic.h"
#include "sqlite_functions.h"
#include "str_functions.h"
Go to the source code of this file.
Data Structures | |
| struct | UnihanIRG_SourceData |
| IRG source data. More... | |
| struct | UnihanIRG_SourceRec |
| IRG source rec. More... | |
| struct | DatabaseFuncStru |
| Data structure of database supporting functions. More... | |
Unihan query options. | |
| #define | UNIHAN_QUERY_OPTION_LIKE 1 |
| Using SQL LIKE in WHERE expression. | |
| #define | UNIHAN_QUERY_OPTION_SCALAR_STRING 1 << 1 |
| Show code point as string "U+xxxx". | |
| #define | UNIHAN_QUERY_OPTION_SHOW_GIVEN_FIELD 1 << 2 |
| Show the given field in results. | |
| #define | UNIHAN_QUERY_OPTION_PINYIN_TONE_ACCENT 1 << 3 |
| Use accent mark for pinyin tone. | |
| #define | UNIHAN_QUERY_OPTION_PINYIN_FORMAT_MASK 7 << 4 |
| Mask for pinyin format. | |
| #define | UNIHAN_QUERY_OPTION_ZHUYIN_FORCE_DISPLAY 1 << 7 |
| Force ZhuYin display. | |
| #define | UNIHAN_QUERY_OPTION_ZHUYIN_FORMAT_MASK 7 << 8 |
| Mask for zhuyin format. | |
| #define | UNIHAN_QUERY_OPTION_DEFAULT (PINYIN_ACCENT_UNIHAN << 4) | (ZHUYIN_TONEMARK_ORIGINAL << 8) |
| Default options, PinYin format is Unihan; and ZhuYin format is Original. | |
| #define | UNIHAN_QUERY_OPTION_GET_PINYIN_FORMAT(options) (options & UNIHAN_QUERY_OPTION_PINYIN_FORMAT_MASK) >> 4 |
| Get PinYin format from UnihanQueryOption. | |
| #define | UNIHAN_QUERY_OPTION_SET_PINYIN_FORMAT(options, format) options |= format << 4 |
| Set PinYin format to UnihanQueryOption. | |
| #define | UNIHAN_QUERY_OPTION_GET_ZHUYIN_FORMAT(options) (options & UNIHAN_QUERY_OPTION_ZHUYIN_FORMAT_MASK) >> 8 |
| Get ZhuYin format from UnihanQueryOption. | |
| #define | UNIHAN_QUERY_OPTION_SET_ZHUYIN_FORMAT(options, format) options |= format << 8 |
| Set ZhuYin format to UnihanQueryOption. | |
| typedef guint | UnihanQueryOption |
| Unihan query options. | |
Typedefs | |
| typedef int(* | UnihanCallback )(void *userOption, int col_num, char **results, char **col_names) |
| Prototype of callback function for SQL execution. | |
Functions | |
| SQL_Result * | unihan_find_all_matched (UnihanField givenField, const char *givenValue, UnihanField queryField, UnihanQueryOption qOption) |
| Find all matched results, given a field and its value. | |
| char * | unihan_find_firstMatched (UnihanField givenField, const char *givenValue, UnihanField queryField, UnihanQueryOption qOption) |
| Find the first matched result, given a field and its value. | |
| int | unihan_count_matched_record (UnihanTable table, StringList *valueList) |
| Count number of matched records in a table. | |
| int | unihan_insert (UnihanTable table, StringList *valueList) |
| Insert a record to table. | |
| int | unihan_insert_no_duplicate (UnihanTable table, StringList *valueList) |
| Insert a record to table with duplication check. | |
| int | unihan_insert_value (gunichar code, UnihanField field, const char *value) |
| Insert a Unihan textual formated record to corresponding tables. | |
| gboolean | unihanChar_has_property (gunichar code, UnihanField field) |
| Whether the character is associate with the given field. | |
| gboolean | unihanChar_is_in_source (gunichar code, UnihanIRG_SourceId source) |
| Whether the character appeared in given source. | |
| UnihanIRG_SourceId | unihanChar_is_in_sources (gunichar code, UnihanIRG_SourceId source,...) |
| Find the first source which the character appears in. | |
| gboolean | unihanChar_is_common_in_locale (gunichar code, UnihanLocale locale) |
| Whether the character is common in the specified locale. | |
| UnihanRange | unihanChar_in_range (gunichar code) |
| Return the range which the character belong to. | |
| gunichar | unihanChar_parse (const char *str) |
| Parses the string argument as a UCS4 (gunichar) character. | |
| char * | unihanChar_to_scalar_string (gunichar code) |
| Returns a string representing a UCS4 character. | |
| sqlite3 * | unihanDb_get () |
| Returns the db which libUnihan is using. | |
| SQL_Result * | unihanDb_get_tableNames () |
| Returns the tables in database. | |
| int | unihanDb_open (const char *filename, int flags) |
| Open a Unihan db. | |
| int | unihanDb_open_default () |
| Open the system default Unihan Db as read-only. | |
| int | unihanDb_close () |
| Close Unihan db. | |
| int | unihanField_array_index (UnihanField field, const UnihanField *fieldArray) |
| Return the index of a UnihanField in array. | |
| UnihanIRG_Source | unihanField_get_IRG_source (UnihanField field) |
| Return the corresponding IRG source if the field is IRG source field. | |
| UnihanTable | unihanField_get_table (UnihanField field) |
| Return the table that contains the key. | |
| UnihanTable * | unihanField_get_all_tables (UnihanField field) |
| Return all the tables that contains the key. | |
| UnihanTable | unihanField_get_extra_table (UnihanField field) |
| Return the corresponding extra table if the field needs one. | |
| gboolean | unihanField_is_IRG_Source (UnihanField field) |
| Whether the field is IRG_Source. | |
| gboolean | unihanField_is_indexed (UnihanField field) |
| Whether the field is indexed. | |
| gboolean | unihanField_is_integer (UnihanField field) |
| Whether the field is an integer field. | |
| gboolean | unihanField_is_case_no_change (UnihanField field) |
| Whether the case of value should be kept. | |
| gboolean | unihanField_is_lowercase (UnihanField field) |
| Whether the value in the field is stored as lowercase. | |
| gboolean | unihanField_is_mandarin (UnihanField field) |
| Whether the field contains mandarin pronunciation. | |
| gboolean | unihanField_is_pseudo (UnihanField field) |
| Whether the field is a pseudo field. | |
| gboolean | unihanField_is_ucs4 (UnihanField field) |
| Whether the field holds UCS4 value. | |
| gboolean | unihanField_is_singleton (UnihanField field) |
| Whether the field is a singleton field. | |
| UnihanField | unihanField_parse (const char *str) |
| Parses the string argument as a UnihanField. | |
| const char * | unihanField_to_string (UnihanField field) |
| Returns a string representing a UnihanField. | |
| gboolean | unihanIRG_Source_has_no_mapping (UnihanIRG_SourceId sourceId) |
| Whether the SourceId has mapping. | |
| const UnihanIRG_SourceData * | unihanIRG_SourceData_get (UnihanIRG_SourceId sourceId) |
| Return the Unihan IRG_Source Data. | |
| UnihanIRG_SourceId | unihanIRG_SourceId_parse (const char *sourceShortName) |
| Parse the string argument as Unihan IRG Source ID. | |
| UnihanIRG_SourceRec * | unihanIRG_SourceRec_parse (UnihanField field, const char *value) |
| Parse the string argument as Unihan IRG Source Rec. | |
| void | unihanIRG_SourceRec_free (UnihanIRG_SourceRec *rec) |
| Free the UnihanIRG_SourceRec. | |
| int | unihanSql_count_matches (const char *sqlClause, char **errMsg_ptr) |
| Count the number of matches. | |
| int | unihanSql_exec (char *sqlClause, UnihanCallback callback, void *callbackOption, char **errMsg_ptr) |
| Execute the SQL to Unihan db. | |
| SQL_Result * | unihanSql_get_sql_result (const char *sqlClause) |
| Obtains a SQL_Result table of SQL command. | |
| UnihanLocale | unihanLocale_parse (char *str) |
| Parse the string argument as Unihan Locale. | |
| const char * | unihanLocale_to_string (UnihanLocale locale) |
| Returns a string representing a UnihanLocale. | |
| const char * | unihanRange_to_string (UnihanRange uRange) |
| Returns a string representing a UnihanLocale. | |
| UnihanTable | unihanTable_parse (const char *tableName) |
| Parse the string argument as UnihanTable. | |
| const char * | unihanTable_to_string (UnihanTable table) |
| Returns a string representing a UnihanTable. | |
| UnihanField * | unihanTable_get_db_fields (UnihanTable table) |
| Returns the actual data table fields in an UnihanField array. | |
| UnihanField * | unihanTable_get_fields (UnihanTable table) |
| Returns all fields of the table in an UnihanField array. | |
| UnihanField * | unihanTable_get_primary_key_fields (UnihanTable table) |
| Returns all primary key fields of the given table in an UnihanField array. | |
Variables | |
| const DatabaseFuncStru | DATABASE_FUNCS [] |
| List of database supporting functions. | |
| #define UNIHAN_QUERY_OPTION_GET_PINYIN_FORMAT | ( | options | ) | (options & UNIHAN_QUERY_OPTION_PINYIN_FORMAT_MASK) >> 4 |
| options | A UnihanQueryOption. |
| #define UNIHAN_QUERY_OPTION_GET_ZHUYIN_FORMAT | ( | options | ) | (options & UNIHAN_QUERY_OPTION_ZHUYIN_FORMAT_MASK) >> 8 |
| options | A UnihanQueryOption. |
| #define UNIHAN_QUERY_OPTION_SET_PINYIN_FORMAT | ( | options, | |||
| format | ) | options |= format << 4 |
| options | A UnihanQueryOption. | |
| format | PinYin_Accent_Format. |
| #define UNIHAN_QUERY_OPTION_SET_ZHUYIN_FORMAT | ( | options, | |||
| format | ) | options |= format << 8 |
| options | A UnihanQueryOption. | |
| format | ZhuYin_Accent_Format. |
| typedef guint UnihanQueryOption |
Unihan query options provides additional control of query processing, such as SQL like query and output format.
| int unihan_count_matched_record | ( | UnihanTable | table, | |
| StringList * | valueList | |||
| ) |
Also useful to avoid the duplication record. For example, use unihan_count_matched_record(UNIHAN_TABLE_KRSADOBE_JAPAN_1_6, valueArray) to check whether the kRSAdobe_Japan_1_6Table has the record (13317,"C",15387,"3",1,1) before insertion, where the valueArray[]={ "13317", "C","15387","3","1","1", NULL};
| table | the database table to be looked at. | |
| valueList | Values in StringList. |
| SQL_Result* unihan_find_all_matched | ( | UnihanField | givenField, | |
| const char * | givenValue, | |||
| UnihanField | queryField, | |||
| UnihanQueryOption | qOption | |||
| ) |
This function is a convenient wrapper of unihanSql_get_sql_result().
Put the known field as givenFiled and its value as givenValue. The values of field specified in queryField will be put in the result table.
Use sql_result_free() to free the pResults after the finish using result table.
| givenField | the given (input) field. | |
| givenValue | the given value of the field. | |
| queryField | the result field. | |
| qOption | the UnihanQueryOption. |
| char* unihan_find_firstMatched | ( | UnihanField | givenField, | |
| const char * | givenValue, | |||
| UnihanField | queryField, | |||
| UnihanQueryOption | qOption | |||
| ) |
This is a simplified version of unihan_find_all_matched(). Instead of returning whole result table, it only returns the first matched result as string.
Use g_free() to free the returned result.
| givenField | the given (input) field. | |
| givenValue | the given value of the field. | |
| queryField | the result field. | |
| qOption | the UnihanQueryOption. |
| int unihan_insert | ( | UnihanTable | table, | |
| StringList * | valueList | |||
| ) |
The value to be insert should be in string representation, as if put in plain text SQL command.
| table | The database table to be looked at. | |
| valueList | Values in StringList. |
| int unihan_insert_no_duplicate | ( | UnihanTable | table, | |
| StringList * | valueList | |||
| ) |
It will check the duplication before insertion, otherwise is same with unihan_insert(). Return negative value if the duplication is found.
| table | the database table to be looked at. | |
| valueList | Values in StringList. |
| int unihan_insert_value | ( | gunichar | code, | |
| UnihanField | field, | |||
| const char * | value | |||
| ) |
This function deals with the insertion of Unihan textual formated records (as shown in Unihan.txt), which have 3 fields:
This function will parse the value and insert the parsed result to corresponding tables.
Before using this function, convert first field to UCS4 (gunichar) format (using unihanChar_parse()); second to UnihanField (using unihanField_parse()).
| code | character in UCS4 (gunichar) | |
| field | the UnihanField | |
| value | the value in as in Unihan.txt. |
| gboolean unihanChar_has_property | ( | gunichar | code, | |
| UnihanField | field | |||
| ) |
| code | character in UCS4 (gunichar) | |
| field | the UnihanField |
| UnihanRange unihanChar_in_range | ( | gunichar | code | ) |
| code | character in UCS4 (gunichar) |
| gboolean unihanChar_is_common_in_locale | ( | gunichar | code, | |
| UnihanLocale | locale | |||
| ) |
"Common" characters are the ones which appear in the well-known source, such as GB-2312 and JIS X 0208:1990. See Common sources for a locale for exact sources.
| code | character in UCS4 (gunichar) | |
| locale | the locale. |
| gboolean unihanChar_is_in_source | ( | gunichar | code, | |
| UnihanIRG_SourceId | source | |||
| ) |
| code | character in UCS4 (gunichar) | |
| source | IRG Source ID |
| UnihanIRG_SourceId unihanChar_is_in_sources | ( | gunichar | code, | |
| UnihanIRG_SourceId | source, | |||
| ... | ||||
| ) |
This function will return the first matched source, or until it reaches the end of
| code | character in UCS4 (gunichar) | |
| source | IRG Source ID, use UNIHAN_INVALID_SOURCEID as indefinite value terminator. |
| gunichar unihanChar_parse | ( | const char * | str | ) |
| str | the string to be parsed. |
| char* unihanChar_to_scalar_string | ( | gunichar | code | ) |
| code | the UCS4 character. |
| int unihanDb_close | ( | ) |
| sqlite3* unihanDb_get | ( | ) |
Normally this function is not needed, except to get additional control beyond SQL.
| SQL_Result* unihanDb_get_tableNames | ( | ) |
This function will get the names from database.
Use stringList_free() to free it.
| int unihanDb_open | ( | const char * | filename, | |
| int | flags | |||
| ) |
The flags parameter provides additional control of database access. It is supported by sqlite3_open_v2(), thus it takes one of the following three values, optionally combined with the SQLITE_OPEN_NOMUTEX flag, just like the flags parameter of sqlite3_open_v2():
Note that SQLITE_OPEN_NOMUTEX flags is not supported in SQLite 3.3.X and earlier.
| filename | name of db file to be open. | |
| flags | Database access flags. |
| int unihanDb_open_default | ( | ) |
| int unihanField_array_index | ( | UnihanField | field, | |
| const UnihanField * | fieldArray | |||
| ) |
| field | Field to be found | |
| fieldArray | the array of UnihanFields. |
| UnihanTable* unihanField_get_all_tables | ( | UnihanField | field | ) |
This function returns an array of tables, terminated by UNIHAN_INVALID_TABLE. Use unihanField_get_allTables() to obtain all the table that the field belongs to.
| field | the UnihanField. |
| UnihanTable unihanField_get_extra_table | ( | UnihanField | field | ) |
A extra table is a table that provide additional information for pseudo field. For example, kSemanticVariant is a pseudo field which combines fields in kSemanticVariantTable and kSemanticVariantTableExtra.
unihanFIeld_get_table(UNIHAN_KSEMANTIC_VARIANT) returns kSemanticVariantTable while unihanFIeld_get_extra_table returns the extra table kSemanticVariantTableExtra.
Pseudo fields usually associate with extra tables, see unihanField_is_pseudo() for details.
| field | the UnihanField. |
| UnihanIRG_Source unihanField_get_IRG_source | ( | UnihanField | field | ) |
| field | the UnihanField. |
| UnihanTable unihanField_get_table | ( | UnihanField | field | ) |
This function only returns one table. If the field is in more than one table, then returns UNIHAN_AMBIGUOUS_TABLE. Returns UNIHAN_INVALID_TABLE if invalid field is given.
Use unihanField_get_allTables() to obtain all the table that the field belongs to.
| field | the UnihanField. |
| gboolean unihanField_is_case_no_change | ( | UnihanField | field | ) |
Usually, the Unihan tag values are stored as uppercase, such as UNIHAN_FIELD_KMANDARIN, UNIHAN_FIELD_PINYIN.
However, there are exceptions such as field UNIHAN_FIELD_KCANTONESE which always stores as lowercase; while field UNIHAN_FIELD_KDEFINITION the other hand, may have uppercase and lowercase characters.
Integer fields need not change case, however, as UCS-4 can have 'U+xxxxx' form, UCS-4 should be convert to uppercase.
This function returns TRUE if the case of field value need not be changed, as with UNIHAN_FIELD_KDEFINITION; FALSE otherwise.
| field | the UnihanField |
| gboolean unihanField_is_indexed | ( | UnihanField | field | ) |
Indexed field are non-pseudo fields which are indexed in database. Note that UNIHAN_FIELD_KDEFITION is not indexed as well.
| field | the UnihanField |
| gboolean unihanField_is_integer | ( | UnihanField | field | ) |
| field | the UnihanField |
| gboolean unihanField_is_IRG_Source | ( | UnihanField | field | ) |
It is a convenient wrapper of unihanField_get_IRG_source().
| field | the UnihanField. |
| gboolean unihanField_is_lowercase | ( | UnihanField | field | ) |
Usually, the Unihan tag values are stored as uppercase, such as UNIHAN_FIELD_KMANDARIN, UNIHAN_FIELD_PINYIN.
However, there are exceptions such as field UNIHAN_FIELD_KCANTONESE which always stores as lowercase; while field UNIHAN_FIELD_KDEFINITIONon the other hand, may have uppercase and lowercase characters.
| field | the UnihanField |
| gboolean unihanField_is_mandarin | ( | UnihanField | field | ) |
| field | the UnihanField |
| gboolean unihanField_is_pseudo | ( | UnihanField | field | ) |
A pseudo field is a field whose value is not derived directly from table but database functions. Field zhuyin, for example, is not in database but derived from function PINYIN_TO_ZHUYIN(). It is deemed to be a short cut for database functions.
Another example is field kSemanticVariant, which is a pseudo field which combines: kSemanticVariantTable.varinatCode, kSemanticVariantTableExtra.fromDict, kSemanticVariantTableExtra.semanticT, kSemanticVariantTableExtra.semanticB and kSemanticVariantTableExtra.semanticZ
| field | the UnihanField |
| gboolean unihanField_is_singleton | ( | UnihanField | field | ) |
A singleton field is a field whose value cannot be further split, and is functional dependent to the UNIHAN_FIELD_CODE.
Most of tag value in Unihan.txt is delimited by space, however kDefinition, for example, should not be split in this way.
| field | the UnihanField |
| gboolean unihanField_is_ucs4 | ( | UnihanField | field | ) |
UCS4 fields can be displayed in the form of Unicode scalar string (U+xxxxx)
| field | the UnihanField |
| UnihanField unihanField_parse | ( | const char * | str | ) |
| str | the string to be parsed. |
| const char* unihanField_to_string | ( | UnihanField | field | ) |
| gboolean unihanIRG_Source_has_no_mapping | ( | UnihanIRG_SourceId | sourceId | ) |
Some IRG sources (such as UNIHAN_SOURCE_GKX, UNIHAN_SOURCE_G4K) does not have mapping index (inner code). This function tells whether a Source ID has mapping.
| sourceId | the IRG source ID. |
| const UnihanIRG_SourceData* unihanIRG_SourceData_get | ( | UnihanIRG_SourceId | sourceId | ) |
Note: the returned data is static, not need to be freed.
| sourceId | Unihan IRG source ID. |
| UnihanIRG_SourceId unihanIRG_SourceId_parse | ( | const char * | sourceShortName | ) |
| sourceShortName | the string to be parsed. |
| void unihanIRG_SourceRec_free | ( | UnihanIRG_SourceRec * | rec | ) |
| rec | the unihanIRG_SourceRec |
| UnihanIRG_SourceRec* unihanIRG_SourceRec_parse | ( | UnihanField | field, | |
| const char * | value | |||
| ) |
| field | the UnihanField | |
| value | the string to be parsed. |
| UnihanLocale unihanLocale_parse | ( | char * | str | ) |
| str | the string to be parsed. |
| const char* unihanLocale_to_string | ( | UnihanLocale | locale | ) |
Note: the return string is static, not need to free it.
| locale | the UnihanLocale. |
| const char* unihanRange_to_string | ( | UnihanRange | uRange | ) |
Note: the return string is static, not need to free it.
| uRange | the UnihanRange. |
| int unihanSql_count_matches | ( | const char * | sqlClause, | |
| char ** | errMsg_ptr | |||
| ) |
| sqlClause | SQL command to be passed to Unihan db. | |
| errMsg_ptr | pointer for error message. |
| int unihanSql_exec | ( | char * | sqlClause, | |
| UnihanCallback | callback, | |||
| void * | callbackOption, | |||
| char ** | errMsg_ptr | |||
| ) |
A convenient wrapper of sqlite3_exec().
| sqlClause | SQL command to be passed to Unihan db. | |
| callback | callback function for each match record, can be NULL. | |
| callbackOption | option for callback function, can be NULL. | |
| errMsg_ptr | pointer for error message. |
| SQL_Result* unihanSql_get_sql_result | ( | const char * | sqlClause | ) |
| sqlClause | SQL command to be passed to Unihan db. |
| UnihanField* unihanTable_get_db_fields | ( | UnihanTable | table | ) |
Unlike unihanTable_get_fields(), this function will retrieves the actual fields from the database. Hence, it is slower than unihanTable_get_fields().
The returned UnihanField array is terminated by UNIHAN_INVALID_FIELD. Note: use g_free to free the UnihanField array.
| table | the UnihanTable. |
| UnihanField* unihanTable_get_fields | ( | UnihanTable | table | ) |
Unlike unihanTable_get_db_fields(), this function returns the fields that supposedly in the given data table. It will not check the database, therefore it is faster than unihanTable_get_db_fields();
The returned UnihanField array is terminated by UNIHAN_INVALID_FIELD. Note: use g_free to free the UnihanField array.
| table | the UnihanTable. |
| UnihanField* unihanTable_get_primary_key_fields | ( | UnihanTable | table | ) |
The returned UnihanField array is terminated by UNIHAN_INVALID_FIELD. Note: use g_free to free the UnihanField array.
| table | the UnihanTable. |
| UnihanTable unihanTable_parse | ( | const char * | tableName | ) |
| tableName | the string to be parsed. |
| const char* unihanTable_to_string | ( | UnihanTable | table | ) |
Note: the return string is static, not need to free it.
| table | the UnihanTable. |
1.5.6