#include "Unihan_enum.h"
#include "Unihan_phonetic.h"
#include "sqlite_functions.h"
#include "str_functions.h"
Go to the source code of this file.
Data Structures | |
struct | UnihanIRG_SourceData |
IRG source data. More... | |
struct | UnihanFieldTablePair |
Structure for field-table pair. More... | |
struct | UnihanIRG_SourceRec |
IRG source rec. More... | |
struct | DatabaseFuncStru |
Data structure of database supporting functions. More... | |
Defines | |
#define | FIELD_CACHE_DB "field.cache" |
The default field cache file. | |
Functions | |
SQL_Result * | unihan_find_all_matched (UnihanField givenField, const char *givenValue, UnihanField queryField, UnihanQueryOption qOption) |
Find all matched results, given a field and its value. | |
char * | unihan_find_first_matched (UnihanField givenField, const char *givenValue, UnihanField queryField, UnihanQueryOption qOption) |
Find the first matched result, given a field and its value. | |
int | unihan_count_matched_record (UnihanTable table, StringList *valueList) |
Count number of matched records in a table. | |
int | unihan_insert (UnihanTable table, StringList *valueList) |
Insert a record to table. | |
int | unihan_insert_no_duplicate (UnihanTable table, StringList *valueList) |
Insert a record to table with duplication check. | |
int | unihan_insert_value (gunichar code, UnihanField field, const char *value) |
Insert a Unihan textual formated record to corresponding tables. | |
gboolean | unihanChar_has_field (gunichar code, UnihanField field) |
Whether the character is associate with the given field. | |
gboolean | unihanChar_is_in_source (gunichar code, UnihanIRG_SourceId source) |
Whether the character appeared in given source. | |
UnihanIRG_SourceId | unihanChar_is_in_sources (gunichar code, UnihanIRG_SourceId source,...) |
Find the first source which the character appears in. | |
gboolean | unihanChar_is_common_in_locale (gunichar code, UnihanLocale locale) |
Whether the character is common in the specified locale. | |
UnihanRange | unihanChar_in_range (gunichar code) |
Return the range which the character belong to. | |
gunichar | unihanChar_parse (const char *str) |
Parses the string argument as a UCS4 (gunichar) character. | |
char * | unihanChar_to_scalar_string (gunichar code) |
Returns a scalar string of a UCS4 character. | |
sqlite3 * | unihanDb_get () |
Returns the db which libUnihan is using. | |
UnihanTable * | unihanDb_get_all_tables () |
Returns the names of table in database. | |
SQL_Result * | unihanDb_get_all_tableNames () |
Returns the names of table in database. | |
int | unihanDb_open (const char *filename, int flags) |
Open a Unihan db. | |
int | unihanDb_open_default () |
Open the system default Unihan Db as read-only. | |
int | unihanDb_close () |
Close Unihan db. | |
int | unihanField_array_index (UnihanField field, const UnihanField *fieldArray) |
Return the index of a UnihanField in array. | |
UnihanIRG_Source | unihanField_get_IRG_source (UnihanField field) |
Return the corresponding IRG source if the field is IRG source field. | |
UnihanTable | unihanField_get_table (UnihanField field) |
Return the table that contains the key. | |
UnihanTable * | unihanField_get_all_tables (UnihanField field) |
Return all the tables that contains the key. | |
UnihanTable | unihanField_get_extra_table (UnihanField field) |
Return the corresponding extra table if the field needs one. | |
UnihanTable * | unihanField_get_required_tables (UnihanField field) |
Return the built-in tables that required by the built-in field. | |
gboolean | unihanField_is_IRG_Source (UnihanField field) |
Whether the field is IRG_Source. | |
gboolean | unihanField_is_indexed (UnihanField field) |
Whether the field is indexed. | |
gboolean | unihanField_is_integer (UnihanField field) |
Whether the field is an integer field. | |
gboolean | unihanField_is_uppercase (UnihanField field) |
Whether the value in the field is stored as uppercase. | |
gboolean | unihanField_is_lowercase (UnihanField field) |
Whether the value in the field is stored as lowercase. | |
gboolean | unihanField_is_mandarin (UnihanField field) |
Whether the field contains mandarin pronunciation. | |
gboolean | unihanField_is_pseudo (UnihanField field) |
Whether the field is a pseudo field. | |
gboolean | unihanField_is_ucs4 (UnihanField field) |
Whether the field holds UCS4 value. | |
gboolean | unihanField_is_singleton (UnihanField field) |
Whether the field is a singleton field. | |
UnihanField | unihanField_parse (const char *str) |
Parses the string argument as a UnihanField. | |
const char * | unihanField_to_string (UnihanField field) |
Returns a string representing a UnihanField. | |
gboolean | unihanIRG_Source_has_no_mapping (UnihanIRG_SourceId sourceId) |
Whether the SourceId has mapping. | |
const UnihanIRG_SourceData * | unihanIRG_SourceData_get (UnihanIRG_SourceId sourceId) |
Return the Unihan IRG_Source Data. | |
UnihanIRG_SourceId | unihanIRG_SourceId_parse (const char *sourceShortName) |
Parse the string argument as Unihan IRG Source ID. | |
UnihanIRG_SourceRec * | unihanIRG_SourceRec_parse (UnihanField field, const char *value) |
Parse the string argument as Unihan IRG Source Rec. | |
void | unihanIRG_SourceRec_free (UnihanIRG_SourceRec *rec) |
Free the UnihanIRG_SourceRec. | |
int | unihanSql_count_matches (const char *sqlClause, char **errMsg_ptr) |
Count the number of matches. | |
int | unihanSql_exec (char *sqlClause, sqlite_exec_callback callback, void *callbackOption, char **errMsg_ptr) |
Execute the SQL to Unihan db. | |
SQL_Result * | unihanSql_get_sql_result (const char *sqlClause) |
Obtains a SQL_Result table of SQL command. | |
UnihanLocale | unihanLocale_parse (char *str) |
Parse the string argument as Unihan Locale. | |
const char * | unihanLocale_to_string (UnihanLocale locale) |
Returns a string representing a UnihanLocale. | |
const char * | unihanRange_to_string (UnihanRange uRange) |
Returns a string representing a UnihanLocale. | |
UnihanTable | unihanTable_parse (const char *tableName) |
Parse the string argument as UnihanTable. | |
const char * | unihanTable_to_string (UnihanTable table) |
Returns a string representing a UnihanTable. | |
UnihanField * | unihanTable_get_db_fields (UnihanTable table) |
Returns the actual data table fields in an UnihanField array. | |
UnihanField * | unihanTable_get_fields (UnihanTable table) |
Returns all fields of the table in an UnihanField array. | |
UnihanField * | unihanTable_get_primary_key_fields (UnihanTable table) |
Returns all primary key fields of the given table in an UnihanField array. | |
Variables | |
const DatabaseFuncStru | DATABASE_FUNCS [] |
List of database supporting functions. |
#define UNIHAN_QUERY_OPTION_GET_PINYIN_FORMAT | ( | options | ) | (options & UNIHAN_QUERY_OPTION_PINYIN_FORMAT_MASK) >> 4 |
options | A UnihanQueryOption. |
#define UNIHAN_QUERY_OPTION_GET_ZHUYIN_FORMAT | ( | options | ) | (options & UNIHAN_QUERY_OPTION_ZHUYIN_FORMAT_MASK) >> 8 |
options | A UnihanQueryOption. |
#define UNIHAN_QUERY_OPTION_SET_PINYIN_FORMAT | ( | options, | |||
format | ) | options |= format << 4 |
options | A UnihanQueryOption. | |
format | PinyinAccentFormat. |
#define UNIHAN_QUERY_OPTION_SET_ZHUYIN_FORMAT | ( | options, | |||
format | ) | options |= format << 8 |
options | A UnihanQueryOption. | |
format | Zhuyin_Accent_Format. |
typedef guint UnihanQueryOption |
Unihan query options provides additional control of query processing, such as SQL like query and output format.
int unihan_count_matched_record | ( | UnihanTable | table, | |
StringList * | valueList | |||
) |
Also useful to avoid the duplication record. For example, use unihan_count_matched_record(UNIHAN_TABLE_KRSADOBE_JAPAN_1_6, valueArray)
to check whether the kRSAdobe_Japan_1_6Table has the record (13317,"C",15387,"3",1,1) before insertion, where the valueArray[]={ "13317", "C","15387","3","1","1", NULL};
table | the database table to be looked at. | |
valueList | Values in StringList. |
SQL_Result* unihan_find_all_matched | ( | UnihanField | givenField, | |
const char * | givenValue, | |||
UnihanField | queryField, | |||
UnihanQueryOption | qOption | |||
) |
This function is a convenient wrapper of unihanSql_get_sql_result().
Put the known field as givenFiled
and its value as givenValue
. The values of field specified in queryField
will be put in the result table.
Use sql_result_free() to free the pResults after the finish using result table.
givenField | the given (input) field. | |
givenValue | the given value of the field. | |
queryField | the result field. | |
qOption | the UnihanQueryOption. |
char* unihan_find_first_matched | ( | UnihanField | givenField, | |
const char * | givenValue, | |||
UnihanField | queryField, | |||
UnihanQueryOption | qOption | |||
) |
This is a simplified version of unihan_find_all_matched(). Instead of returning whole result table, it only returns the first matched result as string.
Use g_free() to free the returned result.
givenField | the given (input) field. | |
givenValue | the given value of the field. | |
queryField | the result field. | |
qOption | the UnihanQueryOption. |
int unihan_insert | ( | UnihanTable | table, | |
StringList * | valueList | |||
) |
The value to be insert should be in string representation, as if put in plain text SQL command.
table | The database table to be looked at. | |
valueList | Values in StringList. |
int unihan_insert_no_duplicate | ( | UnihanTable | table, | |
StringList * | valueList | |||
) |
It will check the duplication before insertion, otherwise is same with unihan_insert(). Return negative value if the duplication is found.
table | the database table to be looked at. | |
valueList | Values in StringList. |
int unihan_insert_value | ( | gunichar | code, | |
UnihanField | field, | |||
const char * | value | |||
) |
This function deals with the insertion of Unihan textual formated records (as shown in Unihan.txt), which have 3 fields:
This function will parse the value and insert the parsed result to corresponding tables.
Before using this function, convert first field to UCS4 (gunichar) format (using unihanChar_parse()); second to UnihanField (using unihanField_parse()).
code | character in UCS4 (gunichar) | |
field | the UnihanField | |
value | the value in as in Unihan.txt. |
gboolean unihanChar_has_field | ( | gunichar | code, | |
UnihanField | field | |||
) |
code | character in UCS4 (gunichar) | |
field | the UnihanField |
UnihanRange unihanChar_in_range | ( | gunichar | code | ) |
code | character in UCS4 (gunichar) |
gboolean unihanChar_is_common_in_locale | ( | gunichar | code, | |
UnihanLocale | locale | |||
) |
"Common" characters are the ones which appear in the well-known source, such as GB-2312 and JIS X 0208:1990. See Common sources for a locale for exact sources.
code | character in UCS4 (gunichar) | |
locale | the locale. |
gboolean unihanChar_is_in_source | ( | gunichar | code, | |
UnihanIRG_SourceId | source | |||
) |
code | character in UCS4 (gunichar) | |
source | IRG Source ID |
UnihanIRG_SourceId unihanChar_is_in_sources | ( | gunichar | code, | |
UnihanIRG_SourceId | source, | |||
... | ||||
) |
This function will return the first matched source, or until it reaches the end of
code | character in UCS4 (gunichar) | |
source | IRG Source ID, use UNIHAN_INVALID_SOURCEID as indefinite value terminator. |
gunichar unihanChar_parse | ( | const char * | str | ) |
str | the string to be parsed. |
char* unihanChar_to_scalar_string | ( | gunichar | code | ) |
This function converts the UCS4 integer to scalar value format (U+XXXXXX) and returns a newly allocated string that holds it.
Use free() or g_free() to free the result.
code | the UCS4 character. |
int unihanDb_close | ( | ) |
sqlite3* unihanDb_get | ( | ) |
Normally this function is not needed, except to get additional control beyond SQL.
SQL_Result* unihanDb_get_all_tableNames | ( | ) |
This function returns names of all tables from DB files which are recorded in the cache DB.
The result is returned as a newly allocated SQL_Result instance.
Use sql_result_free() to free the result.
UnihanTable* unihanDb_get_all_tables | ( | ) |
This function returns all tables Id from DB files which are recorded in the cache DB.
The result is returned as a newly allocated UnihanTable array.
Use free() or g_free*( to free the result.
int unihanDb_open | ( | const char * | filename, | |
int | flags | |||
) |
The flags parameter provides additional control of database access. It is supported by sqlite3_open_v2(), thus it takes one of the following three values, optionally combined with the SQLITE_OPEN_NOMUTEX flag, just like the flags parameter of sqlite3_open_v2():
Note that SQLITE_OPEN_NOMUTEX flags is not supported in SQLite 3.3.X and earlier.
filename | name of db file to be open. | |
flags | Database access flags. |
int unihanDb_open_default | ( | ) |
int unihanField_array_index | ( | UnihanField | field, | |
const UnihanField * | fieldArray | |||
) |
field | Field to be found | |
fieldArray | the array of UnihanFields. |
UnihanTable* unihanField_get_all_tables | ( | UnihanField | field | ) |
This function returns an array of tables, terminated by UNIHAN_INVALID_TABLE. Use unihanField_get_allTables() to obtain all the table that the field belongs to.
field | the UnihanField. |
UnihanTable unihanField_get_extra_table | ( | UnihanField | field | ) |
A extra table is a table that provide additional information for pseudo field. For example, kSemanticVariant is a pseudo field which combines fields in kSemanticVariantTable and kSemanticVariantTableExtra.
unihanFIeld_get_table(UNIHAN_KSEMANTIC_VARIANT) returns kSemanticVariantTable while unihanFIeld_get_extra_table returns the extra table kSemanticVariantTableExtra.
Pseudo fields usually associate with extra tables, see unihanField_is_pseudo() for details.
field | the UnihanField. |
UnihanIRG_Source unihanField_get_IRG_source | ( | UnihanField | field | ) |
field | the UnihanField. |
UnihanTable* unihanField_get_required_tables | ( | UnihanField | field | ) |
The function returns built-in tables that required by the specified built-in field. This function is designed for handling only the build-in fields. It returns an UnihanTable array with UNIHAN_INVALID_TABLE
at index 0, if 3rd party fields is given.
Free the returned array with free() or g_free().
field | the UnihanField. |
field
requires, terminated by UNIHAN_INVALID_TABLE. UnihanTable unihanField_get_table | ( | UnihanField | field | ) |
This function only returns one table. If the field is in more than one table, then returns UNIHAN_AMBIGUOUS_TABLE. Returns UNIHAN_INVALID_TABLE if invalid field is given.
Use unihanField_get_allTables() to obtain all the table that the field belongs to.
field | the UnihanField. |
gboolean unihanField_is_indexed | ( | UnihanField | field | ) |
Indexed field are non-pseudo fields which are indexed in database. Note that UNIHAN_FIELD_KDEFITION is not indexed as well.
field | the UnihanField |
gboolean unihanField_is_integer | ( | UnihanField | field | ) |
field | the UnihanField |
gboolean unihanField_is_IRG_Source | ( | UnihanField | field | ) |
It is a convenient wrapper of unihanField_get_IRG_source().
field | the UnihanField. |
gboolean unihanField_is_lowercase | ( | UnihanField | field | ) |
Usually, the Unihan tag values are stored as uppercase, such as UNIHAN_FIELD_KMANDARIN
, UNIHAN_FIELD_PINYIN
.
However, there are exceptions such as field UNIHAN_FIELD_KCANTONESE
which always stores as lowercase; while field UNIHAN_FIELD_KDEFINITION
, on the other hand, may have uppercase and lowercase characters.
field | the UnihanField |
gboolean unihanField_is_mandarin | ( | UnihanField | field | ) |
field | the UnihanField |
gboolean unihanField_is_pseudo | ( | UnihanField | field | ) |
A pseudo field is a field whose value is not derived directly from table but database functions. Field zhuyin, for example, is not in database but derived from function PINYIN_TO_ZHUYIN(). It is deemed to be a short cut for database functions.
Another example is field kSemanticVariant, which is a pseudo field which combines: kSemanticVariantTable.varinatCode, kSemanticVariantTableExtra.fromDict, kSemanticVariantTableExtra.semanticT, kSemanticVariantTableExtra.semanticB and kSemanticVariantTableExtra.semanticZ
field | the UnihanField |
gboolean unihanField_is_singleton | ( | UnihanField | field | ) |
A singleton field is a field whose value cannot be further split, and is functional dependent to the UNIHAN_FIELD_CODE.
Most of tag value in Unihan.txt is delimited by space, however kDefinition, for example, should not be split in this way.
field | the UnihanField |
gboolean unihanField_is_ucs4 | ( | UnihanField | field | ) |
UCS4 fields can be displayed in the form of Unicode scalar string (U+xxxxx)
field | the UnihanField |
gboolean unihanField_is_uppercase | ( | UnihanField | field | ) |
Some Unihan tag values are always stored as uppercase, such as UNIHAN_FIELD_KMANDARIN
, UNIHAN_FIELD_PINYIN
.
field | the UnihanField |
UnihanField unihanField_parse | ( | const char * | str | ) |
str | the string to be parsed. |
const char* unihanField_to_string | ( | UnihanField | field | ) |
gboolean unihanIRG_Source_has_no_mapping | ( | UnihanIRG_SourceId | sourceId | ) |
Some IRG sources (such as UNIHAN_SOURCE_GKX
, UNIHAN_SOURCE_G4K
) does not have mapping index (inner code). This function tells whether a Source ID has mapping.
sourceId | the IRG source ID. |
const UnihanIRG_SourceData* unihanIRG_SourceData_get | ( | UnihanIRG_SourceId | sourceId | ) |
Note: the returned data is static, not need to be freed.
sourceId | Unihan IRG source ID. |
UnihanIRG_SourceId unihanIRG_SourceId_parse | ( | const char * | sourceShortName | ) |
sourceShortName | the string to be parsed. |
void unihanIRG_SourceRec_free | ( | UnihanIRG_SourceRec * | rec | ) |
rec | the unihanIRG_SourceRec |
UnihanIRG_SourceRec* unihanIRG_SourceRec_parse | ( | UnihanField | field, | |
const char * | value | |||
) |
field | the UnihanField | |
value | the string to be parsed. |
UnihanLocale unihanLocale_parse | ( | char * | str | ) |
str | the string to be parsed. |
const char* unihanLocale_to_string | ( | UnihanLocale | locale | ) |
Note: the return string is static, not need to free it.
locale | the UnihanLocale. |
const char* unihanRange_to_string | ( | UnihanRange | uRange | ) |
Note: the return string is static, not need to free it.
uRange | the UnihanRange. |
int unihanSql_count_matches | ( | const char * | sqlClause, | |
char ** | errMsg_ptr | |||
) |
sqlClause | SQL command to be passed to Unihan db. | |
errMsg_ptr | pointer for error message. |
int unihanSql_exec | ( | char * | sqlClause, | |
sqlite_exec_callback | callback, | |||
void * | callbackOption, | |||
char ** | errMsg_ptr | |||
) |
A convenient wrapper of sqlite3_exec().
sqlClause | SQL command to be passed to Unihan db. | |
callback | callback function for each match record, can be NULL. | |
callbackOption | option for callback function, can be NULL. | |
errMsg_ptr | pointer for error message. |
SQL_Result* unihanSql_get_sql_result | ( | const char * | sqlClause | ) |
sqlClause | SQL command to be passed to Unihan db. |
UnihanField* unihanTable_get_db_fields | ( | UnihanTable | table | ) |
Unlike unihanTable_get_fields(), this function will retrieves the actual fields from the database. Hence, it is slower than unihanTable_get_fields().
The returned UnihanField array is terminated by UNIHAN_INVALID_FIELD. Note: use g_free to free the UnihanField array.
table | the UnihanTable. |
UnihanField* unihanTable_get_fields | ( | UnihanTable | table | ) |
Unlike unihanTable_get_db_fields(), this function returns the fields that supposedly in the given data table. It will not check the database, therefore it is faster than unihanTable_get_db_fields();
The returned UnihanField array is terminated by UNIHAN_INVALID_FIELD. Note: use g_free to free the UnihanField array.
table | the UnihanTable. |
UnihanField* unihanTable_get_primary_key_fields | ( | UnihanTable | table | ) |
The returned UnihanField array is terminated by UNIHAN_INVALID_FIELD. Note: use g_free to free the UnihanField array.
table | the UnihanTable. |
UnihanTable unihanTable_parse | ( | const char * | tableName | ) |
tableName | the string to be parsed. |
const char* unihanTable_to_string | ( | UnihanTable | table | ) |
Note: the return string is static, not need to free it.
table | the UnihanTable. |