{"id":7353,"date":"2022-12-20T19:37:47","date_gmt":"2022-12-20T22:37:47","guid":{"rendered":"http:\/\/lode.uno\/linux-man\/index.php\/2022\/12\/20\/tcl_getencoding-man3-10\/"},"modified":"2022-12-20T19:37:47","modified_gmt":"2022-12-20T22:37:47","slug":"tcl_getencoding-man3-10","status":"publish","type":"post","link":"https:\/\/lode.uno\/linux-man\/2022\/12\/20\/tcl_getencoding-man3-10\/","title":{"rendered":"Tcl_GetEncoding (man3)"},"content":{"rendered":"<h1 align=\"center\">Tcl_GetEncoding<\/h1>\n<p> <a href=\"#NAME\">NAME<\/a><br \/> <a href=\"#SYNOPSIS\">SYNOPSIS<\/a><br \/> <a href=\"#ARGUMENTS\">ARGUMENTS<\/a><br \/> <a href=\"#INTRODUCTION\">INTRODUCTION<\/a><br \/> <a href=\"#DESCRIPTION\">DESCRIPTION<\/a><br \/> <a href=\"#ENCODING FILES\">ENCODING FILES<\/a><br \/> <a href=\"#KEYWORDS\">KEYWORDS<\/a> <\/p>\n<hr>\n<p>______________________________________________________________________________<\/p>\n<h2>NAME <a name=\"NAME\"><\/a> <\/h2>\n<p style=\"margin-left:11%; margin-top: 1em\">Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_GetEncodingFromObj, Tcl_ExternalToUtfDString, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternal, Tcl_WinTCharToUtf, Tcl_WinUtfToTChar, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNameFromEnvironment, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetEncodingSearchPath, Tcl_SetEncodingSearchPath, Tcl_GetDefaultEncodingDir, Tcl_SetDefaultEncodingDir \u2212 procedures for creating and using encodings<\/p>\n<h2>SYNOPSIS <a name=\"SYNOPSIS\"><\/a> <\/h2>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>#include <tcl.h><\/b><\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">Tcl_Encoding <b><br \/> Tcl_GetEncoding<\/b>(<i>interp, name<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">void <b><br \/> Tcl_FreeEncoding<\/b>(<i>encoding<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">int <b><br \/> Tcl_GetEncodingFromObj<\/b>(<i>interp, objPtr, encodingPtr<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">char bodies manpages.csv script_extrae_body.sh script.sh usr <b><br \/> Tcl_ExternalToUtfDString<\/b>(<i>encoding, src, srcLen, dstPtr<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">char bodies manpages.csv script_extrae_body.sh script.sh usr <b><br \/> Tcl_UtfToExternalDString<\/b>(<i>encoding, src, srcLen, dstPtr<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">int <b><br \/> Tcl_ExternalToUtf<\/b>(<i>interp, encoding, src, srcLen, flags, statePtr, <br \/> dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">int <b><br \/> Tcl_UtfToExternal<\/b>(<i>interp, encoding, src, srcLen, flags, statePtr, <br \/> dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">char bodies manpages.csv script_extrae_body.sh script.sh usr <b><br \/> Tcl_WinTCharToUtf<\/b>(<i>tsrc, srcLen, dstPtr<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">TCHAR bodies manpages.csv script_extrae_body.sh script.sh usr <b><br \/> Tcl_WinUtfToTChar<\/b>(<i>src, srcLen, dstPtr<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">const char bodies manpages.csv script_extrae_body.sh script.sh usr <b><br \/> Tcl_GetEncodingName<\/b>(<i>encoding<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">int <b><br \/> Tcl_SetSystemEncoding<\/b>(<i>interp, name<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">const char bodies manpages.csv script_extrae_body.sh script.sh usr <b><br \/> Tcl_GetEncodingNameFromEnvironment<\/b>(<i>bufPtr<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">void <b><br \/> Tcl_GetEncodingNames<\/b>(<i>interp<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">Tcl_Encoding <b><br \/> Tcl_CreateEncoding<\/b>(<i>typePtr<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">Tcl_Obj bodies manpages.csv script_extrae_body.sh script.sh usr <b><br \/> Tcl_GetEncodingSearchPath<\/b>()<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">int <b><br \/> Tcl_SetEncodingSearchPath<\/b>(<i>searchPath<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">const char bodies manpages.csv script_extrae_body.sh script.sh usr <b><br \/> Tcl_GetDefaultEncodingDir<\/b>(<i>void<\/i>)<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">void <b><br \/> Tcl_SetDefaultEncodingDir<\/b>(<i>path<\/i>)<\/p>\n<h2>ARGUMENTS <a name=\"ARGUMENTS\"><\/a> <\/h2>\n<table width=\"100%\" border=\"0\" rules=\"none\" frame=\"void\" cellspacing=\"0\" cellpadding=\"0\">\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p style=\"margin-top: 1em\">Tcl_Interp <i>*interp<\/i> (in)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p style=\"margin-top: 1em\">Interpreter to use for error reporting, or NULL if no error reporting is desired.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>const char <i>*name<\/i> (in)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>Name of encoding to load.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>Tcl_Encoding <i>encoding<\/i> (in)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>The encoding to query, free, or use for converting text. If <i>encoding<\/i> is NULL, the current system encoding is used.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>Tcl_Obj <i>*objPtr<\/i> (in)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>Name of encoding to get token for.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>Tcl_Encoding <i>*encodingPtr<\/i> (out)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>Points to storage where encoding token is to be written.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>const char <i>*src<\/i> (in)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>For the <b>Tcl_ExternalToUtf<\/b> functions, an array of bytes in the specified encoding that are to be converted to UTF-8. For the <b>Tcl_UtfToExternal<\/b> and <b>Tcl_WinUtfToTChar<\/b> functions, an array of UTF-8 characters to be converted to the specified encoding.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>const TCHAR <i>*tsrc<\/i> (in)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>An array of Windows TCHAR characters to convert to UTF-8.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>int <i>srcLen<\/i> (in)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>Length of <i>src<\/i> or <i>tsrc<\/i> in bytes. If the length is negative, the encoding-specific length of the string is used.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>Tcl_DString <i>*dstPtr<\/i> (out)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>Pointer to an uninitialized or free <b>Tcl_DString<\/b> in which the converted result will be stored.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>int <i>flags<\/i> (in)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>Various flag bits OR-ed together. <b>TCL_ENCODING_START<\/b> signifies that the source buffer is the first block in a (potentially multi-block) input stream, telling the conversion routine to reset to an initial state and perform any initialization that needs to occur before the first byte is converted. <b>TCL_ENCODING_END<\/b> signifies that the source buffer is the last block in a (potentially multi-block) input stream, telling the conversion routine to perform any finalization that needs to occur after the last byte is converted and then to reset to an initial state. <b>TCL_ENCODING_STOPONERROR<\/b> signifies that the conversion routine should return immediately upon reading a source character that does not exist in the target encoding; otherwise a default fallback character will automatically be substituted.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>Tcl_EncodingState <i>*statePtr<\/i> (in\/out)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>Used when converting a (generally long or indefinite length) byte stream in a piece-by-piece fashion. The conversion routine stores its current state in <i>*statePtr<\/i> after <i>src<\/i> (the buffer containing the current piece) has been converted; that state information must be passed back when converting the next piece of the stream so the conversion routine knows what state it was in when it left off at the end of the last piece. May be NULL, in which case the value specified for <i>flags<\/i> is ignored and the source buffer is assumed to contain the complete string to convert.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>char <i>*dst<\/i> (out)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>Buffer in which the converted result will be stored. No more than <i>dstLen<\/i> bytes will be stored in <i>dst<\/i>.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>int <i>dstLen<\/i> (in)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>The maximum length of the output buffer <i>dst<\/i> in bytes.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>int <i>*srcReadPtr<\/i> (out)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>Filled with the number of bytes from <i>src<\/i> that were actually converted. This may be less than the original source length if there was a problem converting some source characters. May be NULL.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>int <i>*dstWrotePtr<\/i> (out)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>Filled with the number of bytes that were actually stored in the output buffer as a result of the conversion. May be NULL.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>int <i>*dstCharsPtr<\/i> (out)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>Filled with the number of characters that correspond to the number of bytes stored in the output buffer. May be NULL.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>Tcl_DString <i>*bufPtr<\/i> (out)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>Storage for the prescribed system encoding name.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>const Tcl_EncodingType <i>*typePtr<\/i> (in)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>Structure that defines a new type of encoding.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>Tcl_Obj <i>*searchPath<\/i> (in)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>List of filesystem directories in which to search for encoding data files.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"55%\">\n<p>const char <i>*path<\/i> (in)<\/p>\n<\/td>\n<td width=\"22%\"><\/td>\n<td width=\"12%\">\n<p>A path to the location of the encoding file.<\/p>\n<\/td>\n<\/tr>\n<\/table>\n<p style=\"margin-left:88%;\">______________________________________________________________________________<\/p>\n<h2>INTRODUCTION <a name=\"INTRODUCTION\"><\/a> <\/h2>\n<p style=\"margin-left:11%; margin-top: 1em\">These routines convert between Tcl\u2019s internal character representation, UTF-8, and character representations used by various operating systems or file systems, such as Unicode, ASCII, or Shift-JIS. When operating on strings, such as such as obtaining the names of files or displaying characters using international fonts, the strings must be translated into one or possibly multiple formats that the various system calls can use. For instance, on a Japanese Unix workstation, a user might obtain a filename represented in the EUC-JP file encoding and then translate the characters to the jisx0208 font encoding in order to display the filename in a Tk widget. The purpose of the encoding package is to help bridge the translation gap. UTF-8 provides an intermediate staging ground for all the various encodings. In the example above, text would be translated into UTF-8 from whatever file encoding the operating system is using. Then it would be translated from UTF-8 into whatever font encoding the display routines require.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">Some basic encodings are compiled into Tcl. Others can be defined by the user or dynamically loaded from encoding files in a platform-independent manner.<\/p>\n<h2>DESCRIPTION <a name=\"DESCRIPTION\"><\/a> <\/h2>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>Tcl_GetEncoding<\/b> finds an encoding given its <i>name<\/i>. The name may refer to a built-in Tcl encoding, a user-defined encoding registered by calling <b>Tcl_CreateEncoding<\/b>, or a dynamically-loadable encoding file. The return value is a token that represents the encoding and can be used in subsequent calls to procedures such as <b>Tcl_GetEncodingName<\/b>, <b>Tcl_FreeEncoding<\/b>, and <b>Tcl_UtfToExternal<\/b>. If the name did not refer to any known or loadable encoding, NULL is returned and an error message is returned in <i>interp<\/i>.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">The encoding package maintains a database of all encodings currently in use. The first time <i>name<\/i> is seen, <b>Tcl_GetEncoding<\/b> returns an encoding with a reference count of 1. If the same <i>name<\/i> is requested further times, then the reference count for that encoding is incremented without the overhead of allocating a new encoding and all its associated data structures.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">When an <i>encoding<\/i> is no longer needed, <b>Tcl_FreeEncoding<\/b> should be called to release it. When an <i>encoding<\/i> is no longer in use anywhere (i.e., it has been freed as many times as it has been gotten) <b>Tcl_FreeEncoding<\/b> will release all storage the encoding was using and delete it from the database.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>Tcl_GetEncodingFromObj<\/b> treats the string representation of <i>objPtr<\/i> as an encoding name, and finds an encoding with that name, just as <b>Tcl_GetEncoding<\/b> does. When an encoding is found, it is cached within the <b>objPtr<\/b> value for future reference, the <b>Tcl_Encoding<\/b> token is written to the storage pointed to by <i>encodingPtr<\/i>, and the value <b>TCL_OK<\/b> is returned. If no such encoding is found, the value <b>TCL_ERROR<\/b> is returned, and no writing to <b>*<\/b><i>encodingPtr<\/i> takes place. Just as with <b>Tcl_GetEncoding<\/b>, the caller should call <b>Tcl_FreeEncoding<\/b> on the resulting encoding token when that token will no longer be used.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>Tcl_ExternalToUtfDString<\/b> converts a source buffer <i>src<\/i> from the specified <i>encoding<\/i> into UTF-8. The converted bytes are stored in <i>dstPtr<\/i>, which is then null-terminated. The caller should eventually call <b>Tcl_DStringFree<\/b> to free any information stored in <i>dstPtr<\/i>. When converting, if any of the characters in the source buffer cannot be represented in the target encoding, a default fallback character will be used. The return value is a pointer to the value stored in the DString.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>Tcl_ExternalToUtf<\/b> converts a source buffer <i>src<\/i> from the specified <i>encoding<\/i> into UTF-8. Up to <i>srcLen<\/i> bytes are converted from the source buffer and up to <i>dstLen<\/i> converted bytes are stored in <i>dst<\/i>. In all cases, <i>*srcReadPtr<\/i> is filled with the number of bytes that were successfully converted from <i>src<\/i> and <i>*dstWrotePtr<\/i> is filled with the corresponding number of bytes that were stored in <i>dst<\/i>. The return value is one of the following:<\/p>\n<table width=\"100%\" border=\"0\" rules=\"none\" frame=\"void\" cellspacing=\"0\" cellpadding=\"0\">\n<tr valign=\"top\" align=\"left\">\n<td width=\"22%\"><\/td>\n<td width=\"32%\">\n<p style=\"margin-top: 1em\"><b>TCL_OK<\/b><\/p>\n<\/td>\n<td width=\"12%\"><\/td>\n<td width=\"34%\">\n<p style=\"margin-top: 1em\">All bytes of <i>src<\/i> were converted.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"22%\"><\/td>\n<td width=\"32%\">\n<p><b>TCL_CONVERT_NOSPACE<\/b><\/p>\n<\/td>\n<td width=\"12%\"><\/td>\n<td width=\"34%\">\n<p>The destination buffer was not large enough for all of the converted data; as many characters as could fit were converted though.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"22%\"><\/td>\n<td width=\"32%\">\n<p><b>TCL_CONVERT_MULTIBYTE<\/b><\/p>\n<\/td>\n<td width=\"12%\"><\/td>\n<td width=\"34%\">\n<p>The last few bytes in the source buffer were the beginning of a multibyte sequence, but more bytes were needed to complete this sequence. A subsequent call to the conversion routine should pass a buffer containing the unconverted bytes that remained in <i>src<\/i> plus some further bytes from the source stream to properly convert the formerly split-up multibyte sequence.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"22%\"><\/td>\n<td width=\"32%\">\n<p><b>TCL_CONVERT_SYNTAX<\/b><\/p>\n<\/td>\n<td width=\"12%\"><\/td>\n<td width=\"34%\">\n<p>The source buffer contained an invalid character sequence. This may occur if the input stream has been damaged or if the input encoding method was misidentified.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"22%\"><\/td>\n<td width=\"32%\">\n<p><b>TCL_CONVERT_UNKNOWN<\/b><\/p>\n<\/td>\n<td width=\"12%\"><\/td>\n<td width=\"34%\">\n<p>The source buffer contained a character that could not be represented in the target encoding and <b>TCL_ENCODING_STOPONERROR<\/b> was specified.<\/p>\n<\/td>\n<\/tr>\n<\/table>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>Tcl_UtfToExternalDString<\/b> converts a source buffer <i>src<\/i> from UTF-8 into the specified <i>encoding<\/i>. The converted bytes are stored in <i>dstPtr<\/i>, which is then terminated with the appropriate encoding-specific null. The caller should eventually call <b>Tcl_DStringFree<\/b> to free any information stored in <i>dstPtr<\/i>. When converting, if any of the characters in the source buffer cannot be represented in the target encoding, a default fallback character will be used. The return value is a pointer to the value stored in the DString.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>Tcl_UtfToExternal<\/b> converts a source buffer <i>src<\/i> from UTF-8 into the specified <i>encoding<\/i>. Up to <i>srcLen<\/i> bytes are converted from the source buffer and up to <i>dstLen<\/i> converted bytes are stored in <i>dst<\/i>. In all cases, <i>*srcReadPtr<\/i> is filled with the number of bytes that were successfully converted from <i>src<\/i> and <i>*dstWrotePtr<\/i> is filled with the corresponding number of bytes that were stored in <i>dst<\/i>. The return values are the same as the return values for <b>Tcl_ExternalToUtf<\/b>.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>Tcl_WinUtfToTChar<\/b> and <b>Tcl_WinTCharToUtf<\/b> are Windows-only convenience functions for converting between UTF-8 and Windows strings based on the TCHAR type which is by convention a Unicode character on Windows NT.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>Tcl_GetEncodingName<\/b> is roughly the inverse of <b>Tcl_GetEncoding<\/b>. Given an <i>encoding<\/i>, the return value is the <i>name<\/i> argument that was used to create the encoding. The string returned by <b>Tcl_GetEncodingName<\/b> is only guaranteed to persist until the <i>encoding<\/i> is deleted. The caller must not modify this string.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>Tcl_SetSystemEncoding<\/b> sets the default encoding that should be used whenever the user passes a NULL value for the <i>encoding<\/i> argument to any of the other encoding functions. If <i>name<\/i> is NULL, the system encoding is reset to the default system encoding, <b>binary<\/b>. If the name did not refer to any known or loadable encoding, <b>TCL_ERROR<\/b> is returned and an error message is left in <i>interp<\/i>. Otherwise, this procedure increments the reference count of the new system encoding, decrements the reference count of the old system encoding, and returns <b>TCL_OK<\/b>.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>Tcl_GetEncodingNameFromEnvironment<\/b> provides a means for the Tcl library to report the encoding name it believes to be the correct one to use as the system encoding, based on system calls and examination of the environment suitable for the platform. It accepts <i>bufPtr<\/i>, a pointer to an uninitialized or freed <b>Tcl_DString<\/b> and writes the encoding name to it. The <b>Tcl_DStringValue<\/b> is returned.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>Tcl_GetEncodingNames<\/b> sets the <i>interp<\/i> result to a list consisting of the names of all the encodings that are currently defined or can be dynamically loaded, searching the encoding path specified by <b>Tcl_SetDefaultEncodingDir<\/b>. This procedure does not ensure that the dynamically-loadable encoding files contain valid data, but merely that they exist.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>Tcl_CreateEncoding<\/b> defines a new encoding and registers the C procedures that are called back to convert between the encoding and UTF-8. Encodings created by <b>Tcl_CreateEncoding<\/b> are thereafter visible in the database used by <b>Tcl_GetEncoding<\/b>. Just as with the <b>Tcl_GetEncoding<\/b> procedure, the return value is a token that represents the encoding and can be used in subsequent calls to other encoding functions. <b>Tcl_CreateEncoding<\/b> returns an encoding with a reference count of 1. If an encoding with the specified <i>name<\/i> already exists, then its entry in the database is replaced with the new encoding; the token for the old encoding will remain valid and continue to behave as before, but users of the new token will now call the new encoding procedures.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">The <i>typePtr<\/i> argument to <b>Tcl_CreateEncoding<\/b> contains information about the name of the encoding and the procedures that will be called to convert between this encoding and UTF-8. It is defined as follows:<\/p>\n<p style=\"margin-left:22%; margin-top: 1em\">typedef struct Tcl_EncodingType { <br \/> const char *<i>encodingName<\/i>; <br \/> Tcl_EncodingConvertProc *<i>toUtfProc<\/i>; <br \/> Tcl_EncodingConvertProc *<i>fromUtfProc<\/i>; <br \/> Tcl_EncodingFreeProc *<i>freeProc<\/i>; <br \/> ClientData <i>clientData<\/i>; <br \/> int <i>nullSize<\/i>; <br \/> } <b>Tcl_EncodingType<\/b>;<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">The <i>encodingName<\/i> provides a string name for the encoding, by which it can be referred in other procedures such as <b>Tcl_GetEncoding<\/b>. The <i>toUtfProc<\/i> refers to a callback procedure to invoke to convert text from this encoding into UTF-8. The <i>fromUtfProc<\/i> refers to a callback procedure to invoke to convert text from UTF-8 into this encoding. The <i>freeProc<\/i> refers to a callback procedure to invoke when this encoding is deleted. The <i>freeProc<\/i> field may be NULL. The <i>clientData<\/i> contains an arbitrary one-word value passed to <i>toUtfProc<\/i>, <i>fromUtfProc<\/i>, and <i>freeProc<\/i> whenever they are called. Typically, this is a pointer to a data structure containing encoding-specific information that can be used by the callback procedures. For instance, two very similar encodings such as <b>ascii<\/b> and <b>macRoman<\/b> may use the same callback procedure, but use different values of <i>clientData<\/i> to control its behavior. The <i>nullSize<\/i> specifies the number of zero bytes that signify end-of-string in this encoding. It must be <b>1<\/b> (for single-byte or multi-byte encodings like ASCII or Shift-JIS) or <b>2<\/b> (for double-byte encodings like Unicode). Constant-sized encodings with 3 or more bytes per character (such as CNS11643) are not accepted.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">The callback procedures <i>toUtfProc<\/i> and <i>fromUtfProc<\/i> should match the type <b>Tcl_EncodingConvertProc<\/b>:<\/p>\n<p style=\"margin-left:22%; margin-top: 1em\">typedef int <b>Tcl_EncodingConvertProc<\/b>( <br \/> ClientData <i>clientData<\/i>, <br \/> const char *<i>src<\/i>, <br \/> int <i>srcLen<\/i>, <br \/> int <i>flags<\/i>, <br \/> Tcl_EncodingState *<i>statePtr<\/i>, <br \/> char *<i>dst<\/i>, <br \/> int <i>dstLen<\/i>, <br \/> int *<i>srcReadPtr<\/i>, <br \/> int *<i>dstWrotePtr<\/i>, <br \/> int *<i>dstCharsPtr<\/i>);<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">The <i>toUtfProc<\/i> and <i>fromUtfProc<\/i> procedures are called by the <b>Tcl_ExternalToUtf<\/b> or <b>Tcl_UtfToExternal<\/b> family of functions to perform the actual conversion. The <i>clientData<\/i> parameter to these procedures is the same as the <i>clientData<\/i> field specified to <b>Tcl_CreateEncoding<\/b> when the encoding was created. The remaining arguments to the callback procedures are the same as the arguments, documented at the top, to <b>Tcl_ExternalToUtf<\/b> or <b>Tcl_UtfToExternal<\/b>, with the following exceptions. If the <i>srcLen<\/i> argument to one of those high-level functions is negative, the value passed to the callback procedure will be the appropriate encoding-specific string length of <i>src<\/i>. If any of the <i>srcReadPtr<\/i>, <i>dstWrotePtr<\/i>, or <i>dstCharsPtr<\/i> arguments to one of the high-level functions is NULL, the corresponding value passed to the callback procedure will be a non-NULL location.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">The callback procedure <i>freeProc<\/i>, if non-NULL, should match the type <b>Tcl_EncodingFreeProc<\/b>:<\/p>\n<p style=\"margin-left:22%; margin-top: 1em\">typedef void <b>Tcl_EncodingFreeProc<\/b>( <br \/> ClientData <i>clientData<\/i>);<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">This <i>freeProc<\/i> function is called when the encoding is deleted. The <i>clientData<\/i> parameter is the same as the <i>clientData<\/i> field specified to <b>Tcl_CreateEncoding<\/b> when the encoding was created.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>Tcl_GetEncodingSearchPath<\/b> and <b>Tcl_SetEncodingSearchPath<\/b> are called to access and set the list of filesystem directories searched for encoding data files.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">The value returned by <b>Tcl_GetEncodingSearchPath<\/b> is the value stored by the last successful call to <b>Tcl_SetEncodingSearchPath<\/b>. If no calls to <b>Tcl_SetEncodingSearchPath<\/b> have occurred, Tcl will compute an initial value based on the environment. There is one encoding search path for the entire process, shared by all threads in the process.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>Tcl_SetEncodingSearchPath<\/b> stores <i>searchPath<\/i> and returns <b>TCL_OK<\/b>, unless <i>searchPath<\/i> is not a valid Tcl list, which causes <b>TCL_ERROR<\/b> to be returned. The elements of <i>searchPath<\/i> are not verified as existing readable filesystem directories. When searching for encoding data files takes place, and non-existent or non-readable filesystem directories on the <i>searchPath<\/i> are silently ignored.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\"><b>Tcl_GetDefaultEncodingDir<\/b> and <b>Tcl_SetDefaultEncodingDir<\/b> are obsolete interfaces best replaced with calls to <b>Tcl_GetEncodingSearchPath<\/b> and <b>Tcl_SetEncodingSearchPath<\/b>. They are called to access and set the first element of the <i>searchPath<\/i> list. Since Tcl searches <i>searchPath<\/i> for encoding data files in list order, these routines establish the \u201cdefault\u201d directory in which to find encoding data files.<\/p>\n<h2>ENCODING FILES <a name=\"ENCODING FILES\"><\/a> <\/h2>\n<p style=\"margin-left:11%; margin-top: 1em\">Space would prohibit precompiling into Tcl every possible encoding algorithm, so many encodings are stored on disk as dynamically-loadable encoding files. This behavior also allows the user to create additional encoding files that can be loaded using the same mechanism. These encoding files contain information about the tables and\/or escape sequences used to map between an external encoding and Unicode. The external encoding may consist of single-byte, multi-byte, or double-byte characters.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">Each dynamically-loadable encoding is represented as a text file. The initial line of the file, beginning with a \u201c#\u201d symbol, is a comment that provides a human-readable description of the file. The next line identifies the type of encoding file. It can be one of the following letters:<\/p>\n<table width=\"100%\" border=\"0\" rules=\"none\" frame=\"void\" cellspacing=\"0\" cellpadding=\"0\">\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"7%\">\n<p style=\"margin-top: 1em\">[1] <b>S<\/b><\/p>\n<\/td>\n<td width=\"4%\"><\/td>\n<td width=\"78%\">\n<p style=\"margin-top: 1em\">A single-byte encoding, where one character is always one byte long in the encoding. An example is <b>iso8859-1<\/b>, used by many European languages.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"7%\">\n<p>[2] <b>D<\/b><\/p>\n<\/td>\n<td width=\"4%\"><\/td>\n<td width=\"78%\">\n<p>A double-byte encoding, where one character is always two bytes long in the encoding. An example is <b>big5<\/b>, used for Chinese text.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"7%\">\n<p>[3] <b>M<\/b><\/p>\n<\/td>\n<td width=\"4%\"><\/td>\n<td width=\"78%\">\n<p>A multi-byte encoding, where one character may be either one or two bytes long. Certain bytes are lead bytes, indicating that another byte must follow and that together the two bytes represent one character. Other bytes are not lead bytes and represent themselves. An example is <b>shiftjis<\/b>, used by many Japanese computers.<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"11%\"><\/td>\n<td width=\"7%\">\n<p>[4] <b>E<\/b><\/p>\n<\/td>\n<td width=\"4%\"><\/td>\n<td width=\"78%\">\n<p>An escape-sequence encoding, specifying that certain sequences of bytes do not represent characters, but commands that describe how following bytes should be interpreted.<\/p>\n<\/td>\n<\/tr>\n<\/table>\n<p style=\"margin-left:11%; margin-top: 1em\">The rest of the lines in the file depend on the type.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">Cases [1], [2], and [3] are collectively referred to as table-based encoding files. The lines in a table-based encoding file are in the same format as this example taken from the <b>shiftjis<\/b> encoding (this is not the complete file):<\/p>\n<p style=\"margin-left:22%; margin-top: 1em\"># Encoding file: shiftjis, multi-byte <br \/> M <br \/> 003F 0 40 <br \/> 00 <br \/> 0000000100020003000400050006000700080009000A000B000C000D000E000F <br \/> 0010001100120013001400150016001700180019001A001B001C001D001E001F <br \/> 0020002100220023002400250026002700280029002A002B002C002D002E002F <br \/> 0030003100320033003400350036003700380039003A003B003C003D003E003F <br \/> 0040004100420043004400450046004700480049004A004B004C004D004E004F <br \/> 0050005100520053005400550056005700580059005A005B005C005D005E005F <br \/> 0060006100620063006400650066006700680069006A006B006C006D006E006F <br \/> 0070007100720073007400750076007700780079007A007B007C007D203E007F <br \/> 0080000000000000000000000000000000000000000000000000000000000000 <br \/> 0000000000000000000000000000000000000000000000000000000000000000 <br \/> 0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F <br \/> FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F <br \/> FF80FF81FF82FF83FF84FF85FF86FF87FF88FF89FF8AFF8BFF8CFF8DFF8EFF8F <br \/> FF90FF91FF92FF93FF94FF95FF96FF97FF98FF99FF9AFF9BFF9CFF9DFF9EFF9F <br \/> 0000000000000000000000000000000000000000000000000000000000000000 <br \/> 0000000000000000000000000000000000000000000000000000000000000000 <br \/> 81 <br \/> 0000000000000000000000000000000000000000000000000000000000000000 <br \/> 0000000000000000000000000000000000000000000000000000000000000000 <br \/> 0000000000000000000000000000000000000000000000000000000000000000 <br \/> 0000000000000000000000000000000000000000000000000000000000000000 <br \/> 300030013002FF0CFF0E30FBFF1AFF1BFF1FFF01309B309C00B4FF4000A8FF3E <br \/> FFE3FF3F30FD30FE309D309E30034EDD30053006300730FC20152010FF0F005C <br \/> 301C2016FF5C2026202520182019201C201DFF08FF0930143015FF3BFF3DFF5B <br \/> FF5D30083009300A300B300C300D300E300F30103011FF0B221200B100D70000 <br \/> 00F7FF1D2260FF1CFF1E22662267221E22342642264000B0203220332103FFE5 <br \/> FF0400A200A3FF05FF03FF06FF0AFF2000A72606260525CB25CF25CE25C725C6 <br \/> 25A125A025B325B225BD25BC203B301221922190219121933013000000000000 <br \/> 000000000000000000000000000000002208220B2286228722822283222A2229 <br \/> 000000000000000000000000000000002227222800AC21D221D4220022030000 <br \/> 0000000000000000000000000000000000000000222022A52312220222072261 <br \/> 2252226A226B221A223D221D2235222B222C0000000000000000000000000000 <br \/> 212B2030266F266D266A2020202100B6000000000000000025EF000000000000<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">The third line of the file is three numbers. The first number is the fallback character (in base 16) to use when converting from UTF-8 to this encoding. The second number is a <b>1<\/b> if this file represents the encoding for a symbol font, or <b>0<\/b> otherwise. The last number (in base 10) is how many pages of data follow.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">Subsequent lines in the example above are pages that describe how to map from the encoding into 2-byte Unicode. The first line in a page identifies the page number. Following it are 256 double-byte numbers, arranged as 16 rows of 16 numbers. Given a character in the encoding, the high byte of that character is used to select which page, and the low byte of that character is used as an index to select one of the double-byte numbers in that page \u2212 the value obtained being the corresponding Unicode character. By examination of the example above, one can see that the characters 0x7E and 0x8163 in <b>shiftjis<\/b> map to 203E and 2026 in Unicode, respectively.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">Following the first page will be all the other pages, each in the same format as the first: one number identifying the page followed by 256 double-byte Unicode characters. If a character in the encoding maps to the Unicode character 0000, it means that the character does not actually exist. If all characters on a page would map to 0000, that page can be omitted.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">Case [4] is the escape-sequence encoding file. The lines in an this type of file are in the same format as this example taken from the <b>iso2022-jp<\/b> encoding:<\/p>\n<p style=\"margin-left:22%; margin-top: 1em\"># Encoding file: iso2022-jp, escape-driven <br \/> E<\/p>\n<table width=\"100%\" border=\"0\" rules=\"none\" frame=\"void\" cellspacing=\"0\" cellpadding=\"0\">\n<tr valign=\"top\" align=\"left\">\n<td width=\"22%\"><\/td>\n<td width=\"1%\">\n<p>init<\/p>\n<\/td>\n<td width=\"77%\">\n<p>{}<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"22%\"><\/td>\n<td width=\"1%\">\n<p>final<\/p>\n<\/td>\n<td width=\"77%\">\n<p>{}<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"22%\"><\/td>\n<td width=\"1%\">\n<p>iso8859-1<\/p>\n<\/td>\n<td width=\"77%\">\n<p>x1b(B<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"22%\"><\/td>\n<td width=\"1%\">\n<p>jis0201<\/p>\n<\/td>\n<td width=\"77%\">\n<p>x1b(J<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"22%\"><\/td>\n<td width=\"1%\">\n<p>jis0208<\/p>\n<\/td>\n<td width=\"77%\">\n<p>x1b$@<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"22%\"><\/td>\n<td width=\"1%\">\n<p>jis0208<\/p>\n<\/td>\n<td width=\"77%\">\n<p>x1b$B<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"22%\"><\/td>\n<td width=\"1%\">\n<p>jis0212<\/p>\n<\/td>\n<td width=\"77%\">\n<p>x1b$(D<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"22%\"><\/td>\n<td width=\"1%\">\n<p>gb2312<\/p>\n<\/td>\n<td width=\"77%\">\n<p>x1b$A<\/p>\n<\/td>\n<\/tr>\n<tr valign=\"top\" align=\"left\">\n<td width=\"22%\"><\/td>\n<td width=\"1%\">\n<p>ksc5601<\/p>\n<\/td>\n<td width=\"77%\">\n<p>x1b$(C<\/p>\n<\/td>\n<\/tr>\n<\/table>\n<p style=\"margin-left:11%; margin-top: 1em\">In the file, the first column represents an option and the second column is the associated value. <b>init<\/b> is a string to emit or expect before the first character is converted, while <b>final<\/b> is a string to emit or expect after the last character. All other options are names of table-based encodings; the associated value is the escape-sequence that marks that encoding. Tcl syntax is used for the values; in the above example, for instance, \u201c<b>{}<\/b>\u201d represents the empty string and \u201c<b>x1b<\/b>\u201d represents character 27.<\/p>\n<p style=\"margin-left:11%; margin-top: 1em\">When <b>Tcl_GetEncoding<\/b> encounters an encoding <i>name<\/i> that has not been loaded, it attempts to load an encoding file called <i>name<\/i><b>.enc<\/b> from the <b>encoding<\/b> subdirectory of each directory that Tcl searches for its script library. If the encoding file exists, but is malformed, an error message will be left in <i>interp<\/i>.<\/p>\n<h2>KEYWORDS <a name=\"KEYWORDS\"><\/a> <\/h2>\n<p style=\"margin-left:11%; margin-top: 1em\">utf, encoding, convert<\/p>\n<hr>\n","protected":false},"excerpt":{"rendered":"<p>  Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_GetEncodingFromObj, Tcl_ExternalToUtfDString, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternal, Tcl_WinTCharToUtf, Tcl_WinUtfToTChar, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNameFromEnvironment, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetEncodingSearchPath, Tcl_SetEncodingSearchPath, Tcl_GetDefaultEncodingDir, Tcl_SetDefaultEncodingDir \u2212 procedures for creating and using encodings <\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[2536],"tags":[2538,3007,3667],"class_list":["post-7353","post","type-post","status-publish","format-standard","hentry","category-3-llamadas-de-bibliotecas","tag-2538","tag-man3","tag-tcl_wintchartoutf"],"gutentor_comment":0,"_links":{"self":[{"href":"https:\/\/lode.uno\/linux-man\/wp-json\/wp\/v2\/posts\/7353","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/lode.uno\/linux-man\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/lode.uno\/linux-man\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/lode.uno\/linux-man\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/lode.uno\/linux-man\/wp-json\/wp\/v2\/comments?post=7353"}],"version-history":[{"count":0,"href":"https:\/\/lode.uno\/linux-man\/wp-json\/wp\/v2\/posts\/7353\/revisions"}],"wp:attachment":[{"href":"https:\/\/lode.uno\/linux-man\/wp-json\/wp\/v2\/media?parent=7353"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/lode.uno\/linux-man\/wp-json\/wp\/v2\/categories?post=7353"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/lode.uno\/linux-man\/wp-json\/wp\/v2\/tags?post=7353"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}