
也談IF命令的比較順序眾所周知,IF命令的排序規則既不是按照GBK編碼的順序,也不是按照Unicode編碼的順序,而是有著自己的規則,這個規則是什么呢?在CMD內部,IF命令是調用lstrcmpW函數來比較字符串大小的(《批處理技術內幕:IF命令》),IF命令的比較規則即lstrcmpW函數的比較規則。lstrcmp(Locale String Compare)函數的排序是與系統的語言與區域設置有關的(參考《Windows 代碼頁與字符順序》)。但是具體怎么排序MSDN卻沒有說明(至少我沒有找到),為了弄清楚默認情況下IF的比較順序,我寫了一個簡單C程序:
CP936.TXT可以到Unicode官方網站下載到(http://unicode.org/Public/MAPPIN ... T/WINDOWS/CP936.TXT)。程序運行后會生成CP936_SORT.txt,里面是排序后的CP936到Unicode的映射表,第一列是GBK碼,第二列是對應的Unicode代碼點(Code Point),第三列是字符的Unicode名稱。不想自己編譯的話可以下載我編譯好的EXE:nclick="copycode($('code0'));">復制代碼
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <windows.h>
- #define BUFFER_SIZE 1024
- typedef struct _table {
- int cp936;
- wchar_t *unicode;
- char*name;
- } table;
- int compare(const void *a, const void *b);
- int main()
- {
- table *a;
- int n = 0, i = 0;
- wchar_t *p;
- char buf[BUFFER_SIZE], *p1, *p2;
- FILE *fp1, *fp2;
- fp1 = fopen(CP936.txt, rb);
- if (fp1 == NULL) {
- fprintf(stderr, Can't open CP936.txtn);
- return 1;
- }
- while (!feof(fp1) && fgets(buf, BUFFER_SIZE, fp1)) {
- if (strlen(buf) == 0 || buf[0] == '#') continue;
- if (p1 = strchr(buf, 't')) *p1++ = '';
- if (p2 = strchr(p1, 't')) *p2++ = '';
- while (isspace(*p1)) p1++;
- if (!*p1) continue;
- n++;
- }
- a = (table *) malloc(n * sizeof(table));
- fp1 = freopen(CP936.txt, rb, fp1);
- if (fp1 == NULL) {
- fprintf(stderr, Can't reopen CP936.txtn);
- return 1;
- }
- while (!feof(fp1) && fgets(buf, BUFFER_SIZE, fp1)) {
- if (strlen(buf) == 0 || buf[0] == '#') continue;
- if (p1 = strchr(buf, 't')) *p1++ = '';
- if (p2 = strchr(p1, 't')) *p2++ = '';
- while (isspace(*p1)) p1++;
- if (!*p1) continue;
- p = (wchar_t *) malloc(2 * sizeof(wchar_t));
- p[0] = (wchar_t) strtol(p1, NULL, 16);
- p[1] = 0x0000;
- a[i].cp936 = strtol(buf, NULL, 16);
- a[i].unicode = p;
- a[i].name = strdup(p2);
- i++;
- }
- qsort(a, n, sizeof(table), compare);
- fp2 = fopen(CP936_SORT.txt, wb);
- if (fp2 == NULL) {
- fprintf(stderr, Can't open CP936_SORT.txtn);
- return 1;
- }
- for (i = 0; i < n; i++) {
- fprintf(fp2, 0x%02Xt0x%04Xt%s, a[i].cp936, a[i].unicode[0], a[i].name);
- }
- for (i = 0; i < n; i++) {
- free(a[i].unicode);
- free(a[i].name);
- }
- free(a);
- fclose(fp1);
- fclose(fp2);
- return 0;
- }
- int compare(const void *a, const void *b)
- {
- wchar_t *s1 = ((table *)a)->unicode;
- wchar_t *s2 = ((table *)b)->unicode;
- return lstrcmpW(s1, s2);
- }

