/* * Requires a C99ish compiler. gcc 3 works. gcc 2.95 works. Earlier * versions might. * * The arrays below use designated initializers to make it very explicit * which elements are being set to what. The standard says that any elements * without an initalizer in these starts out like it would if static - in * other words, zero'ed out. That's usually what we wanted. * * However, since most people compiling Penn probably aren't going to be * using a C99 compiler for some time to come, this program will translate * from the DI form to the fully-initialized form that all C and C++ compilers * understand. * * Example Usage: * % cd pennmush * % gcc -o gentables utils/gentables.c * % ./gentables > src/tables.c * % make */ #include <stdio.h> #include <limits.h> #include <stdlib.h> /* Offsets (+1) for q-register lookup. */ char q_offsets[UCHAR_MAX + 1] = { ['0'] = 1, ['1'] = 2, ['2'] = 3, ['3'] = 4, ['4'] = 5, ['5'] = 6, ['6'] = 7, ['7'] = 8, ['8'] = 9, ['9'] = 10, ['A'] = 11, ['a'] = 11, ['B'] = 12, ['b'] = 12, ['C'] = 13, ['c'] = 13, ['D'] = 14, ['d'] = 14, ['E'] = 15, ['e'] = 15, ['F'] = 16, ['f'] = 16, ['G'] = 17, ['g'] = 17, ['H'] = 18, ['h'] = 18, ['I'] = 19, ['i'] = 19, ['J'] = 20, ['j'] = 20, ['K'] = 21, ['k'] = 21, ['L'] = 22, ['l'] = 22, ['M'] = 23, ['m'] = 23, ['N'] = 24, ['n'] = 24, ['O'] = 25, ['o'] = 25, ['P'] = 26, ['p'] = 26, ['Q'] = 27, ['q'] = 27, ['R'] = 28, ['r'] = 28, ['S'] = 29, ['s'] = 29, ['T'] = 30, ['t'] = 30, ['U'] = 31, ['u'] = 31, ['V'] = 32, ['v'] = 32, ['W'] = 33, ['w'] = 33, ['X'] = 34, ['x'] = 34, ['Y'] = 35, ['y'] = 35, ['Z'] = 36, ['z'] = 36 }; /* What characters the parser looks for. */ char parse_interesting[UCHAR_MAX + 1] = { ['\0'] = 1, ['%'] = 1, ['{'] = 1, ['['] = 1, ['('] = 1, ['\\'] = 1, [' '] = 1, ['}'] = 1, [']'] = 1, [')'] = 1, [','] = 1, [';'] = 1, ['='] = 1, ['$'] = 1, [0x1B] = 1 }; /* What characters are allowed in attribute names. */ char attribute_names[UCHAR_MAX + 1] = { ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1, ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1, ['A'] = 1, ['B'] = 1, ['C'] = 1, ['D'] = 1, ['E'] = 1, ['F'] = 1, ['G'] = 1, ['H'] = 1, ['I'] = 1, ['J'] = 1, ['K'] = 1, ['L'] = 1, ['M'] = 1, ['N'] = 1, ['O'] = 1, ['P'] = 1, ['Q'] = 1, ['R'] = 1, ['S'] = 1, ['T'] = 1, ['U'] = 1, ['V'] = 1, ['W'] = 1, ['X'] = 1, ['Y'] = 1, ['Z'] = 1, ['_'] = 1, ['#'] = 1, ['@'] = 1, ['$'] = 1, ['!'] = 1, ['~'] = 1, ['|'] = 1, [';'] = 1, ['`'] = 1, ['"'] = 1, ['\''] = 1,['&'] = 1, ['*'] = 1, ['-'] = 1, ['+'] = 1, ['='] = 1, ['?'] = 1, ['/'] = 1, ['.'] = 1, ['>'] = 1, ['<'] = 1, [','] = 1 }; /* C89 format codes for strftime() */ char valid_timefmt_codes[UCHAR_MAX + 1] = { ['a'] = 1, ['A'] = 1, ['b'] = 1, ['B'] = 1, ['c'] = 1, ['d'] = 1, ['H'] = 1, ['I'] = 1, ['j'] = 1, ['m'] = 1, ['M'] = 1, ['p'] = 1, ['S'] = 1, ['U'] = 1, ['w'] = 1, ['W'] = 1, ['x'] = 1, ['X'] = 1, ['y'] = 1, ['Y'] = 1, ['Z'] = 1, ['$'] = 1 }; /* Special characters for escape() and secure() */ char escaped_chars[UCHAR_MAX + 1] = { ['('] = 1, [')'] = 1, ['['] = 1, [']'] = 1, ['{'] = 1, ['}'] = 1, ['$'] = 1, ['^'] = 1, ['%'] = 1, [','] = 1, [';'] = 1, ['\\'] = 1 }; /** Accented characters * * The table is for ISO 8859-1 character set. * It should be easy to modify it for other ISO 8859-X sets, or completely * different families. */ typedef struct { const char *base; /**< Base character */ const char *entity; /**< HTML entity */ } accent_info; accent_info entity_table[UCHAR_MAX + 1] = { // Assorted characters ['<'] = {"<", "<"}, ['>'] = {">", ">"}, ['&'] = {"&", "&"}, ['"'] = {"\\\"", """}, ['\n'] = {"\\n", "<br>\\n"}, // << and >> quotes [171] = {"<<", "«"}, [187] = {">>", "»"}, // Upside-down punctuation [161] = {"!", "¡"}, [191] = {"?", "¿"}, // szlig [223] = {"s", "ß"}, // thorn [222] = {"P", "Þ"}, [254] = {"p", "þ:"}, // eth [208] = {"D", "Ð"}, [240] = {"o", "ð"}, // Special symbols [169] = {"(c)", "©"}, [174] = {"(r)", "®"}, [188] = {"1/4", "¼"}, [189] = {"1/2", "½"}, [190] = {"3/4", "¾"}, // AE ligatures [198] = {"AE", "Æ"}, [230] = {"ae", "æ"}, // Accented a's [192] = {"A", "À"}, [193] = {"A", "Á"}, [194] = {"A", "Â"}, [195] = {"A", "Ã"}, [196] = {"A", "Ä"}, [197] = {"A", "Å"}, [224] = {"a", "à"}, [225] = {"a", "á"}, [226] = {"a", "â"}, [227] = {"a", "ã"}, [228] = {"a", "ä"}, [229] = {"a", "å"}, // Accented c's [199] = {"C", "Ç"}, [231] = {"c", "ç"}, // Accented e's [200] = {"E", "È"}, [201] = {"E", "É"}, [202] = {"E", "Ê"}, [203] = {"E", "Ë"}, [232] = {"e", "è"}, [233] = {"e", "é"}, [234] = {"e", "ê"}, [235] = {"e", "ë"}, // Accented i's [204] = {"I", "Ì"}, [205] = {"I", "Í"}, [206] = {"I", "Î"}, [207] = {"I", "Ï"}, [236] = {"i", "ì"}, [237] = {"i", "í"}, [238] = {"i", "î"}, [239] = {"i", "ï"}, // Accented n's [209] = {"N", "Ñ"}, [241] = {"n", "ñ"}, // Accented o's [210] = {"O", "Ò"}, [211] = {"O", "Ó"}, [212] = {"O", "Ô"}, [213] = {"O", "Õ"}, [214] = {"O", "Ö"}, [242] = {"o", "ò"}, [243] = {"o", "ó"}, [244] = {"o", "ô"}, [245] = {"o", "õ"}, [246] = {"o", "ö"}, // Accented u's [217] = {"U", "Ù"}, [218] = {"U", "Ú"}, [219] = {"U", "Û"}, [220] = {"U", "Ü"}, [249] = {"u", "ù"}, [250] = {"u", "ú"}, [251] = {"u", "û"}, [252] = {"u", "ü"}, // Accented y's [221] = {"Y", "Ý"}, [253] = {"y", "ý"}, [255] = {"y", "ÿ"}, }; /* For tables of char's treated as small numeric values. */ void print_table_bool(const char *type, const char *name, char table[], int delta) { int n ; printf("%s %s[%d] = {\n", type, name, UCHAR_MAX + 1); for (n = 1; n < UCHAR_MAX + 2; n++) { printf("%3d", table[n - 1] + delta); if (n < UCHAR_MAX + 1) putchar(','); if (n % 16 == 0) putchar('\n'); } fputs("};\n\n", stdout); } void print_entity_table(const char *name, const accent_info table[]) { int n; puts("typedef struct {"); puts("const char *base;"); puts("const char *entity;"); puts("} accent_info;"); printf("accent_info %s[%d] = {\n", name, UCHAR_MAX + 1); for (n = 0; n < UCHAR_MAX + 1; n++) { if (table[n].entity) printf("{\"%s\", \"%s\"}", table[n].base, table[n].entity); else printf("{NULL, NULL}", n); if (n < UCHAR_MAX) putchar(','); putchar('\n'); } fputs("};\n\n", stdout); } int main(int argc, char *argv[]) { printf("/* This file was generated by running %s compiled from\n" " * %s. Edit that file, not this one, when making changes. */\n" "#include <stdlib.h>\n\n", argv[0], __FILE__); print_table_bool("signed char", "qreg_indexes", q_offsets, -1); print_table_bool("char", "active_table", parse_interesting, 0); print_table_bool("char", "atr_name_table", attribute_names, 0); print_table_bool("char", "valid_timefmt_codes", valid_timefmt_codes, 0); print_table_bool("char", "escaped_chars", escaped_chars, 0); print_entity_table("accent_table", entity_table); return EXIT_SUCCESS; }