/*
* Requires a C99ish compiler. gcc 3 works. gcc 2.95 works. Earlier
* versions might.
*
* The arrays below use designated initializers to make it very explicit
* which elements are being set to what. The standard says that any elements
* without an initalizer in these starts out like it would if static - in
* other words, zero'ed out. That's usually what we wanted.
*
* However, since most people compiling Penn probably aren't going to be
* using a C99 compiler for some time to come, this program will translate
* from the DI form to the fully-initialized form that all C and C++ compilers
* understand.
*
* Example Usage:
* % cd pennmush
* % gcc -o gentables utils/gentables.c
* % ./gentables > src/tables.c
* % make
*/
#include <stdio.h>
#include <limits.h>
#include <stdlib.h>
/* Offsets (+1) for q-register lookup. */
char q_offsets[UCHAR_MAX + 1] = {
['0'] = 1, ['1'] = 2, ['2'] = 3, ['3'] = 4, ['4'] = 5,
['5'] = 6, ['6'] = 7, ['7'] = 8, ['8'] = 9, ['9'] = 10,
['A'] = 11, ['a'] = 11,
['B'] = 12, ['b'] = 12,
['C'] = 13, ['c'] = 13,
['D'] = 14, ['d'] = 14,
['E'] = 15, ['e'] = 15,
['F'] = 16, ['f'] = 16,
['G'] = 17, ['g'] = 17,
['H'] = 18, ['h'] = 18,
['I'] = 19, ['i'] = 19,
['J'] = 20, ['j'] = 20,
['K'] = 21, ['k'] = 21,
['L'] = 22, ['l'] = 22,
['M'] = 23, ['m'] = 23,
['N'] = 24, ['n'] = 24,
['O'] = 25, ['o'] = 25,
['P'] = 26, ['p'] = 26,
['Q'] = 27, ['q'] = 27,
['R'] = 28, ['r'] = 28,
['S'] = 29, ['s'] = 29,
['T'] = 30, ['t'] = 30,
['U'] = 31, ['u'] = 31,
['V'] = 32, ['v'] = 32,
['W'] = 33, ['w'] = 33,
['X'] = 34, ['x'] = 34,
['Y'] = 35, ['y'] = 35,
['Z'] = 36, ['z'] = 36
};
/* What characters the parser looks for. */
char parse_interesting[UCHAR_MAX + 1] = {
['\0'] = 1,
['%'] = 1,
['{'] = 1,
['['] = 1,
['('] = 1,
['\\'] = 1,
[' '] = 1,
['}'] = 1,
[']'] = 1,
[')'] = 1,
[','] = 1,
[';'] = 1,
['='] = 1,
['$'] = 1,
[0x1B] = 1
};
/* What characters are allowed in attribute names. */
char attribute_names[UCHAR_MAX + 1] = {
['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1,
['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1,
['A'] = 1, ['B'] = 1, ['C'] = 1, ['D'] = 1, ['E'] = 1,
['F'] = 1, ['G'] = 1, ['H'] = 1, ['I'] = 1, ['J'] = 1,
['K'] = 1, ['L'] = 1, ['M'] = 1, ['N'] = 1, ['O'] = 1,
['P'] = 1, ['Q'] = 1, ['R'] = 1, ['S'] = 1, ['T'] = 1,
['U'] = 1, ['V'] = 1, ['W'] = 1, ['X'] = 1, ['Y'] = 1,
['Z'] = 1, ['_'] = 1, ['#'] = 1, ['@'] = 1, ['$'] = 1,
['!'] = 1, ['~'] = 1, ['|'] = 1, [';'] = 1, ['`'] = 1,
['"'] = 1, ['\''] = 1,['&'] = 1, ['*'] = 1, ['-'] = 1,
['+'] = 1, ['='] = 1, ['?'] = 1, ['/'] = 1, ['.'] = 1,
['>'] = 1, ['<'] = 1, [','] = 1
};
/* C89 format codes for strftime() */
char valid_timefmt_codes[UCHAR_MAX + 1] = {
['a'] = 1, ['A'] = 1, ['b'] = 1, ['B'] = 1, ['c'] = 1,
['d'] = 1, ['H'] = 1, ['I'] = 1, ['j'] = 1, ['m'] = 1,
['M'] = 1, ['p'] = 1, ['S'] = 1, ['U'] = 1, ['w'] = 1,
['W'] = 1, ['x'] = 1, ['X'] = 1, ['y'] = 1, ['Y'] = 1,
['Z'] = 1, ['$'] = 1
};
/* Special characters for escape() and secure() */
char escaped_chars[UCHAR_MAX + 1] = {
['('] = 1, [')'] = 1, ['['] = 1, [']'] = 1, ['{'] = 1,
['}'] = 1, ['$'] = 1, ['^'] = 1, ['%'] = 1, [','] = 1,
[';'] = 1, ['\\'] = 1
};
/** Accented characters
*
* The table is for ISO 8859-1 character set.
* It should be easy to modify it for other ISO 8859-X sets, or completely
* different families.
*/
typedef struct {
const char *base; /**< Base character */
const char *entity; /**< HTML entity */
} accent_info;
accent_info entity_table[UCHAR_MAX + 1] = {
// Assorted characters
['<'] = {"<", "<"},
['>'] = {">", ">"},
['&'] = {"&", "&"},
['"'] = {"\\\"", """},
['\n'] = {"\\n", "<br>\\n"},
// << and >> quotes
[171] = {"<<", "«"},
[187] = {">>", "»"},
// Upside-down punctuation
[161] = {"!", "¡"},
[191] = {"?", "¿"},
// szlig
[223] = {"s", "ß"},
// thorn
[222] = {"P", "Þ"},
[254] = {"p", "þ:"},
// eth
[208] = {"D", "Ð"},
[240] = {"o", "ð"},
// Special symbols
[169] = {"(c)", "©"},
[174] = {"(r)", "®"},
[188] = {"1/4", "¼"},
[189] = {"1/2", "½"},
[190] = {"3/4", "¾"},
// AE ligatures
[198] = {"AE", "Æ"},
[230] = {"ae", "æ"},
// Accented a's
[192] = {"A", "À"},
[193] = {"A", "Á"},
[194] = {"A", "Â"},
[195] = {"A", "Ã"},
[196] = {"A", "Ä"},
[197] = {"A", "Å"},
[224] = {"a", "à"},
[225] = {"a", "á"},
[226] = {"a", "â"},
[227] = {"a", "ã"},
[228] = {"a", "ä"},
[229] = {"a", "å"},
// Accented c's
[199] = {"C", "Ç"},
[231] = {"c", "ç"},
// Accented e's
[200] = {"E", "È"},
[201] = {"E", "É"},
[202] = {"E", "Ê"},
[203] = {"E", "Ë"},
[232] = {"e", "è"},
[233] = {"e", "é"},
[234] = {"e", "ê"},
[235] = {"e", "ë"},
// Accented i's
[204] = {"I", "Ì"},
[205] = {"I", "Í"},
[206] = {"I", "Î"},
[207] = {"I", "Ï"},
[236] = {"i", "ì"},
[237] = {"i", "í"},
[238] = {"i", "î"},
[239] = {"i", "ï"},
// Accented n's
[209] = {"N", "Ñ"},
[241] = {"n", "ñ"},
// Accented o's
[210] = {"O", "Ò"},
[211] = {"O", "Ó"},
[212] = {"O", "Ô"},
[213] = {"O", "Õ"},
[214] = {"O", "Ö"},
[242] = {"o", "ò"},
[243] = {"o", "ó"},
[244] = {"o", "ô"},
[245] = {"o", "õ"},
[246] = {"o", "ö"},
// Accented u's
[217] = {"U", "Ù"},
[218] = {"U", "Ú"},
[219] = {"U", "Û"},
[220] = {"U", "Ü"},
[249] = {"u", "ù"},
[250] = {"u", "ú"},
[251] = {"u", "û"},
[252] = {"u", "ü"},
// Accented y's
[221] = {"Y", "Ý"},
[253] = {"y", "ý"},
[255] = {"y", "ÿ"},
};
/* For tables of char's treated as small numeric values. */
void print_table_bool(const char *type, const char *name,
char table[], int delta) {
int n ;
printf("%s %s[%d] = {\n", type, name, UCHAR_MAX + 1);
for (n = 1; n < UCHAR_MAX + 2; n++) {
printf("%3d", table[n - 1] + delta);
if (n < UCHAR_MAX + 1)
putchar(',');
if (n % 16 == 0)
putchar('\n');
}
fputs("};\n\n", stdout);
}
void print_entity_table(const char *name,
const accent_info table[]) {
int n;
puts("typedef struct {");
puts("const char *base;");
puts("const char *entity;");
puts("} accent_info;");
printf("accent_info %s[%d] = {\n", name, UCHAR_MAX + 1);
for (n = 0; n < UCHAR_MAX + 1; n++) {
if (table[n].entity)
printf("{\"%s\", \"%s\"}", table[n].base, table[n].entity);
else
printf("{NULL, NULL}", n);
if (n < UCHAR_MAX)
putchar(',');
putchar('\n');
}
fputs("};\n\n", stdout);
}
int main(int argc, char *argv[]) {
printf("/* This file was generated by running %s compiled from\n"
" * %s. Edit that file, not this one, when making changes. */\n"
"#include <stdlib.h>\n\n",
argv[0], __FILE__);
print_table_bool("signed char", "qreg_indexes", q_offsets, -1);
print_table_bool("char", "active_table", parse_interesting, 0);
print_table_bool("char", "atr_name_table", attribute_names, 0);
print_table_bool("char", "valid_timefmt_codes", valid_timefmt_codes, 0);
print_table_bool("char", "escaped_chars", escaped_chars, 0);
print_entity_table("accent_table", entity_table);
return EXIT_SUCCESS;
}