/** * A parser for RFC 822 compliant messages, with emphasis on * the headers in them, also known as MIME headers. * <p> * RFC 822 describes the format of all Internet messages. This * includes things like email, usenet news and HTTP protocol * messages. * @author Turrican * @started 22 May 1998 */ #include <mime.h> #define HEADER_NAME 1 #define HEADER_VAL 2 #define MUDMAIL 3 #define TOK_QP 4 #define TOK_QUOTE 5 #define TOK_OP 6 #define TOK_CP 7 #define TOK_LT 8 #define TOK_GT 9 #define TOK_SEP 10 #define TOK_AMP 11 #define SPECIAL 12 #define LWS 13 #define ATOM 14 #define COMMENT 15 #define EMAIL 16 #define QUOTE 17 /* * This function parses a string into RFC 822 tokens. */ private mixed *tokenize(string header_field) { int i, quoted, paren_count, angle_count; int *tokens, state; mixed *bits; bits = reg_assoc(header_field, ({ "\\\\.", "\"", "\\(", "\\)", "<", ">", ",", "@", "[]()<>@,;:\\\\\".[]", "(\n?[ \t])+", "[!#-'*+/-9=?A-Z^-~-]+" }), ({ TOK_QP, TOK_QUOTE, TOK_OP, TOK_CP, TOK_LT, TOK_GT, TOK_SEP, TOK_AMP, SPECIAL, LWS, ATOM })); tokens = ({ 0 }); for (i = 1; i < sizeof(bits[0]); i += 2) { switch(bits[1][i]) { case LWS: if (quoted) { tokens += ({ state, 0 }); } else { tokens += ({ 0, 0 }); } break; case TOK_QUOTE: quoted = !quoted; if (!state && quoted) { state = QUOTE; } tokens += ({ state, 0 }); if (state == QUOTE && !quoted) { state = 0; } break; case TOK_OP: if (!quoted) { paren_count++; state = COMMENT; } tokens += ({ state, 0 }); break; case TOK_CP: if (!quoted) { paren_count--; if (paren_count < 0) { printf("Unbalanced ')'\n"); return ({ }); } } tokens += ({ state, 0 }); if (!paren_count) { state = 0; } break; case TOK_LT: tokens += ({ state, 0 }); if (!(quoted || paren_count)) { angle_count++; state = EMAIL; } break; case TOK_GT: if (!(quoted || paren_count)) { angle_count--; if (angle_count != 0) { printf("Unbalanced '%c'\n", (angle_count < 0?'>':'<')); return ({ }); } state = 0; } tokens += ({ state, 0 }); break; default: if (!state) { tokens += ({ bits[1][i], 0 }); } else { tokens += ({ state, 0 }); } break; } if (paren_count < 0) { printf("Unbalanced ')'\n"); return ({ }); } } if (paren_count != 0) { printf("Unbalanced '%c'\n", (paren_count < 0?')':'(')); return ({ }); } bits += ({ tokens }); return bits; } /* tokenize() */ /** * This method extracts valid email adresses from the given * header field or string. * * @param arg the string to parse * @return an array consisting of an array of machine usable email adresses * (no whitespace and comments) and an array of the full addresses * @see rewrite_field() */ mixed *get_email_addrs(string arg) { string *addrs, *full_addrs; int i, idx, state; mixed *bits; if (!arg) { return ({ ({ }), ({ }) }); } bits = tokenize(arg); if (!sizeof(bits)) { return ({ ({ }), ({ }) }); } addrs = allocate(1); addrs[0] = ""; full_addrs = allocate(1); full_addrs[0] = ""; idx = 0; for (i = 1; i < sizeof(bits[2]); i += 2) { switch (bits[2][i]) { case TOK_SEP: idx++; addrs += ({ "" }); full_addrs += ({ "" }); break; case EMAIL: if (state != EMAIL) { addrs[idx] = bits[0][i]; state = EMAIL; } else { addrs[idx] += bits[0][i]; } full_addrs[idx] += bits[0][i]; break; case COMMENT: case LWS: case 0: full_addrs[idx] += bits[0][i]; break; default: addrs[idx] += bits[0][i]; full_addrs[idx] += bits[0][i]; break; } } return ({ addrs, full_addrs }); } /* get_email_addrs() */ /** * This method rewrites local email addresses (as found in mudmail) * to be usable outside Discworld, for instance from an email client. * @param header_field the string to rewrite * @return the new string * @see get_email_addrs() */ string rewrite_field(string header_field) { int i, idx, state; mixed *bits, *addrs, *indices; if (!header_field) { return ""; } bits = tokenize(header_field); if (!sizeof(bits)) { return ""; } addrs = allocate(1); addrs[0] = ({ }); indices = allocate(1); indices[0] = ({ }); idx = 0; for (i = 1; i < sizeof(bits[2]); i += 2) { switch (bits[2][i]) { case TOK_SEP: if (sizeof(addrs[idx]) && member_array(TOK_AMP, addrs[idx]) == -1) { if (bits[0][indices[idx][<1]] == "discworld") { bits[0][indices[idx][<1]] += "@discworld.imaginary.com"; } else { bits[0][indices[idx][<1]] += ".discworld@discworld.imaginary.com"; } } idx++; addrs += ({ ({ }) }); indices += ({ ({ }) }); break; case EMAIL: if (state != EMAIL) { addrs[idx] = ({ bits[1][i] }); indices[idx] = ({ i }); state = EMAIL; } else { addrs[idx] += ({ bits[1][i] }); indices[idx] += ({ i }); } break; case COMMENT: case LWS: case 0: break; default: addrs[idx] += ({ bits[1][i] }); indices[idx] += ({ i }); break; } } if (sizeof(addrs[<1]) && member_array(TOK_AMP, addrs[<1]) == -1) { if (bits[0][indices[<1][<1]] == "discworld") { bits[0][indices[<1][<1]] += "@discworld.imaginary.com"; } else { bits[0][indices[<1][<1]] += ".discworld@discworld.imaginary.com"; } } return implode(bits[0], ""); } /* rewrite_field() */ /** * This method parses an RFC 822 compliant message and extracts all * the headers into a class mime_header. This class contains a mapping * with the header field names as keys, so you can easily select * the headers you need. * @param message the message to be parsed * @return a class mime_header with the headers from the message */ class mime_header parse_headers(string message) { string *bits, bit, cont, headers; int idx, len; class mime_header hdr; if ((idx = strsrch(message, "\n\n")) == -1) { return hdr; } headers = message[0..idx]; hdr = new(class mime_header, header_m : ([]), header_k : ({})); bits = explode(headers, "\n") - ({ "", 0 }); foreach (bit in bits[1..]) { if (cont && ((bit[0] == ' ') || (bit[0] == '\t'))) { hdr->header_m[cont] += "\n" + bit[1..]; continue; } len = strlen(bit); if ((idx = strsrch(bit, ':')) == -1) { continue; } cont = bit[0..idx-1]; if (undefinedp(hdr->header_m[lower_case(cont)])) { hdr->header_k += ({ cont }); } cont = lower_case(cont); ++idx; while ((idx < len) && isspace(bit[idx])) { ++idx; } if (undefinedp(hdr->header_m[cont])) { hdr->header_m[cont] = bit[idx..]; } else { hdr->header_m[cont] += sprintf(", %s", bit[idx..]); } } return hdr; } /* parse_headers() */ /** * This method rewrites all the fields from an RFC 822 compliant message * to make the message suited for Internet transport. It uses the * rewrite_field() method to achieve this. The affected header fields * are: * <UL> * <LI> To: * <LI> From: * <LI> Cc: * <LI> Bcc: * <LI> Reply-To: * </UL> * @param message the message to rewrite * @return the possibly modified message, suited for Internet transport * @see rewrite_field() */ string rewrite_header(string message) { mixed *ra; int i; string header, field; if ((i = strsrch(message, "\n")) == -1) { return message; } message = message[i+1..]; if ((i = strsrch(message, "\n\n")) == -1) { return message; } header = message[0..i]; message = message[i+1..]; ra = reg_assoc(header, ({ "^[!-9;-~]+:", "((\n?[ \t])+[^\n]*(\n|$))+" }), ({ HEADER_NAME, HEADER_VAL })); for (i = 1; i + 2 < sizeof(ra[0]); i += 2) { if (ra[1][i] == HEADER_NAME && ra[1][i+2] == HEADER_VAL) { switch(lower_case(ra[0][i])) { case "to:": case "from:": case "cc:": case "bcc:": case "reply-to:": field = rewrite_field(ra[0][i+2]); if (field != "") { ra[0][i+2] = field; } break; default: break; } } } return implode(ra[0], "") + message; } /* rewrite_header() */ /** @ignore yes */ void dest_me() { destruct(this_object()); } /* dest_me() */ /** @ignore yes */ int cleanup(int inherited) { if (!inherited) { dest_me(); } return 1; }