package com.jmxp;
import java.util.ArrayList;
import com.jmxp.MXPState.mxpMode;
import com.jmxp.structures.chunk;
public class MXPParser
{
public enum parserState {
pText,
pAnsiSeq,
pTag,
pComment,
pQuotedParam
};
private parserState pstate;
private String str = "";
private ArrayList<chunk> chunks = new ArrayList<chunk>();
private char quoteChar;
private boolean wasBackslashR;
private MXPState state;
private ElementManager elements;
private ResultHandler results;
/** constructor */
public MXPParser (MXPState st, ElementManager elm, ResultHandler res )
{
state = st;
elements = elm;
results = res;
pstate = parserState.pText;
wasBackslashR = false;
}
public void simpleParse(String text)
{
if (text.isEmpty())
return;
chunk ch = new chunk();
pstate = parserState.pText;
str = "";
for ( int i =0; i < text.length(); i++ )
{
char c = text.charAt(i);
switch (pstate)
{
case pText: {
if (c == '<')
{
//end of text - got start of tag
if (!str.isEmpty())
{
ch.chk = chunk.chunkType.chunkText;
ch.text = str;
chunks.add(ch);
ch = new chunk();
str = "";
}
pstate = parserState.pTag;
}
else
str += c; //add new character to the text...
break;
}
case pTag: {
if (c == '>')
{
ch.chk = chunk.chunkType.chunkTag;
ch.text = str;
chunks.add(ch);
ch = new chunk();
str = "";
pstate = parserState.pText;
}
else
if ((c == '"') || (c == '\''))
{
pstate = parserState.pQuotedParam;
quoteChar = c;
str += c;
}
else
str += c;
break;
}
case pQuotedParam: {
if (c == quoteChar)
{
//quoted parameter ends... this simple approach will work correctly for correct
//tags, it may treat incorrect quotes as correct, but element manager will take care
//of that
pstate = parserState.pTag;
str += c;
}
else
str += c;
break;
}
};
}
//unfinished things...
if (pstate == parserState.pText)
{
ch.chk = chunk.chunkType.chunkText;
ch.text = str;
chunks.add(ch);
ch = new chunk();
}
if ((pstate == parserState.pTag) || (pstate == parserState.pQuotedParam))
{
ch.chk = chunk.chunkType.chunkError;
ch.text = "Tag definition contains unfinished tag <" + str;
chunks.add(ch);
ch = new chunk();
}
str = "";
}
public boolean hasNext()
{
return chunks.isEmpty() ? false : true;
}
public chunk getNext()
{
if (!hasNext())
{
chunk nochunk = new chunk();
nochunk.chk = chunk.chunkType.chunkNone;
return nochunk;
}
chunk ch = chunks.get(0);
chunks.remove(0);
return ch;
}
public void parse(String text) throws Exception
{
//WARNING: examine this function only at your own risk!
//it is advised to have a look at the simpleParse() function first - it's similar
//to this one, but much simpler...
if (text.isEmpty())
return;
for (int i = 0; i < text.length(); i++)
{
char c = text.charAt(i);
// Looks like number of brain-dead servers that send out \n\r is bigger than the
// number of servers that send out \r alone - the latter maybe don't exist at
// all. Hence, with this commented out, we can't handle the \r-only ones,
// but \n\r works.
/*
//handle \r not followed by \n - treated as a newline
if (wasBackslashR && (c != '\n'))
{
//"str" now certainly is empty, so we needn't care about that
//report new-line
elements->gotNewLine();
state->gotNewLine();
}
*/
wasBackslashR = false;
//we need current mode - parsing in LOCKED mode is limited
//mode is retrieved in every iteration to ensure that it's always up-to-date
mxpMode mode = state.getMXPMode();
switch (pstate) {
case pText: {
//tags not recognized in LOCKED mode...
if ((c == '\u001B') || ((mode != mxpMode.lockedMode) &&
(c == '<')) || (c == '\n') || (c == '\r'))
{
//end of text - got newline / ANSI seq / start of tag
if (!str.isEmpty())
{
state.gotText(str,true);
str = "";
}
if (c == '\u001B')
pstate = parserState.pAnsiSeq;
if ((c == '<') && (mode != mxpMode.lockedMode))
pstate = parserState.pTag;
if (c == '\n')
{
//report new-line
elements.gotNewLine();
state.gotNewLine();
}
if (c == '\r')
wasBackslashR = true;
}
else
str += c; //add new character to the text...
break;
}
case pAnsiSeq: {
if ((c == '\u001B') || (c == '\n') || (c == '\r'))
{
//the same as in pTag section...
results.addToList (results.createError ("Received unfinished ANSI sequence!"));
str = "";
if (c == '\u001B')
pstate = parserState.pAnsiSeq;
if (c == '\n')
{
//report new-line
elements.gotNewLine();
state.gotNewLine();
pstate = parserState.pText;
}
if (c == '\r')
{
pstate = parserState.pText;
wasBackslashR = true;
}
}
else
if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))
{
//ANSI sequence ends...
if (c == 'z') //line tag
{
if (str.isEmpty())
{
//invalid sequence
str = "\u001Bz";
}
else
{
//process this sequence
int len = str.length();
int num = 0;
for (int j = 1; j < len; j++) //str[0] is '[', which is SKIPPED
{
char cc = str.charAt(j);
if (cc == ';') //this shouldn't happen, but some MUD might want to use it...
{
if ((num >= 0) && (num <= 99)) //ensure that number lies in correct range
{
state.gotLineTag (num);
elements.gotLineTag (num);
}
else
results.addToList (results.createError ("Received invalid line tag!"));
num = 0;
}
else
num = num * 10 + (cc - 48); //48 is the code of '0'
}
//report last line tag (and usually the only one)
if ((num >= 0) && (num <= 99)) //ensure that number lies in correct range
{
state.gotLineTag (num);
elements.gotLineTag (num);
}
else
results.addToList (results.createError ("Received invalid line tag!"));
str = "";
}
}
else //something else
{
//'\u001B' and c are not in the string - add them there
str = '\u001B' + str + c;
}
pstate = parserState.pText;
}
else
if (c == '[') //this one is valid, but only at the beginning
{
if (str.isEmpty())
str += c;
else
{
//'[' in the middle of ANSI seq => not an ANSI seq...
pstate = parserState.pText;
str = '\u001B' + str + c;
}
}
else
if ((c == ';') || ((c >= '0') && (c <= '9'))) //correct char, unless str is empty
if (!str.isEmpty())
str += c; //here we go...
else
{
//ANSI seq must start with [ - therefore this is not an ANSI sequence after all
pstate = parserState.pText;
str += '\u001B';
str += c;
}
else
//incorrect character...
{
str = '\u001B' + str + c;
pstate = parserState.pText;
}
break;
}
case pTag: {
if (c == '>')
{
elements.gotTag (str);
str = "";
pstate = parserState.pText;
}
else
if ((c == '"') || (c == '\''))
{
pstate = parserState.pQuotedParam;
quoteChar = c;
str += c;
}
else if ((c == '\u001B') || (c == '\n') || (c == '\r'))
{
//handle incorrectly terminated tag and continue parsing...
results.addToList (results.createError ("Received unfinished tag <" + str));
str = "";
if (c == '\u001B')
pstate = parserState.pAnsiSeq;
if (c == '\n')
{
//report new-line
elements.gotNewLine();
state.gotNewLine();
pstate = parserState.pText;
}
if (c == '\r')
{
pstate = parserState.pText;
wasBackslashR = true;
}
}
else if (str.equals("!--")) //comment
{
str += c;
pstate = parserState.pComment;
}
else
str += c;
break;
}
case pComment: {
if (c == '>')
{
int l = str.length();
if ((str.charAt(l-2) == '-') && (str.charAt(l-1) == '-')) //okay, comment ends
{
str = "";
pstate = parserState.pText;
}
else
str += c;
}
else if ((c == '\u001B') || (c == '\n') || (c == '\r'))
{
//handle incorrectly terminated comment and continue parsing...
results.addToList (results.createError ("Received an unfinished comment!"));
str = "";
if (c == '\u001B')
pstate = parserState.pAnsiSeq;
if (c == '\n')
{
//report new-line
elements.gotNewLine();
state.gotNewLine();
pstate = parserState.pText;
}
if (c == '\r')
{
pstate = parserState.pText;
wasBackslashR = true;
}
}
else
str += c;
break;
}
case pQuotedParam: {
if (c == quoteChar)
{
//quoted parameter ends... this simple approach will work correctly for correct
//tags, it may treat incorrect quotes as correct, but element manager will take care
//of that
pstate = parserState.pTag;
str += c;
}
else
if ((c == '\u001B') || (c == '\n') || (c == '\r'))
{
//the same as in pTag section...
results.addToList (results.createError ("Received unfinished tag <" + str));
str = "";
if (c == '\u001B')
pstate = parserState.pAnsiSeq;
if (c == '\n')
{
//report new-line
elements.gotNewLine();
state.gotNewLine();
pstate = parserState.pText;
}
if (c == '\r')
{
pstate = parserState.pText;
wasBackslashR = true;
}
}
else
str += c;
break;
}
};
}
//report remaining text, if any (needed to improve speed of text displaying and to handle
//prompts correctly)
if ((pstate == parserState.pText) && (!str.isEmpty()))
{
state.gotText(str,true);
str = "";
}
}
}