// Modified JavaTokenMaker.java from RText - http://rtext.sourceforge.net/

package sdoc.lexers;

import sdoc.lexers.tokens.Token;
import java.util.ArrayList;
import java.util.List;
import javax.swing.text.Segment;
import java.io.CharArrayReader;
import java.io.IOException;
import sdoc.lexers.tokens.TokenFactory;

%%
%public
%class JavaLexer
%implements Lexer
%unicode
%pack
%buffer 128
%type List

%{


	public JavaLexer()
	{
	}

	private List tokens = new ArrayList();

	private void addNullToken()
	{
		tokens.add(TokenFactory.createNullToken());
	}
		
	public int getLastTokenTypeOnLine(Segment text , int initialTokenType)
	{
		getTokens(text , initialTokenType , 0);
		return ((Token)tokens.get(tokens.size() -1)).type;
	}
	
	private void addToken(int type) 
	{
		
		Token t = TokenFactory.createToken(type , yytext());
		
		if(tokens.size() == 1 && ((Token)tokens.get(0)).type == Token.NULL)
		{
			tokens.remove(0);
		}
		
		tokens.add(t);
	}
	
	
	public List getTokens(Segment text, int initialTokenType, int startOffset) 
	{
		tokens.clear();
	
		// Start off in the proper state.
		int state = Token.NULL;
		switch (initialTokenType) {
			case Token.COMMENT_MULTILINE:
				state = MLC;
				break;
			case Token.COMMENT_DOCUMENTATION:
				state = DOCCOMMENT;
				break;
			default:
				state = Token.NULL;
		}
	
		try 
		{
			yyreset(new CharArrayReader(text.array , text.offset , text.count));
			yybegin(state);
			return yylex();			
		} 
		catch (IOException ioe) 
		{
			ioe.printStackTrace();
			return new ArrayList();
		}
	}
	
%}


Letter							= [A-Za-z]
NonzeroDigit						= [1-9]
Digit							= ("0"|{NonzeroDigit})
HexDigit							= ({Digit}|[A-Fa-f])
OctalDigit						= ([0-7])
AnyCharacterButApostropheOrBackSlash	= ([^\\'])
AnyCharacterButDoubleQuoteOrBackSlash	= ([^\\\"\n])
EscapedSourceCharacter				= ("u"{HexDigit}{HexDigit}{HexDigit}{HexDigit})
Escape							= ("\\"(([btnfr\"'\\])|([0123]{OctalDigit}?{OctalDigit}?)|({OctalDigit}{OctalDigit}?)|{EscapedSourceCharacter}))
NonSeparator						= ([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#"|"\\")
IdentifierStart					= ({Letter}|"_"|"$")
IdentifierPart						= ({IdentifierStart}|{Digit}|("\\"{EscapedSourceCharacter}))

LineTerminator				= (\n)
WhiteSpace				= ([ \t\f])

CharLiteral				= ([\']({AnyCharacterButApostropheOrBackSlash}|{Escape})[\'])
UnclosedCharLiteral			= ([\'][^\'\n]*)
ErrorCharLiteral			= ({UnclosedCharLiteral}[\'])
StringLiteral				= ([\"]({AnyCharacterButDoubleQuoteOrBackSlash}|{Escape})*[\"])
UnclosedStringLiteral		= ([\"]([\\].|[^\\\"])*[^\"]?)
ErrorStringLiteral			= ({UnclosedStringLiteral}[\"])

MLCBegin					= "/*"
MLCEnd					= "*/"
DocCommentBegin			= "/**"
LineCommentBegin			= "//"

IntegerHelper1				= (({NonzeroDigit}{Digit}*)|"0")
IntegerHelper2				= ("0"(([xX]{HexDigit}+)|({OctalDigit}*)))
IntegerLiteral				= ({IntegerHelper1}[lL]?)
HexLiteral				= ({IntegerHelper2}[lL]?)
FloatHelper1				= ([fFdD]?)
FloatHelper2				= ([eE][+-]?{Digit}+{FloatHelper1})
FloatLiteral1				= ({Digit}+"."({FloatHelper1}|{FloatHelper2}|{Digit}+({FloatHelper1}|{FloatHelper2})))
FloatLiteral2				= ("."{Digit}+({FloatHelper1}|{FloatHelper2}))
FloatLiteral3				= ({Digit}+{FloatHelper2})
FloatLiteral				= ({FloatLiteral1}|{FloatLiteral2}|{FloatLiteral3}|({Digit}+[fFdD]))
ErrorNumberFormat			= (({IntegerLiteral}|{HexLiteral}|{FloatLiteral}){NonSeparator}+)
BooleanLiteral				= ("true"|"false")

Separator					= ([\(\)\{\}\[\]])
Separator2				= ([\;,.])

NonAssignmentOperator		= ("+"|"-"|"<="|"^"|"++"|"<"|"*"|">="|"%"|"--"|">"|"/"|"!="|"?"|">>"|"!"|"&"|"=="|":"|">>"|"~"|"|"|"&&"|">>>")
AssignmentOperator			= ("="|"-="|"*="|"/="|"|="|"&="|"^="|"+="|"%="|"<<="|">>="|">>>=")
Operator					= ({NonAssignmentOperator}|{AssignmentOperator})

DocumentationKeyword		= ("author"|"deprecated"|"exception"|"link"|"param"|"return"|"see"|"serial"|"serialData"|"serialField"|"since"|"throws"|"version")

Identifier				= ({IdentifierStart}{IdentifierPart}*)
ErrorIdentifier			= ({NonSeparator}+)

Annotation				= ("@"{Identifier}?)
/*
URLCharacter				= ([A-Za-z_0-9:/\.\?=&\-])
URLCharacters				= ({URLCharacter}+)
URL						= (("http://"|"www."){URLCharacters})
*/

%state MLC
%state DOCCOMMENT

%%


/* Keywords */
<YYINITIAL> "abstract"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "assert"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "break"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "case"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "catch"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "class"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "const"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "continue"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "default"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "do"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "else"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "enum"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "extends"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "final"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "finally"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "for"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "goto"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "if"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "implements"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "import"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "instanceof"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "interface"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "native"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "new"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "null"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "package"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "private"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "protected"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "public"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "return"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "static"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "strictfp"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "super"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "switch"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "synchronized"			{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "this"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "throw"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "throws"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "transient"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "try"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "void"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "volatile"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "while"					{ addToken(Token.RESERVED_WORD); }

/* Data types. */
<YYINITIAL> "boolean"				{ addToken(Token.DATA_TYPE); }
<YYINITIAL> "byte"					{ addToken(Token.DATA_TYPE); }
<YYINITIAL> "char"					{ addToken(Token.DATA_TYPE); }
<YYINITIAL> "double"				{ addToken(Token.DATA_TYPE); }
<YYINITIAL> "float"					{ addToken(Token.DATA_TYPE); }
<YYINITIAL> "int"					{ addToken(Token.DATA_TYPE); }
<YYINITIAL> "long"					{ addToken(Token.DATA_TYPE); }
<YYINITIAL> "short"					{ addToken(Token.DATA_TYPE); }

/* Booleans. */
<YYINITIAL> {BooleanLiteral}			{ addToken(Token.LITERAL_BOOLEAN); }

/* Standard functions */
<YYINITIAL> "Boolean"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "Byte"							{ addToken(Token.FUNCTION); }
<YYINITIAL> "Character"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "Class"							{ addToken(Token.FUNCTION); }
<YYINITIAL> "ClassLoader"					{ addToken(Token.FUNCTION); }
<YYINITIAL> "Compiler"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "Double"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "Float"							{ addToken(Token.FUNCTION); }
<YYINITIAL> "InheritableThreadLocal"			{ addToken(Token.FUNCTION); }
<YYINITIAL> "Integer"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "Long"							{ addToken(Token.FUNCTION); }
<YYINITIAL> "Math"							{ addToken(Token.FUNCTION); }
<YYINITIAL> "Number"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "Object"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "Package"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "Process"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "Runtime"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "RuntimePermission"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "SecurityManager"					{ addToken(Token.FUNCTION); }
<YYINITIAL> "Short"							{ addToken(Token.FUNCTION); }
<YYINITIAL> "StackTraceElement"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "StrictMath"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "String"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "StringBuffer"					{ addToken(Token.FUNCTION); }
<YYINITIAL> "System"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "Thread"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "ThreadGroup"					{ addToken(Token.FUNCTION); }
<YYINITIAL> "ThreadLocal"					{ addToken(Token.FUNCTION); }
<YYINITIAL> "Throwable"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "Void"							{ addToken(Token.FUNCTION); }
<YYINITIAL> "ArithmeticException"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "ArrayIndexOutOfBoundsException"		{ addToken(Token.FUNCTION); }
<YYINITIAL> "ArrayStoreException"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "ClassCastException"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "ClassNotFoundException"			{ addToken(Token.FUNCTION); }
<YYINITIAL> "CloneNotSupportedException"		{ addToken(Token.FUNCTION); }
<YYINITIAL> "Exception"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "IllegalAccessException"			{ addToken(Token.FUNCTION); }
<YYINITIAL> "IllegalArgumentException"			{ addToken(Token.FUNCTION); }
<YYINITIAL> "IllegalMonitorStateException"		{ addToken(Token.FUNCTION); }
<YYINITIAL> "IllegalStateException"			{ addToken(Token.FUNCTION); }
<YYINITIAL> "IllegalThreadStateException"		{ addToken(Token.FUNCTION); }
<YYINITIAL> "IndexOutOfBoundsException"			{ addToken(Token.FUNCTION); }
<YYINITIAL> "InstantiationException"			{ addToken(Token.FUNCTION); }
<YYINITIAL> "InterruptedException"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "NegativeArraySizeException"		{ addToken(Token.FUNCTION); }
<YYINITIAL> "NoSuchFieldException"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "NoSuchMethodException"			{ addToken(Token.FUNCTION); }
<YYINITIAL> "NullPointerException"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "NumberFormatException"			{ addToken(Token.FUNCTION); }
<YYINITIAL> "RuntimeException"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "SecurityException"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "StringIndexOutOfBoundsException"	{ addToken(Token.FUNCTION); }
<YYINITIAL> "UnsupportedOperationException"		{ addToken(Token.FUNCTION); }
<YYINITIAL> "AbstractMethodError"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "AssertionError"					{ addToken(Token.FUNCTION); }
<YYINITIAL> "ClassCircularityError"			{ addToken(Token.FUNCTION); }
<YYINITIAL> "ClassFormatError"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "Error"							{ addToken(Token.FUNCTION); }
<YYINITIAL> "ExceptionInInitializerError"		{ addToken(Token.FUNCTION); }
<YYINITIAL> "IllegalAccessError"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "IllegalAccessError"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "IncompatibleClassChangeError"		{ addToken(Token.FUNCTION); }
<YYINITIAL> "InternalError"					{ addToken(Token.FUNCTION); }
<YYINITIAL> "LinkageError"					{ addToken(Token.FUNCTION); }
<YYINITIAL> "NoClassDefFoundError"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "NoSuchFieldError"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "NoSuchMethodError"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "OutOfMemoryError"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "StackOverflowError"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "ThreadDeath"					{ addToken(Token.FUNCTION); }
<YYINITIAL> "UnknownError"					{ addToken(Token.FUNCTION); }
<YYINITIAL> "UnsatisfiedLinkError"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "UnsupportedClassVersionError"		{ addToken(Token.FUNCTION); }
<YYINITIAL> "VerifyError"					{ addToken(Token.FUNCTION); }
<YYINITIAL> "VirtualMachineError"				{ addToken(Token.FUNCTION); }
<YYINITIAL> "CharSequence"					{ addToken(Token.FUNCTION); }
<YYINITIAL> "Cloneable"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "Comparable"						{ addToken(Token.FUNCTION); }
<YYINITIAL> "Runnable"						{ addToken(Token.FUNCTION); }


<YYINITIAL> {

	{LineTerminator}				{ addNullToken(); return tokens; }

[A-Z0-9_]+ { addToken(Token.LITERAL_CHAR); }

	{Identifier}					{ addToken(Token.IDENTIFIER); }

/*
 * How should we handle generics?
"<"[^\[\]\{\}\(\)\+\-\*\/\%\&\|\!\~]+">" {addToken(Token.PREPROCESSOR); }
*/
	
	{WhiteSpace}+					{ addToken(Token.WHITESPACE); }

	/* String/Character literals. */
	{CharLiteral}					{ addToken(Token.LITERAL_CHAR); }
	{UnclosedCharLiteral}			{ addToken(Token.ERROR_CHAR); addNullToken(); return tokens; }
	{ErrorCharLiteral}				{ addToken(Token.ERROR_CHAR); }
	{StringLiteral}				{ addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
	{UnclosedStringLiteral}			{ addToken(Token.ERROR_STRING_DOUBLE); addNullToken(); return tokens; }
	{ErrorStringLiteral}			{ addToken(Token.ERROR_STRING_DOUBLE); }

	/* Comment literals. */
	"/**/"						{ /* Silly we have to do this. */ addToken(Token.COMMENT_MULTILINE); }
	{MLCBegin}					{ addToken(Token.COMMENT_MULTILINE); yybegin(MLC); }
	{DocCommentBegin}				{ addToken(Token.COMMENT_DOCUMENTATION); yybegin(DOCCOMMENT); }
	{LineCommentBegin}.*			{ addToken(Token.COMMENT_EOL); addNullToken(); return tokens; }

	/* Annotations. */
	{Annotation}					{ addToken(Token.VARIABLE); /* FIXME:  Add token type to Token? */ }

	/* Separators. */
	{Separator}					{ addToken(Token.SEPARATOR); }
	{Separator2}					{ addToken(Token.IDENTIFIER); }

	/* Operators. */
	{Operator}					{ addToken(Token.OPERATOR); }


	/* Numbers */
	{IntegerLiteral}				{ addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
	{HexLiteral}					{ addToken(Token.LITERAL_NUMBER_HEXADECIMAL); }
	{FloatLiteral}					{ addToken(Token.LITERAL_NUMBER_FLOAT); }
	{ErrorNumberFormat}				{ addToken(Token.ERROR_NUMBER_FORMAT); }

	{ErrorIdentifier}				{ addToken(Token.ERROR_IDENTIFIER); }

	/* Ended with a line not in a string or comment. */
	<<EOF>>						{ addNullToken(); return tokens; }

	/* Catch any other (unhandled) characters and flag them as bad. */
	.							{ addToken(Token.ERROR_IDENTIFIER); }

}



<MLC> {

	[^\n\*]+					{addToken(Token.COMMENT_MULTILINE);}
	\n						{ addToken(Token.COMMENT_MULTILINE); return tokens; }
	{MLCEnd}					{ yybegin(YYINITIAL); addToken(Token.COMMENT_MULTILINE); }
	\*						{addToken(Token.COMMENT_MULTILINE);}
	<<EOF>>					{ addToken(Token.COMMENT_MULTILINE); return tokens; }

}


<DOCCOMMENT> {

	[^\@\n\<\*]+				{addToken(Token.COMMENT_DOCUMENTATION);}
	"@" {DocumentationKeyword}	{ addToken(Token.COMMENT_DOCUMENTATION); }
	"@"						{addToken(Token.COMMENT_DOCUMENTATION);}
	\n						{ addToken(Token.COMMENT_DOCUMENTATION); return tokens; }
	"<"[/]?({Letter}[^\>]*)?">"	{ addToken(Token.COMMENT_DOCUMENTATION); }
	\<						{addToken(Token.COMMENT_DOCUMENTATION);}
	{MLCEnd}					{ yybegin(YYINITIAL); addToken(Token.COMMENT_DOCUMENTATION); }
	\*						{addToken(Token.COMMENT_DOCUMENTATION);}
	<<EOF>>					{ yybegin(YYINITIAL); addToken(Token.COMMENT_DOCUMENTATION); return tokens; }

}

















