/* Generated by re2c 0.12.1 */
#line 1 "_mwscan.re"
// -*- mode: c++ -*-
// Copyright (c) 2007-2008 PediaPress GmbH
// See README.txt for additional licensing information.

#include <Python.h>

#include <iostream>
#include <assert.h>
#include <vector>
using namespace std;

#define RET(x) {found(x); return x;}

typedef enum {
	t_end,
	t_text,
	t_entity,
	t_special,
	t_magicword,
	t_comment,
	t_2box_open,   // [[
	t_2box_close,  // ]]
	t_http_url,
	t_break,
	t_begin_table,
	t_end_table,
	t_html_tag,
	t_style,
	t_pre,
	t_section,
	t_section_end,
	t_item,
	t_colon,
	t_semicolon,
	t_hrule,
	t_newline,
	t_column,
	t_row,
	t_tablecaption,
	t_urllink,
} mwtok;

struct Token
{
	int type;
	int start;
	int len;
};

class Scanner
{
public:

	Scanner(Py_UNICODE *_start, Py_UNICODE *_end) {
		source = start = _start;
		end = _end;
		cursor = start;
		line_startswith_section = -1;
		tablemode=0;
	}

	int found(mwtok val) {
		if (val==t_text && tokens.size()) {
			Token &previous_token (tokens[tokens.size()-1]);
			if (previous_token.type==val) {
				previous_token.len += cursor-start;
				return tokens.size()-1;
			}
		}
		Token t;
		t.type = val;
		t.start = (start-source);
		t.len = cursor-start;			
		tokens.push_back(t);
		return tokens.size()-1;
	}

	bool bol() const {
		return (start==source) || (start[-1]=='\n');
	}

	bool eol() const {
		return *cursor=='\n' || *cursor==0;
	}

	void newline() {
		if (line_startswith_section>=0) {
			tokens[line_startswith_section].type = t_text;
		}
		line_startswith_section = -1;
	}

	inline int scan();

	Py_UNICODE *source;

	Py_UNICODE *start;
	Py_UNICODE *cursor;
	Py_UNICODE *end;
	vector<Token> tokens;

	int line_startswith_section;
	int tablemode;
};


int Scanner::scan()
{
	start=cursor;
	
	Py_UNICODE *marker=cursor;

	Py_UNICODE *save_cursor = cursor;


#define YYCTYPE         Py_UNICODE
#define YYCURSOR        cursor
#define YYMARKER	marker
#define YYLIMIT   (end)
// #define YYFILL(n) return 0;

#line 124 "_mwscan.re"


/*
  the re2c manpage says:
  "The user must arrange for a sentinel token to appear at the end of input"
  \000 is our sentinel token.
*/

#line 157 "_mwscan.re"

	if (!bol()) {
		goto not_bol;
	}

#line 140 "_mwscan.cc"
{
	YYCTYPE yych;
	unsigned int yyaccept = 0;

	yych = *YYCURSOR;
	if(yych <= ',') {
		if(yych <= '"') {
			if(yych <= 0x001F) goto yy17;
			if(yych <= ' ') goto yy2;
			if(yych <= '!') goto yy6;
			goto yy17;
		} else {
			if(yych <= '#') goto yy11;
			if(yych == '*') goto yy11;
			goto yy17;
		}
	} else {
		if(yych <= ';') {
			if(yych <= '-') goto yy15;
			if(yych <= '9') goto yy17;
			if(yych <= ':') goto yy9;
			goto yy13;
		} else {
			if(yych <= '=') {
				if(yych <= '<') goto yy17;
				goto yy7;
			} else {
				if(yych == '|') goto yy4;
				goto yy17;
			}
		}
	}
yy2:
	yyaccept = 0;
	yych = *(YYMARKER = ++YYCURSOR);
	if(yych <= 0x001F) goto yy3;
	if(yych <= '!') goto yy44;
	if(yych == '|') goto yy44;
yy3:
#line 198 "_mwscan.re"
	{RET(t_pre);}
#line 182 "_mwscan.cc"
yy4:
	++YYCURSOR;
	if((yych = *YYCURSOR) <= ',') {
		if(yych == '+') goto yy34;
	} else {
		if(yych <= '-') goto yy37;
		if(yych == '}') goto yy40;
	}
yy5:
#line 176 "_mwscan.re"
	{
		if (tablemode)
			RET(t_column);

		if (*start==' ') {
			cursor = start+1;
			RET(t_pre);
		}
		RET(t_text);
	}
#line 203 "_mwscan.cc"
yy6:
	yych = *++YYCURSOR;
	goto yy5;
yy7:
	++YYCURSOR;
	if((yych = *YYCURSOR) == '=') goto yy32;
	goto yy31;
yy8:
#line 199 "_mwscan.re"
	{
			line_startswith_section = found(t_section);
			return t_section;
		}
#line 217 "_mwscan.cc"
yy9:
	++YYCURSOR;
	if((yych = *YYCURSOR) <= ')') {
		if(yych == '#') goto yy26;
	} else {
		if(yych <= '*') goto yy26;
		if(yych == ':') goto yy28;
	}
yy10:
#line 204 "_mwscan.re"
	{RET(t_colon);}
#line 229 "_mwscan.cc"
yy11:
	++YYCURSOR;
	yych = *YYCURSOR;
	goto yy27;
yy12:
#line 203 "_mwscan.re"
	{RET(t_item);}
#line 237 "_mwscan.cc"
yy13:
	++YYCURSOR;
	yych = *YYCURSOR;
	goto yy25;
yy14:
#line 205 "_mwscan.re"
	{RET(t_semicolon);}
#line 245 "_mwscan.cc"
yy15:
	yyaccept = 1;
	yych = *(YYMARKER = ++YYCURSOR);
	if(yych == '-') goto yy18;
yy16:
#line 208 "_mwscan.re"
	{goto not_bol;}
#line 253 "_mwscan.cc"
yy17:
	yych = *++YYCURSOR;
	goto yy16;
yy18:
	yych = *++YYCURSOR;
	if(yych == '-') goto yy20;
yy19:
	YYCURSOR = YYMARKER;
	if(yyaccept <= 0) {
		goto yy3;
	} else {
		goto yy16;
	}
yy20:
	yych = *++YYCURSOR;
	if(yych != '-') goto yy19;
yy21:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych == '-') goto yy21;
#line 206 "_mwscan.re"
	{RET(t_hrule);}
#line 276 "_mwscan.cc"
yy24:
	++YYCURSOR;
	yych = *YYCURSOR;
yy25:
	if(yych == ';') goto yy24;
	goto yy14;
yy26:
	++YYCURSOR;
	yych = *YYCURSOR;
yy27:
	if(yych == '#') goto yy26;
	if(yych == '*') goto yy26;
	goto yy12;
yy28:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= ')') {
		if(yych == '#') goto yy26;
		goto yy10;
	} else {
		if(yych <= '*') goto yy26;
		if(yych == ':') goto yy28;
		goto yy10;
	}
yy30:
	++YYCURSOR;
	yych = *YYCURSOR;
yy31:
	if(yych == 0x0009) goto yy30;
	if(yych == ' ') goto yy30;
	goto yy8;
yy32:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= 0x001F) {
		if(yych == 0x0009) goto yy30;
		goto yy8;
	} else {
		if(yych <= ' ') goto yy30;
		if(yych == '=') goto yy32;
		goto yy8;
	}
yy34:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych == '+') goto yy34;
#line 188 "_mwscan.re"
	{
		if (tablemode) 
			RET(t_tablecaption);
		if (*start==' ') {
			cursor = start+1;
			RET(t_pre);
		}
		RET(t_text);
	}
#line 333 "_mwscan.cc"
yy37:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych == '-') goto yy37;
#line 165 "_mwscan.re"
	{
		if (tablemode) 
			RET(t_row);
		if (*start==' ') {
			cursor = start+1;
			RET(t_pre);
		}
		RET(t_text);
	}
#line 348 "_mwscan.cc"
yy40:
	++YYCURSOR;
#line 162 "_mwscan.re"
	{--tablemode; RET(t_end_table);}
#line 353 "_mwscan.cc"
yy42:
	yych = *++YYCURSOR;
	if(yych <= ',') {
		if(yych == '+') goto yy34;
		goto yy5;
	} else {
		if(yych <= '-') goto yy37;
		if(yych == '}') goto yy40;
		goto yy5;
	}
yy43:
	++YYCURSOR;
	yych = *YYCURSOR;
yy44:
	if(yych <= '!') {
		if(yych <= 0x001F) goto yy19;
		if(yych <= ' ') goto yy43;
	} else {
		if(yych == '|') goto yy42;
		goto yy19;
	}
	++YYCURSOR;
	yych = *YYCURSOR;
	goto yy5;
}
#line 209 "_mwscan.re"



not_bol:
	cursor = save_cursor;
	marker = cursor;


#line 388 "_mwscan.cc"
{
	YYCTYPE yych;
	unsigned int yyaccept = 0;
	yych = *YYCURSOR;
	if(yych <= 'Z') {
		if(yych <= '\'') {
			if(yych <= ' ') {
				if(yych <= 0x0000) goto yy69;
				if(yych == 0x000A) goto yy59;
				goto yy71;
			} else {
				if(yych <= '!') goto yy62;
				if(yych <= '%') goto yy71;
				if(yych <= '&') goto yy68;
				goto yy66;
			}
		} else {
			if(yych <= ';') {
				if(yych <= '/') goto yy71;
				if(yych <= '9') goto yy55;
				if(yych <= ':') goto yy64;
				goto yy71;
			} else {
				if(yych <= '<') goto yy67;
				if(yych <= '=') goto yy57;
				if(yych <= '@') goto yy71;
				goto yy55;
			}
		}
	} else {
		if(yych <= 'f') {
			if(yych <= '^') {
				if(yych <= '[') goto yy48;
				if(yych == ']') goto yy56;
				goto yy71;
			} else {
				if(yych <= '_') goto yy54;
				if(yych <= '`') goto yy71;
				if(yych <= 'e') goto yy55;
				goto yy52;
			}
		} else {
			if(yych <= 'm') {
				if(yych == 'h') goto yy53;
				if(yych <= 'l') goto yy55;
				goto yy50;
			} else {
				if(yych <= 'z') goto yy55;
				if(yych <= '{') goto yy65;
				if(yych <= '|') goto yy61;
				goto yy71;
			}
		}
	}
yy48:
	yyaccept = 0;
	yych = *(YYMARKER = ++YYCURSOR);
	switch(yych) {
	case '[':	goto yy250;
	case 'f':	goto yy253;
	case 'h':	goto yy252;
	case 'm':	goto yy254;
	default:	goto yy49;
	}
yy49:
#line 255 "_mwscan.re"
	{RET(t_special);}
#line 456 "_mwscan.cc"
yy50:
	++YYCURSOR;
	if((yych = *YYCURSOR) == 'a') goto yy238;
	goto yy122;
yy51:
#line 224 "_mwscan.re"
	{RET(t_text);}
#line 464 "_mwscan.cc"
yy52:
	yych = *++YYCURSOR;
	if(yych == 't') goto yy230;
	goto yy122;
yy53:
	yych = *++YYCURSOR;
	if(yych == 't') goto yy220;
	goto yy122;
yy54:
	yych = *++YYCURSOR;
	if(yych == '_') goto yy123;
	goto yy122;
yy55:
	yych = *++YYCURSOR;
	goto yy122;
yy56:
	yych = *++YYCURSOR;
	if(yych == ']') goto yy119;
	goto yy49;
yy57:
	++YYCURSOR;
	if((yych = *YYCURSOR) == '=') goto yy117;
	goto yy116;
yy58:
#line 227 "_mwscan.re"
	{
			if (eol()) {
			        if (line_startswith_section>=0) {
				     line_startswith_section=-1;
				     RET(t_section_end);
                                } else {
				     RET(t_text);
                                }
			} else {
				RET(t_text);
			}
		    }
#line 502 "_mwscan.cc"
yy59:
	++YYCURSOR;
	if((yych = *YYCURSOR) == 0x000A) goto yy112;
#line 240 "_mwscan.re"
	{newline(); RET(t_newline);}
#line 508 "_mwscan.cc"
yy61:
	yych = *++YYCURSOR;
	if(yych <= '+') {
		if(yych == '!') goto yy106;
		if(yych <= '*') goto yy49;
		goto yy110;
	} else {
		if(yych <= '{') goto yy49;
		if(yych <= '|') goto yy106;
		if(yych <= '}') goto yy108;
		goto yy49;
	}
yy62:
	++YYCURSOR;
	if((yych = *YYCURSOR) == '!') goto yy106;
yy63:
#line 267 "_mwscan.re"
	{RET(t_text);}
#line 527 "_mwscan.cc"
yy64:
	yych = *++YYCURSOR;
	goto yy49;
yy65:
	yych = *++YYCURSOR;
	if(yych == '|') goto yy104;
	goto yy63;
yy66:
	yych = *++YYCURSOR;
	if(yych == '\'') goto yy99;
	goto yy63;
yy67:
	yyaccept = 1;
	yych = *(YYMARKER = ++YYCURSOR);
	if(yych <= '/') {
		if(yych == '!') goto yy83;
		if(yych <= '.') goto yy63;
		goto yy84;
	} else {
		if(yych <= 'Z') {
			if(yych <= '@') goto yy63;
			goto yy85;
		} else {
			if(yych <= '`') goto yy63;
			if(yych <= 'z') goto yy85;
			goto yy63;
		}
	}
yy68:
	yyaccept = 1;
	yych = *(YYMARKER = ++YYCURSOR);
	if(yych <= '9') {
		if(yych == '#') goto yy72;
		if(yych <= '/') goto yy63;
		goto yy74;
	} else {
		if(yych <= 'Z') {
			if(yych <= '@') goto yy63;
			goto yy74;
		} else {
			if(yych <= '`') goto yy63;
			if(yych <= 'z') goto yy74;
			goto yy63;
		}
	}
yy69:
	++YYCURSOR;
#line 266 "_mwscan.re"
	{newline(); return t_end;}
#line 577 "_mwscan.cc"
yy71:
	yych = *++YYCURSOR;
	goto yy63;
yy72:
	yych = *++YYCURSOR;
	if(yych <= 'W') {
		if(yych <= '/') goto yy73;
		if(yych <= '9') goto yy79;
	} else {
		if(yych <= 'X') goto yy78;
		if(yych == 'x') goto yy78;
	}
yy73:
	YYCURSOR = YYMARKER;
	if(yyaccept <= 1) {
		if(yyaccept <= 0) {
			goto yy49;
		} else {
			goto yy63;
		}
	} else {
		if(yyaccept <= 2) {
			goto yy100;
		} else {
			goto yy51;
		}
	}
yy74:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= ';') {
		if(yych <= '/') goto yy73;
		if(yych <= '9') goto yy74;
		if(yych <= ':') goto yy73;
	} else {
		if(yych <= 'Z') {
			if(yych <= '@') goto yy73;
			goto yy74;
		} else {
			if(yych <= '`') goto yy73;
			if(yych <= 'z') goto yy74;
			goto yy73;
		}
	}
yy76:
	++YYCURSOR;
#line 264 "_mwscan.re"
	{RET(t_entity);}
#line 626 "_mwscan.cc"
yy78:
	yych = *++YYCURSOR;
	if(yych == ';') goto yy73;
	goto yy82;
yy79:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= '/') goto yy73;
	if(yych <= '9') goto yy79;
	if(yych == ';') goto yy76;
	goto yy73;
yy81:
	++YYCURSOR;
	yych = *YYCURSOR;
yy82:
	if(yych <= ';') {
		if(yych <= '/') goto yy73;
		if(yych <= '9') goto yy81;
		if(yych <= ':') goto yy73;
		goto yy76;
	} else {
		if(yych <= 'F') {
			if(yych <= '@') goto yy73;
			goto yy81;
		} else {
			if(yych <= '`') goto yy73;
			if(yych <= 'f') goto yy81;
			goto yy73;
		}
	}
yy83:
	yych = *++YYCURSOR;
	if(yych == '-') goto yy91;
	goto yy73;
yy84:
	yych = *++YYCURSOR;
	if(yych <= '@') goto yy73;
	if(yych <= 'Z') goto yy85;
	if(yych <= '`') goto yy73;
	if(yych >= '{') goto yy73;
yy85:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= '>') {
		if(yych <= ';') {
			if(yych <= 0x0000) goto yy73;
		} else {
			if(yych <= '<') goto yy73;
			if(yych >= '>') goto yy89;
		}
	} else {
		if(yych <= 'Z') {
			if(yych >= 'A') goto yy85;
		} else {
			if(yych <= '`') goto yy87;
			if(yych <= 'z') goto yy85;
		}
	}
yy87:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= '<') {
		if(yych <= 0x0000) goto yy73;
		if(yych <= ';') goto yy87;
		goto yy73;
	} else {
		if(yych != '>') goto yy87;
	}
yy89:
	++YYCURSOR;
#line 260 "_mwscan.re"
	{RET(t_html_tag);}
#line 699 "_mwscan.cc"
yy91:
	yych = *++YYCURSOR;
	if(yych != '-') goto yy73;
yy92:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= ';') {
		if(yych <= 0x0000) goto yy73;
		if(yych != '-') goto yy92;
	} else {
		if(yych == '=') goto yy92;
		if(yych <= '>') goto yy73;
		goto yy92;
	}
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= ';') {
		if(yych <= 0x0000) goto yy73;
		if(yych != '-') goto yy92;
	} else {
		if(yych == '=') goto yy92;
		if(yych <= '>') goto yy73;
		goto yy92;
	}
yy95:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= ';') {
		if(yych <= 0x0000) goto yy73;
		if(yych == '-') goto yy95;
		goto yy92;
	} else {
		if(yych <= '<') goto yy73;
		if(yych != '>') goto yy92;
	}
	++YYCURSOR;
#line 263 "_mwscan.re"
	{RET(t_comment);}
#line 738 "_mwscan.cc"
yy99:
	++YYCURSOR;
	if((yych = *YYCURSOR) == '\'') goto yy101;
yy100:
#line 258 "_mwscan.re"
	{RET(t_style);}
#line 745 "_mwscan.cc"
yy101:
	yyaccept = 2;
	yych = *(YYMARKER = ++YYCURSOR);
	if(yych != '\'') goto yy100;
	yych = *++YYCURSOR;
	if(yych != '\'') goto yy73;
	yych = *++YYCURSOR;
	goto yy100;
yy104:
	++YYCURSOR;
#line 256 "_mwscan.re"
	{++tablemode; RET(t_begin_table);}
#line 758 "_mwscan.cc"
yy106:
	++YYCURSOR;
#line 242 "_mwscan.re"
	{
		if (tablemode) 
			RET(t_column);
		cursor = start+1;
		RET(t_special);
	}
#line 768 "_mwscan.cc"
yy108:
	++YYCURSOR;
#line 257 "_mwscan.re"
	{--tablemode; RET(t_end_table);}
#line 773 "_mwscan.cc"
yy110:
	++YYCURSOR;
#line 249 "_mwscan.re"
	{
		if (tablemode) 
			RET(t_tablecaption);
		cursor = start+1;
		RET(t_special);
	}
#line 783 "_mwscan.cc"
yy112:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych == 0x000A) goto yy112;
#line 239 "_mwscan.re"
	{newline(); RET(t_break);}
#line 790 "_mwscan.cc"
yy115:
	++YYCURSOR;
	yych = *YYCURSOR;
yy116:
	if(yych == 0x0009) goto yy115;
	if(yych == ' ') goto yy115;
	goto yy58;
yy117:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= 0x001F) {
		if(yych == 0x0009) goto yy115;
		goto yy58;
	} else {
		if(yych <= ' ') goto yy115;
		if(yych == '=') goto yy117;
		goto yy58;
	}
yy119:
	++YYCURSOR;
#line 226 "_mwscan.re"
	{RET(t_2box_close);}
#line 813 "_mwscan.cc"
yy121:
	++YYCURSOR;
	yych = *YYCURSOR;
yy122:
	if(yych <= 'Z') {
		if(yych <= '/') goto yy51;
		if(yych <= '9') goto yy121;
		if(yych <= '@') goto yy51;
		goto yy121;
	} else {
		if(yych <= '_') {
			if(yych <= '^') goto yy51;
			goto yy121;
		} else {
			if(yych <= '`') goto yy51;
			if(yych <= 'z') goto yy121;
			goto yy51;
		}
	}
yy123:
	yych = *++YYCURSOR;
	switch(yych) {
	case 'E':	goto yy127;
	case 'F':	goto yy126;
	case 'N':	goto yy125;
	case 'S':	goto yy128;
	case 'T':	goto yy124;
	default:	goto yy122;
	}
yy124:
	yych = *++YYCURSOR;
	if(yych == 'O') goto yy217;
	goto yy122;
yy125:
	yych = *++YYCURSOR;
	if(yych == 'E') goto yy147;
	if(yych == 'O') goto yy148;
	goto yy122;
yy126:
	yych = *++YYCURSOR;
	if(yych == 'O') goto yy139;
	goto yy122;
yy127:
	yych = *++YYCURSOR;
	if(yych == 'N') goto yy136;
	goto yy122;
yy128:
	yych = *++YYCURSOR;
	if(yych != 'T') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'A') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'R') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'T') goto yy122;
	yych = *++YYCURSOR;
	if(yych != '_') goto yy122;
	yych = *++YYCURSOR;
	if(yych != '_') goto yy122;
yy134:
	++YYCURSOR;
	if((yych = *YYCURSOR) <= 'Z') {
		if(yych <= '/') goto yy135;
		if(yych <= '9') goto yy121;
		if(yych >= 'A') goto yy121;
	} else {
		if(yych <= '_') {
			if(yych >= '_') goto yy121;
		} else {
			if(yych <= '`') goto yy135;
			if(yych <= 'z') goto yy121;
		}
	}
yy135:
#line 223 "_mwscan.re"
	{RET(t_magicword);}
#line 890 "_mwscan.cc"
yy136:
	yych = *++YYCURSOR;
	if(yych != 'D') goto yy122;
	yych = *++YYCURSOR;
	if(yych != '_') goto yy122;
	yych = *++YYCURSOR;
	if(yych == '_') goto yy134;
	goto yy122;
yy139:
	yych = *++YYCURSOR;
	if(yych != 'R') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'C') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'E') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'T') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'O') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'C') goto yy122;
	yych = *++YYCURSOR;
	if(yych != '_') goto yy122;
	yych = *++YYCURSOR;
	if(yych == '_') goto yy134;
	goto yy122;
yy147:
	yych = *++YYCURSOR;
	if(yych == 'W') goto yy204;
	goto yy122;
yy148:
	yych = *++YYCURSOR;
	switch(yych) {
	case 'C':	goto yy151;
	case 'E':	goto yy150;
	case 'G':	goto yy152;
	case 'T':	goto yy149;
	default:	goto yy122;
	}
yy149:
	yych = *++YYCURSOR;
	if(yych <= 'H') {
		if(yych == 'C') goto yy187;
		goto yy122;
	} else {
		if(yych <= 'I') goto yy188;
		if(yych == 'O') goto yy189;
		goto yy122;
	}
yy150:
	yych = *++YYCURSOR;
	if(yych == 'D') goto yy176;
	goto yy122;
yy151:
	yych = *++YYCURSOR;
	if(yych == 'C') goto yy160;
	if(yych == 'O') goto yy161;
	goto yy122;
yy152:
	yych = *++YYCURSOR;
	if(yych != 'A') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'L') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'L') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'E') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'R') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'Y') goto yy122;
	yych = *++YYCURSOR;
	if(yych != '_') goto yy122;
	yych = *++YYCURSOR;
	if(yych == '_') goto yy134;
	goto yy122;
yy160:
	yych = *++YYCURSOR;
	if(yych == '_') goto yy175;
	goto yy122;
yy161:
	yych = *++YYCURSOR;
	if(yych != 'N') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'T') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'E') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'N') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'T') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'C') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'O') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'N') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'V') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'E') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'R') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'T') goto yy122;
	yych = *++YYCURSOR;
	if(yych != '_') goto yy122;
	yych = *++YYCURSOR;
	if(yych == '_') goto yy134;
	goto yy122;
yy175:
	yych = *++YYCURSOR;
	if(yych == '_') goto yy134;
	goto yy122;
yy176:
	yych = *++YYCURSOR;
	if(yych != 'I') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'T') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'S') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'E') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'C') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'T') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'I') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'O') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'N') goto yy122;
	yych = *++YYCURSOR;
	if(yych != '_') goto yy122;
	yych = *++YYCURSOR;
	if(yych == '_') goto yy134;
	goto yy122;
yy187:
	yych = *++YYCURSOR;
	if(yych == '_') goto yy203;
	goto yy122;
yy188:
	yych = *++YYCURSOR;
	if(yych == 'T') goto yy192;
	goto yy122;
yy189:
	yych = *++YYCURSOR;
	if(yych != 'C') goto yy122;
	yych = *++YYCURSOR;
	if(yych != '_') goto yy122;
	yych = *++YYCURSOR;
	if(yych == '_') goto yy134;
	goto yy122;
yy192:
	yych = *++YYCURSOR;
	if(yych != 'L') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'E') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'C') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'O') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'N') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'V') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'E') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'R') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'T') goto yy122;
	yych = *++YYCURSOR;
	if(yych != '_') goto yy122;
	yych = *++YYCURSOR;
	if(yych == '_') goto yy134;
	goto yy122;
yy203:
	yych = *++YYCURSOR;
	if(yych == '_') goto yy134;
	goto yy122;
yy204:
	yych = *++YYCURSOR;
	if(yych != 'S') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'E') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'C') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'T') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'I') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'O') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'N') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'L') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'I') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'N') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'K') goto yy122;
	yych = *++YYCURSOR;
	if(yych != '_') goto yy122;
	yych = *++YYCURSOR;
	if(yych == '_') goto yy134;
	goto yy122;
yy217:
	yych = *++YYCURSOR;
	if(yych != 'C') goto yy122;
	yych = *++YYCURSOR;
	if(yych != '_') goto yy122;
	yych = *++YYCURSOR;
	if(yych == '_') goto yy134;
	goto yy122;
yy220:
	yych = *++YYCURSOR;
	if(yych != 't') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'p') goto yy122;
	yyaccept = 3;
	yych = *(YYMARKER = ++YYCURSOR);
	if(yych == ':') goto yy224;
	if(yych != 's') goto yy122;
	yyaccept = 3;
	yych = *(YYMARKER = ++YYCURSOR);
	if(yych != ':') goto yy122;
yy224:
	yych = *++YYCURSOR;
	if(yych != '/') goto yy73;
	yych = *++YYCURSOR;
	if(yych != '/') goto yy73;
	yych = *++YYCURSOR;
	if(yych <= '_') {
		if(yych <= ':') {
			if(yych <= '&') {
				if(yych == '#') goto yy227;
				if(yych <= '$') goto yy73;
			} else {
				if(yych <= '\'') goto yy73;
				if(yych == '*') goto yy73;
			}
		} else {
			if(yych <= '?') {
				if(yych == '=') goto yy227;
				if(yych <= '>') goto yy73;
			} else {
				if(yych <= '@') goto yy73;
				if(yych <= 'Z') goto yy227;
				if(yych <= '^') goto yy73;
			}
		}
	} else {
		if(yych <= 0x00D6) {
			if(yych <= '~') {
				if(yych <= '`') goto yy73;
				if(yych <= 'z') goto yy227;
				if(yych <= '}') goto yy73;
			} else {
				if(yych == 0x00C4) goto yy227;
				if(yych <= 0x00D5) goto yy73;
			}
		} else {
			if(yych <= 0x00E4) {
				if(yych == 0x00DC) goto yy227;
				if(yych <= 0x00E3) goto yy73;
			} else {
				if(yych <= 0x00F6) {
					if(yych <= 0x00F5) goto yy73;
				} else {
					if(yych != 0x00FC) goto yy73;
				}
			}
		}
	}
yy227:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= '_') {
		if(yych <= ':') {
			if(yych <= '&') {
				if(yych == '#') goto yy227;
				if(yych >= '%') goto yy227;
			} else {
				if(yych <= '\'') goto yy229;
				if(yych != '*') goto yy227;
			}
		} else {
			if(yych <= '?') {
				if(yych == '=') goto yy227;
				if(yych >= '?') goto yy227;
			} else {
				if(yych <= '@') goto yy229;
				if(yych <= 'Z') goto yy227;
				if(yych >= '_') goto yy227;
			}
		}
	} else {
		if(yych <= 0x00D6) {
			if(yych <= '~') {
				if(yych <= '`') goto yy229;
				if(yych <= 'z') goto yy227;
				if(yych >= '~') goto yy227;
			} else {
				if(yych == 0x00C4) goto yy227;
				if(yych >= 0x00D6) goto yy227;
			}
		} else {
			if(yych <= 0x00E4) {
				if(yych == 0x00DC) goto yy227;
				if(yych >= 0x00E4) goto yy227;
			} else {
				if(yych <= 0x00F6) {
					if(yych >= 0x00F6) goto yy227;
				} else {
					if(yych == 0x00FC) goto yy227;
				}
			}
		}
	}
yy229:
#line 222 "_mwscan.re"
	{RET(t_http_url);}
#line 1217 "_mwscan.cc"
yy230:
	yych = *++YYCURSOR;
	if(yych != 'p') goto yy122;
	yyaccept = 3;
	yych = *(YYMARKER = ++YYCURSOR);
	if(yych != ':') goto yy122;
	yych = *++YYCURSOR;
	if(yych != '/') goto yy73;
	yych = *++YYCURSOR;
	if(yych != '/') goto yy73;
	yych = *++YYCURSOR;
	if(yych <= '=') {
		if(yych <= '&') {
			if(yych <= '"') goto yy73;
			if(yych == '%') goto yy73;
		} else {
			if(yych <= '\'') goto yy73;
			if(yych <= ':') goto yy235;
			if(yych <= '<') goto yy73;
		}
	} else {
		if(yych <= '_') {
			if(yych <= '>') goto yy73;
			if(yych <= 'Z') goto yy235;
			if(yych <= '^') goto yy73;
		} else {
			if(yych <= '{') {
				if(yych <= '`') goto yy73;
			} else {
				if(yych <= '|') goto yy73;
				if(yych >= 0x007F) goto yy73;
			}
		}
	}
yy235:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= '=') {
		if(yych <= '&') {
			if(yych <= '"') goto yy237;
			if(yych != '%') goto yy235;
		} else {
			if(yych <= '\'') goto yy237;
			if(yych <= ':') goto yy235;
			if(yych >= '=') goto yy235;
		}
	} else {
		if(yych <= '_') {
			if(yych <= '>') goto yy237;
			if(yych <= 'Z') goto yy235;
			if(yych >= '_') goto yy235;
		} else {
			if(yych <= '{') {
				if(yych >= 'a') goto yy235;
			} else {
				if(yych <= '|') goto yy237;
				if(yych <= '~') goto yy235;
			}
		}
	}
yy237:
#line 220 "_mwscan.re"
	{RET(t_http_url);}
#line 1281 "_mwscan.cc"
yy238:
	yych = *++YYCURSOR;
	if(yych != 'i') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'l') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 't') goto yy122;
	yych = *++YYCURSOR;
	if(yych != 'o') goto yy122;
	yyaccept = 3;
	yych = *(YYMARKER = ++YYCURSOR);
	if(yych != ':') goto yy122;
	yych = *++YYCURSOR;
	if(yych == '@') goto yy73;
	goto yy245;
yy244:
	++YYCURSOR;
	yych = *YYCURSOR;
yy245:
	if(yych <= '9') {
		if(yych <= '\'') {
			if(yych == '!') goto yy244;
			if(yych <= '"') goto yy73;
			goto yy244;
		} else {
			if(yych <= ')') goto yy73;
			if(yych == ',') goto yy73;
			goto yy244;
		}
	} else {
		if(yych <= '?') {
			if(yych == '=') goto yy244;
			if(yych <= '>') goto yy73;
			goto yy244;
		} else {
			if(yych <= 'Z') {
				if(yych >= 'A') goto yy244;
			} else {
				if(yych <= ']') goto yy73;
				if(yych <= '~') goto yy244;
				goto yy73;
			}
		}
	}
	yych = *++YYCURSOR;
	if(yych <= '@') {
		if(yych <= '.') {
			if(yych <= ',') goto yy73;
		} else {
			if(yych <= '/') goto yy73;
			if(yych >= ':') goto yy73;
		}
	} else {
		if(yych <= '_') {
			if(yych <= 'Z') goto yy247;
			if(yych <= '^') goto yy73;
		} else {
			if(yych <= '`') goto yy73;
			if(yych >= '{') goto yy73;
		}
	}
yy247:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= '@') {
		if(yych <= '.') {
			if(yych >= '-') goto yy247;
		} else {
			if(yych <= '/') goto yy249;
			if(yych <= '9') goto yy247;
		}
	} else {
		if(yych <= '_') {
			if(yych <= 'Z') goto yy247;
			if(yych >= '_') goto yy247;
		} else {
			if(yych <= '`') goto yy249;
			if(yych <= 'z') goto yy247;
		}
	}
yy249:
#line 218 "_mwscan.re"
	{RET(t_http_url);}
#line 1365 "_mwscan.cc"
yy250:
	++YYCURSOR;
#line 225 "_mwscan.re"
	{RET(t_2box_open);}
#line 1370 "_mwscan.cc"
yy252:
	yych = *++YYCURSOR;
	if(yych == 't') goto yy275;
	goto yy73;
yy253:
	yych = *++YYCURSOR;
	if(yych == 't') goto yy267;
	goto yy73;
yy254:
	yych = *++YYCURSOR;
	if(yych != 'a') goto yy73;
	yych = *++YYCURSOR;
	if(yych != 'i') goto yy73;
	yych = *++YYCURSOR;
	if(yych != 'l') goto yy73;
	yych = *++YYCURSOR;
	if(yych != 't') goto yy73;
	yych = *++YYCURSOR;
	if(yych != 'o') goto yy73;
	yych = *++YYCURSOR;
	if(yych != ':') goto yy73;
	yych = *++YYCURSOR;
	if(yych == '@') goto yy73;
	goto yy262;
yy261:
	++YYCURSOR;
	yych = *YYCURSOR;
yy262:
	if(yych <= '9') {
		if(yych <= '\'') {
			if(yych == '!') goto yy261;
			if(yych <= '"') goto yy73;
			goto yy261;
		} else {
			if(yych <= ')') goto yy73;
			if(yych == ',') goto yy73;
			goto yy261;
		}
	} else {
		if(yych <= '?') {
			if(yych == '=') goto yy261;
			if(yych <= '>') goto yy73;
			goto yy261;
		} else {
			if(yych <= 'Z') {
				if(yych >= 'A') goto yy261;
			} else {
				if(yych <= ']') goto yy73;
				if(yych <= '~') goto yy261;
				goto yy73;
			}
		}
	}
	yych = *++YYCURSOR;
	if(yych <= '@') {
		if(yych <= '.') {
			if(yych <= ',') goto yy73;
		} else {
			if(yych <= '/') goto yy73;
			if(yych >= ':') goto yy73;
		}
	} else {
		if(yych <= '_') {
			if(yych <= 'Z') goto yy264;
			if(yych <= '^') goto yy73;
		} else {
			if(yych <= '`') goto yy73;
			if(yych >= '{') goto yy73;
		}
	}
yy264:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= '@') {
		if(yych <= '.') {
			if(yych >= '-') goto yy264;
		} else {
			if(yych <= '/') goto yy266;
			if(yych <= '9') goto yy264;
		}
	} else {
		if(yych <= '_') {
			if(yych <= 'Z') goto yy264;
			if(yych >= '_') goto yy264;
		} else {
			if(yych <= '`') goto yy266;
			if(yych <= 'z') goto yy264;
		}
	}
yy266:
#line 217 "_mwscan.re"
	{RET(t_urllink);}
#line 1463 "_mwscan.cc"
yy267:
	yych = *++YYCURSOR;
	if(yych != 'p') goto yy73;
	yych = *++YYCURSOR;
	if(yych != ':') goto yy73;
	yych = *++YYCURSOR;
	if(yych != '/') goto yy73;
	yych = *++YYCURSOR;
	if(yych != '/') goto yy73;
	yych = *++YYCURSOR;
	if(yych <= '=') {
		if(yych <= '&') {
			if(yych <= '"') goto yy73;
			if(yych == '%') goto yy73;
		} else {
			if(yych <= '\'') goto yy73;
			if(yych <= ':') goto yy272;
			if(yych <= '<') goto yy73;
		}
	} else {
		if(yych <= '_') {
			if(yych <= '>') goto yy73;
			if(yych <= 'Z') goto yy272;
			if(yych <= '^') goto yy73;
		} else {
			if(yych <= '{') {
				if(yych <= '`') goto yy73;
			} else {
				if(yych <= '|') goto yy73;
				if(yych >= 0x007F) goto yy73;
			}
		}
	}
yy272:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= '=') {
		if(yych <= '&') {
			if(yych <= '"') goto yy274;
			if(yych != '%') goto yy272;
		} else {
			if(yych <= '\'') goto yy274;
			if(yych <= ':') goto yy272;
			if(yych >= '=') goto yy272;
		}
	} else {
		if(yych <= '_') {
			if(yych <= '>') goto yy274;
			if(yych <= 'Z') goto yy272;
			if(yych >= '_') goto yy272;
		} else {
			if(yych <= '{') {
				if(yych >= 'a') goto yy272;
			} else {
				if(yych <= '|') goto yy274;
				if(yych <= '~') goto yy272;
			}
		}
	}
yy274:
#line 219 "_mwscan.re"
	{RET(t_urllink);}
#line 1526 "_mwscan.cc"
yy275:
	yych = *++YYCURSOR;
	if(yych != 't') goto yy73;
	yych = *++YYCURSOR;
	if(yych != 'p') goto yy73;
	yych = *++YYCURSOR;
	if(yych == ':') goto yy279;
	if(yych != 's') goto yy73;
	yych = *++YYCURSOR;
	if(yych != ':') goto yy73;
yy279:
	yych = *++YYCURSOR;
	if(yych != '/') goto yy73;
	yych = *++YYCURSOR;
	if(yych != '/') goto yy73;
	yych = *++YYCURSOR;
	if(yych <= '_') {
		if(yych <= ':') {
			if(yych <= '&') {
				if(yych == '#') goto yy282;
				if(yych <= '$') goto yy73;
			} else {
				if(yych <= '\'') goto yy73;
				if(yych == '*') goto yy73;
			}
		} else {
			if(yych <= '?') {
				if(yych == '=') goto yy282;
				if(yych <= '>') goto yy73;
			} else {
				if(yych <= '@') goto yy73;
				if(yych <= 'Z') goto yy282;
				if(yych <= '^') goto yy73;
			}
		}
	} else {
		if(yych <= 0x00D6) {
			if(yych <= '~') {
				if(yych <= '`') goto yy73;
				if(yych <= 'z') goto yy282;
				if(yych <= '}') goto yy73;
			} else {
				if(yych == 0x00C4) goto yy282;
				if(yych <= 0x00D5) goto yy73;
			}
		} else {
			if(yych <= 0x00E4) {
				if(yych == 0x00DC) goto yy282;
				if(yych <= 0x00E3) goto yy73;
			} else {
				if(yych <= 0x00F6) {
					if(yych <= 0x00F5) goto yy73;
				} else {
					if(yych != 0x00FC) goto yy73;
				}
			}
		}
	}
yy282:
	++YYCURSOR;
	yych = *YYCURSOR;
	if(yych <= '_') {
		if(yych <= ':') {
			if(yych <= '&') {
				if(yych == '#') goto yy282;
				if(yych >= '%') goto yy282;
			} else {
				if(yych <= '\'') goto yy284;
				if(yych != '*') goto yy282;
			}
		} else {
			if(yych <= '?') {
				if(yych == '=') goto yy282;
				if(yych >= '?') goto yy282;
			} else {
				if(yych <= '@') goto yy284;
				if(yych <= 'Z') goto yy282;
				if(yych >= '_') goto yy282;
			}
		}
	} else {
		if(yych <= 0x00D6) {
			if(yych <= '~') {
				if(yych <= '`') goto yy284;
				if(yych <= 'z') goto yy282;
				if(yych >= '~') goto yy282;
			} else {
				if(yych == 0x00C4) goto yy282;
				if(yych >= 0x00D6) goto yy282;
			}
		} else {
			if(yych <= 0x00E4) {
				if(yych == 0x00DC) goto yy282;
				if(yych >= 0x00E4) goto yy282;
			} else {
				if(yych <= 0x00F6) {
					if(yych >= 0x00F6) goto yy282;
				} else {
					if(yych == 0x00FC) goto yy282;
				}
			}
		}
	}
yy284:
#line 221 "_mwscan.re"
	{RET(t_urllink);}
#line 1633 "_mwscan.cc"
}
#line 268 "_mwscan.re"

}


PyObject *py_scan(PyObject *self, PyObject *args) 
{
	PyObject *arg1;
	if (!PyArg_ParseTuple(args, "O:mwscan.scan", &arg1)) {
		return 0;
	}
	PyUnicodeObject *unistr = (PyUnicodeObject*)PyUnicode_FromObject(arg1);
	if (unistr == NULL) {
		PyErr_SetString(PyExc_TypeError,
				"parameter cannot be converted to unicode in mwscan.scan");
		return 0;
	}

	Py_UNICODE *start = unistr->str;
	Py_UNICODE *end = start+unistr->length;
	

	Scanner scanner (start, end);
	Py_BEGIN_ALLOW_THREADS
	while (scanner.scan()) {
	}
	Py_END_ALLOW_THREADS
	Py_XDECREF(unistr);
	
	// return PyList_New(0); // uncomment to see timings for scanning

	int size = scanner.tokens.size();
	PyObject *result = PyList_New(size);
	if (!result) {
		return 0;
	}
	
	for (int i=0; i<size; i++) {
		Token t = scanner.tokens[i];
		PyList_SET_ITEM(result, i, Py_BuildValue("iii", t.type, t.start, t.len));
	}
	
	return result;
}



static PyMethodDef module_functions[] = {
	{"scan", (PyCFunction)py_scan, METH_VARARGS, "scan(text)"},
	{0, 0},
};



extern "C" {
	DL_EXPORT(void) init_mwscan();
}

DL_EXPORT(void) init_mwscan()
{
	/*PyObject *m =*/ Py_InitModule("_mwscan", module_functions);
}
