<?php
/*
*  Gmail RSS feed generator 0.2 - June 27th, 2004
*    Google PHP new mail notification
*        Source based off of Ilia Alshanetsky's source at: 
* http://ilia.ws/archives/15_Gmail_as_an_online_backup_system.html
*		 Added gmail_view_inbox, gmail_rss functions to review and parse the 
* text and generate a RSS feed of the number of unread messages
* Todo: 
* Parse the JS listing and get the titles of unread conversations. 
* (sort of done. buggy tho)
* Parse and display contact information (I don't think this is very important)
* Generate valid links in the RSS (tricky, but not impossible)
* Get Gmail username/pwd information off a cookie or some such mechanism (I hate * the raw pwd in the script)
* 
* Disclaimer and legalese: Please note that the following are applicable
* This piece of software was written in the author's spare time as a proof of 
* concept. No warranty, either expressed or implied is made for
* performance, merchantability or fitness for a particular purpose. The author 
* specifically does not take any responsibility for any or all consequences 
* arising from the use (improper or otherwise) including but not confined to 
* server meltdown, loss or suspension of Gmail account, 
* compromised email passwords and/or World War 3. Use this script at your own 
* risk.
*
*
* If you can't read and understand all the code contained within, DO NOT RUN 
* THIS SCRIPT.
* Please also note that automated scraping is against the Gmail TOS. If you run * this script regularly and are noticed, you will likely be banned. 
* Set your feedreader to check no sooner than once every 5 minutes or so (that's * a recommendation only).
* IMPORTANT: Please note that protecting this script on your web server is 
* essential. 
* (else anyone can point a browser to the URL and find out your mail 
* information).
*
*
* Feel free to modify the script as you wish (but give credit to Ilia, please) 
* or submit suggestions / bugs to me via email.
* If you do make any changes, letting me know is appreciated, but not required. 
*
* Gmail is a beta and the code is still liable to change. Don't expect anything * to keep on working indefinitely. It probably won't.
* Thimal Jayasooriya (my email is <first name> @gmail.com)
*/

/* These four variables are the only ones you should change in this script */

$username = 'xxxxxxxxxxxxxxx; 
// gmail user name. @gmail.com prefix is optional. example: "lucifer"
$pwd = 'xxxxxxxxxxxxx';  
// password for gmail. Yes. it's in plaintext. Sucks, I know.
$display_name = "Joe Random GMail user"; 
// Display name for the RSS feed. Just a cosmetic detail.
$user_agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4b) Gecko/20040612 Mozilla Firebird/0.9"; 
// Sneaky browser UA identifier in case someone is checking.
// This script says it is Firebird 0.6 running on Linux.

/* end four configuration variables */

/* System variables. Leave 'em alone */
$conversation_store = array();
$stats = array(); 
/* End System variables  */

function get_cookies($header)
// Function to get cookie values set by Gmail.
// From Ilia's code. No modifications
{
    preg_match_all('!Set-Cookie: ([^;\s]+)($|;)!', $header, $match);

    $cookie = '';
    foreach ($match[1] as $val) {
        if ($val{0} == '=') {
            continue;
        }
        $cookie .= $val . ';';
    }
    return substr($cookie, 0, -1);
}

function gmail_login($login, $pass)
// function to properly login to gmail. 
// Pulled out user agent into a global setting. 
// Other than that, no changes from Ilia's original.
{
	global $user_agent;
	$stats["login"] = $login;
    $postdata = "service=mail&Email=".urlencode($login)."&Passwd=".urlencode($pass)."&null=Sign%20in";

    $c = curl_init();
    curl_setopt($c, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($c, CURLOPT_URL, "https://www.google.com/accounts/ServiceLoginBoxAuth");
    curl_setopt($c, CURLOPT_SSL_VERIFYHOST,  2);
    curl_setopt($c, CURLOPT_USERAGENT, $user_agent);
    curl_setopt($c, CURLOPT_POST, 1);
    curl_setopt($c, CURLOPT_HEADER, 1);
    curl_setopt($c, CURLOPT_POSTFIELDS, $postdata);
    curl_setopt($c, CURLOPT_SSL_VERIFYPEER, FALSE);
    $result = curl_exec($c);
    curl_close($c);

    $cookies = get_cookies(substr($result, 0, strpos($result, "\n\n")));
    /* js cookie */
    preg_match('!var cookieVal= "([^"]+)";!', $result, $match);
    $cookies .= ($gv = '; GV='.$match[1]);

    $c = curl_init();
    curl_setopt($c, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($c, CURLOPT_URL, "https://www.google.com/accounts/CheckCookie?service=mail&chtml=LoginDoneHtml");
    curl_setopt($c, CURLOPT_SSL_VERIFYHOST,  2);
    curl_setopt($c, CURLOPT_USERAGENT, $user_agent);
    curl_setopt($c, CURLOPT_HEADER, 1);
    curl_setopt($c, CURLOPT_COOKIE, $cookies);
    curl_setopt($c, CURLOPT_SSL_VERIFYPEER, FALSE);
    $ret = curl_exec($c);
    curl_close($c);

    $data = get_cookies(substr($ret, 0, strpos($ret, "\n\n")));
    if (!$data) {
        return $cookies;
    } else {
        return $data . $gv;
    }
}

function gmail_view_inbox($login, $pass)
// First login to Gmail then call the inbox viewer URL 
// and grab the returned data into $ret. 
// Sling it around for parsing. This function ends ALL 
// transactions with the Gmail server. Everything
// else is done on the client side (ie: within the script).
{
	global $user_agent;
    $cookie = gmail_login($login, $pass);
    $c = curl_init();
    curl_setopt($c, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($c, CURLOPT_URL, "http://gmail.google.com/gmail?search=inbox&view=tl&start=0&init=1");
    curl_setopt($c, CURLOPT_COOKIE, $cookie);
    curl_setopt($c, CURLOPT_USERAGENT, $user_agent);
    $ret = curl_exec($c);
    curl_close($c);

	return $ret;    
}

function parse_page($returned_page) {
// Do a very rough and ready regexp hack to find out total inbox mail, 
// unread mail and mailbox usage percent.
// I apologize in advance for using the dreaded (.*) I am lazy, sue me. 
// This can probably be made much faster.
global $stats;
	$regexp = "[\"ts\",.*\"Inbox\",.*,(\d+)]";
    preg_match($regexp, $returned_page, $matches); 
	$stats["inbox_count"] = $matches[1];
	$regexp = "[\"ds\",(\d+),.*]";
    preg_match($regexp, $returned_page, $matches); 
	$stats["unread"] = $matches[1];
	$regexp = "[\"qu\",.*\"(\d+)%\".*]";
    preg_match($regexp, $returned_page, $matches); 
	$stats["use_percent"] = $matches[1];

// find the conversations that have unread messages.
	$mail_contents = match_fragments($returned_page);
	foreach($mail_contents as $mail_frag) {
		match_messages($mail_frag[0]);
	}
}

function gmail_rss($name) {	
// Get the stats array, the fancy display name and use a heredoc 
// to generate sorta valid XML for the RSS.
// RSS, RDF, Atom.. I just don't know. My head hurts.
global $stats;
global $conversation_store;
header("Content-type: text/xml ", true); 
// send the header and force replace of "text/html" if PHP is being naughty
print "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n";
print <<<EOF
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 
xmlns="http://my.netscape.com/rdf/simple/0.9/">
  <channel>
  <copyright>Gmail and the Login user</copyright>
  <description>Gmail stats for $name</description>
  <link>http://gmail.google.com/</link>
  <title>GMail stats for $name</title>
<image>
  <link>http://gmail.google.com</link>
  <title>Gmail Stats</title>
  <url>http://gmail.google.com/gmail/help/images/logo.gif</url>
  <height>31</height>
  <width>88</width>
</image>

<item>
  <title>Unread Messages</title>
  <description>Total unread messages: $stats[unread]</description>
</item>

<item>
<title>Total Messages</title>
<description>Total messages in inbox: $stats[inbox_count]</description>
</item>
<item>
<title>Mailbox utilization</title>
<description>Mailbox utilization percentage: $stats[use_percent]% (from 1000mb)</description>
</item>
EOF;

	if($conversation_store != "") { 
		// are we at initial state, in which case, leave it alone.
		foreach ($conversation_store as $cv) {
			print "\n<item> \n <title> Mail from $cv[sender] </title>\n";
			print "<description>Unread mail from $cv[sender] in conversation $cv[title]</description>\n";
			print "</item>\n\n";
		}
	}
print <<< EOF
	</channel>
</rdf:RDF>
EOF;
}

function match_fragments($file_slurp) {
// Matches the slurped data for mailblock fragments.
	preg_match_all('/\nD\(\[\"t\",(.*)\n\]\n\);\n/UXsx', $file_slurp, $contents, PREG_SET_ORDER);
	$i = 0;
	return $contents;
}

function scrub_string($unclean_string) {
// scrub the string to remove all the formatting, spans and stuff that the UI 
// needs (but we don't)
	return stripslashes(strip_tags($unclean_string));
}

function contains_email_address($test_string) {
// pathetically, I am reduced to checking for the existence of _user_(\w+)@(.*) // to validate the existence
// of an email address. I should be shot. honestly.
	return preg_match('/_user_\w+@.*/', $test_string);
}

function get_printable_display($string) {
// the rule: if you're an email address, then scrub to get the name out. Else, // leave alone.
	if(contains_email_address($string) == 1) { 
		return scrub_string($string);
	}
	else {
		return $string;
	}
}

function match_messages($mail_fragment) {
// takes a fragment identified by match_fragments and separates it into 
// individual message arrays. More regexpy fun
	global $conversation_store;
	preg_match_all('/D\(\[\"t\",(.*)\n\]\n\);/UXsx', $mail_fragment, $mail_info, PREG_SET_ORDER);
	$i = 0;
	foreach($mail_info as $v) {
		$i++;
		$field_fragments = preg_split('/\n,\[/sx',$v[1]);
// splits the fragments of fields (each consistutes a complete conversation)
		for($i = 0; $i < count($field_fragments); $i++) {
			$message_fields = explode(',',$field_fragments[$i]);
// tokenizes individual fields within the displayed conversation 
// (this is buggy as hell, but hey, it's fast)
// if the message summary shown has a comma, it's tokenized here. aargh.
			if ($message_fields[1] == 1) { 
// In the message array, 1 = unread mail, 0 = all mail in conversation 
// (or single mail) read. So speaketh Gmail
				$conversation_var["sender"] = get_printable_display($message_fields[4]) . " or " . get_printable_display($message_fields[5]);
				$conversation_var["title"] = scrub_string($message_fields[6]) . " or " . scrub_string($message_fields[7]);
				array_push($conversation_store, $conversation_var);
			}			
		}
	}
}

// This is the stuff that actually GETS executed.
// gmail_view_inbox calls login, then gets the inbox contents. 
// Returns inbox contents to parse_page
// parse_page uses a few regexes to populate the stats array and the 
// conversation_store globally
// gmail_rss uses the stats array to generate a (hopefully) valid RSS feed.

parse_page(gmail_view_inbox($username, $pwd));
gmail_rss($display_name, $stats);
?> 
