View Single Post
Old 04-18-2009, 01:57 AM   #97
ilovejedd
hopeless n00b
ilovejedd ought to be getting tired of karma fortunes by now.ilovejedd ought to be getting tired of karma fortunes by now.ilovejedd ought to be getting tired of karma fortunes by now.ilovejedd ought to be getting tired of karma fortunes by now.ilovejedd ought to be getting tired of karma fortunes by now.ilovejedd ought to be getting tired of karma fortunes by now.ilovejedd ought to be getting tired of karma fortunes by now.ilovejedd ought to be getting tired of karma fortunes by now.ilovejedd ought to be getting tired of karma fortunes by now.ilovejedd ought to be getting tired of karma fortunes by now.ilovejedd ought to be getting tired of karma fortunes by now.
 
ilovejedd's Avatar
 
Posts: 5,136
Karma: 19597086
Join Date: Jan 2009
Location: in the middle of nowhere
Device: PW4, PW3, Libra H2O, iPad 10.5, iPad 11, iPad 12.9
Grr... FanFiction.Net enjoys making my life miserable. They changed the page layout again. Hopefully, it'll stay the same for some time.

Here's the updated source. Caveat, didn't have the inclination to look for one of the cleaned up ones. This one is probably full of stray code that I use for testing. Oh well, works for me, though. For anyone using this with Calibre recipes, I've also updated the "print version" generator on utterlyinsane.org
PHP Code:
<?php

function ffnet_source_info() {
    return 
"\tFetches stories from fanfiction.net";
}

function 
ffnet_get_story($storyid$meta false) {
    
$ffurl 'www.fanfiction.net';
    
$story['source'] = $ffurl;
    
$ffurl 'http://' $ffurl;

    
//get initial info
    
$ch curl_init();
    
curl_setopt($chCURLOPT_USERAGENT'SIRG/0.1 (Stanza iPhone RSS Generator)');
    
curl_setopt($chCURLOPT_RETURNTRANSFERtrue);
    
curl_setopt($chCURLOPT_FOLLOWLOCATIONfalse);
    
curl_setopt($chCURLOPT_URL"$ffurl/s/$storyid/1");
    
$story['pages'][1] = curl_exec($ch);

    
//extract metadata
    
$story['meta'] = ffnet_get_meta($story['pages'][1], $meta);

    
//fetch remaining pages
    
if(!$meta) {
        if(isset(
$story['meta']['chapters'])) {
            foreach(
$story['meta']['chapters'] as $key => $chaptitle) {
                if(!isset(
$story['pages'][$key])) {
                    
curl_setopt($chCURLOPT_URL"$ffurl/s/$storyid/$key");
                    
$story['pages'][$key] = curl_exec($ch);
                }
            }
        }
        else 
$story['meta']['chapters'][1] = $story['meta']['title'];

        
//clean pages
        
foreach($story['pages'] as &$currpage) {
            
preg_match("/<!-- start story -->(.+)<!-- end story -->/Usi"$currpage$matches);
            
$currpage $matches[1];
        }
    }

    return 
$story;
}

function 
ffnet_get_meta($page$nochapters) {
    
$matches = array();

    
//category and title
    
if (preg_match('/.*<a href.+>.+Crossover<\/a> »/Ui'$page$matches)) {
        
preg_match('/.*<a href.+>(.+) Crossover<\/a> » <b>(.+)<\/b>/Ui'$page$matches);
        
$matches[1] = str_ireplace(','''$matches[1]);
        
$tag str_ireplace(' and '','$matches[1]);
        
//$tag = preg_replace('/<.+>/Ui', '', $tag);
        //$matches[1] = preg_replace('/<.+>/Ui', '', $matches[1]);
    
}
    else 
preg_match('/.+ » <a href=.+>(.+)<\/a> » <b>(.+)<\/b>/Ui'$page$matches);

    
//author
    
preg_match("/<a href='\/u\/[0-9]+\/.+'>(.+)<\/a>/Ui"$page$author);
    
$meta['title'] = $matches[2];
    
$meta['author'] = $author[1];
    
$meta['category'] = $matches[1];
    
$meta['crosscat'] = $tag;

    
//rating, lang, category
    
preg_match("/Rated: <a href='http:\/\/www.fictionratings.com\/guide.php' target='rating'> (.+)<\/a> - (.+) - (.+) - Reviews: <a href='.+'>[0-9]+<\/a>( - Updated: ([0-9-]+))? - Published: ([0-9-]+) (- (Complete)+ )?- id:([0-9]+)/Ui"$page$matches);
    
$meta['rating'] = $matches[1];
    
$meta['language'] = $matches[2];
    
$meta['genre'] = $matches[3];
    
$meta['date_pub'] = $matches[6];
    
$meta['date_update'] = $matches[5];
    
$meta['complete'] = $matches[8] == 'Complete' true false;
    
$meta['ficstatus'] = $matches[8] == 'Complete' 'Complete' 'In-Progress';
    
$meta['id'] = $matches[9];

    
//description
    
preg_match('/<meta name="description" content="(.+), (.+)(, pairing: (.+))?,  (.+)">/Ui'$page$matches);
    
$meta['category2'] = $matches[1];
    
$meta['genre2'] = $matches[2];
    
$meta['char'] = $matches[4];
    
$meta['summary'] = $matches[5];
    
$meta['summary'] = iconv('UTF-8''ISO-8859-15//TRANSLIT//IGNORE'$meta['summary']);

    
//chapters
    
if($nochapters){
        if(
preg_match("/<SELECT title='chapter navigation' Name=chapter onChange=\".+\">(<option.+)<\/select>/Ui"$page$matches)) {
            
preg_match_all("/<option  value=[0-9]+ (selected)?>([0-9]+). (.+)(?=<option|$)/Ui"$matches[1], $matches);
            foreach(
$matches[2] as $key => $chapnum$chaparr $chapnum;
            
$meta['chapters'] = $chaparr;
        }
    }
    else {
        if(
preg_match("/<SELECT title='chapter navigation' Name=chapter onChange=\".+\">(<option.+)<\/select>/Ui"$page$matches)) {
            
preg_match_all("/<option  value=[0-9]+ (selected)?>([0-9]+). (.+)(?=<option|$)/Ui"$matches[1], $matches);
            foreach(
$matches[2] as $key => $chapnum$chaparr[$chapnum] = $matches[3][$key];
            
$meta['chapters'] = $chaparr;
        }
    }

    return 
$meta;
}

?>
ilovejedd is offline   Reply With Quote