<?php
# php_arXiv_paging_example.php
#
# This sample script illustrates paging of arXiv api
# results.  In order to play nice with the api, we
# recommend that you wait 3 seconds between api calls.
#
# Please see the documentation at
# http://export.arxiv.org/api_help/docs/user-manual.html
# for more information, or email the arXiv api
# mailing list at arxiv-api@googlegroups.com.
#
# Simplepie can be gotten from http://simplepie.org/
#
# Author: Julius B. Lucks, Bill Flanagan
#
# This is free software.  Feel free to do what you want
# with it, but please play nice with the arXiv API!
include_once('./SimplePie/simplepie.inc');

define ('EOL', "<br />\n");

# atom namespace definition
$atom_ns = "http://www.w3.org/2005/Atom";

# Base api query url
$base_url = 'http://export.arxiv.org/api/query?';

# Search parameters
$search_query = 'all:biophysics'; # search for electron in all fields
$start = 0;                       # start at the first result
$total_results = 20;              # want 20 total results
$results_per_iteration = 5;       # 5 results at a time
$wait_time = 3;                   # number of seconds to wait beetween calls

print("<b>Searching arXiv for ".$search_query."</b>".EOL.EOL);

# Loop through each page of results, printing out the
# article id's, titles, and authors

for ($i = $start; $i <= $total_results; $i += $results_per_iteration) {
    $next = $i + $results_per_iteration;
    print("<b>Results ".$i." - ".$next."</b>".EOL);

    # Construct the query with the search parameters
    $query = "search_query=".$search_query.
             "&start=".$i.
             "&max_results=".$results_per_iteration;

    # perform a GET request using the $base_url and $query
    # parse the response using SimplePie.
    # SimplePie will automatically sort the entries by date
    # unless we explicitly turn this off
    $feed = new SimplePie($base_url.$query);
    $feed->enable_order_by_date(false);
    $feed->init();
    $feed->handle_content_type();

    foreach ($feed->get_items() as $entry) {

        # split the id line to get just the arxiv id
        $temp = split('/abs/',$entry->get_id());
        print("arxiv-id: ".$temp[1].EOL);
        print("Title: ".$entry->get_title().EOL);

        $published = $entry->get_item_tags($atom_ns,'published');
        print("Published: ".$published[0]['data'].EOL);

        # gather a list of authors
        $authors = array();
        foreach ($entry->get_authors() as $author) {
            array_push($authors,$author->get_name());
        }
        $author_string = join(', ',$authors);
        print("Authors: ".$author_string.EOL.EOL);
    }

    # Remember to play nice and sleep a bit before you call
    # the api again!
    print("<b>Sleeping for ".$wait_time." seconds</b>".EOL.EOL);
    sleep($wait_time);
}
?>
