# ruby_arXiv_paging_example.rb
#
# This sample script illustrates paging of arXiv api 
# results.  In order to play nice with the api, we 
# recommend that you wait 3 seconds between api calls.
#
# Please see the documentation at 
# http://export.arxiv.org/api_help/docs/user-manual.html
# for more information, or email the arXiv api 
# mailing list at arxiv-api@googlegroups.com.
#
# net/http and uri are part of the default ruby distribution
# ruby gems can be gotten from http://www.rubygems.org/
# feedtools can be gotten via
#  gem install feedtools
#
# Author: Julius B. Lucks
#
# This is free software.  Feel free to do what you want
# with it, but please play nice with the arXiv API!

require 'RubyGems'
require 'feed_tools'
require 'net/http'
require 'uri'

# Feedtools will automatically sort the entries by date
# set :entry_sorting_property to nil to instead leave the
# default entrty order
FeedTools.configurations[:entry_sorting_property] = nil

# Base api query url
base_url = 'http://export.arxiv.org/api/query?' 

# Search parameters
search_query = 'all:biophysics' # search for electron in all fields
start = 0                       # start at the first result
total_results = 20              # want 20 total results
results_per_iteration = 5       # 5 results at a time
wait_time = 3                   # number of seconds to wait beetween calls

puts 'Searching arXiv for %s' % search_query

i = start
while (i < total_results)

  puts "Results #{i} - #{i+results_per_iteration}"
  # Construct the query with the search parameters
  query = "search_query=#{search_query}&start=#{i}&max_results=#{results_per_iteration}"

  # perform a GET request and parse the feed all in one go
  feed = FeedTools::Feed.open(base_url+query)

  # Run through each entry, and print out information
  for item in feed.items
    puts 'arxiv-id: %s' % item.id.split('/abs/')[-1]
    puts 'Title: %s' % item.title
  
    # FeedTools v0.2.26 only allows one author
    puts 'First Author: %s' % item.author.name  
  end

  
  i += results_per_iteration
  
  # Remember to play nice and sleep a bit before you call
  # the api again!
  puts 'Sleeping for %i seconds' % wait_time 
  sleep(wait_time)
  
end