Paginate Observable results without recursion - RxJava

I've got a pretty standard API pagination problem which you can handle with some simple recursion. Here's a fabricated example:

public Observable<List<Result>> scan() {
    return scanPage(Optional.empty(), ImmutableList.of());
}

private Observable<?> scanPage(Optional<KEY> startKey, List<Result> results) {
    return this.scanner.scan(startKey, LIMIT)
            .flatMap(page -> {
                if (!page.getLastKey().isPresent()) {
                    return Observable.just(results);
                }
                return scanPage(page.getLastKey(), ImmutableList.<Result>builder()
                        .addAll(results)
                        .addAll(page.getResults())
                        .build()
                );
            });
}

But this can obviously create a massive callstack. How can I do this imperatively but maintain the Observable stream?

Here's an imperative blocking example:

public List<Result> scan() {
    Optional<String> startKey = Optional.empty();
    final ImmutableList.Builder<Result> results = ImmutableList.builder();

    do {
        final Page page = this.scanner.scan(startKey);
        startKey = page.getLastKey();
        results.addAll(page.getResults());
    } while (startKey.isPresent());

    return results.build();
}

Solution 1:

JohnWowUs' answer is great and helped me understand how to avoid the recursion effectively, but there were some points I was still confused about, so I'm posting my tweaked version.

Summary:

  • The individual pages are returned as a Single.
  • Use a Flowable to stream each of the items contained in the pages. This means callers to our function do not need to know about the individual pages and can just collect the contained items.
  • Use a BehaviorProcessor to start with the first page, and fetch each subsequent page once we have checked with the current page if the next page is available.
  • The key is that the call to processor.onNext(int) starts the next iteration.

This code depends on rxjava and reactive-streams.

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.function.Function;

import io.reactivex.Flowable;
import io.reactivex.Single;
import io.reactivex.processors.BehaviorProcessor;

public class Pagination {

    // Fetch all pages and return the items contained in those pages, using the provided page fetcher function
    public static <T> Flowable<T> fetchItems(Function<Integer, Single<Page<T>>> fetchPage) {
        // Processor issues page indices
        BehaviorProcessor<Integer> processor = BehaviorProcessor.createDefault(0);
        // When an index number is issued, fetch the corresponding page
        return processor.concatMap(index -> fetchPage.apply(index).toFlowable())
                        // when returning the page, update the processor to get the next page (or stop)
                        .doOnNext(page -> {
                            if (page.hasNext()) {
                                processor.onNext(page.getNextPageIndex());
                            } else {
                                processor.onComplete();
                            }
                        })
                        .concatMapIterable(Page::getElements);
    }

    public static void main(String[] args) {
        fetchItems(Pagination::examplePageFetcher).subscribe(System.out::println);
    }

    // A function to fetch a page of our paged data
    private static Single<Page<String>> examplePageFetcher(int index) {
        return Single.just(pages.get(index));
    }

    // Create some paged data
    private static ArrayList<Page<String>> pages = new ArrayList<>(3);

    static {
        pages.add(new Page<>(Arrays.asList("one", "two"), Optional.of(1)));
        pages.add(new Page<>(Arrays.asList("three", "four"), Optional.of(2)));
        pages.add(new Page<>(Arrays.asList("five"), Optional.empty()));
    }

    static class Page<T> {
        private List<T> elements;
        private Optional<Integer> nextPageIndex;

        public Page(List<T> elements, Optional<Integer> nextPageIndex) {
            this.elements = elements;
            this.nextPageIndex = nextPageIndex;
        }

        public List<T> getElements() {
            return elements;
        }

        public int getNextPageIndex() {
            return nextPageIndex.get();
        }

        public boolean hasNext() {
            return nextPageIndex.isPresent();
        }
    }
}

Output:

one
two
three
four
five

Solution 2:

It's not the most elegant of solutions but you can use subjects and side-effects. See the toy example below

import rx.Observable;
import rx.Subscriber;
import java.util.ArrayList;
import java.util.List;
import java.util.HashMap;
import rx.subjects.*;

public class Pagination {
    static HashMap<String,ArrayList<String>> pages = new HashMap<String,ArrayList<String>>();

    public static void main(String[] args) throws InterruptedException {
        pages.put("default", new ArrayList<String>());
        pages.put("2", new ArrayList<String>());
        pages.put("3", new ArrayList<String>());
        pages.put("4", new ArrayList<String>());

        pages.get("default").add("2");
        pages.get("default").add("Maths");
        pages.get("default").add("Chemistry");  

        pages.get("2").add("3");
        pages.get("2").add("Physics");   
        pages.get("2").add("Biology"); 

        pages.get("3").add("4");
        pages.get("3").add("Art");   

        pages.get("4").add("");
        pages.get("4").add("Geography"); 



        Observable<List<String>> ret = Observable.defer(() -> 
        { 
            System.out.println("Building Observable");
            ReplaySubject<String> pagecontrol = ReplaySubject.<String>create(1);
            Observable<List<String>> ret2 = pagecontrol.asObservable().concatMap(aKey -> 
            {
                if (!aKey.equals("")) {
                    return Observable.just(pages.get(aKey)).doOnNext(page -> pagecontrol.onNext(page.get(0)));
                } else {
                    return Observable.<List<String>>empty().doOnCompleted(()->pagecontrol.onCompleted());
                }
            });
            pagecontrol.onNext("default");
            return ret2;
        });
        // Use this if you want to ensure work isn't done again
        ret = ret.cache();
        ret.subscribe(l -> System.out.println("Sub 1 : " + l));
        ret.subscribe(l -> System.out.println("Sub 2 : " + l));
        Thread.sleep(2000L);
    }
}

Edited with improvements.

Solution 3:

Another approach is to use token stream: get data for initial token, push next token to stream once fresh remote data is obtained, and resubscribe until token is empty

 public Observable<Window> paging() {

        Subject<Token, Token> tokenStream = BehaviorSubject.<Token>create().toSerialized();

        tokenStream.onNext(Token.startToken());

        Observable<Window> dataStream =
                Observable.defer(() -> tokenStream.first().flatMap(this::remoteData))
                        .doOnNext(window -> tokenStream.onNext(window.getToken()))
                        .repeatWhen(completed -> completed.flatMap(__ -> tokenStream).takeWhile(Token::hasMore));

        return dataStream;
    }

The result is

Window{next token=Token{key='1'}, data='data for token: Token{key=''}'}
Window{next token=Token{key='2'}, data='data for token: Token{key='1'}'}
Window{next token=Token{key='3'}, data='data for token: Token{key='2'}'}
Window{next token=Token{key='4'}, data='data for token: Token{key='3'}'}
Window{next token=Token{key='5'}, data='data for token: Token{key='4'}'}
Window{next token=Token{key='6'}, data='data for token: Token{key='5'}'}
Window{next token=Token{key='7'}, data='data for token: Token{key='6'}'}
Window{next token=Token{key='8'}, data='data for token: Token{key='7'}'}
Window{next token=Token{key='9'}, data='data for token: Token{key='8'}'}
Window{next token=Token{key='10'}, data='data for token: Token{key='9'}'}

Copy pastable sample

public class RxPaging {

    public Observable<Window> paging() {

        Subject<Token, Token> tokenStream = BehaviorSubject.<Token>create().toSerialized();

        tokenStream.onNext(Token.startToken());

        Observable<Window> dataStream =
                Observable.defer(() -> tokenStream.first().flatMap(this::remoteData))
                        .doOnNext(window -> tokenStream.onNext(window.getToken()))
                        .repeatWhen(completed -> completed.flatMap(__ -> tokenStream).takeWhile(Token::hasMore));

        return dataStream;
    }

    private Observable<Window> remoteData(Token token) {
        /*limit number of pages*/
        int page = page(token);
        Token nextToken = page < 10
                ? nextPageToken(token)
                : Token.endToken();

        return Observable
                .just(new Window(nextToken, "data for token: " + token))
                .delay(100, TimeUnit.MILLISECONDS);
    }

    private int page(Token token) {
        String key = token.getKey();
        return key.isEmpty() ? 0 : Integer.parseInt(key);
    }

    private Token nextPageToken(Token token) {
        String tokenKey = token.getKey();
        return tokenKey.isEmpty() ? new Token("1") : nextToken(tokenKey);
    }

    private Token nextToken(String tokenKey) {
        return new Token(String.valueOf(Integer.parseInt(tokenKey) + 1));
    }

    public static class Token {
        private final String key;

        private Token(String key) {
            this.key = key;
        }

        public static Token endToken() {
            return startToken();
        }

        public static Token startToken() {
            return new Token("");
        }

        public String getKey() {
            return key;
        }

        public boolean hasMore() {
            return !key.isEmpty();
        }

        @Override
        public String toString() {
            return "Token{" +
                    "key='" + key + '\'' +
                    '}';
        }
    }


    public static class Window {
        private final Token token;
        private final String data;

        public Window(Token token, String data) {
            this.token = token;
            this.data = data;
        }

        public Token getToken() {
            return token;
        }

        public String getData() {
            return data;
        }

        @Override
        public String toString() {
            return "Window{" +
                    "next token=" + token +
                    ", data='" + data + '\'' +
                    '}';
        }
    }

    @Test
    public void testPaging() throws Exception {
        paging().toBlocking().subscribe(System.out::println);
    }
}