Stream Distinct

The Distinct function defined in the Stream class is used to find unique elements. it's an intermediate operation. It takes input streams filters the duplicates out and creates a new Stream of distinct elements.

package org.wesome.java8;

import java.util.Arrays;
import java.util.HashSet;
import java.util.List;

class Apple {
    public static void main(String args[]) {
        List<Integer> integers = Arrays.asList(1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6);
        System.out.println("*---------------------------Traditional method---------------------------*");
        System.out.println(new HashSet<>(integers));

        System.out.println("*---------------------------Distinct---------------------------*");
        integers.stream().distinct().forEach(System.out::println);
    }
}

The distinct function is a stateful function, it maintains the state of previous elements while computing current hence giving a very poor performance with parallel streams and is advised to use with sequential streams only.

package org.wesome.java8;

import java.time.LocalTime;
import java.util.List;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

class Apple {
    public static void main(String args[]) {
        List<Integer> integers = new Random().ints().limit(50).boxed().collect(Collectors.toList());
        System.out.println("integers = " + integers);

        System.out.println("*---------------------------Sequential Distinct---------------------------*");
        LocalTime start = LocalTime.now();
        integers.stream().distinct().collect(Collectors.toList());
        LocalTime end = LocalTime.now();
        int duration = end.getNano() - start.getNano();
        System.out.println("sequential Stream took = " + TimeUnit.MILLISECONDS.convert(duration, TimeUnit.NANOSECONDS) + " milliseconds");

        System.out.println("*---------------------------Parallel Distinct---------------------------*");
        start = LocalTime.now();
        integers.parallelStream().distinct().collect(Collectors.toList());
        end = LocalTime.now();
        duration = end.getNano() - start.getNano();
        System.out.println("parallel Stream took = " + TimeUnit.MILLISECONDS.convert(duration, TimeUnit.NANOSECONDS) + " milliseconds");
    }
}

Distinct on User Defined Object

The distinct method calls equals and hash code in order to find the unique elements.

package org.wesome.java8;

import lombok.AllArgsConstructor;
import lombok.Data;

import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;

@Data
@AllArgsConstructor
class Fruit {
    int fruitId;
    String fruitName;

    @Override
    public boolean equals(Object o) {
        System.out.println("equals method is called on = " + o);
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;
        Fruit fruit = (Fruit) o;
        return fruitId == fruit.fruitId;
    }

    @Override
    public int hashCode() {
        return Objects.hash(fruitId);
    }
}

class Apple {
    public static void main(String args[]) {
        List<Fruit> fruits = Arrays.asList(new Fruit(1, "Macintosh"), new Fruit(1, "Fuji"), new Fruit(2, "Gala"), new Fruit(2, "Jonagold"));

        System.out.println("*---------------------------Traditional method---------------------------*");
        System.out.println(new HashSet<>(fruits));

        System.out.println("*---------------------------Distinct---------------------------*");
        fruits.stream().distinct().forEach(System.out::println);
    }
}

In the case of the duplicate elements, a distinct method preserves the first encountered element, in the below example Macintosh and Fuji both have the same fruitId, which makes them duplicate, but since Macintosh appeared first hence it will be preserved in the result.

package org.wesome.java8;

import lombok.AllArgsConstructor;
import lombok.Data;

import java.util.Arrays;
import java.util.List;
import java.util.Objects;

@Data
@AllArgsConstructor
class Fruit {
    int fruitId;
    String fruitName;

    @Override
    public boolean equals(Object o) {
        System.out.println("equals method is called on = " + o);
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;
        Fruit fruit = (Fruit) o;
        return fruitId == fruit.fruitId;
    }

    @Override
    public int hashCode() {
        return Objects.hash(fruitId);
    }
}

class Apple {
    public static void main(String args[]) {
        List<Fruit> fruits = Arrays.asList(new Fruit(1, "Macintosh"), new Fruit(1, "Fuji"));

        System.out.println("*---------------------------Distinct---------------------------*");
        fruits.parallelStream().distinct().forEach(System.out::println);
    }
}

follow us on