Source code for torch_ppr.api

# -*- coding: utf-8 -*-

"""The public API."""

import logging
from typing import Optional

import torch

from .utils import (
    DeviceHint,
    batched_personalized_page_rank,
    power_iteration,
    prepare_page_rank_adjacency,
    prepare_x0,
    resolve_device,
    validate_adjacency,
    validate_x,
)

__all__ = [
    "page_rank",
    "personalized_page_rank",
]

logger = logging.getLogger(__name__)


[docs]def page_rank(
    *,
    adj: Optional[torch.Tensor] = None,
    edge_index: Optional[torch.LongTensor] = None,
    num_nodes: Optional[int] = None,
    add_identity: bool = False,
    max_iter: int = 1_000,
    alpha: float = 0.05,
    epsilon: float = 1.0e-04,
    x0: Optional[torch.Tensor] = None,
    use_tqdm: bool = False,
    device: DeviceHint = None,
) -> torch.Tensor:
    """
    Compute page rank by power iteration.

    :param adj:
        the adjacency matrix, cf. :func:`torch_ppr.utils.prepare_page_rank_adjacency`. Preferred over ``edge_index``.
    :param edge_index: shape: ``(2, m)``
        the edge index of the graph, i.e, the edge list. cf. :func:`torch_ppr.utils.prepare_page_rank_adjacency`
    :param num_nodes:
        the number of nodes used to determine the shape of the adjacency matrix.
        If ``None``, and ``adj`` is not already provided, it is inferred from ``edge_index``.
    :param add_identity:
        whether to add an identity matrix to ``A`` to ensure that each node has a degree of at least one.

    :param max_iter: ``max_iter > 0``
        the maximum number of iterations
    :param alpha: ``0 < alpha < 1``
        the smoothing value / teleport probability
    :param epsilon: ``epsilon > 0``
        a (small) constant to check for convergence
    :param x0: shape: ``(n,)``
        the initial value for ``x``. If ``None``, set to a constant $1/n$ vector,
        cf. :func:`torch_ppr.utils.prepare_x0`. Otherwise, the tensor is checked for being valid using
        :func:`torch_ppr.utils.validate_x`.
    :param use_tqdm:
        whether to use a tqdm progress bar
    :param device:
        the device to use, or a hint thereof


    :return: shape: ``(n,)`` or ``(batch_size, n)``
        the page-rank vector, i.e., a score between 0 and 1 for each node.
    """
    # normalize inputs
    adj = prepare_page_rank_adjacency(
        adj=adj, edge_index=edge_index, num_nodes=num_nodes, add_identity=add_identity
    )
    validate_adjacency(adj=adj)

    x0 = prepare_x0(x0=x0, n=adj.shape[0])

    # input normalization
    validate_x(x=x0, n=adj.shape[0])

    # power iteration
    x = power_iteration(
        adj=adj,
        x0=x0,
        alpha=alpha,
        max_iter=max_iter,
        use_tqdm=use_tqdm,
        epsilon=epsilon,
        device=device,
    )
    if x.ndim < 2:
        return x
    return x.t()


[docs]def personalized_page_rank(
    *,
    adj: Optional[torch.Tensor] = None,
    edge_index: Optional[torch.LongTensor] = None,
    add_identity: bool = False,
    num_nodes: Optional[int] = None,
    indices: Optional[torch.Tensor] = None,
    device: DeviceHint = None,
    batch_size: Optional[int] = None,
    **kwargs,
) -> torch.Tensor:
    """
    Personalized Page-Rank (PPR) computation.

    .. note::
        this method supports automatic memory optimization / batch size selection using :mod:`torch_max_mem`.

    :param adj: shape: ``(n, n)``
        the adjacency matrix, cf. :func:`torch_ppr.utils.prepare_page_rank_adjacency`
    :param edge_index: shape: ``(2, m)``
        the edge index, cf. :func:`torch_ppr.utils.prepare_page_rank_adjacency`
    :param num_nodes:
        the number of nodes used to determine the shape of the adjacency matrix.
        If ``None``, and ``adj`` is not already provided, it is inferred from ``edge_index``.
    :param add_identity:
        whether to add an identity matrix to ``A`` to ensure that each node has a degree of at least one.

    :param indices: shape: ``(k,)``
        the node indices for which to calculate the PPR. Defaults to all nodes.
    :param device:
        the device to use
    :param batch_size: ``batch_size > 0``
        the batch size. Defaults to the number of indices. It will be reduced if necessary.
    :param kwargs:
        additional keyword-based parameters passed to :func:`torch_ppr.utils.batched_personalized_page_rank`

    :return: shape: ``(k, n)``
        the PPR vectors for each node index

    The following shows an example where a custom adjacency matrix is provided. For illustrative purposes, we randomly
    generate one:

    >>> import torch
    >>> adj = (torch.rand(300, 300)*10).round().to_sparse()

    Next, we need to ensure that the matrix is row-normalized, i.e., the individual rows sum to 1. Here, we re-use a
    utility method provided by the library:

    >>> from torch_ppr.utils import sparse_normalize
    >>> adj_normalized = sparse_normalize(adj, dim=0)

    Finally, we can use this matrix to calculate the personalized page rank for some nodes

    >>> from torch_ppr import personalized_page_rank
    >>> indices = torch.as_tensor([1, 2], dtype=torch.long)
    >>> ppr = personalized_page_rank(adj=adj_normalized, indices=indices)
    """
    # resolve device first
    device = resolve_device(device=device)
    # prepare adjacency and indices only once
    adj = prepare_page_rank_adjacency(
        adj=adj, edge_index=edge_index, num_nodes=num_nodes, add_identity=add_identity
    ).to(device=device)
    validate_adjacency(adj=adj)

    if indices is None:
        indices = torch.arange(adj.shape[0], device=device)
    else:
        indices = torch.as_tensor(indices, dtype=torch.long, device=device)
    # normalize inputs
    batch_size = batch_size or len(indices)
    return batched_personalized_page_rank(
        adj=adj, indices=indices, device=device, batch_size=batch_size, **kwargs
    ).t()