Source code for tensorwaves.data

"""The `.data` module takes care of data generation."""

import logging
from typing import Mapping, Optional, Tuple

import numpy as np
from tqdm.auto import tqdm

from tensorwaves.data.phasespace import (
    TFPhaseSpaceGenerator,
    TFUniformRealNumberGenerator,
)
from tensorwaves.interface import (
    DataSample,
    DataTransformer,
    Function,
    PhaseSpaceGenerator,
    UniformRealNumberGenerator,
)

from . import phasespace, transform

__all__ = [
    "generate_data",
    "generate_phsp",
    "phasespace",
    "transform",
]


[docs]def generate_data( # pylint: disable=too-many-arguments size: int, initial_state_mass: float, final_state_masses: Mapping[int, float], data_transformer: DataTransformer, intensity: Function, phsp_generator: Optional[PhaseSpaceGenerator] = None, random_generator: Optional[UniformRealNumberGenerator] = None, bunch_size: int = 50000, ) -> DataSample: """Facade function for creating data samples based on an intensities. Args: size: Sample size to generate. initial_state_mass: See :meth:`.PhaseSpaceGenerator.setup`. final_state_masses: See :meth:`.PhaseSpaceGenerator.setup`. data_transformer: An instance of `.DataTransformer` that is used to transform a generated `.DataSample` to a `.DataSample` that can be understood by the `.Function`. intensity: The intensity `.Function` that will be sampled. phsp_generator: Class of a phase space generator. random_generator: A uniform real random number generator. Defaults to `.TFUniformRealNumberGenerator` with **indeterministic** behavior. bunch_size: Adjusts size of a bunch. The requested sample size is generated from many smaller samples, aka bunches. """ # pylint: disable=import-outside-toplevel from ampform.data import EventCollection if phsp_generator is None: phsp_gen_instance = TFPhaseSpaceGenerator() phsp_gen_instance.setup(initial_state_mass, final_state_masses) if random_generator is None: random_generator = TFUniformRealNumberGenerator() progress_bar = tqdm( total=size, desc="Generating intensity-based sample", disable=logging.getLogger().level > logging.WARNING, ) momentum_pool = EventCollection({}) current_max = 0.0 while momentum_pool.n_events < size: bunch, maxvalue = _generate_data_bunch( bunch_size, phsp_gen_instance, random_generator, intensity, data_transformer, ) if maxvalue > current_max: current_max = 1.05 * maxvalue if momentum_pool.n_events > 0: logging.info( "processed bunch maximum of %s is over current" " maximum %s. Restarting generation!", maxvalue, current_max, ) momentum_pool = EventCollection({}) progress_bar.update(n=-progress_bar.n) # reset progress bar continue if np.size(momentum_pool, 0) > 0: # type: ignore[arg-type] momentum_pool.append(bunch) # type: ignore[arg-type] else: momentum_pool = EventCollection(bunch) # type: ignore[arg-type] progress_bar.update(n=momentum_pool.n_events - progress_bar.n) _finalize_progress_bar(progress_bar) return momentum_pool.select_events(slice(0, size))
def _generate_data_bunch( bunch_size: int, phsp_generator: PhaseSpaceGenerator, random_generator: UniformRealNumberGenerator, intensity: Function, kinematics: DataTransformer, ) -> Tuple[DataSample, float]: # pylint: disable=import-outside-toplevel from ampform.data import EventCollection phsp_sample, weights = phsp_generator.generate( bunch_size, random_generator ) momentum_pool = EventCollection(phsp_sample) # type: ignore[arg-type] dataset = kinematics.transform(momentum_pool) intensities = intensity(dataset) maxvalue: float = np.max(intensities) uniform_randoms = random_generator(bunch_size, max_value=maxvalue) hit_and_miss_sample = momentum_pool.select_events( weights * intensities > uniform_randoms ) return hit_and_miss_sample, maxvalue
[docs]def generate_phsp( size: int, initial_state_mass: float, final_state_masses: Mapping[int, float], phsp_generator: Optional[PhaseSpaceGenerator] = None, random_generator: Optional[UniformRealNumberGenerator] = None, bunch_size: int = 50000, ) -> DataSample: """Facade function for creating (unweighted) phase space samples. Args: size: Sample size to generate. initial_state_mass: See :meth:`.PhaseSpaceGenerator.setup`. final_state_masses: See :meth:`.PhaseSpaceGenerator.setup`. phsp_generator: Class of a phase space generator. Defaults to `.TFPhaseSpaceGenerator`. random_generator: A uniform real random number generator. Defaults to `.TFUniformRealNumberGenerator` with **indeterministic** behavior. bunch_size: Adjusts size of a bunch. The requested sample size is generated from many smaller samples, aka bunches. """ # pylint: disable=import-outside-toplevel from ampform.data import EventCollection if phsp_generator is None: phsp_generator = TFPhaseSpaceGenerator() phsp_generator.setup(initial_state_mass, final_state_masses) if random_generator is None: random_generator = TFUniformRealNumberGenerator() progress_bar = tqdm( total=size, desc="Generating phase space sample", disable=logging.getLogger().level > logging.WARNING, ) momentum_pool = EventCollection({}) while momentum_pool.n_events < size: phsp_sample, weights = phsp_generator.generate( bunch_size, random_generator ) hit_and_miss_randoms = random_generator(bunch_size) bunch = EventCollection(phsp_sample).select_events( # type: ignore[arg-type] weights > hit_and_miss_randoms ) if momentum_pool.n_events > 0: momentum_pool.append(bunch) else: momentum_pool = bunch progress_bar.update(n=bunch.n_events) _finalize_progress_bar(progress_bar) return momentum_pool.select_events(slice(0, size))
def _finalize_progress_bar(progress_bar: tqdm) -> None: remainder = progress_bar.total - progress_bar.n progress_bar.update(n=remainder) # pylint crashes if total is set directly progress_bar.close()