"""The `.data` module takes care of data generation."""
import logging
from typing import Mapping, Optional, Tuple
import numpy as np
from tqdm.auto import tqdm
from tensorwaves.data.phasespace import (
TFPhaseSpaceGenerator,
TFUniformRealNumberGenerator,
)
from tensorwaves.interface import (
DataSample,
DataTransformer,
Function,
PhaseSpaceGenerator,
UniformRealNumberGenerator,
)
from . import phasespace, transform
__all__ = [
"generate_data",
"generate_phsp",
"phasespace",
"transform",
]
[docs]def generate_data( # pylint: disable=too-many-arguments
size: int,
initial_state_mass: float,
final_state_masses: Mapping[int, float],
data_transformer: DataTransformer,
intensity: Function,
phsp_generator: Optional[PhaseSpaceGenerator] = None,
random_generator: Optional[UniformRealNumberGenerator] = None,
bunch_size: int = 50000,
) -> DataSample:
"""Facade function for creating data samples based on an intensities.
Args:
size: Sample size to generate.
initial_state_mass: See :meth:`.PhaseSpaceGenerator.setup`.
final_state_masses: See :meth:`.PhaseSpaceGenerator.setup`.
data_transformer: An instance of `.DataTransformer` that is used to
transform a generated `.DataSample` to a `.DataSample` that can be
understood by the `.Function`.
intensity: The intensity `.Function` that will be sampled.
phsp_generator: Class of a phase space generator.
random_generator: A uniform real random number generator. Defaults to
`.TFUniformRealNumberGenerator` with **indeterministic** behavior.
bunch_size: Adjusts size of a bunch. The requested sample size is
generated from many smaller samples, aka bunches.
"""
# pylint: disable=import-outside-toplevel
from ampform.data import EventCollection
if phsp_generator is None:
phsp_gen_instance = TFPhaseSpaceGenerator()
phsp_gen_instance.setup(initial_state_mass, final_state_masses)
if random_generator is None:
random_generator = TFUniformRealNumberGenerator()
progress_bar = tqdm(
total=size,
desc="Generating intensity-based sample",
disable=logging.getLogger().level > logging.WARNING,
)
momentum_pool = EventCollection({})
current_max = 0.0
while momentum_pool.n_events < size:
bunch, maxvalue = _generate_data_bunch(
bunch_size,
phsp_gen_instance,
random_generator,
intensity,
data_transformer,
)
if maxvalue > current_max:
current_max = 1.05 * maxvalue
if momentum_pool.n_events > 0:
logging.info(
"processed bunch maximum of %s is over current"
" maximum %s. Restarting generation!",
maxvalue,
current_max,
)
momentum_pool = EventCollection({})
progress_bar.update(n=-progress_bar.n) # reset progress bar
continue
if np.size(momentum_pool, 0) > 0: # type: ignore[arg-type]
momentum_pool.append(bunch) # type: ignore[arg-type]
else:
momentum_pool = EventCollection(bunch) # type: ignore[arg-type]
progress_bar.update(n=momentum_pool.n_events - progress_bar.n)
_finalize_progress_bar(progress_bar)
return momentum_pool.select_events(slice(0, size))
def _generate_data_bunch(
bunch_size: int,
phsp_generator: PhaseSpaceGenerator,
random_generator: UniformRealNumberGenerator,
intensity: Function,
kinematics: DataTransformer,
) -> Tuple[DataSample, float]:
# pylint: disable=import-outside-toplevel
from ampform.data import EventCollection
phsp_sample, weights = phsp_generator.generate(
bunch_size, random_generator
)
momentum_pool = EventCollection(phsp_sample) # type: ignore[arg-type]
dataset = kinematics.transform(momentum_pool)
intensities = intensity(dataset)
maxvalue: float = np.max(intensities)
uniform_randoms = random_generator(bunch_size, max_value=maxvalue)
hit_and_miss_sample = momentum_pool.select_events(
weights * intensities > uniform_randoms
)
return hit_and_miss_sample, maxvalue
[docs]def generate_phsp(
size: int,
initial_state_mass: float,
final_state_masses: Mapping[int, float],
phsp_generator: Optional[PhaseSpaceGenerator] = None,
random_generator: Optional[UniformRealNumberGenerator] = None,
bunch_size: int = 50000,
) -> DataSample:
"""Facade function for creating (unweighted) phase space samples.
Args:
size: Sample size to generate.
initial_state_mass: See :meth:`.PhaseSpaceGenerator.setup`.
final_state_masses: See :meth:`.PhaseSpaceGenerator.setup`.
phsp_generator: Class of a phase space generator. Defaults to
`.TFPhaseSpaceGenerator`.
random_generator: A uniform real random number generator. Defaults to
`.TFUniformRealNumberGenerator` with **indeterministic** behavior.
bunch_size: Adjusts size of a bunch. The requested sample size is
generated from many smaller samples, aka bunches.
"""
# pylint: disable=import-outside-toplevel
from ampform.data import EventCollection
if phsp_generator is None:
phsp_generator = TFPhaseSpaceGenerator()
phsp_generator.setup(initial_state_mass, final_state_masses)
if random_generator is None:
random_generator = TFUniformRealNumberGenerator()
progress_bar = tqdm(
total=size,
desc="Generating phase space sample",
disable=logging.getLogger().level > logging.WARNING,
)
momentum_pool = EventCollection({})
while momentum_pool.n_events < size:
phsp_sample, weights = phsp_generator.generate(
bunch_size, random_generator
)
hit_and_miss_randoms = random_generator(bunch_size)
bunch = EventCollection(phsp_sample).select_events( # type: ignore[arg-type]
weights > hit_and_miss_randoms
)
if momentum_pool.n_events > 0:
momentum_pool.append(bunch)
else:
momentum_pool = bunch
progress_bar.update(n=bunch.n_events)
_finalize_progress_bar(progress_bar)
return momentum_pool.select_events(slice(0, size))
def _finalize_progress_bar(progress_bar: tqdm) -> None:
remainder = progress_bar.total - progress_bar.n
progress_bar.update(n=remainder) # pylint crashes if total is set directly
progress_bar.close()