Quickstart
Built-in dataset
from anomalog.presets import bgl
dataset = bgl.build()
Custom dataset
from pathlib import Path
from anomalog import DatasetSpec
from anomalog.labels import CSVReader
from anomalog.parsers import HDFSV1Parser
from anomalog.sources import LocalZipSource
dataset = (
DatasetSpec("my-hdfs")
.from_source(LocalZipSource(Path("HDFS_v1.zip"), raw_logs_relpath=Path("HDFS.log")))
.parse_with(HDFSV1Parser())
.label_with(
CSVReader(
relative_path=Path("preprocessed/anomaly_label.csv"),
entity_column="BlockId",
label_column="Label",
),
)
.build()
)
Group sequences
sequences = (
dataset.group_by_entity()
.with_train_fraction(0.8)
)