@article{
nguyen2026mixturevitae,
title={MixtureVitae: Open Web-Scale Pretraining Dataset With High Quality Instruction and Reasoning Data Built from Permissive-First Text Sources},
author={Huu Nguyen and Victor May and Harsh Raj and Marianna Nezhurina and Yishan Wang and Yanqi Luo and Vu Minh Chien and Taishi Nakamura and Ken Tsui and Van Khue Nguyen and David Salinas and Aleksandra Krasnod{\k{e}}bska and Christoph Schuhmann and Mats Leon Richter and Xuan-Son Vu and Jenia Jitsev},
journal={Transactions on Machine Learning Research},
issn={2835-8856},
year={2026},
url={https://openreview.net/forum?id=SyCcUNUUMf},
note={Featured Certification, J2C Certification}
}