This repository contains the early study and code for the paper "UniXGen: A Unified Vision-Language Model for Multi-View Chest X-ray Generation and Report Generation." by Hyungyung Lee, Da Young Lee, Wonjae Kim, Jin-Hwa Kim, Tackeun Kim, Jihang Kim, Leonard Sunwoo5, Edward Cho.
This research is based on Performer, a linear attention-based transformer variant with a Fast Attention Via positive Orthogonal Random features approach (FAVOR+).
@article{kim2023unixgen,
title={UniXGen: A Unified Vision-Language Model for Multi-View Chest X-ray Generation and Report Generation},
author={Lee, Hyungyung and Lee, Da Young and Kim, Wonjae and Kim, Jin-Hwa and Kim, Tackeun and Kim, Jihang and Sunwoo, Leonard and Choi, Edward},
journal={arXiv preprint arXiv:2302.12172},
year={2023}
}
@misc{choromanski2020rethinking,
title = {Rethinking Attention with Performers},
author = {Krzysztof Choromanski and Valerii Likhosherstov and David Dohan and Xingyou Song and Andreea Gane and Tamas Sarlos and Peter Hawkins and Jared Davis and Afroz Mohiuddin and Lukasz Kaiser and David Belanger and Lucy Colwell and Adrian Weller},
year = {2020},
eprint = {2009.14794},
archivePrefix = {arXiv},
primaryClass = {cs.LG}
}