@article{chen2021d3net,
    title={D3Net: A Speaker-Listener Architecture for Semi-supervised Dense Captioning and Visual Grounding in RGB-D Scans}, 
    author={Dave Zhenyu Chen and Qirui Wu and Matthias Nießner and Angel X. Chang},
    year={2022},
    jounral={ECCV 2022},
}