{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,18]],"date-time":"2026-01-18T01:48:46Z","timestamp":1768700926450,"version":"3.49.0"},"publisher-location":"Cham","reference-count":48,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031734137","type":"print"},{"value":"9783031734144","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T00:00:00Z","timestamp":1729814400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T00:00:00Z","timestamp":1729814400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73414-4_13","type":"book-chapter","created":{"date-parts":[[2024,10,24]],"date-time":"2024-10-24T17:02:54Z","timestamp":1729789374000},"page":"218-234","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Modeling Label Correlations with\u00a0Latent Context for\u00a0Multi-label Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8955-1789","authenticated-orcid":false,"given":"Zhaomin","family":"Chen","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3428-4913","authenticated-orcid":false,"given":"Quan","family":"Cui","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5912-0152","authenticated-orcid":false,"given":"Ruoxi","family":"Deng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3296-5459","authenticated-orcid":false,"given":"Jie","family":"Hu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6264-5854","authenticated-orcid":false,"given":"Guodao","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,25]]},"reference":[{"key":"13_CR1","unstructured":"Brown, T., et al.: Language models are few-shot learners. In: Advances in Neural Information Processing Systems, vol. 33, pp. 1877\u20131901 (2020)"},{"key":"13_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Chen, S.F., Chen, Y.C., Yeh, C.K., Wang, Y.C.F.: Order-free RNN with visual attention for multi-label classification. In: AAAI, pp. 6714\u20136721 (2018)","DOI":"10.1609\/aaai.v32i1.12230"},{"key":"13_CR4","unstructured":"Chen, T., Lin, L., Hui, X., Chen, R., Wu, H.: Knowledge-guided multi-label few-shot learning for general image recognition. IEEE TPAMI (2020, in press)"},{"key":"13_CR5","doi-asserted-by":"crossref","unstructured":"Chen, T., Pu, T., Wu, H., Xie, Y., Lin, L.: Structured semantic transfer for multi-label recognition with partial labels. In: AAAI, pp. 339\u2013346 (2022)","DOI":"10.1609\/aaai.v36i1.19910"},{"key":"13_CR6","first-page":"1827","volume":"23","author":"ZM Chen","year":"2020","unstructured":"Chen, Z.M., Cui, Q., Wei, X.S., Jin, X., Guo, Y.: Disentangling, embedding and ranking label cues for multi-label image recognition. IEEE TMM 23, 1827\u20131840 (2020)","journal-title":"IEEE TMM"},{"key":"13_CR7","first-page":"2570","volume":"31","author":"ZM Chen","year":"2022","unstructured":"Chen, Z.M., Cui, Q., Zhao, B., Song, R., Zhang, X., Yoshie, O.: SST: spatial and semantic transformers for multi-label image recognition. IEEE TIP 31, 2570\u20132583 (2022)","journal-title":"IEEE TIP"},{"key":"13_CR8","doi-asserted-by":"crossref","unstructured":"Chen, Z.M., Wei, X.S., Wang, P., Guo, Y.: Multi-label image recognition with graph convolutional networks. In: CVPR, pp. 5177\u20135186 (2019)","DOI":"10.1109\/CVPR.2019.00532"},{"key":"13_CR9","unstructured":"Chen, Z., Wei, X.S., Wang, P., Guo, Y.: Learning graph convolutional networks for multi-label recognition and applications. IEEE TPAMI (2021, in press)"},{"key":"13_CR10","doi-asserted-by":"crossref","unstructured":"Chua, T.S., Tang, J., Hong, R., Li, H., Luo, Z., Zheng, Y.: NUS-WIDE: a real-world web image database from national university of Singapore. In: Proceedings of Conseil Interprofessionnel des Vins du Roussillon, pp.\u00a01\u20139 (2009)","DOI":"10.1145\/1646396.1646452"},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: CVPR, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"13_CR12","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"13_CR13","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"13_CR14","doi-asserted-by":"crossref","unstructured":"Durand, T., Mehrasa, N., Mori, G.: Learning a deep convnet for multi-label classification with partial labels. In: CVPR, pp. 647\u2013657 (2019)","DOI":"10.1109\/CVPR.2019.00074"},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Durand, T., Mordan, T., Thome, N., Cord, M.: Wildcat: weakly supervised learning of deep convnets for image classification, pointwise localization and segmentation. In: CVPR, pp. 642\u2013651 (2017)","DOI":"10.1109\/CVPR.2017.631"},{"key":"13_CR16","first-page":"5920","volume":"30","author":"BB Gao","year":"2021","unstructured":"Gao, B.B., Zhou, H.Y.: Learning to discover multi-class attentional regions for multi-label image recognition. IEEE TIP 30, 5920\u20135932 (2021)","journal-title":"IEEE TIP"},{"key":"13_CR17","unstructured":"Gong, Y., Jia, Y., Leung, T., Toshev, A., Ioffe, S.: Deep convolutional ranking for multi-label image annotation. arXiv preprint arXiv:1312.4894 pp.\u00a01\u20139 (2013)"},{"key":"13_CR18","doi-asserted-by":"crossref","unstructured":"Guo, H., Zheng, K., Fan, X., Yu, H., Wang, S.: Visual attention consistency under image transforms for multi-label image classification. In: CVPR, pp. 729\u2013739 (2019)","DOI":"10.1109\/CVPR.2019.00082"},{"key":"13_CR19","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"13_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"649","DOI":"10.1007\/978-3-030-58589-1_39","volume-title":"Computer Vision \u2013 ECCV 2020","author":"J Ye","year":"2020","unstructured":"Ye, J., He, J., Peng, X., Wu, W., Qiao, Yu.: Attention-driven dynamic graph convolutional network for multi-label image recognition. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12366, pp. 649\u2013665. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58589-1_39"},{"key":"13_CR21","doi-asserted-by":"crossref","unstructured":"Lanchantin, J., Wang, T., Ordonez, V., Qi, Y.: General multi-label image classification with transformers. In: CVPR, pp. 16478\u201316488 (2021)","DOI":"10.1109\/CVPR46437.2021.01621"},{"key":"13_CR22","first-page":"2726","volume":"31","author":"L Li","year":"2022","unstructured":"Li, L., Gao, X., Deng, J., Tu, Y., Zha, Z.J., Huang, Q.: Long short-term relation transformer with global gating for video captioning. IEEE TIP 31, 2726\u20132738 (2022)","journal-title":"IEEE TIP"},{"key":"13_CR23","doi-asserted-by":"crossref","unstructured":"Li, Y., Song, Y., Luo, J.: Improving pairwise ranking for multi-label image classification. In: CVPR, pp. 3617\u20133625 (2017)","DOI":"10.1109\/CVPR.2017.199"},{"key":"13_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"13_CR25","unstructured":"Liu, R., Huang, J., Thomas, H.L., Li, G.: Causality compensated attention for contextual biased visual recognition. In: ICLR, pp. 1\u201317 (2023)"},{"key":"13_CR26","doi-asserted-by":"crossref","unstructured":"Liu, R., Liu, H., Li, G., Hou, H., Yu, T., Yang, T.: Contextual debiasing for visual recognition with causal mechanisms. In: CVPR, pp. 12755\u201312765 (2022)","DOI":"10.1109\/CVPR52688.2022.01242"},{"key":"13_CR27","unstructured":"Liu, S., Zhang, L., Yang, X., Su, H., Zhu, J.: Query2label: a simple transformer way to multi-label classification. arXiv preprint arXiv:2107.10834 (2021)"},{"key":"13_CR28","unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692, pp. 1\u201313 (2019)"},{"key":"13_CR29","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: ICCV, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"issue":"11","key":"13_CR30","first-page":"2579","volume":"9","author":"L Maaten","year":"2008","unstructured":"Maaten, L., Hinton, G.: Visualizing data using t-SNE. JMLR 9(11), 2579\u20132605 (2008)","journal-title":"JMLR"},{"key":"13_CR31","doi-asserted-by":"crossref","unstructured":"Nguyen, H.D., Vu, X.S., Le, D.T.: Modular graph transformer networks for multi-label image classification. In: AAAI, pp. 9092\u20139100 (2021)","DOI":"10.1609\/aaai.v35i10.17098"},{"key":"13_CR32","unstructured":"Renchun, Y., Zhiyao, G., Lei, C., Xiang, L., Yingze, B., Shilei, W.: Cross-modality attention with semantic graph embedding for multi-label classification. In: AAAI, pp.\u00a01\u20139 (2020)"},{"key":"13_CR33","doi-asserted-by":"crossref","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. In: ICLR, pp.\u00a01\u20138 (2015)","DOI":"10.1109\/ICCV.2015.314"},{"issue":"12","key":"13_CR34","first-page":"6025","volume":"27","author":"L Song","year":"2018","unstructured":"Song, L., et al.: A deep multi-modal CNN for multi-instance multi-label image classification. IEEE TIP 27(12), 6025\u20136038 (2018)","journal-title":"IEEE TIP"},{"issue":"56","key":"13_CR35","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava, N., Hinton, G., Krizhevsky, A., Sutskever, I., Salakhutdinov, R.: Dropout: a simple way to prevent neural networks from overfitting. JMLR 15(56), 1929\u20131958 (2014)","journal-title":"JMLR"},{"key":"13_CR36","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z.: Rethinking the inception architecture for computer vision. In: CVPR, pp. 2818\u20132826 (2016)","DOI":"10.1109\/CVPR.2016.308"},{"key":"13_CR37","doi-asserted-by":"crossref","unstructured":"Tianshui, C., Muxin, X., Xiaolu, H., Hefeng, W., Liang, L.: Learning semantic-specific graph representation for multi-label image recognition. In: ICCV, pp. 522\u2013531 (2019)","DOI":"10.1109\/ICCV.2019.00061"},{"key":"13_CR38","unstructured":"Vacit, Oguz, Y., Abel, G.G., Arnau, R.: Orderless recurrent models for multi-label classification. In: CVPR, pp. 13440\u201313449 (2020)"},{"key":"13_CR39","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS, pp. 6000\u20136010 (2017)"},{"key":"13_CR40","doi-asserted-by":"crossref","unstructured":"Wang, J., Yang, Y., Mao, J., Huang, Z., Huang, C., Xu, W.: CNN-RNN: a unified framework for multi-label image classification. In: CVPR, pp. 2285\u20132294 (2016)","DOI":"10.1109\/CVPR.2016.251"},{"key":"13_CR41","doi-asserted-by":"crossref","unstructured":"Wang, Z., Chen, T., Li, G., Xu, R., Lin, L.: Multi-label image recognition by recurrently discovering attentional regions. In: ICCV, pp. 464\u2013472 (2017)","DOI":"10.1109\/ICCV.2017.58"},{"key":"13_CR42","unstructured":"Wei, Y., et al.: CNN: single-label to multi-label. arXiv preprint arXiv:1406.5726, pp. 1\u201314 (2014)"},{"key":"13_CR43","doi-asserted-by":"crossref","unstructured":"Wu, H., et al.: CVT: introducing convolutions to vision transformers. In: ICCV, pp. 22\u201331 (2021)","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"13_CR44","first-page":"1696","volume":"23","author":"J Xu","year":"2020","unstructured":"Xu, J., Tian, H., Wang, Z., Wang, Y., Kang, W., Chen, F.: Joint input and output space learning for multi-label image classification. IEEE TMM 23, 1696\u20131707 (2020)","journal-title":"IEEE TMM"},{"key":"13_CR45","first-page":"2839","volume":"31","author":"X Yang","year":"2022","unstructured":"Yang, X., Wang, H., Xie, D., Deng, C., Tao, D.: Object-agnostic transformers for video referring segmentation. IEEE TIP 31, 2839\u20132849 (2022)","journal-title":"IEEE TIP"},{"key":"13_CR46","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"818","DOI":"10.1007\/978-3-319-10590-1_53","volume-title":"Computer Vision \u2013 ECCV 2014","author":"MD Zeiler","year":"2014","unstructured":"Zeiler, M.D., Fergus, R.: Visualizing and understanding convolutional networks. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8689, pp. 818\u2013833. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10590-1_53"},{"key":"13_CR47","doi-asserted-by":"crossref","unstructured":"Zhao, J., Yan, K., Zhao, Y., Guo, X., Huang, F., Li, J.: Transformer-based dual relation graph for multi-label image recognition. In: ICCV, pp. 163\u2013172 (2021)","DOI":"10.1109\/ICCV48922.2021.00023"},{"key":"13_CR48","doi-asserted-by":"crossref","unstructured":"Zhu, F., Li, H., Ouyang, W., Yu, N., Wang, X.: Learning spatial regularization with image-level supervisions for multi-label image classification. In: CVPR, pp. 5513\u20135522 (2017)","DOI":"10.1109\/CVPR.2017.219"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73414-4_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T06:48:53Z","timestamp":1732949333000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73414-4_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,25]]},"ISBN":["9783031734137","9783031734144"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73414-4_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,25]]},"assertion":[{"value":"25 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}