{"id":"https:\/\/openalex.org\/W4414696231","doi":"https:\/\/doi.org\/10.48550\/arxiv.2507.09512","title":"Online Micro-gesture Recognition Using Data Augmentation and Spatial-Temporal Attention","display_name":"Online Micro-gesture Recognition Using Data Augmentation and Spatial-Temporal Attention","publication_year":2025,"publication_date":"2025-07-13","ids":{"openalex":"https:\/\/openalex.org\/W4414696231","doi":"https:\/\/doi.org\/10.48550\/arxiv.2507.09512"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2507.09512","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2507.09512","pdf_url":"https:\/\/arxiv.org\/pdf\/2507.09512","source":{"id":"https:\/\/openalex.org\/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https:\/\/arxiv.org\/pdf\/2507.09512","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https:\/\/openalex.org\/A5108144130","display_name":"Pengyu Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Pengyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5100377578","display_name":"Kun Li","orcid":"https:\/\/orcid.org\/0000-0003-2326-0166"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Kun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5100455750","display_name":"Fei Wang","orcid":"https:\/\/orcid.org\/0000-0001-8432-0009"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Fei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5101143136","display_name":"Yanyan Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Yanyan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5101390823","display_name":"Junhui She","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"She, Junhui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https:\/\/openalex.org\/A5059530979","display_name":"Dan Guo","orcid":"https:\/\/orcid.org\/0000-0003-2594-254X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Dan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https:\/\/openalex.org\/A5108144130"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https:\/\/openalex.org\/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9986000061035156,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},"topics":[{"id":"https:\/\/openalex.org\/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9986000061035156,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T10812","display_name":"Human Pose and Action Recognition","score":0.9805999994277954,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T12290","display_name":"Human Motion and Animation","score":0.95169997215271,"subfield":{"id":"https:\/\/openalex.org\/subfields\/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https:\/\/openalex.org\/fields\/22","display_name":"Engineering"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https:\/\/openalex.org\/keywords\/task","display_name":"Task (project management)","score":0.6851999759674072},{"id":"https:\/\/openalex.org\/keywords\/action","display_name":"Action (physics)","score":0.5503000020980835},{"id":"https:\/\/openalex.org\/keywords\/action-recognition","display_name":"Action recognition","score":0.5228999853134155},{"id":"https:\/\/openalex.org\/keywords\/activity-recognition","display_name":"Activity recognition","score":0.4643999934196472},{"id":"https:\/\/openalex.org\/keywords\/task-analysis","display_name":"Task analysis","score":0.3727000057697296},{"id":"https:\/\/openalex.org\/keywords\/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3605000078678131}],"concepts":[{"id":"https:\/\/openalex.org\/C41008148","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q21198","display_name":"Computer science","level":0,"score":0.7095000147819519},{"id":"https:\/\/openalex.org\/C2780451532","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q759676","display_name":"Task (project management)","level":2,"score":0.6851999759674072},{"id":"https:\/\/openalex.org\/C154945302","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q11660","display_name":"Artificial intelligence","level":1,"score":0.633899986743927},{"id":"https:\/\/openalex.org\/C2780791683","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q846785","display_name":"Action (physics)","level":2,"score":0.5503000020980835},{"id":"https:\/\/openalex.org\/C2987834672","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q4677630","display_name":"Action recognition","level":3,"score":0.5228999853134155},{"id":"https:\/\/openalex.org\/C121687571","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q4677630","display_name":"Activity recognition","level":2,"score":0.4643999934196472},{"id":"https:\/\/openalex.org\/C119857082","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q2539","display_name":"Machine learning","level":1,"score":0.40369999408721924},{"id":"https:\/\/openalex.org\/C175154964","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q380077","display_name":"Task analysis","level":3,"score":0.3727000057697296},{"id":"https:\/\/openalex.org\/C153180895","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3605000078678131},{"id":"https:\/\/openalex.org\/C28490314","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q189436","display_name":"Speech recognition","level":1,"score":0.32850000262260437},{"id":"https:\/\/openalex.org\/C51632099","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q3985153","display_name":"Training set","level":2,"score":0.3084999918937683},{"id":"https:\/\/openalex.org\/C59656382","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q191536","display_name":"Conjunction (astronomy)","level":2,"score":0.3012999892234802},{"id":"https:\/\/openalex.org\/C26517878","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q228039","display_name":"Key (lock)","level":2,"score":0.2939999997615814},{"id":"https:\/\/openalex.org\/C89992363","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q5961558","display_name":"Track (disk drive)","level":2,"score":0.28049999475479126},{"id":"https:\/\/openalex.org\/C52622490","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1026626","display_name":"Feature extraction","level":2,"score":0.27160000801086426},{"id":"https:\/\/openalex.org\/C177454536","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q578290","display_name":"Emphasis (telecommunications)","level":2,"score":0.2646999955177307}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2507.09512","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2507.09512","pdf_url":"https:\/\/arxiv.org\/pdf\/2507.09512","source":{"id":"https:\/\/openalex.org\/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550\/arxiv.2507.09512","is_oa":true,"landing_page_url":"https:\/\/doi.org\/10.48550\/arxiv.2507.09512","pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2507.09512","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2507.09512","pdf_url":"https:\/\/arxiv.org\/pdf\/2507.09512","source":{"id":"https:\/\/openalex.org\/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1,59],"paper,":[2],"we":[3,101],"introduce":[4],"the":[5,14,20,39,44,72,111,130,143],"latest":[6],"solution":[7,122],"developed":[8],"by":[9,133],"our":[10,138],"team,":[11],"HFUT-VUT,":[12],"for":[13],"Micro-gesture":[15,26,144],"Online":[16,27,145],"Recognition":[17,28,146],"track":[18],"of":[19,46,77,127],"IJCAI":[21],"2025":[22],"MiGA":[23],"Challenge.":[24],"The":[25],"task":[29,60],"is":[30],"a":[31,136],"highly":[32],"challenging":[33],"problem":[34],"that":[35],"aims":[36],"to":[37,54,109,114],"locate":[38],"temporal":[40,56],"positions":[41],"and":[42,69,74,106,116],"recognize":[43],"categories":[45,68],"multiple":[47],"micro-gesture":[48,67],"instances":[49],"in":[50,93,142],"untrimmed":[51],"videos.":[52],"Compared":[53],"traditional":[55],"action":[57],"detection,":[58],"places":[61],"greater":[62,88],"emphasis":[63],"on":[64],"distinguishing":[65],"between":[66],"precisely":[70],"identifying":[71],"start":[73],"end":[75],"times":[76],"each":[78],"instance.":[79],"Moreover,":[80],"micro-gestures":[81,118],"are":[82],"typically":[83],"spontaneous":[84],"human":[85,95],"actions,":[86],"with":[87],"differences":[89],"than":[90],"those":[91],"found":[92],"other":[94],"actions.":[96],"To":[97],"address":[98],"these":[99],"challenges,":[100],"propose":[102],"hand-crafted":[103],"data":[104],"augmentation":[105],"spatial-temporal":[107],"attention":[108],"enhance":[110],"model's":[112],"ability":[113],"classify":[115],"localize":[117],"more":[119],"accurately.":[120],"Our":[121],"achieved":[123],"an":[124],"F1":[125],"score":[126],"38.03,":[128],"outperforming":[129],"previous":[131],"state-of-the-art":[132],"37.9%.":[134],"As":[135],"result,":[137],"method":[139],"ranked":[140],"first":[141],"track.":[147]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}