Skip to content

Commit c271d8f

Browse files

File tree

1 file changed

+14
-10
lines changed

1 file changed

+14
-10
lines changed

label_studio/data_import/models.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,11 @@ def content(self):
7777
def _detect_csv_separator(self):
7878
"""
7979
Detect the CSV separator by analyzing the first line of the file.
80-
80+
8181
This method implements a reliable heuristic:
8282
1. If semicolons are more frequent than commas in the first line, use semicolon
8383
2. Otherwise, default to comma
84-
84+
8585
Returns:
8686
str: The detected separator (',' or ';')
8787
"""
@@ -91,30 +91,34 @@ def _detect_csv_separator(self):
9191
first_line = f.readline()
9292
if isinstance(first_line, bytes):
9393
first_line = first_line.decode('utf-8')
94-
94+
9595
# Count potential separators
9696
comma_count = first_line.count(',')
9797
semicolon_count = first_line.count(';')
98-
98+
9999
# Use semicolon if it's clearly indicated by higher frequency
100100
if semicolon_count > comma_count:
101-
logger.debug(f'Detected semicolon separator (found {semicolon_count} semicolons vs {comma_count} commas)')
101+
logger.debug(
102+
f'Detected semicolon separator (found {semicolon_count} semicolons vs {comma_count} commas)'
103+
)
102104
return ';'
103105
else:
104-
logger.debug(f'Using default comma separator (found {comma_count} commas vs {semicolon_count} semicolons)')
106+
logger.debug(
107+
f'Using default comma separator (found {comma_count} commas vs {semicolon_count} semicolons)'
108+
)
105109
return ','
106110
except Exception as e:
107111
logger.warning(f'Failed to detect CSV separator, defaulting to comma: {e}')
108112
return ','
109-
113+
110114
def read_tasks_list_from_csv(self):
111115
"""
112116
Read tasks from a CSV file with automatic separator detection.
113-
117+
114118
The separator is automatically detected by analyzing the first line:
115119
- If semicolons are clearly indicated (more frequent than commas), use semicolon
116120
- Otherwise, use the default comma separator
117-
121+
118122
Returns:
119123
list: List of tasks in the format [{'data': {...}}, ...]
120124
"""
@@ -127,7 +131,7 @@ def read_tasks_list_from_csv(self):
127131
def read_tasks_list_from_tsv(self):
128132
"""
129133
Read tasks from a TSV (tab-separated values) file.
130-
134+
131135
Returns:
132136
list: List of tasks in the format [{'data': {...}}, ...]
133137
"""

0 commit comments

Comments
 (0)