Skip to content

Commit 27c6579

Browse files
committed
Improve handling of BOMs in table cells
Detect some Unicode BOMs and always treat data starting with a BOM as text. We might need to fine-tune this later but it should be an improvement already. In the Edit Dialog remove the BOM from the text editor but keep it in the hex editor. Also add it back to the text when saving changes in text mode. This way the BOM is out of the way for text edits but is not lost either when editing a cell.
1 parent 8f03124 commit 27c6579

File tree

4 files changed

+59
-7
lines changed

4 files changed

+59
-7
lines changed

src/Data.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55
bool isTextOnly(QByteArray data, const QString& encoding, bool quickTest)
66
{
7+
// If the data starts with a Unicode BOM, we always assume it is text
8+
if(startsWithBom(data))
9+
return true;
10+
711
// Truncate to the first couple of bytes for quick testing
812
if(quickTest)
913
data = data.left(512);
@@ -15,3 +19,35 @@ bool isTextOnly(QByteArray data, const QString& encoding, bool quickTest)
1519
// Perform check
1620
return QString(data).toUtf8() == data;
1721
}
22+
23+
bool startsWithBom(const QByteArray& data)
24+
{
25+
// Note that these aren't all possible BOMs. But they are probably the most common ones.
26+
27+
if(data.startsWith("\xEF\xBB\xBF") ||
28+
data.startsWith("\xFE\xFF") || data.startsWith("\xFF\xFE") ||
29+
data.startsWith("\x00\x00\xFE\xFF") || data.startsWith("\xFF\xFE\x00\x00"))
30+
return true;
31+
else
32+
return false;
33+
}
34+
35+
QByteArray removeBom(QByteArray& data)
36+
{
37+
if(data.startsWith("\xEF\xBB\xBF"))
38+
{
39+
QByteArray bom = data.left(3);
40+
data.remove(0, 3);
41+
return bom;
42+
} else if(data.startsWith("\xFE\xFF") || data.startsWith("\xFF\xFE")) {
43+
QByteArray bom = data.left(2);
44+
data.remove(0, 2);
45+
return bom;
46+
} else if(data.startsWith("\x00\x00\xFE\xFF") || data.startsWith("\xFF\xFE\x00\x00")) {
47+
QByteArray bom = data.left(4);
48+
data.remove(0, 4);
49+
return bom;
50+
} else {
51+
return QByteArray();
52+
}
53+
}

src/Data.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,12 @@
99
// text but makes it less reliable
1010
bool isTextOnly(QByteArray data, const QString& encoding = QString(), bool quickTest = false);
1111

12+
// This function returns true if the data in the data parameter starts with a Unicode BOM. Otherwise it returns false.
13+
bool startsWithBom(const QByteArray& data);
14+
15+
// This function checks if the data in the data parameter starts with a Unicode BOM. If so, the BOM is removed from the
16+
// byte array and passed back to the caller separately as the return value of the function. If the data does not start
17+
// with a BOM an empty byte array is returned and the original data is not modified.
18+
QByteArray removeBom(QByteArray& data);
19+
1220
#endif

src/EditDialog.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ void EditDialog::loadData(const QByteArray& data)
9393
QImage img;
9494
QString textData;
9595

96+
// Clear previously removed BOM
97+
removedBom.clear();
98+
9699
// Determine the data type, saving that info in the class variable
97100
dataType = checkDataType(data);
98101

@@ -150,25 +153,28 @@ void EditDialog::loadData(const QByteArray& data)
150153

151154
case Text:
152155
case JSON:
153-
154156
// Set enabled any of the text widgets
155157
ui->editorText->setEnabled(true);
156158
jsonEdit->setEnabled(true);
157159

158160
switch (editMode) {
159161
case TextEditor:
162+
{
160163
// The text widget buffer is now the main data source
161164
dataSource = TextBuffer;
162165

163-
// Load the text into the text editor
164-
textData = QString::fromUtf8(data.constData(), data.size());
166+
// Load the text into the text editor, remove BOM first if there is one
167+
QByteArray dataWithoutBom = data;
168+
removedBom = removeBom(dataWithoutBom);
169+
170+
textData = QString::fromUtf8(dataWithoutBom.constData(), dataWithoutBom.size());
165171
ui->editorText->setPlainText(textData);
166172

167173
// Select all of the text by default
168174
ui->editorText->selectAll();
169175

170176
break;
171-
177+
}
172178
case JsonEditor:
173179
// The JSON widget buffer is now the main data source
174180
dataSource = JsonBuffer;
@@ -373,6 +379,7 @@ void EditDialog::setNull()
373379
hexEdit->setData(QByteArray());
374380
jsonEdit->clear();
375381
dataType = Null;
382+
removedBom.clear();
376383

377384
// Check if in text editor mode
378385
int editMode = ui->editorStack->currentIndex();
@@ -425,10 +432,10 @@ void EditDialog::accept()
425432
} else {
426433
// It's not NULL, so proceed with normal text string checking
427434
QString oldData = currentIndex.data(Qt::EditRole).toString();
428-
QString newData = ui->editorText->toPlainText();
435+
QString newData = removedBom + ui->editorText->toPlainText();
429436
if (oldData != newData)
430437
// The data is different, so commit it back to the database
431-
emit recordTextUpdated(currentIndex, newData.toUtf8(), false);
438+
emit recordTextUpdated(currentIndex, removedBom + newData.toUtf8(), false);
432439
}
433440
break;
434441
case JsonBuffer:
@@ -509,7 +516,7 @@ void EditDialog::editModeChanged(int newMode)
509516

510517
case HexEditor: // Switching to the hex editor
511518
// Convert the text widget buffer for the hex widget
512-
hexEdit->setData(ui->editorText->toPlainText().toUtf8());
519+
hexEdit->setData(removedBom + ui->editorText->toPlainText().toUtf8());
513520

514521
// The hex widget buffer is now the main data source
515522
dataSource = HexBuffer;

src/EditDialog.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ private slots:
5757
bool textNullSet;
5858
bool isReadOnly;
5959
bool mustIndentAndCompact;
60+
QByteArray removedBom;
6061

6162
enum DataSources {
6263
TextBuffer,

0 commit comments

Comments
 (0)