Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion contrib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ if (USE_INTERNAL_SSL_LIBRARY)
add_library(OpenSSL::SSL ALIAS ${OPENSSL_SSL_LIBRARY})
endif ()

if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY)
function(mysql_support)
set(CLIENT_PLUGIN_CACHING_SHA2_PASSWORD STATIC)
set(CLIENT_PLUGIN_SHA256_PASSWORD STATIC)
set(CLIENT_PLUGIN_REMOTE_IO OFF)
Expand All @@ -136,7 +136,15 @@ if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY)
if (GLIBC_COMPATIBILITY)
set(LIBM glibc-compatibility)
endif()
if (USE_INTERNAL_ZLIB_LIBRARY)
set(ZLIB_FOUND ON)
set(ZLIB_LIBRARY zlibstatic)
set(WITH_EXTERNAL_ZLIB ON)
endif()
add_subdirectory (mariadb-connector-c)
endfunction()
if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY)
mysql_support()
endif ()

if (USE_INTERNAL_RDKAFKA_LIBRARY)
Expand Down
146 changes: 146 additions & 0 deletions dbms/src/Functions/CRC.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#include <zlib.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringOrArrayToT.h>

namespace
{

template <class T>
struct CRCBase
{
T tab[256];
CRCBase(T polynomial)
{
for (size_t i = 0; i < 256; ++i)
{
T c = i;
for (size_t j = 0; j < 8; ++j)
{
c = c & 1 ? polynomial ^ (c >> 1) : c >> 1;
}
tab[i] = c;
}
}
};

template <class T, T polynomial>
struct CRCImpl
{
using ReturnType = T;

static T make_crc(const unsigned char *buf, size_t size)
{
static CRCBase<ReturnType> base(polynomial);

T i, crc;

crc = 0;
for (i = 0; i < size; i++)
{
crc = base.tab[(crc ^ buf[i]) & 0xff] ^ (crc >> 8);
}
return crc;
}
};

static constexpr UInt64 CRC64_ECMA = 0xc96c5795d7870f42ULL;
struct CRC64ECMAImpl : public CRCImpl<UInt64, CRC64_ECMA>
{
static constexpr auto name = "CRC64";
};

static constexpr UInt32 CRC32_IEEE = 0xedb88320;
struct CRC32IEEEImpl : public CRCImpl<UInt32, CRC32_IEEE>
{
static constexpr auto name = "CRC32IEEE";
};

struct CRC32ZLIBImpl
{
using ReturnType = UInt32;
static constexpr auto name = "CRC32";

static UInt32 make_crc(const unsigned char *buf, size_t size)
{ return crc32_z(0L, buf, size); }
};

} // \anonymous

namespace DB
{

namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}

template <class Impl>
struct CRCFunctionWrapper
{
static constexpr auto is_fixed_to_constant = true;
using ReturnType = typename Impl::ReturnType;

static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<ReturnType> & res)
{
size_t size = offsets.size();

ColumnString::Offset prev_offset = 0;
for (size_t i = 0; i < size; ++i)
{
res[i] = do_crc(data, prev_offset, offsets[i] - prev_offset - 1);
prev_offset = offsets[i];
}
}

static void vector_fixed_to_constant(const ColumnString::Chars & data, size_t n, ReturnType & res) { res = do_crc(data, 0, n); }

static void vector_fixed_to_vector(const ColumnString::Chars & data, size_t n, PaddedPODArray<ReturnType> & res)
{
size_t size = data.size() / n;

for (size_t i = 0; i < size; ++i)
{
res[i] = do_crc(data, i * n, n);
}
}

[[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray<ReturnType> & /*res*/)
{
throw Exception("Cannot apply function " + std::string(Impl::name) + " to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}

private:
static ReturnType do_crc(const ColumnString::Chars & buf, size_t offset, size_t size)
{
const unsigned char * p = reinterpret_cast<const unsigned char *>(&buf[0]) + offset;
return Impl::make_crc(p, size);
}
};

template <class T>
using FunctionCRC = FunctionStringOrArrayToT<CRCFunctionWrapper<T>, T, typename T::ReturnType>;
// The same as IEEE variant, but uses 0xffffffff as initial value
// This is the default
//
// (And zlib is used here, since it has optimized version)
using FunctionCRC32ZLIB = FunctionCRC<CRC32ZLIBImpl>;
// Uses CRC-32-IEEE 802.3 polynomial
using FunctionCRC32IEEE = FunctionCRC<CRC32IEEEImpl>;
// Uses CRC-64-ECMA polynomial
using FunctionCRC64ECMA = FunctionCRC<CRC64ECMAImpl>;

template <class T>
void registerFunctionCRCImpl(FunctionFactory & factory)
{
factory.registerFunction<T>(T::name, FunctionFactory::CaseInsensitive);
}

void registerFunctionCRC(FunctionFactory & factory)
{
registerFunctionCRCImpl<FunctionCRC32ZLIB>(factory);
registerFunctionCRCImpl<FunctionCRC32IEEE>(factory);
registerFunctionCRCImpl<FunctionCRC64ECMA>(factory);
}

}
68 changes: 0 additions & 68 deletions dbms/src/Functions/CRC32.cpp

This file was deleted.

4 changes: 2 additions & 2 deletions dbms/src/Functions/registerFunctionsString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ void registerFunctionReverseUTF8(FunctionFactory &);
void registerFunctionsConcat(FunctionFactory &);
void registerFunctionFormat(FunctionFactory &);
void registerFunctionSubstring(FunctionFactory &);
void registerFunctionCRC32(FunctionFactory &);
void registerFunctionCRC(FunctionFactory &);
void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &);
void registerFunctionStartsWith(FunctionFactory &);
void registerFunctionEndsWith(FunctionFactory &);
Expand All @@ -47,7 +47,7 @@ void registerFunctionsString(FunctionFactory & factory)
registerFunctionLowerUTF8(factory);
registerFunctionUpperUTF8(factory);
registerFunctionReverse(factory);
registerFunctionCRC32(factory);
registerFunctionCRC(factory);
registerFunctionReverseUTF8(factory);
registerFunctionsConcat(factory);
registerFunctionFormat(factory);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,7 @@ qwerty string 55151997 2663297705
qqq aaa 3142898280 4027020077
zxcqwer 3358319860 0
aasq xxz 3369829874 4069886758
CRC32IEEE()
7332BC33
CRC64()
72D5B9EA0B70CE1E
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,8 @@ select CRC32(str1), CRC32(str2) from table1 order by CRC32(str1), CRC32(str2);
select str1, str2, CRC32(str1), CRC32(str2) from table1 order by CRC32(str1), CRC32(str2);

DROP TABLE table1;

SELECT 'CRC32IEEE()';
SELECT hex(CRC32IEEE('foo'));
SELECT 'CRC64()';
SELECT hex(CRC64('foo'));
15 changes: 14 additions & 1 deletion docs/en/query_language/functions/string_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,20 @@ Returns a string that removes the whitespace characters on either side.

## CRC32(s)

Returns the CRC32 checksum of a string
Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial and initial value `0xffffffff` (zlib implementation).

The result type is UInt32.

## CRC32IEEE(s)

Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial.

The result type is UInt32.

## CRC64(s)

Returns the CRC64 checksum of a string, using CRC-64-ECMA polynomial.

The result type is UInt64.

[Original article](https://clickhouse.yandex/docs/en/query_language/functions/string_functions/) <!--hide-->
15 changes: 14 additions & 1 deletion docs/ru/query_language/functions/string_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,20 @@ SELECT startsWith('Hello, world!', 'He');

## CRC32(s)

Возвращает чексумму CRC32 данной строки.
Возвращает чексумму CRC32 данной строки, используется CRC-32-IEEE 802.3 многочлен и начальным значением `0xffffffff` (т.к. используется реализация из zlib).

Тип результата - UInt32.

## CRC32IEEE(s)

Возвращает чексумму CRC32 данной строки, используется CRC-32-IEEE 802.3 многочлен.

Тип результата - UInt32.

## CRC64(s)

Возвращает чексумму CRC64 данной строки, используется CRC-64-ECMA многочлен.

Тип результата - UInt64.

[Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/string_functions/) <!--hide-->