mirror of
https://github.com/zebrajr/postgres.git
synced 2025-12-07 12:20:31 +01:00
Solaris 11.4 has built-in functions named b64_encode and b64_decode. Rename ours to something else to avoid the conflict (fortunately, ours are static so the impact is limited). One could wish for less duplication of code in this area, but that would be a larger patch and not very suitable for back-patching. Since this is a portability fix, we want to put it into all supported branches. Report and initial patch by Rainer Orth, reviewed and adjusted a bit by Michael Paquier Discussion: https://postgr.es/m/ydd372wk28h.fsf@CeBiTec.Uni-Bielefeld.DE
563 lines
11 KiB
C
563 lines
11 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* encode.c
|
|
* Various data encoding/decoding things.
|
|
*
|
|
* Copyright (c) 2001-2018, PostgreSQL Global Development Group
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/utils/adt/encode.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include <ctype.h>
|
|
|
|
#include "utils/builtins.h"
|
|
|
|
|
|
struct pg_encoding
|
|
{
|
|
unsigned (*encode_len) (const char *data, unsigned dlen);
|
|
unsigned (*decode_len) (const char *data, unsigned dlen);
|
|
unsigned (*encode) (const char *data, unsigned dlen, char *res);
|
|
unsigned (*decode) (const char *data, unsigned dlen, char *res);
|
|
};
|
|
|
|
static const struct pg_encoding *pg_find_encoding(const char *name);
|
|
|
|
/*
|
|
* SQL functions.
|
|
*/
|
|
|
|
Datum
|
|
binary_encode(PG_FUNCTION_ARGS)
|
|
{
|
|
bytea *data = PG_GETARG_BYTEA_PP(0);
|
|
Datum name = PG_GETARG_DATUM(1);
|
|
text *result;
|
|
char *namebuf;
|
|
int datalen,
|
|
resultlen,
|
|
res;
|
|
const struct pg_encoding *enc;
|
|
|
|
datalen = VARSIZE_ANY_EXHDR(data);
|
|
|
|
namebuf = TextDatumGetCString(name);
|
|
|
|
enc = pg_find_encoding(namebuf);
|
|
if (enc == NULL)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("unrecognized encoding: \"%s\"", namebuf)));
|
|
|
|
resultlen = enc->encode_len(VARDATA_ANY(data), datalen);
|
|
result = palloc(VARHDRSZ + resultlen);
|
|
|
|
res = enc->encode(VARDATA_ANY(data), datalen, VARDATA(result));
|
|
|
|
/* Make this FATAL 'cause we've trodden on memory ... */
|
|
if (res > resultlen)
|
|
elog(FATAL, "overflow - encode estimate too small");
|
|
|
|
SET_VARSIZE(result, VARHDRSZ + res);
|
|
|
|
PG_RETURN_TEXT_P(result);
|
|
}
|
|
|
|
Datum
|
|
binary_decode(PG_FUNCTION_ARGS)
|
|
{
|
|
text *data = PG_GETARG_TEXT_PP(0);
|
|
Datum name = PG_GETARG_DATUM(1);
|
|
bytea *result;
|
|
char *namebuf;
|
|
int datalen,
|
|
resultlen,
|
|
res;
|
|
const struct pg_encoding *enc;
|
|
|
|
datalen = VARSIZE_ANY_EXHDR(data);
|
|
|
|
namebuf = TextDatumGetCString(name);
|
|
|
|
enc = pg_find_encoding(namebuf);
|
|
if (enc == NULL)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("unrecognized encoding: \"%s\"", namebuf)));
|
|
|
|
resultlen = enc->decode_len(VARDATA_ANY(data), datalen);
|
|
result = palloc(VARHDRSZ + resultlen);
|
|
|
|
res = enc->decode(VARDATA_ANY(data), datalen, VARDATA(result));
|
|
|
|
/* Make this FATAL 'cause we've trodden on memory ... */
|
|
if (res > resultlen)
|
|
elog(FATAL, "overflow - decode estimate too small");
|
|
|
|
SET_VARSIZE(result, VARHDRSZ + res);
|
|
|
|
PG_RETURN_BYTEA_P(result);
|
|
}
|
|
|
|
|
|
/*
|
|
* HEX
|
|
*/
|
|
|
|
static const char hextbl[] = "0123456789abcdef";
|
|
|
|
static const int8 hexlookup[128] = {
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
|
|
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
};
|
|
|
|
unsigned
|
|
hex_encode(const char *src, unsigned len, char *dst)
|
|
{
|
|
const char *end = src + len;
|
|
|
|
while (src < end)
|
|
{
|
|
*dst++ = hextbl[(*src >> 4) & 0xF];
|
|
*dst++ = hextbl[*src & 0xF];
|
|
src++;
|
|
}
|
|
return len * 2;
|
|
}
|
|
|
|
static inline char
|
|
get_hex(char c)
|
|
{
|
|
int res = -1;
|
|
|
|
if (c > 0 && c < 127)
|
|
res = hexlookup[(unsigned char) c];
|
|
|
|
if (res < 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid hexadecimal digit: \"%c\"", c)));
|
|
|
|
return (char) res;
|
|
}
|
|
|
|
unsigned
|
|
hex_decode(const char *src, unsigned len, char *dst)
|
|
{
|
|
const char *s,
|
|
*srcend;
|
|
char v1,
|
|
v2,
|
|
*p;
|
|
|
|
srcend = src + len;
|
|
s = src;
|
|
p = dst;
|
|
while (s < srcend)
|
|
{
|
|
if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
|
|
{
|
|
s++;
|
|
continue;
|
|
}
|
|
v1 = get_hex(*s++) << 4;
|
|
if (s >= srcend)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid hexadecimal data: odd number of digits")));
|
|
|
|
v2 = get_hex(*s++);
|
|
*p++ = v1 | v2;
|
|
}
|
|
|
|
return p - dst;
|
|
}
|
|
|
|
static unsigned
|
|
hex_enc_len(const char *src, unsigned srclen)
|
|
{
|
|
return srclen << 1;
|
|
}
|
|
|
|
static unsigned
|
|
hex_dec_len(const char *src, unsigned srclen)
|
|
{
|
|
return srclen >> 1;
|
|
}
|
|
|
|
/*
|
|
* BASE64
|
|
*/
|
|
|
|
static const char _base64[] =
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
|
|
|
static const int8 b64lookup[128] = {
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
|
|
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
|
|
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
|
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
|
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
|
|
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
|
|
};
|
|
|
|
static unsigned
|
|
pg_base64_encode(const char *src, unsigned len, char *dst)
|
|
{
|
|
char *p,
|
|
*lend = dst + 76;
|
|
const char *s,
|
|
*end = src + len;
|
|
int pos = 2;
|
|
uint32 buf = 0;
|
|
|
|
s = src;
|
|
p = dst;
|
|
|
|
while (s < end)
|
|
{
|
|
buf |= (unsigned char) *s << (pos << 3);
|
|
pos--;
|
|
s++;
|
|
|
|
/* write it out */
|
|
if (pos < 0)
|
|
{
|
|
*p++ = _base64[(buf >> 18) & 0x3f];
|
|
*p++ = _base64[(buf >> 12) & 0x3f];
|
|
*p++ = _base64[(buf >> 6) & 0x3f];
|
|
*p++ = _base64[buf & 0x3f];
|
|
|
|
pos = 2;
|
|
buf = 0;
|
|
}
|
|
if (p >= lend)
|
|
{
|
|
*p++ = '\n';
|
|
lend = p + 76;
|
|
}
|
|
}
|
|
if (pos != 2)
|
|
{
|
|
*p++ = _base64[(buf >> 18) & 0x3f];
|
|
*p++ = _base64[(buf >> 12) & 0x3f];
|
|
*p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
|
|
*p++ = '=';
|
|
}
|
|
|
|
return p - dst;
|
|
}
|
|
|
|
static unsigned
|
|
pg_base64_decode(const char *src, unsigned len, char *dst)
|
|
{
|
|
const char *srcend = src + len,
|
|
*s = src;
|
|
char *p = dst;
|
|
char c;
|
|
int b = 0;
|
|
uint32 buf = 0;
|
|
int pos = 0,
|
|
end = 0;
|
|
|
|
while (s < srcend)
|
|
{
|
|
c = *s++;
|
|
|
|
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
|
|
continue;
|
|
|
|
if (c == '=')
|
|
{
|
|
/* end sequence */
|
|
if (!end)
|
|
{
|
|
if (pos == 2)
|
|
end = 1;
|
|
else if (pos == 3)
|
|
end = 2;
|
|
else
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("unexpected \"=\" while decoding base64 sequence")));
|
|
}
|
|
b = 0;
|
|
}
|
|
else
|
|
{
|
|
b = -1;
|
|
if (c > 0 && c < 127)
|
|
b = b64lookup[(unsigned char) c];
|
|
if (b < 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid symbol \"%c\" while decoding base64 sequence", (int) c)));
|
|
}
|
|
/* add it to buffer */
|
|
buf = (buf << 6) + b;
|
|
pos++;
|
|
if (pos == 4)
|
|
{
|
|
*p++ = (buf >> 16) & 255;
|
|
if (end == 0 || end > 1)
|
|
*p++ = (buf >> 8) & 255;
|
|
if (end == 0 || end > 2)
|
|
*p++ = buf & 255;
|
|
buf = 0;
|
|
pos = 0;
|
|
}
|
|
}
|
|
|
|
if (pos != 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid base64 end sequence"),
|
|
errhint("Input data is missing padding, is truncated, or is otherwise corrupted.")));
|
|
|
|
return p - dst;
|
|
}
|
|
|
|
|
|
static unsigned
|
|
pg_base64_enc_len(const char *src, unsigned srclen)
|
|
{
|
|
/* 3 bytes will be converted to 4, linefeed after 76 chars */
|
|
return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
|
|
}
|
|
|
|
static unsigned
|
|
pg_base64_dec_len(const char *src, unsigned srclen)
|
|
{
|
|
return (srclen * 3) >> 2;
|
|
}
|
|
|
|
/*
|
|
* Escape
|
|
* Minimally escape bytea to text.
|
|
* De-escape text to bytea.
|
|
*
|
|
* We must escape zero bytes and high-bit-set bytes to avoid generating
|
|
* text that might be invalid in the current encoding, or that might
|
|
* change to something else if passed through an encoding conversion
|
|
* (leading to failing to de-escape to the original bytea value).
|
|
* Also of course backslash itself has to be escaped.
|
|
*
|
|
* De-escaping processes \\ and any \### octal
|
|
*/
|
|
|
|
#define VAL(CH) ((CH) - '0')
|
|
#define DIG(VAL) ((VAL) + '0')
|
|
|
|
static unsigned
|
|
esc_encode(const char *src, unsigned srclen, char *dst)
|
|
{
|
|
const char *end = src + srclen;
|
|
char *rp = dst;
|
|
int len = 0;
|
|
|
|
while (src < end)
|
|
{
|
|
unsigned char c = (unsigned char) *src;
|
|
|
|
if (c == '\0' || IS_HIGHBIT_SET(c))
|
|
{
|
|
rp[0] = '\\';
|
|
rp[1] = DIG(c >> 6);
|
|
rp[2] = DIG((c >> 3) & 7);
|
|
rp[3] = DIG(c & 7);
|
|
rp += 4;
|
|
len += 4;
|
|
}
|
|
else if (c == '\\')
|
|
{
|
|
rp[0] = '\\';
|
|
rp[1] = '\\';
|
|
rp += 2;
|
|
len += 2;
|
|
}
|
|
else
|
|
{
|
|
*rp++ = c;
|
|
len++;
|
|
}
|
|
|
|
src++;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static unsigned
|
|
esc_decode(const char *src, unsigned srclen, char *dst)
|
|
{
|
|
const char *end = src + srclen;
|
|
char *rp = dst;
|
|
int len = 0;
|
|
|
|
while (src < end)
|
|
{
|
|
if (src[0] != '\\')
|
|
*rp++ = *src++;
|
|
else if (src + 3 < end &&
|
|
(src[1] >= '0' && src[1] <= '3') &&
|
|
(src[2] >= '0' && src[2] <= '7') &&
|
|
(src[3] >= '0' && src[3] <= '7'))
|
|
{
|
|
int val;
|
|
|
|
val = VAL(src[1]);
|
|
val <<= 3;
|
|
val += VAL(src[2]);
|
|
val <<= 3;
|
|
*rp++ = val + VAL(src[3]);
|
|
src += 4;
|
|
}
|
|
else if (src + 1 < end &&
|
|
(src[1] == '\\'))
|
|
{
|
|
*rp++ = '\\';
|
|
src += 2;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* One backslash, not followed by ### valid octal. Should never
|
|
* get here, since esc_dec_len does same check.
|
|
*/
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
errmsg("invalid input syntax for type %s", "bytea")));
|
|
}
|
|
|
|
len++;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static unsigned
|
|
esc_enc_len(const char *src, unsigned srclen)
|
|
{
|
|
const char *end = src + srclen;
|
|
int len = 0;
|
|
|
|
while (src < end)
|
|
{
|
|
if (*src == '\0' || IS_HIGHBIT_SET(*src))
|
|
len += 4;
|
|
else if (*src == '\\')
|
|
len += 2;
|
|
else
|
|
len++;
|
|
|
|
src++;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static unsigned
|
|
esc_dec_len(const char *src, unsigned srclen)
|
|
{
|
|
const char *end = src + srclen;
|
|
int len = 0;
|
|
|
|
while (src < end)
|
|
{
|
|
if (src[0] != '\\')
|
|
src++;
|
|
else if (src + 3 < end &&
|
|
(src[1] >= '0' && src[1] <= '3') &&
|
|
(src[2] >= '0' && src[2] <= '7') &&
|
|
(src[3] >= '0' && src[3] <= '7'))
|
|
{
|
|
/*
|
|
* backslash + valid octal
|
|
*/
|
|
src += 4;
|
|
}
|
|
else if (src + 1 < end &&
|
|
(src[1] == '\\'))
|
|
{
|
|
/*
|
|
* two backslashes = backslash
|
|
*/
|
|
src += 2;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* one backslash, not followed by ### valid octal
|
|
*/
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
errmsg("invalid input syntax for type %s", "bytea")));
|
|
}
|
|
|
|
len++;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/*
|
|
* Common
|
|
*/
|
|
|
|
static const struct
|
|
{
|
|
const char *name;
|
|
struct pg_encoding enc;
|
|
} enclist[] =
|
|
|
|
{
|
|
{
|
|
"hex",
|
|
{
|
|
hex_enc_len, hex_dec_len, hex_encode, hex_decode
|
|
}
|
|
},
|
|
{
|
|
"base64",
|
|
{
|
|
pg_base64_enc_len, pg_base64_dec_len, pg_base64_encode, pg_base64_decode
|
|
}
|
|
},
|
|
{
|
|
"escape",
|
|
{
|
|
esc_enc_len, esc_dec_len, esc_encode, esc_decode
|
|
}
|
|
},
|
|
{
|
|
NULL,
|
|
{
|
|
NULL, NULL, NULL, NULL
|
|
}
|
|
}
|
|
};
|
|
|
|
static const struct pg_encoding *
|
|
pg_find_encoding(const char *name)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; enclist[i].name; i++)
|
|
if (pg_strcasecmp(enclist[i].name, name) == 0)
|
|
return &enclist[i].enc;
|
|
|
|
return NULL;
|
|
}
|