34 #ifndef QORE_QORE_STRING_PRIVATE_H
35 #define QORE_QORE_STRING_PRIVATE_H
39 #define MAX_INT_STRING_LEN 48
40 #define MAX_BIGINT_STRING_LEN 48
41 #define MAX_FLOAT_STRING_LEN 48
42 #define STR_CLASS_BLOCK (0x10 * 4)
43 #define STR_CLASS_EXTRA (0x10 * 3)
45 #define MIN_SPRINTF_BUFSIZE 64
49 #define QUS_FRAGMENT 2
51 typedef std::vector<int> intvec_t;
53 hashdecl qore_string_private {
62 DLLLOCAL qore_string_private() {
65 DLLLOCAL qore_string_private(
const qore_string_private &p) {
66 allocated = p.len + STR_CLASS_EXTRA;
67 allocated = (allocated / 0x10 + 1) * 0x10;
68 buf = (
char*)malloc(
sizeof(
char) * allocated);
71 memcpy(buf, p.buf, len);
73 encoding = p.getEncoding();
76 DLLLOCAL ~qore_string_private() {
81 DLLLOCAL
void check_char(
size_t i) {
84 allocated = i + (d < STR_CLASS_BLOCK ? STR_CLASS_BLOCK : d);
85 allocated = (allocated / 0x10 + 1) * 0x10;
86 buf = (
char*)realloc(buf, allocated *
sizeof(
char));
92 offset = len + offset;
93 return offset < 0 ? 0 : offset;
96 return ((
size_t)offset > len) ? len : offset;
100 n_offset = check_offset(offset);
103 num = len + num - n_offset;
122 if (!(p = strchr(buf + pos, c)))
136 const char* p = buf + pos;
154 if (!(p = strstr(buf + pos, str)))
168 const char* p = buf + pos;
170 for (
const char* t = str; *t; ++t) {
179 DLLLOCAL
static qore_offset_t index_simple(
const char* haystack,
size_t hlen,
const char* needle,
size_t nlen,
181 const char* start = haystack + pos;
182 void* ptr =
q_memmem(start, hlen - pos, needle, nlen);
186 return reinterpret_cast<const char*
>(ptr) - start + pos;
196 if (!getEncoding()->isMultiByte()) {
206 return index_simple(buf, len, needle->c_str(), needle->size(), pos);
210 if (findByteOffset(pos, xsink))
217 qore_offset_t ind = index_simple(buf + pos, len - pos, needle->c_str(), needle->size());
219 ind = getEncoding()->getCharPos(buf, buf + pos + ind, xsink);
228 if (needle.
strlen() + pos > len)
231 return bindex(needle.
c_str(), pos, needle.
size());
235 if (needle.size() + pos > len)
238 return bindex(needle.c_str(), pos, needle.size());
252 nsize = strlen(needle);
254 return index_simple(buf, len, needle, nsize, pos);
259 DLLLOCAL
static qore_offset_t rindex_simple(
const char* haystack,
size_t hlen,
const char* needle,
271 void* ptr =
q_memrmem(haystack, pos + 1, needle, nlen);
275 return static_cast<qore_offset_t>(
reinterpret_cast<const char*
>(ptr) -
reinterpret_cast<const char*
>(haystack));
281 assert(getEncoding()->isMultiByte());
287 size_t clen = getEncoding()->getLength(buf + start, buf + len, xsink);
293 pos = getEncoding()->getByteLen(buf + start, buf + len, pos, xsink);
294 return *xsink ? -1 : 0;
303 if (!getEncoding()->isMultiByte()) {
310 return rindex_simple(buf, len, needle->c_str(), needle->size(), pos);
314 if (findByteOffset(pos, xsink))
320 qore_offset_t ind = rindex_simple(buf, len, needle->c_str(), needle->size(), pos);
323 if (ind && ind != -1) {
324 ind = getEncoding()->getCharPos(buf, buf + ind, xsink);
337 return brindex(needle.c_str(), needle.size(), pos);
349 if (pos == -1 && !len && !needle_len) {
355 if (needle_len + (len - pos) > len)
358 return rindex_simple(buf, len, needle, needle_len, pos);
361 DLLLOCAL
bool startsWith(
const char* str,
size_t ssize)
const {
362 return !strncmp(str, buf, ssize);
365 DLLLOCAL
bool endsWith(
const char* str,
size_t ssize)
const {
369 return strncmp(str, buf + len - ssize, ssize);
372 DLLLOCAL
bool isDataPrintableAscii()
const {
373 for (
size_t i = 0; i < len; ++i) {
374 if (buf[i] < 32 || buf[i] > 126)
380 DLLLOCAL
bool isDataAscii()
const {
381 for (
size_t i = 0; i < len; ++i) {
382 if ((
unsigned char)(buf[i]) > 127)
388 DLLLOCAL
void concat_intern(
const char* p,
size_t plen) {
391 check_char(len + plen);
392 memcpy(buf + len, p, plen);
397 DLLLOCAL
void concat_simple(
const qore_string_private& str,
qore_offset_t pos) {
406 concat_intern(str.buf + pos, str.len - pos);
410 assert(str.getEncoding() == getEncoding());
412 if (!getEncoding()->isMultiByte()) {
413 concat_simple(str, pos);
419 if (str.findByteOffset(pos, xsink))
427 concat_intern(str.buf + pos, str.len - pos);
440 plen = str.len + plen;
446 concat_intern(str.buf + pos, plen);
450 assert(str.getEncoding() == getEncoding());
453 if (!getEncoding()->isMultiByte()) {
454 concat_simple(str, pos);
460 if (str.findByteOffset(pos, xsink))
469 if (str.findByteOffset(plen, xsink, pos))
476 concat_intern(str.buf + pos, plen);
484 rc = getEncoding()->getByteLen(buf, buf + len, i, xsink);
492 DLLLOCAL
void concat(
char c) {
500 allocated = STR_CLASS_BLOCK;
502 buf = (
char*)malloc(
sizeof(
char) * allocated);
507 DLLLOCAL
void concat(
const qore_string_private* str) {
508 assert(!str || (str->encoding == encoding) || !str->encoding);
511 if (str && str->len) {
513 check_char(str->len + len + STR_CLASS_EXTRA);
515 memcpy(buf + len, str->buf, str->len);
521 DLLLOCAL
void concat(
const char *str) {
530 buf[len++] = str[i++];
542 DLLLOCAL
int vsprintf(
const char *fmt, va_list args) {
543 size_t fmtlen = ::strlen(fmt);
545 if ((allocated - len - fmtlen) < MIN_SPRINTF_BUFSIZE) {
546 allocated += fmtlen + MIN_SPRINTF_BUFSIZE;
547 allocated = (allocated / 0x10 + 1) * 0x10;
549 buf = (
char*)realloc(buf, allocated *
sizeof(
char));
555 int i = ::vsnprintf(buf + len, free, fmt, args);
563 allocated += STR_CLASS_EXTRA;
564 allocated = (allocated / 0x10 + 1) * 0x10;
565 buf = (
char*)realloc(buf,
sizeof(
char) * allocated);
574 allocated = len + i + STR_CLASS_EXTRA;
575 allocated = (allocated / 0x10 + 1) * 0x10;
576 buf = (
char*)realloc(buf,
sizeof(
char) * allocated);
586 DLLLOCAL
int sprintf(
const char *fmt, ...) {
590 int rc = vsprintf(fmt, args);
598 DLLLOCAL
void concatUTF8FromUnicode(
unsigned code);
600 DLLLOCAL
int concatUnicode(
unsigned code,
ExceptionSink *xsink) {
603 concatUTF8FromUnicode(code);
608 tmp.concatUTF8FromUnicode(code);
609 TempString ns(tmp.convertEncoding(getEncoding(), xsink));
616 DLLLOCAL
void setRegexBaseOpts(QoreRegexBase& re,
int opts);
618 DLLLOCAL
void setRegexOpts(QoreRegexSubst& re,
int opts);
620 DLLLOCAL
void splice_simple(
size_t offset,
size_t length,
QoreString* extract =
nullptr);
621 DLLLOCAL
void splice_simple(
size_t offset,
size_t length,
const char* str,
size_t str_len,
634 DLLLOCAL
int trimLeading(
ExceptionSink* xsink,
const intvec_t& vec);
635 DLLLOCAL
int trimLeading(
ExceptionSink* xsink,
const qore_string_private* chars);
636 DLLLOCAL
int trimTrailing(
ExceptionSink* xsink,
const intvec_t& vec);
637 DLLLOCAL
int trimTrailing(
ExceptionSink* xsink,
const qore_string_private* chars);
639 DLLLOCAL
void terminate(
size_t size);
641 DLLLOCAL
int concatUnicode(
unsigned code);
643 DLLLOCAL
int concatDecodeUriIntern(
ExceptionSink* xsink,
const qore_string_private& str,
644 bool detect_query =
false);
646 DLLLOCAL
int concatEncodeUriRequest(
ExceptionSink* xsink,
const qore_string_private& str);
648 DLLLOCAL
unsigned int getUnicodePointFromBytePos(
size_t offset,
unsigned& len,
ExceptionSink* xsink)
const;
653 DLLLOCAL
int getUnicodeCharArray(intvec_t& vec,
ExceptionSink* xsink)
const {
657 int c = getUnicodePointFromBytePos((
qore_offset_t)j, clen, xsink);
666 DLLLOCAL
int allocate(
unsigned requested_size) {
667 if ((
unsigned)allocated >= requested_size)
669 requested_size = (requested_size / 0x10 + 1) * 0x10;
670 char* aux = (
char*)realloc(buf, requested_size *
sizeof(
char));
677 allocated = requested_size;
687 DLLLOCAL
static bool inVector(
int c,
const intvec_t& vec) {
688 for (
unsigned j = 0; j < vec.size(); ++j) {
689 if ((
int)vec[j] == c)
695 DLLLOCAL
static qore_string_private* get(
QoreString& str) {
699 DLLLOCAL
static int getHex(
const char*& p) {
700 if (*p ==
'%' && isxdigit(*(p + 1)) && isxdigit(*(p + 2))) {
701 char x[3] = { *(p + 1), *(p + 2),
'\0' };
703 return strtol(x, 0, 16);
708 DLLLOCAL
static int convert_encoding_intern(
const char* src,
size_t src_len,
const QoreEncoding* from,
DLLEXPORT const QoreEncoding * QCS_UTF8
UTF-8 multi-byte encoding (only UTF-8 and UTF-16 are multi-byte encodings)
Definition: QoreEncoding.h:247
DLLEXPORT const QoreEncoding * QCS_USASCII
ascii encoding
Definition: QoreEncoding.h:246
DLLEXPORT void * q_memmem(const void *big, size_t big_len, const void *little, size_t little_len)
finds a memory sequence in a larger memory sequence
DLLEXPORT void * q_memrmem(const void *big, size_t big_len, const void *little, size_t little_len)
finds a memory sequence in a larger memory sequence searching from the end of the sequence
container for holding Qore-language exception information and also for registering a "thread_exit" ca...
Definition: ExceptionSink.h:48
defines string encoding functions in Qore
Definition: QoreEncoding.h:83
Qore's string type supported by the QoreEncoding class.
Definition: QoreString.h:93
DLLEXPORT size_t strlen() const
returns number of bytes in the string (not including the null pointer)
DLLEXPORT size_t size() const
returns number of bytes in the string (not including the null pointer)
hashdecl qore_string_private * priv
the private implementation of QoreString
Definition: QoreString.h:1006
DLLEXPORT const char * c_str() const
returns the string's buffer; this data should not be changed
DLLEXPORT const char * getBuffer() const
returns the string's buffer; this data should not be changed
use this class to manage strings where the character encoding must be specified and may be different ...
Definition: QoreString.h:1104
class used to hold a possibly temporary QoreString pointer, stack only, cannot be dynamically allocat...
Definition: QoreString.h:1045
intptr_t qore_offset_t
used for offsets that could be negative
Definition: common.h:76