34 #ifndef QORE_QORE_STRING_PRIVATE_H
35 #define QORE_QORE_STRING_PRIVATE_H
39 #define MAX_INT_STRING_LEN 48
40 #define MAX_BIGINT_STRING_LEN 48
41 #define MAX_FLOAT_STRING_LEN 48
42 #define STR_CLASS_BLOCK (0x10 * 4)
43 #define STR_CLASS_EXTRA (0x10 * 3)
45 #define MIN_SPRINTF_BUFSIZE 64
49 #define QUS_FRAGMENT 2
51 typedef std::vector<int> intvec_t;
53 hashdecl qore_string_private {
60 DLLLOCAL qore_string_private() {
63 DLLLOCAL qore_string_private(
const qore_string_private &p) {
64 allocated = p.len + STR_CLASS_EXTRA;
65 allocated = (allocated / 0x10 + 1) * 0x10;
66 buf = (
char*)malloc(
sizeof(
char) * allocated);
69 memcpy(buf, p.buf, len);
71 encoding = p.getEncoding();
74 DLLLOCAL ~qore_string_private() {
80 DLLLOCAL
void check_char(
size_t i) {
83 allocated = i + (d < STR_CLASS_BLOCK ? STR_CLASS_BLOCK : d);
84 allocated = (allocated / 0x10 + 1) * 0x10;
85 buf = (
char*)realloc(buf, allocated *
sizeof(
char));
91 offset = len + offset;
92 return offset < 0 ? 0 : offset;
95 return ((
size_t)offset > len) ? len : offset;
99 n_offset = check_offset(offset);
102 num = len + num - n_offset;
121 if (!(p = strchr(buf + pos, c)))
135 const char* p = buf + pos;
153 if (!(p = strstr(buf + pos, str)))
167 const char* p = buf + pos;
169 for (
const char* t = str; *t; ++t) {
178 DLLLOCAL
static qore_offset_t index_simple(
const char* haystack,
size_t hlen,
const char* needle,
size_t nlen,
180 const char* start = haystack + pos;
181 void* ptr =
q_memmem(start, hlen - pos, needle, nlen);
185 return reinterpret_cast<const char*
>(ptr) - start + pos;
195 if (!getEncoding()->isMultiByte()) {
205 return index_simple(buf, len, needle->c_str(), needle->size(), pos);
209 if (findByteOffset(pos, xsink))
216 qore_offset_t ind = index_simple(buf + pos, len - pos, needle->c_str(), needle->size());
218 ind = getEncoding()->getCharPos(buf, buf + pos + ind, xsink);
227 if (needle.
strlen() + pos > len)
230 return bindex(needle.
c_str(), pos, needle.
size());
234 if (needle.size() + pos > len)
237 return bindex(needle.c_str(), pos, needle.size());
251 nsize = strlen(needle);
253 return index_simple(buf, len, needle, nsize, pos);
258 DLLLOCAL
static qore_offset_t rindex_simple(
const char* haystack,
size_t hlen,
const char* needle,
270 void* ptr =
q_memrmem(haystack, pos + 1, needle, nlen);
274 return static_cast<qore_offset_t>(
reinterpret_cast<const char*
>(ptr) -
reinterpret_cast<const char*
>(haystack));
280 assert(getEncoding()->isMultiByte());
286 size_t clen = getEncoding()->getLength(buf + start, buf + len, xsink);
292 pos = getEncoding()->getByteLen(buf + start, buf + len, pos, xsink);
293 return *xsink ? -1 : 0;
302 if (!getEncoding()->isMultiByte()) {
309 return rindex_simple(buf, len, needle->c_str(), needle->size(), pos);
313 if (findByteOffset(pos, xsink))
319 qore_offset_t ind = rindex_simple(buf, len, needle->c_str(), needle->size(), pos);
322 if (ind && ind != -1) {
323 ind = getEncoding()->getCharPos(buf, buf + ind, xsink);
336 return brindex(needle.c_str(), needle.size(), pos);
348 if (pos == -1 && !len && !needle_len) {
354 if (needle_len + (len - pos) > len)
357 return rindex_simple(buf, len, needle, needle_len, pos);
360 DLLLOCAL
bool startsWith(
const char* str,
size_t ssize)
const {
361 return !strncmp(str, buf, ssize);
364 DLLLOCAL
bool endsWith(
const char* str,
size_t ssize)
const {
368 return strncmp(str, buf + len - ssize, ssize);
371 DLLLOCAL
bool isDataPrintableAscii()
const {
372 for (
size_t i = 0; i < len; ++i) {
373 if (buf[i] < 32 || buf[i] > 126)
379 DLLLOCAL
bool isDataAscii()
const {
380 for (
size_t i = 0; i < len; ++i) {
381 if ((
unsigned char)(buf[i]) > 127)
387 DLLLOCAL
void concat_intern(
const char* p,
size_t plen) {
390 check_char(len + plen);
391 memcpy(buf + len, p, plen);
396 DLLLOCAL
void concat_simple(
const qore_string_private& str,
qore_offset_t pos) {
405 concat_intern(str.buf + pos, str.len - pos);
409 assert(str.getEncoding() == getEncoding());
411 if (!getEncoding()->isMultiByte()) {
412 concat_simple(str, pos);
418 if (str.findByteOffset(pos, xsink))
426 concat_intern(str.buf + pos, str.len - pos);
439 plen = str.len + plen;
445 concat_intern(str.buf + pos, plen);
449 assert(str.getEncoding() == getEncoding());
452 if (!getEncoding()->isMultiByte()) {
453 concat_simple(str, pos);
459 if (str.findByteOffset(pos, xsink))
468 if (str.findByteOffset(plen, xsink, pos))
475 concat_intern(str.buf + pos, plen);
483 rc = getEncoding()->getByteLen(buf, buf + len, i, xsink);
491 DLLLOCAL
void concat(
char c) {
499 allocated = STR_CLASS_BLOCK;
501 buf = (
char*)malloc(
sizeof(
char) * allocated);
506 DLLLOCAL
void concat(
const qore_string_private* str) {
507 assert(!str || (str->encoding == encoding) || !str->encoding);
510 if (str && str->len) {
512 check_char(str->len + len + STR_CLASS_EXTRA);
514 memcpy(buf + len, str->buf, str->len);
520 DLLLOCAL
void concat(
const char *str) {
529 buf[len++] = str[i++];
541 DLLLOCAL
int vsprintf(
const char *fmt, va_list args) {
542 size_t fmtlen = ::strlen(fmt);
544 if ((allocated - len - fmtlen) < MIN_SPRINTF_BUFSIZE) {
545 allocated += fmtlen + MIN_SPRINTF_BUFSIZE;
546 allocated = (allocated / 0x10 + 1) * 0x10;
548 buf = (
char*)realloc(buf, allocated *
sizeof(
char));
554 int i = ::vsnprintf(buf + len, free, fmt, args);
562 allocated += STR_CLASS_EXTRA;
563 allocated = (allocated / 0x10 + 1) * 0x10;
564 buf = (
char*)realloc(buf,
sizeof(
char) * allocated);
573 allocated = len + i + STR_CLASS_EXTRA;
574 allocated = (allocated / 0x10 + 1) * 0x10;
575 buf = (
char*)realloc(buf,
sizeof(
char) * allocated);
585 DLLLOCAL
int sprintf(
const char *fmt, ...) {
589 int rc = vsprintf(fmt, args);
597 DLLLOCAL
void concatUTF8FromUnicode(
unsigned code);
599 DLLLOCAL
int concatUnicode(
unsigned code,
ExceptionSink *xsink) {
602 concatUTF8FromUnicode(code);
607 tmp.concatUTF8FromUnicode(code);
608 TempString ns(tmp.convertEncoding(getEncoding(), xsink));
615 DLLLOCAL
void setRegexBaseOpts(QoreRegexBase& re,
int opts);
617 DLLLOCAL
void setRegexOpts(QoreRegexSubst& re,
int opts);
619 DLLLOCAL
void splice_simple(
size_t offset,
size_t length,
QoreString* extract =
nullptr);
620 DLLLOCAL
void splice_simple(
size_t offset,
size_t length,
const char* str,
size_t str_len,
633 DLLLOCAL
int trimLeading(
ExceptionSink* xsink,
const intvec_t& vec);
634 DLLLOCAL
int trimLeading(
ExceptionSink* xsink,
const qore_string_private* chars);
635 DLLLOCAL
int trimTrailing(
ExceptionSink* xsink,
const intvec_t& vec);
636 DLLLOCAL
int trimTrailing(
ExceptionSink* xsink,
const qore_string_private* chars);
638 DLLLOCAL
void terminate(
size_t size);
640 DLLLOCAL
int concatUnicode(
unsigned code);
642 DLLLOCAL
int concatDecodeUriIntern(
ExceptionSink* xsink,
const qore_string_private& str,
643 bool detect_query =
false);
645 DLLLOCAL
int concatEncodeUriRequest(
ExceptionSink* xsink,
const qore_string_private& str);
647 DLLLOCAL
unsigned int getUnicodePointFromBytePos(
size_t offset,
unsigned& len,
ExceptionSink* xsink)
const;
652 DLLLOCAL
int getUnicodeCharArray(intvec_t& vec,
ExceptionSink* xsink)
const {
656 int c = getUnicodePointFromBytePos((
qore_offset_t)j, clen, xsink);
665 DLLLOCAL
int allocate(
unsigned requested_size) {
666 if ((
unsigned)allocated >= requested_size)
668 requested_size = (requested_size / 0x10 + 1) * 0x10;
669 char* aux = (
char*)realloc(buf, requested_size *
sizeof(
char));
676 allocated = requested_size;
686 DLLLOCAL
static bool inVector(
int c,
const intvec_t& vec) {
687 for (
unsigned j = 0; j < vec.size(); ++j) {
688 if ((
int)vec[j] == c)
694 DLLLOCAL
static qore_string_private* get(
QoreString& str) {
698 DLLLOCAL
static int getHex(
const char*& p) {
699 if (*p ==
'%' && isxdigit(*(p + 1)) && isxdigit(*(p + 2))) {
700 char x[3] = { *(p + 1), *(p + 2),
'\0' };
702 return strtol(x, 0, 16);
707 DLLLOCAL
static int convert_encoding_intern(
const char* src,
size_t src_len,
const QoreEncoding* from,
DLLEXPORT const QoreEncoding * QCS_UTF8
UTF-8 multi-byte encoding (only UTF-8 and UTF-16 are multi-byte encodings)
Definition: QoreEncoding.h:247
DLLEXPORT const QoreEncoding * QCS_USASCII
ascii encoding
Definition: QoreEncoding.h:246
DLLEXPORT void * q_memmem(const void *big, size_t big_len, const void *little, size_t little_len)
finds a memory sequence in a larger memory sequence
DLLEXPORT void * q_memrmem(const void *big, size_t big_len, const void *little, size_t little_len)
finds a memory sequence in a larger memory sequence searching from the end of the sequence
container for holding Qore-language exception information and also for registering a "thread_exit" ca...
Definition: ExceptionSink.h:48
defines string encoding functions in Qore
Definition: QoreEncoding.h:83
Qore's string type supported by the QoreEncoding class.
Definition: QoreString.h:93
DLLEXPORT size_t strlen() const
returns number of bytes in the string (not including the null pointer)
DLLEXPORT size_t size() const
returns number of bytes in the string (not including the null pointer)
hashdecl qore_string_private * priv
the private implementation of QoreString
Definition: QoreString.h:1006
DLLEXPORT const char * c_str() const
returns the string's buffer; this data should not be changed
DLLEXPORT const char * getBuffer() const
returns the string's buffer; this data should not be changed
use this class to manage strings where the character encoding must be specified and may be different ...
Definition: QoreString.h:1104
class used to hold a possibly temporary QoreString pointer, stack only, cannot be dynamically allocat...
Definition: QoreString.h:1045
intptr_t qore_offset_t
used for offsets that could be negative
Definition: common.h:76