Inspired from https://gitlab.com/inkscape/inkscape/-/work_items/6090 --- inkscape-1.4.3_2025-12-25_0d15f75042/src/extension/internal/pdfinput/pdf-parser.cpp +++ inkscape-1.4.3_2025-12-25_0d15f75042/src/extension/internal/pdfinput/pdf-parser.cpp @@ -2268,7 +2268,9 @@ * This adds a string from a PDF file that is contained in one command ('Tj', ''', '"') * or is one string in ShowSpacetext ('TJ'). */ -#if POPPLER_CHECK_VERSION(0,64,0) +#if POPPLER_CHECK_VERSION(26,4,0) +void PdfParser::doShowText(const std::string s) { +#elif POPPLER_CHECK_VERSION(0,64,0) void PdfParser::doShowText(const GooString *s) { #else void PdfParser::doShowText(GooString *s) { @@ -2276,7 +2278,11 @@ auto font = state->getFont(); _POPPLER_WMODE wMode = font->getWMode(); // Vertical/Horizontal/Invalid +#if POPPLER_CHECK_VERSION(26,4,0) + builder->beginString(state, s.size()); +#else builder->beginString(state, get_goostring_length(*s)); +#endif // handle a Type 3 char if (font->getType() == fontType3) { @@ -2286,8 +2292,13 @@ double riseX, riseY; state->textTransformDelta(0, state->getRise(), &riseX, &riseY); +#if POPPLER_CHECK_VERSION(26,4,0) + auto p = s.c_str(); // char* or const char* + int len = s.size(); +#else auto p = s->getCString(); // char* or const char* int len = get_goostring_length(*s); +#endif while (len > 0) { --- inkscape-1.4.3_2025-12-25_0d15f75042/src/extension/internal/pdfinput/pdf-parser.h +++ inkscape-1.4.3_2025-12-25_0d15f75042/src/extension/internal/pdfinput/pdf-parser.h @@ -283,12 +283,14 @@ void opMoveShowText(Object args[], int numArgs); void opMoveSetShowText(Object args[], int numArgs); void opShowSpaceText(Object args[], int numArgs); -#if POPPLER_CHECK_VERSION(0,64,0) - void doShowText(const GooString *s); +#if POPPLER_CHECK_VERSION(26,4,0) + void doShowText(const std::string s); +#elif POPPLER_CHECK_VERSION(0,64,0) + void doShowText(const GooString *s); #else - void doShowText(GooString *s); + void doShowText(GooString *s); #endif - + // XObject operators void opXObject(Object args[], int numArgs); --- inkscape-1.4.3_2025-12-25_0d15f75042/src/extension/internal/pdfinput/poppler-transition-api.h +++ inkscape-1.4.3_2025-12-25_0d15f75042/src/extension/internal/pdfinput/poppler-transition-api.h @@ -72,8 +72,8 @@ #endif #if POPPLER_CHECK_VERSION(24, 5, 0) -#define _POPPLER_HAS_UNICODE_BOM(value) (hasUnicodeByteOrderMark(value->toStr())) -#define _POPPLER_HAS_UNICODE_BOMLE(value) (hasUnicodeByteOrderMarkLE(value->toStr())) +#define _POPPLER_HAS_UNICODE_BOM(value) (hasUnicodeByteOrderMark(value)) +#define _POPPLER_HAS_UNICODE_BOMLE(value) (hasUnicodeByteOrderMarkLE(value)) #else #define _POPPLER_HAS_UNICODE_BOM(value) (value->hasUnicodeMarker()) #define _POPPLER_HAS_UNICODE_BOMLE(value) (value->hasUnicodeMarkerLE()) --- inkscape-1.4.3_2025-12-25_0d15f75042/src/extension/internal/pdfinput/poppler-utils.cpp +++ inkscape-1.4.3_2025-12-25_0d15f75042/src/extension/internal/pdfinput/poppler-utils.cpp @@ -149,7 +149,11 @@ void InkFontDict::hashFontObject1(const Object *obj, FNVHash *h) { +#if POPPLER_CHECK_VERSION(26, 4, 0) + const std::string *s; +#else const GooString *s; +#endif const char *p; double r; int n, i; @@ -171,8 +175,13 @@ break; case objString: h->hash('s'); +#if POPPLER_CHECK_VERSION(26, 4, 0) + s = &obj->getString(); + h->hash(s->c_str(), s->size()); +#else s = obj->getString(); h->hash(s->c_str(), get_goostring_length(*s)); +#endif break; case objName: h->hash('n'); @@ -587,23 +596,45 @@ if (!obj.isString()) { return ""; } + std::cout << obj.getString() << std::endl; return getString(obj.getString()); } +std::string getString(const std::unique_ptr &value) +{ + return getString(value.get()); +} + +std::string getString(const GooString *value) +{ + if (value) { + return getString(value->toStr()); + } + return ""; +} + /** * Convert PDF strings, which can be formatted as UTF8, UTF16BE or UTF16LE into * a predictable UTF8 string consistant with svg requirements. */ -std::string getString(const GooString *value) +std::string getString(const std::string &value) { + char *str = nullptr; + if (_POPPLER_HAS_UNICODE_BOM(value)) { - return g_convert(value->getCString () + 2, get_goostring_length(*value) - 2, - "UTF-8", "UTF-16BE", NULL, NULL, NULL); + str = g_convert(value.c_str() + 2, value.size() - 2, + "UTF-8", "UTF-16BE", NULL, NULL, NULL); } else if (_POPPLER_HAS_UNICODE_BOMLE(value)) { - return g_convert(value->getCString () + 2, get_goostring_length(*value) - 2, - "UTF-8", "UTF-16LE", NULL, NULL, NULL); + str = g_convert(value.c_str() + 2, value.size() - 2, + "UTF-8", "UTF-16LE", NULL, NULL, NULL); + } + if (str) { + std::string copy = str; + g_free(str); + return copy; } - return value->toStr(); + g_warning("Couldn't parse text in PDF from UTF16."); + return str; } void pdf_debug_array(const Array *array, int depth, XRef *xref) @@ -660,7 +691,11 @@ } else if (obj->isArray()) { pdf_debug_array(obj->getArray(), depth, xref); } else if (obj->isString()) { +#if POPPLER_CHECK_VERSION(26, 4, 0) + std::cout << " STR '" << obj->getString().c_str() << "'"; +#else std::cout << " STR '" << obj->getString()->getCString() << "'"; +#endif } else if (obj->isName()) { std::cout << " NAME '" << obj->getName() << "'"; } else if (obj->isBool()) { --- inkscape-1.4.3_2025-12-25_0d15f75042/src/extension/internal/pdfinput/poppler-utils.h +++ inkscape-1.4.3_2025-12-25_0d15f75042/src/extension/internal/pdfinput/poppler-utils.h @@ -83,6 +83,8 @@ FontList getPdfFonts(std::shared_ptr pdf_doc); std::string getNameWithoutSubsetTag(std::string name); std::string getDictString(Dict *dict, const char *key); +std::string getString(const std::string &value); +std::string getString(const std::unique_ptr &value); std::string getString(const GooString *value); std::string validateString(std::string const &in);