Skip to content

Commit e2759b4

Browse files
committed
v5.3.6
1 parent 0e3e273 commit e2759b4

File tree

12 files changed

+163
-40
lines changed

12 files changed

+163
-40
lines changed

.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,4 @@ vignettes/.*\.png$
2828
^vignettes/bondargentina_1\.png$
2929
^vignettes/bondargentina_2\.png$
3030
^pypkg$
31+
^tesseract-training$

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ src/Makevars
1313
configure.log
1414
\.vscode
1515
README.html
16+
tesseract-training

NEWS.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
# 5.3.6
2+
3+
- CRAN fixes (Mac, Windows, clang19):
4+
- Creates a platform-specific `tesseract_config.h` during the configure step
5+
- Performs API detection only once during configuration
6+
- Properly defines the API compatibility macro
7+
- Ensures the compiler knows which API to use consistently
8+
19
# 5.3.3
210

311
- This is a fork of the original tesseract package made by Jeroen Ooms. The

configure

Lines changed: 126 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,39 +14,123 @@ PKG_BREW_NAME="tesseract leptonica"
1414
PKG_CFLAGS="-I/usr/include/leptonica"
1515
PKG_LIBS="-ltesseract"
1616

17-
# Use pkg-config if available
18-
pkg-config --version >/dev/null 2>&1
19-
if [ $? -eq 0 ]; then
20-
PKGCONFIG_CFLAGS=`pkg-config --cflags --silence-errors ${PKG_CONFIG_NAME}`
21-
PKGCONFIG_LIBS=`pkg-config --libs ${PKG_CONFIG_NAME}`
22-
fi
23-
24-
# Note that cflags may be empty in case of success
17+
# Check for custom locations
2518
if [ "$INCLUDE_DIR" ] || [ "$LIB_DIR" ]; then
2619
echo "Found INCLUDE_DIR and/or LIB_DIR!"
2720
PKG_CFLAGS="-I$INCLUDE_DIR $PKG_CFLAGS"
2821
PKG_LIBS="-L$LIB_DIR $PKG_LIBS"
29-
elif [ "$PKGCONFIG_CFLAGS" ] || [ "$PKGCONFIG_LIBS" ]; then
30-
echo "Found pkg-config cflags and libs!"
31-
PKG_CFLAGS="${PKGCONFIG_CFLAGS}"
32-
PKG_LIBS="${PKGCONFIG_LIBS}"
22+
23+
# Use pkg-config if available
3324
else
25+
pkg-config --version >/dev/null 2>&1
26+
if [ $? -eq 0 ]; then
27+
# Request shared libraries specifically
28+
PKGCONFIG_CFLAGS=`pkg-config --cflags --silence-errors ${PKG_CONFIG_NAME}`
29+
PKGCONFIG_LIBS=`pkg-config --libs --silence-errors ${PKG_CONFIG_NAME}`
30+
31+
if [ "$PKGCONFIG_CFLAGS" ] || [ "$PKGCONFIG_LIBS" ]; then
32+
echo "Found pkg-config cflags and libs!"
33+
PKG_CFLAGS="${PKGCONFIG_CFLAGS}"
34+
PKG_LIBS="${PKGCONFIG_LIBS}"
35+
fi
36+
fi
37+
38+
# If pkg-config failed, try OS-specific fallbacks
39+
if [ -z "$PKG_LIBS" ] || [ "$PKG_LIBS" = "-ltesseract" ]; then
40+
# For MacOS, check common Homebrew and Macports locations
41+
if [ $(uname) = "Darwin" ]; then
42+
echo "Checking MacOS-specific paths..."
43+
44+
# Homebrew ARM64 location (Apple Silicon)
45+
if [ -d "/opt/homebrew/include/tesseract" ] && [ -d "/opt/homebrew/lib" ]; then
46+
echo "Found Tesseract in Homebrew (ARM64)"
47+
PKG_CFLAGS="-I/opt/homebrew/include -I/opt/homebrew/include/leptonica"
48+
PKG_LIBS="-L/opt/homebrew/lib -ltesseract -llept"
49+
# Homebrew Intel location
50+
elif [ -d "/usr/local/include/tesseract" ] && [ -d "/usr/local/lib" ]; then
51+
echo "Found Tesseract in Homebrew (Intel)"
52+
PKG_CFLAGS="-I/usr/local/include -I/usr/local/include/leptonica"
53+
PKG_LIBS="-L/usr/local/lib -ltesseract -llept"
54+
# MacPorts location
55+
elif [ -d "/opt/local/include/tesseract" ] && [ -d "/opt/local/lib" ]; then
56+
echo "Found Tesseract in MacPorts"
57+
PKG_CFLAGS="-I/opt/local/include -I/opt/local/include/leptonica"
58+
PKG_LIBS="-L/opt/local/lib -ltesseract -llept"
59+
fi
60+
fi
61+
fi
62+
fi
63+
64+
# Check if shared libraries are available
65+
TESSERACT_LIB_PATH=$(echo $PKG_LIBS | grep -o -- "-L[^ ]*" | sed 's/-L//')
66+
FOUND_SHARED_LIB=0
67+
if [ -n "$TESSERACT_LIB_PATH" ]; then
68+
if [ -f "${TESSERACT_LIB_PATH}/libtesseract.so" ] || [ -f "${TESSERACT_LIB_PATH}/libtesseract.dylib" ]; then
69+
echo "Found shared tesseract library in ${TESSERACT_LIB_PATH}"
70+
FOUND_SHARED_LIB=1
71+
72+
# Force use of .so or .dylib
73+
if [ $(uname) = "Darwin" ]; then
74+
PKG_LIBS=$(echo "$PKG_LIBS" | sed 's/libtesseract\.a/libtesseract.dylib/g')
75+
else
76+
PKG_LIBS=$(echo "$PKG_LIBS" | sed 's/libtesseract\.a/libtesseract.so/g')
77+
fi
78+
fi
79+
fi
80+
81+
if [ $FOUND_SHARED_LIB -eq 0 ]; then
82+
# Common locations for shared libraries on Linux
83+
for LIB_DIR in /usr/lib/x86_64-linux-gnu /usr/lib /usr/lib64 /lib/x86_64-linux-gnu
84+
do
85+
if [ -f "${LIB_DIR}/libtesseract.so" ]; then
86+
echo "Found shared tesseract library in ${LIB_DIR}"
87+
FOUND_SHARED_LIB=1
88+
89+
# Update PKG_LIBS to use the found path
90+
PKG_LIBS="-L${LIB_DIR} -ltesseract"
91+
break
92+
fi
93+
done
94+
fi
95+
96+
if [ $FOUND_SHARED_LIB -eq 0 ]; then
97+
echo "Warning: Shared tesseract library not found, attempting to use static library"
98+
echo "If compilation fails with PIC errors, please install the shared version of tesseract"
99+
100+
# For distributions like Ubuntu, try to suggest the exact package
101+
if [ -f "/etc/lsb-release" ] && grep -q "Ubuntu" /etc/lsb-release; then
102+
echo "On Ubuntu, try: sudo apt-get install libtesseract-dev"
103+
fi
104+
fi
105+
106+
echo "Using PKG_CFLAGS=${PKG_CFLAGS}"
107+
echo "Using PKG_LIBS=${PKG_LIBS}"
108+
109+
# If we still haven't found the libraries, error out
110+
if [ -z "$PKG_LIBS" ] || [ "$PKG_LIBS" = "-ltesseract" ]; then
34111
echo "--------------------------- [ANTICONF] --------------------------------"
35-
echo "Configuration failed to find system libraries. Try installing:"
112+
echo "Configuration failed to find tesseract library. Try installing:"
36113
echo " * deb: $PKG_DEB_NAME (Debian, Ubuntu, etc)"
37114
echo " * rpm: $PKG_RPM_NAME (Fedora, CentOS, RHEL)"
38115
echo " * brew: $PKG_BREW_NAME (Mac OSX)"
39-
echo "If the libraries are already installed, check that 'pkg-config' is in your"
40-
echo "PATH and PKG_CONFIG_PATH contains the necessary .pc files. If pkg-config"
116+
echo "If tesseract is already installed, check that 'pkg-config' is in your"
117+
echo "PATH and PKG_CONFIG_PATH contains the tesseract.pc file. If pkg-config"
41118
echo "is unavailable you can set INCLUDE_DIR and LIB_DIR manually via:"
42119
echo "R CMD INSTALL --configure-vars='INCLUDE_DIR=... LIB_DIR=...'"
43120
echo "-------------------------- [ERROR MESSAGE] ---------------------------"
44121
exit 1
45122
fi
46123

47-
# Print debug information
48-
echo "PKG_CFLAGS: $PKG_CFLAGS"
49-
echo "PKG_LIBS: $PKG_LIBS"
124+
# Ensure we're using shared libraries not static ones
125+
# Replace any static library references with dynamic ones
126+
PKG_LIBS=$(echo "$PKG_LIBS" | sed 's/\.a/.so/g')
127+
128+
# For Mac, ensure we're getting the right compiler flags for linking
129+
if [ $(uname) = "Darwin" ]; then
130+
# Use dylib instead of .so on macOS
131+
PKG_LIBS=$(echo "$PKG_LIBS" | sed 's/\.so/.dylib/g')
132+
PKG_LIBS="$PKG_LIBS -Wl,-rpath,$(dirname $(echo $PKG_LIBS | grep -o '\-L[^ ]*' | sed 's/-L//g'))"
133+
fi
50134

51135
# Check if the compiler is clang
52136
if [ "$CC" = "clang" ] || [ "$CXX" = "clang++" ]; then
@@ -59,33 +143,45 @@ fi
59143
CPPFLAGS=`${R_HOME}/bin/R CMD config CPPFLAGS`
60144
CXX11STD=`${R_HOME}/bin/R CMD config CXX11STD`
61145
if [ $? -eq 0 ]; then
62-
CXX11=`${R_HOME}/bin/R CMD config CXX11`
63-
CXX11CPP="$CXX11 -E $CXX11STD"
146+
CXX11=`${R_HOME}/bin/R CMD config CXX11`
147+
CXX11CPP="$CXX11 -E $CXX11STD"
148+
echo "Using CXX11CPP: ${CXX11CPP}"
64149
else
65-
CXX11CPP=`${R_HOME}/bin/R CMD config CXXCPP`
150+
CXX11CPP=`${R_HOME}/bin/R CMD config CXXCPP`
151+
echo "Using CXX11CPP: ${CXX11CPP}"
66152
fi
67153

68-
# Create temporary C++ files to test the compatibility with Tesseract
154+
# Create test file to check which API to use
69155
cat <<EOF > conftest.cpp
70156
#include <tesseract/baseapi.h>
71157
#include <string>
72158
73159
int main() {
74160
tesseract::TessBaseAPI api;
75161
std::string val;
76-
// This fails to link on older Tesseract
77162
api.GetVariableAsString("test", &val);
78163
return 0;
79164
}
80165
EOF
81166

82-
if ${CXX11} -std=gnu++11 -c conftest.cpp -o conftest.o ${PKG_CFLAGS}; then
83-
CXXFLAGS="$CXXFLAGS -DTESSERACT_HAS_GETVARIABLEASSTRING"
84-
echo "Using GetVariableAsString()"
167+
# Set default to not using GetVariableAsString
168+
HAS_GET_VARIABLE_AS_STRING="#undef TESSERACT_HAS_GETVARIABLEASSTRING"
169+
170+
if ${CXX11} -std=gnu++11 -c conftest.cpp -o conftest.o ${PKG_CFLAGS} 2>/dev/null; then
171+
# If compilation succeeded, GetVariableAsString is available
172+
HAS_GET_VARIABLE_AS_STRING="#define TESSERACT_HAS_GETVARIABLEASSTRING 1"
173+
echo "Using GetVariableAsString() API"
85174
else
86-
echo "Using GetStringVariable()"
175+
echo "Using GetStringVariable() API (older version)"
87176
fi
88177

178+
# Create the tesseract config header
179+
cat <<EOF > src/tesseract_config.h
180+
// Auto-generated by configure
181+
${HAS_GET_VARIABLE_AS_STRING}
182+
EOF
183+
184+
# Test Tesseract C++11 compatibility
89185
cat <<EOF > conftest.cpp
90186
#include <tesseract/baseapi.h>
91187
#include <allheaders.h>
@@ -97,8 +193,7 @@ EOF
97193

98194
# Test Tesseract
99195
# Tesseract enforces C++11
100-
if ! ${CXX11} -std=gnu++11 -c conftest.cpp -o conftest.o ${PKG_CFLAGS}
101-
then
196+
if ! ${CXX11} -std=gnu++11 -c conftest.cpp -o conftest.o ${PKG_CFLAGS} 2>/dev/null; then
102197
echo "Tesseract requires C++11 support"
103198
rm -rf conftest.cpp conftest.o
104199
exit 1
@@ -109,5 +204,7 @@ fi
109204
# Write to Makevars
110205
sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" src/Makevars.in > src/Makevars
111206

207+
echo "CXX_STD = CXX11" >> src/Makevars
208+
112209
# Success
113210
exit 0

docs/articles/intro.html

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/pkgdown.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ pkgdown: 2.1.1
33
pkgdown_sha: ~
44
articles:
55
intro: intro.html
6-
last_built: 2025-01-14T15:33Z
6+
last_built: 2025-04-13T20:06Z

docs/reference/tessdata.html

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/search.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

src/Makevars.in

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,7 @@
11
PKG_CPPFLAGS=@cflags@
2-
PKG_LIBS=@libs@
2+
3+
# Force shared library linking by explicitly changing .a to .so
4+
PKG_LIBS=$(subst .a,.so,@libs@)
5+
6+
# Set C++11 standard for compatibility
7+
CXX_STD=CXX11

src/Makevars.win

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,19 @@
33
RWINLIB = ../windows/tesseract
44
PKG_CPPFLAGS = -I${RWINLIB}/include -I${RWINLIB}/include/leptonica
55

6+
# CRAN note: add --exclude-libs,ALL flag hides all symbols from static
7+
# libraries, which prevents functions like std::cerr, std::cout, abort(), etc.
8+
# from being exposed in the DLL
9+
610
PKG_LIBS = -L${RWINLIB}/lib${subst gcc,,${COMPILED_BY}}${R_ARCH} \
711
-L${RWINLIB}/lib \
812
-ltesseract -lleptonica \
913
-ltiff -lopenjp2 -lwebp -lsharpyuv -ljpeg -lgif -lpng16 -lz \
10-
-lws2_32
14+
-lws2_32 \
15+
-Wl,--exclude-libs,ALL
16+
17+
# CRAN note: consistent C++11 standard usage
18+
CXX_STD = CXX11
1119

1220
# Compile
1321

0 commit comments

Comments
 (0)