Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jules #3203

Merged
merged 3 commits into from
Feb 23, 2024
Merged

Jules #3203

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions scripts/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@
If true (the default on non-OpenBSD systems), we let pip create and use
its own new venv to build PyMuPDF. Otherwise we force pip to use the
current venv.
--build-mupdf 0|1
Whether to rebuild mupdf when we build PyMuPDF. Default is 1.
--gdb 0|1
Run tests under gdb.
--timeout <seconds>
Expand Down Expand Up @@ -118,7 +120,9 @@ def main(argv):

build_isolation = None
valgrind = False
s = True
build_type = None
build_mupdf = True
gdb = False
implementations = None
test_names = list()
Expand Down Expand Up @@ -164,6 +168,8 @@ def main(argv):
timeout = float(next(args))
elif arg == '-v':
venv_quick = True
elif arg == '--build-mupdf':
build_mupdf = int(next(args))
elif arg == '--gdb':
gdb = int(next(args))
elif arg == '--valgrind':
Expand Down Expand Up @@ -198,6 +204,7 @@ def do_build():
build_type=build_type,
build_isolation=build_isolation,
venv_quick=venv_quick,
build_mupdf=build_mupdf,
)
def do_test():
test(
Expand Down Expand Up @@ -272,7 +279,13 @@ def venv_info(pytest_args=None):
return ret


def build(implementations=None, build_type=None, build_isolation=None, venv_quick=False):
def build(
implementations=None,
build_type=None,
build_isolation=None,
venv_quick=False,
build_mupdf=True,
):
'''
Args:
build_type:
Expand All @@ -281,7 +294,8 @@ def build(implementations=None, build_type=None, build_isolation=None, venv_quic
See top-level option `--build-isolation`.
venv_quick:
See top-level option `-v`.

build_mupdf:
See top-level option `build-mupdf`
'''
print(f'{build_type=}')
print(f'{build_isolation=}')
Expand Down Expand Up @@ -320,6 +334,8 @@ def build(implementations=None, build_type=None, build_isolation=None, venv_quic
if 'r' in implementations or 'R' in implementations:
v += 'b'
env_extra['PYMUPDF_SETUP_IMPLEMENTATIONS'] = v
if not build_mupdf:
env_extra['PYMUPDF_SETUP_MUPDF_REBUILD'] = '0'
if build_type:
env_extra['PYMUPDF_SETUP_MUPDF_BUILD_TYPE'] = build_type
gh_release.run(f'pip install{build_isolation_text} -vv {pymupdf_dir}', env_extra=env_extra)
Expand Down
6 changes: 1 addition & 5 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17435,11 +17435,7 @@ def JM_set_ocg_arrays_imp(arr, list_):
Works with dict like {"basestate":name, "on":list, "off":list, "rbg":list}
'''
pdf = mupdf.pdf_get_bound_document(arr)
for i, item in enumerate(list_):
xref = 0
if JM_INT_ITEM(list_, i)[0] == 1:
# Not found.
continue
for xref in list_:
obj = mupdf.pdf_new_indirect(pdf, xref, 0)
mupdf.pdf_array_push(arr, obj)

Expand Down
Binary file added tests/resources/test_3197.pdf
Binary file not shown.
28 changes: 28 additions & 0 deletions tests/test_textextract.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
No checks performed - just contribute to code coverage.
"""
import os
import sys

import fitz

Expand Down Expand Up @@ -239,3 +240,30 @@ def test_3186():
t = page.get_text()
texts.append(t)
assert texts == texts_expected, f'Unexpected output: {texts=}'


def test_3197():
'''
MuPDF's ActualText support fixes handling of test_3197.pdf.
'''
if fitz.mupdf_version_tuple < (1, 24):
print(f'Not running on {fitz.mupdf_version_tuple=}.')
return
path = os.path.abspath(f'{__file__}/../../tests/resources/test_3197.pdf')

text_utf8_expected = [
b'NYSE - Nasdaq Real Time Price \xe2\x80\xa2 USD\nFord Motor Company (F)\n12.14 -0.11 (-0.90%)\nAt close: 4:00 PM EST\nAfter hours: 7:43 PM EST\nAll numbers in thousands\nAnnual\nQuarterly\nDownload\nSummary\nNews\nChart\nConversations\nStatistics\nHistorical Data\nProfile\nFinancials\nAnalysis\nOptions\nHolders\nSustainability\nInsights\nFollow\n12.15 +0.01 (+0.08%)\nIncome Statement\nBalance Sheet\nCash Flow\nSearch for news, symbols or companies\nNews\nFinance\nSports\nSign in\nMy Portfolio\nNews\nMarkets\nSectors\nScreeners\nPersonal Finance\nVideos\nFinance Plus\nBack to classic\nMore\n',
b'Related Tickers\nTTM\n12/31/2023\n12/31/2022\n12/31/2021\n12/31/2020\n14,918,000\n14,918,000\n6,853,000\n15,787,000\n24,269,000\n-17,628,000\n-17,628,000\n-4,347,000\n2,745,000\n-18,615,000\n2,584,000\n2,584,000\n2,511,000\n-23,498,000\n2,315,000\n25,110,000\n25,110,000\n25,340,000\n20,737,000\n25,935,000\n-8,236,000\n-8,236,000\n-6,866,000\n-6,227,000\n-5,742,000\n51,659,000\n51,659,000\n45,470,000\n27,901,000\n65,900,000\n-41,965,000\n-41,965,000\n-45,655,000\n-54,164,000\n-60,514,000\n-335,000\n-335,000\n-484,000\n--\n--\n6,682,000\n6,682,000\n-13,000\n9,560,000\n18,527,000\n \nYahoo Finance Plus Essential\naccess required.\nUnlock Access\nBreakdown\nOperating Cash\nFlow\nInvesting Cash\nFlow\nFinancing Cash\nFlow\nEnd Cash Position\nCapital Expenditure\nIssuance of Debt\nRepayment of Debt\nRepurchase of\nCapital Stock\nFree Cash Flow\n12/31/2020 - 6/1/1972\nGM\nGeneral Motors Compa\xe2\x80\xa6\n39.49 +1.23%\n\xc2\xa0\nRIVN\nRivian Automotive, Inc.\n15.39 -3.15%\n\xc2\xa0\nNIO\nNIO Inc.\n5.97 +0.17%\n\xc2\xa0\nSTLA\nStellantis N.V.\n25.63 +0.91%\n\xc2\xa0\nLCID\nLucid Group, Inc.\n3.7000 +0.54%\n\xc2\xa0\nTSLA\nTesla, Inc.\n194.77 +0.52%\n\xc2\xa0\nTM\nToyota Motor Corporati\xe2\x80\xa6\n227.09 +0.14%\n\xc2\xa0\nXPEV\nXPeng Inc.\n9.08 +0.89%\n\xc2\xa0\nFSR\nFisker Inc.\n0.5579 -11.46%\n\xc2\xa0\nCopyright \xc2\xa9 2024 Yahoo.\nAll rights reserved.\nPOPULAR QUOTES\nTesla\nDAX Index\nKOSPI\nDow Jones\nS&P BSE SENSEX\nSPDR S&P 500 ETF Trust\nEXPLORE MORE\nCredit Score Management\nHousing Market\nActive vs. Passive Investing\nShort Selling\nToday\xe2\x80\x99s Mortgage Rates\nHow Much Mortgage Can You Afford\nABOUT\nData Disclaimer\nHelp\nSuggestions\nSitemap\n',
]

with fitz.open(path) as document:
for i, page in enumerate(document):
text = page.get_text()
#print(f'{i=}:')
text_utf8 = text.encode('utf8')
#print(f' {text_utf8=}')
#print(f' {text_utf8_expected[i]=}')
if fitz.mupdf_version_tuple >= (1, 24):
assert text_utf8 == text_utf8_expected[i]
else:
assert text_utf8 != text_utf8_expected[i]
Loading