This commit is contained in:
sherlockforrest
2023-06-20 09:22:53 +08:00
commit bf2ed2e31f
621 changed files with 271599 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
diff --git a/node_modules/@opendocsg/pdf2md/lib/util/pdf.js b/node_modules/@opendocsg/pdf2md/lib/util/pdf.js
index 0ed1512..e491a0b 100644
--- a/node_modules/@opendocsg/pdf2md/lib/util/pdf.js
+++ b/node_modules/@opendocsg/pdf2md/lib/util/pdf.js
@@ -42,7 +42,7 @@ exports.parse = async function parse (docOptions, callbacks) {
}
}
- let pageNum = firstPage.pageNum
+ let pageNum = 0; // firstPage.pageNum
for (let j = 1; j <= pdfDocument.numPages; j++) {
const page = await pdfDocument.getPage(j)

View File

@@ -0,0 +1,43 @@
diff --git a/node_modules/@opendocsg/pdf2md/lib/util/pdf.js b/node_modules/@opendocsg/pdf2md/lib/util/pdf.js
index 0ed1512..2cf8408 100644
--- a/node_modules/@opendocsg/pdf2md/lib/util/pdf.js
+++ b/node_modules/@opendocsg/pdf2md/lib/util/pdf.js
@@ -5,7 +5,8 @@ const Page = require('../models/Page')
const NO_OP = () => {}
-exports.parse = async function parse (docOptions, callbacks) {
+exports.parse = async function parse (pdfDocument, callbacks) {
+
const { metadataParsed, pageParsed, fontParsed, documentParsed } = {
metadataParsed: NO_OP,
pageParsed: NO_OP,
@@ -13,10 +14,9 @@ exports.parse = async function parse (docOptions, callbacks) {
documentParsed: NO_OP,
...(callbacks || {}),
}
- const pdfDocument = await pdfjs.getDocument(docOptions).promise
+
const metadata = await pdfDocument.getMetadata()
metadataParsed(metadata)
-
const pages = [...Array(pdfDocument.numPages).keys()].map(
index => new Page({ index })
)
@@ -42,7 +42,7 @@ exports.parse = async function parse (docOptions, callbacks) {
}
}
- let pageNum = firstPage.pageNum
+ let pageNum = 0 // firstPage.pageNum
for (let j = 1; j <= pdfDocument.numPages; j++) {
const page = await pdfDocument.getPage(j)
@@ -90,6 +90,7 @@ exports.parse = async function parse (docOptions, callbacks) {
fontParsed(fonts)
}
}
+
}
return {
fonts,