在线时间:8:00-16:00
迪恩网络APP
随时随地掌握行业动态
扫描二维码
关注迪恩网络微信公众号
PPdfObj = ^TPdfObj; TPdfObj = record number, offset: integer; end; function GetPdfPageCount(const filename: string): integer; var ms: TMemoryStream; k, cnt, pagesNum, rootNum: integer; p, p2: pchar; PdfObj: PPdfObj; PdfObjList: TList; //Summary of steps taken to parse PDF file for page count :- //1. Locate 'startxref' at end of file //2. get 'xref' offset and go to xref table //3. fill my pdfObj List with object numbers and offsets //4. handle subsections within xref table. //5. read 'trailer' section at end of each xref //6. store 'Root' object number if found in 'trailer' //7. if 'Prev' xref found in 'trailer' - loop back to step 2 //8. locate Root in my full pdfObj List //9. locate 'Pages' object from Root //10. get Count from Pages. function GetNumber(out num: integer): boolean; var tmpStr: string; begin tmpStr := ''; while p^ < #33 do inc(p); //skip leading CR,LF & SPC while (p^ in ['0'..'9']) do begin tmpStr := tmpStr + p^; inc(p); end; result := tmpStr <> ''; if not result then exit; num := strtoint(tmpStr); end; function IsString(const str: string): boolean; var len: integer; begin len := length(str); result := CompareMem( p, pchar(str), len); inc(p, len); end; function FindStrInDict(const str: string): boolean; begin //PDF 'dictionaries' (assoc. arrays) terminate with '>>' result := false; while not result do begin while (p^ <> '>') and (p^ <> str[1]) do inc(p); if (p^ = '>') then begin inc(p); if (p^ = '>') then exit else continue; end; result := IsString(str); end; end; begin //on error return -1 as page count result := -1; try ms := TMemoryStream.Create; PdfObjList := TList.Create; screen.Cursor := crHourGlass; application.ProcessMessages; try ms.LoadFromFile(filename); //find 'startxref' ignoring '%%EOF' p := pchar(ms.Memory) + ms.Size -5; //21-Jun-05: bugfix //sometimes rubbish is appended to the pdf so //look deeper for 'startxref' p2 := pchar(ms.Memory); repeat while (p > p2) and (p^ <> 'f') do dec(p); if (p = p2) then exit; if StrLComp( (p-8), 'startxref', 9) = 0 then break; dec(p); until false; inc(p); rootNum := -1; //ie flag not yet found //xref offset ==> k if not GetNumber(k) then exit; p := pchar(ms.Memory) + k +4; while true do //top of loop ////////////////////////////// begin //get base object number ==> k if not GetNumber(k) then exit; //get object count ==> cnt if not GetNumber(cnt) then exit; while not (p^ in ['0'..'9']) do inc(p); //skip CR, LF p2 := p; //add all objects in section to list ... for cnt := 0 to cnt-1 do begin new(PdfObj); PdfObjList.Add(PdfObj); PdfObj.number := k + cnt; if not GetNumber(PdfObj.offset) then exit; inc(p2,20); p := p2; end; //check for and process further subsections ... if p^ in ['0'..'9'] then continue; // parse 'trailer dictionary' ... if not IsString('trailer') then exit; p2 := p; // get Root (aka /Catalog) ... if (rootNum = -1) and FindStrInDict('/Root') then if not GetNumber(rootNum) then exit; p := p2; if not FindStrInDict('/Prev') then break; //no more xrefs //next xref offset ==> k if not GetNumber(k) then exit; p := pchar(ms.Memory) + k +4; end; //bottom of loop ///////////////////////////////////// //Make sure we've got Root the object number ... if rootNum < 0 then exit; //Find Root object in list and go to its offset ... k := 0; while k < PdfObjList.Count do if PPdfObj(PdfObjList[k]).number = rootNum then break else inc(k); if k = PdfObjList.Count then exit; p := pchar(ms.Memory) + PPdfObj(PdfObjList[k]).offset; //double check that this is the Root object ... if not GetNumber(k) or (k <> rootNum) then exit; if not FindStrInDict('/Pages') then exit; //get Pages object number ==> pagesNum if not GetNumber(pagesNum) then exit; k := 0; while k < PdfObjList.Count do if PPdfObj(PdfObjList[k]).number = pagesNum then break else inc(k); if k = PdfObjList.Count then exit; //Pages object found in list, now go to offset ... p := pchar(ms.Memory) + PPdfObj(PdfObjList[k]).offset; //make sure it's the Pages object ... if not GetNumber(k) or (k <> pagesNum) then exit; if not FindStrInDict('/Count') then exit; if not GetNumber(cnt) then exit; //21-Jun-05: bugfix //occasionally the 'count' value is an indirect object if GetNumber(k) and IsString(' R') then begin //this is an indirect object to the count value, //so find the obj ... k := 0; while k < PdfObjList.Count do if PPdfObj(PdfObjList[k]).number = cnt then break else inc(k); if k = PdfObjList.Count then exit; p := pchar(ms.Memory) + PPdfObj(PdfObjList[k]).offset; if not GetNumber(k) or //skip the object num not GetNumber(k) or //skip the generation num not IsString(' obj') or not GetNumber(cnt) then exit; end; result := cnt; finally screen.Cursor := crDefault; for k := 0 to PdfObjList.Count -1 do dispose(PPdfObj(PdfObjList[k])); PdfObjList.Free; ms.Free; end; except //nb: errors are flagged by returning -1 end; end; from http://www.angusj.com/delphitips/pdfpagecount.php |
2023-10-27
2022-08-15
2022-08-17
2022-09-23
2022-08-13
请发表评论