.text to pdf
xml,text to pdf from java : iText => http://www.lowagie.com/iText/
create pdf from C Language :haru => http://libharu.sourceforge.net/
.pdf to text
http://www.adobe.com/products/acrobat/access_onlinetools.html
pdf to text from java(範本在最下面),支援跨平台:
Apache PdfBox => http://incubator.apache.org/pdfbox/
.PDF format becomes ISO standard
http://www.iso.org/iso/pressrelease.htm?refid=Ref1141
http://www.iso.org/iso/iso_catalogue/catalogue_tc/catalogue_detail.htm?csnumber=51502
http://www.cogniview.com/convert-pdf-to-excel/post/pdf-editing-creation-50-open-sourcefree-alternatives-to-adobe-acrobat/
http://www.adobe.com/devnet/pdf/library/
好(php,pdf generator) http://www.fpdf.org/
免費 http://www.pdflib.com/
介紹 http://www.neo.com.tw/archives/896
■ Apache PDFBox Sample (pdf to text)
package org.pdfbox;
import java.io.*;
import org.apache.log4j.Category;
import org.apache.log4j.Logger;
import org.pdfbox.encryption.DecryptDocument;
import org.pdfbox.exceptions.InvalidPasswordException;
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
public class ExtractText
{
private static final Logger LOG;
public static final String DEFAULT_ENCODING = null;
private static final String PASSWORD = "-password";
private static final String ENCODING = "-encoding";
private static final String CONSOLE = "-console";
private static final String START_PAGE = "-startPage";
private static final String END_PAGE = "-endPage";
static Class class$org$pdfbox$ExtractText; /* synthetic field */
public ExtractText()
{
}
public static void main(String args[])
throws Exception
{
boolean toConsole = false;
int currentArgumentIndex = 0;
String password = "";
String encoding = DEFAULT_ENCODING;
PDFTextStripper stripper = new PDFTextStripper();
String pdfFile = null;
String textFile = null;
int startPage = 1;
int endPage = 0x7fffffff;
for(int i = 0; i < args.length; i++)
if(args[i].equals("-password"))
{
if(++i >= args.length)
usage();
password = args[i];
} else
if(args[i].equals("-encoding"))
{
if(++i >= args.length)
usage();
encoding = args[i];
} else
if(args[i].equals("-startPage"))
{
if(++i >= args.length)
usage();
startPage = Integer.parseInt(args[i]);
} else
if(args[i].equals("-endPage"))
{
if(++i >= args.length)
usage();
endPage = Integer.parseInt(args[i]);
} else
if(args[i].equals("-console"))
toConsole = true;
else
if(pdfFile == null)
pdfFile = args[i];
else
textFile = args[i];
if(pdfFile == null)
usage();
if(textFile == null && pdfFile.length() > 4)
textFile = pdfFile.substring(0, pdfFile.length() - 4) + ".txt";
InputStream input = null;
Writer output = null;
PDDocument document = null;
try
{
input = new FileInputStream(pdfFile);
long start = System.currentTimeMillis();
document = parseDocument(input);
long stop = System.currentTimeMillis();
LOG.info("Time to parse time=" + (stop - start));
if(document.isEncrypted())
try
{
DecryptDocument decryptor = new DecryptDocument(document);
decryptor.decryptDocument(password);
}
catch(InvalidPasswordException e)
{
if(args.length == 4)
{
System.err.println("Error: The supplied password is incorrect.");
System.exit(2);
} else
{
System.err.println("Error: The document is encrypted.");
usage();
}
}
if(toConsole)
output = new OutputStreamWriter(System.out);
else
if(encoding != null)
output = new OutputStreamWriter(new FileOutputStream(textFile), encoding);
else
output = new OutputStreamWriter(new FileOutputStream(textFile));
start = System.currentTimeMillis();
stripper.setStartPage(startPage);
stripper.setEndPage(endPage);
stripper.writeText(document, output);
stop = System.currentTimeMillis();
LOG.info("Time to extract text time=" + (stop - start));
}
finally
{
if(input != null)
input.close();
if(output != null)
output.close();
if(document != null)
document.close();
}
}
private static PDDocument parseDocument(InputStream input)
throws IOException
{
PDFParser parser = new PDFParser(input);
parser.parse();
return parser.getPDDocument();
}
private static void usage()
{
System.err.println("Usage: java org.pdfbox.ExtractText [OPTIONS]