Welcome, guest! Login / Register - Why register?
Psst.. new poll here.
[email protected] webmail now available. Want one? Go here.
Cannot use outlook/hotmail/live here to register as they blocking our mail servers. #microsoftdeez
Obey the Epel!

Paste

Pasted as Java by kasun ( 12 years ago )
else if (userPath.equals("/topkphrases")) {
                        
              byte[] b = null;
           try {
               b = getBLOB("2");
           } catch (Exception ex) {
                Logger.getLogger(ControlerServelet.class.getName()).log(Level.SEVERE, null, ex);
           }
     
                                             ArrayList<String> sentenceList = new ArrayList<String>();      
 Line------>(ControlerServelet.java:585)     sentenceList=pdfDocToSentence(b);
         
                                              request.setAttribute("allphrases", sentenceList);
     
        } 




public ArrayList<String> txtDocToSentence(String paragraph) {
    // This option shows loading and sentence-segment and tokenizing
    // a file using DocumentPreprocessor
      ArrayList<String> sentenceList = new ArrayList<String>();

  
Reader reader = new StringReader(paragraph);   
    for (List<HasWord> sentence : new DocumentPreprocessor(reader)) {
       
    sentenceList.add(sentence.toString());  
    }
    return sentenceList;
  }

    
public ArrayList<String> pdfDocToSentence(byte[] buf ) throws IOException {

    PDFParser parser;
    String parsedText;
    PDFTextStripper pdfStripper;
    PDDocument pdDoc = null ;
    COSDocument cosDoc = null ;
   
        try {
            parser = new PDFParser(new ByteArrayInputStream(buf));   
           // parser = new PDFParser(new FileInputStream(f));
        } catch (Exception e) {
           // System.out.println("Unable to open PDF Parser.");
            return null;
        }
        {
        try {
            parser.parse();
            cosDoc = parser.getDocument();
            pdfStripper = new PDFTextStripper();
            pdDoc = new PDDocument(cosDoc);
            parsedText = pdfStripper.getText(pdDoc);
            //System.out.println("An exception occured in parsing the PDF Document.");
        } catch (Exception e) {
            try {
                   if (cosDoc != null) cosDoc.close();
                   if (pdDoc != null) pdDoc.close();
               } catch (Exception e1) {
            }
            return null;
        }
        
                                          finally{
                                          cosDoc.close();
                                           pdDoc.close();
 Line-->(ControlerServelet.java:755)        }
        
}


        if (parsedText == null) {
         //System.out.println("PDF to Text Conversion failed.");
        }
        

return txtDocToSentence(parsedText);
        
    }

 

Revise this Paste

Children: 47396
Your Name: Code Language: