Welcome, guest! Login / Register - Why register?
Psst.. new poll here.
Psst.. new forums here.
Microsoft is blocking us again (TY IP Reputation!) so dont bother with any of their useless mail servers here and just use oauth login instead. Thank the nice Russians for causing that. :)

Paste

Pasted as Java by kasun ( 14 years ago )
else if (userPath.equals("/topkphrases")) {
                        
              byte[] b = null;
           try {
               b = getBLOB("2");
           } catch (Exception ex) {
                Logger.getLogger(ControlerServelet.class.getName()).log(Level.SEVERE, null, ex);
           }
     
                                             ArrayList<String> sentenceList = new ArrayList<String>();      
 Line------>(ControlerServelet.java:585)     sentenceList=pdfDocToSentence(b);
         
                                              request.setAttribute("allphrases", sentenceList);
     
        } 




public ArrayList<String> txtDocToSentence(String paragraph) {
    // This option shows loading and sentence-segment and tokenizing
    // a file using DocumentPreprocessor
      ArrayList<String> sentenceList = new ArrayList<String>();

  
Reader reader = new StringReader(paragraph);   
    for (List<HasWord> sentence : new DocumentPreprocessor(reader)) {
       
    sentenceList.add(sentence.toString());  
    }
    return sentenceList;
  }

    
public ArrayList<String> pdfDocToSentence(byte[] buf ) throws IOException {

    PDFParser parser;
    String parsedText;
    PDFTextStripper pdfStripper;
    PDDocument pdDoc = null ;
    COSDocument cosDoc = null ;
   
        try {
            parser = new PDFParser(new ByteArrayInputStream(buf));   
           // parser = new PDFParser(new FileInputStream(f));
        } catch (Exception e) {
           // System.out.println("Unable to open PDF Parser.");
            return null;
        }
        {
        try {
            parser.parse();
            cosDoc = parser.getDocument();
            pdfStripper = new PDFTextStripper();
            pdDoc = new PDDocument(cosDoc);
            parsedText = pdfStripper.getText(pdDoc);
            //System.out.println("An exception occured in parsing the PDF Document.");
        } catch (Exception e) {
            try {
                   if (cosDoc != null) cosDoc.close();
                   if (pdDoc != null) pdDoc.close();
               } catch (Exception e1) {
            }
            return null;
        }
        
                                          finally{
 Line-->(ControlerServelet.java:755)      cosDoc.close();
                                           pdDoc.close();
         }
        
}


        if (parsedText == null) {
         //System.out.println("PDF to Text Conversion failed.");
        }
        

return txtDocToSentence(parsedText);
        
    }

 

Revise this Paste

Parent: 47395
Your Name: Code Language: