1   /**
2    * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
3    *
4    * This library is free software; you can redistribute it and/or modify it under
5    * the terms of the GNU Lesser General Public License as published by the Free
6    * Software Foundation; either version 2.1 of the License, or (at your option)
7    * any later version.
8    *
9    * This library is distributed in the hope that it will be useful, but WITHOUT
10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11   * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12   * details.
13   */
14  
15  package com.liferay.portal.util;
16  
17  import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
18  import com.liferay.portal.kernel.io.unsync.UnsyncByteArrayOutputStream;
19  import com.liferay.portal.kernel.log.Log;
20  import com.liferay.portal.kernel.log.LogFactoryUtil;
21  import com.liferay.portal.kernel.util.ContentTypes;
22  import com.liferay.portal.kernel.util.FileComparator;
23  import com.liferay.portal.kernel.util.JavaProps;
24  import com.liferay.portal.kernel.util.MimeTypesUtil;
25  import com.liferay.portal.kernel.util.ServerDetector;
26  import com.liferay.portal.kernel.util.StreamUtil;
27  import com.liferay.portal.kernel.util.StringBundler;
28  import com.liferay.portal.kernel.util.StringPool;
29  import com.liferay.portal.kernel.util.StringUtil;
30  import com.liferay.portal.kernel.util.Time;
31  import com.liferay.portal.kernel.util.Validator;
32  import com.liferay.util.PwdGenerator;
33  import com.liferay.util.SystemProperties;
34  import com.liferay.util.lucene.JerichoHTMLTextExtractor;
35  
36  import java.io.BufferedInputStream;
37  import java.io.File;
38  import java.io.FileInputStream;
39  import java.io.FileOutputStream;
40  import java.io.FileReader;
41  import java.io.IOException;
42  import java.io.InputStream;
43  import java.io.OutputStreamWriter;
44  import java.io.Reader;
45  import java.io.StringReader;
46  import java.io.Writer;
47  
48  import java.util.ArrayList;
49  import java.util.Arrays;
50  import java.util.HashMap;
51  import java.util.List;
52  import java.util.Map;
53  import java.util.Properties;
54  
55  import org.apache.jackrabbit.extractor.MsExcelTextExtractor;
56  import org.apache.jackrabbit.extractor.MsPowerPointTextExtractor;
57  import org.apache.jackrabbit.extractor.MsWordTextExtractor;
58  import org.apache.jackrabbit.extractor.OpenOfficeTextExtractor;
59  import org.apache.jackrabbit.extractor.PdfTextExtractor;
60  import org.apache.jackrabbit.extractor.PlainTextExtractor;
61  import org.apache.jackrabbit.extractor.RTFTextExtractor;
62  import org.apache.jackrabbit.extractor.TextExtractor;
63  import org.apache.jackrabbit.extractor.XMLTextExtractor;
64  import org.apache.poi.POITextExtractor;
65  import org.apache.poi.extractor.ExtractorFactory;
66  
67  import org.mozilla.intl.chardet.nsDetector;
68  import org.mozilla.intl.chardet.nsPSMDetector;
69  
70  /**
71   * <a href="FileImpl.java.html"><b><i>View Source</i></b></a>
72   *
73   * @author Brian Wing Shun Chan
74   * @author Alexander Chow
75   */
76  public class FileImpl implements com.liferay.portal.kernel.util.File {
77  
78      public static FileImpl getInstance() {
79          return _instance;
80      }
81  
82      public FileImpl() {
83          Class<?>[] textExtractorClasses = new Class[] {
84              JerichoHTMLTextExtractor.class, MsExcelTextExtractor.class,
85              MsPowerPointTextExtractor.class, MsWordTextExtractor.class,
86              OpenOfficeTextExtractor.class, PdfTextExtractor.class,
87              PlainTextExtractor.class, RTFTextExtractor.class,
88              XMLTextExtractor.class
89          };
90  
91          for (Class<?> textExtractorClass : textExtractorClasses) {
92              try {
93                  TextExtractor textExtractor =
94                      (TextExtractor)textExtractorClass.newInstance();
95  
96                  String[] contentTypes = textExtractor.getContentTypes();
97  
98                  for (String contentType : contentTypes) {
99                      _textExtractors.put(contentType, textExtractor);
100                 }
101             }
102             catch (Exception e) {
103                 _log.error(e, e);
104             }
105         }
106     }
107 
108     public void copyDirectory(String sourceDirName, String destinationDirName) {
109         copyDirectory(new File(sourceDirName), new File(destinationDirName));
110     }
111 
112     public void copyDirectory(File source, File destination) {
113         if (source.exists() && source.isDirectory()) {
114             if (!destination.exists()) {
115                 destination.mkdirs();
116             }
117 
118             File[] fileArray = source.listFiles();
119 
120             for (int i = 0; i < fileArray.length; i++) {
121                 if (fileArray[i].isDirectory()) {
122                     copyDirectory(
123                         fileArray[i],
124                         new File(destination.getPath() + File.separator
125                             + fileArray[i].getName()));
126                 }
127                 else {
128                     copyFile(
129                         fileArray[i],
130                         new File(destination.getPath() + File.separator
131                             + fileArray[i].getName()));
132                 }
133             }
134         }
135     }
136 
137     public void copyFile(String source, String destination) {
138         copyFile(source, destination, false);
139     }
140 
141     public void copyFile(String source, String destination, boolean lazy) {
142         copyFile(new File(source), new File(destination), lazy);
143     }
144 
145     public void copyFile(File source, File destination) {
146         copyFile(source, destination, false);
147     }
148 
149     public void copyFile(File source, File destination, boolean lazy) {
150         if (!source.exists()) {
151             return;
152         }
153 
154         if (lazy) {
155             String oldContent = null;
156 
157             try {
158                 oldContent = read(source);
159             }
160             catch (Exception e) {
161                 return;
162             }
163 
164             String newContent = null;
165 
166             try {
167                 newContent = read(destination);
168             }
169             catch (Exception e) {
170             }
171 
172             if ((oldContent == null) || !oldContent.equals(newContent)) {
173                 copyFile(source, destination, false);
174             }
175         }
176         else {
177             if ((destination.getParentFile() != null) &&
178                 (!destination.getParentFile().exists())) {
179 
180                 destination.getParentFile().mkdirs();
181             }
182 
183             try {
184                 StreamUtil.transfer(
185                     new FileInputStream(source),
186                     new FileOutputStream(destination));
187             }
188             catch (IOException ioe) {
189                 _log.error(ioe.getMessage());
190             }
191         }
192     }
193 
194     public File createTempFile() {
195         return createTempFile(null);
196     }
197 
198     public File createTempFile(String extension) {
199         return new File(createTempFileName(extension));
200     }
201 
202     public String createTempFileName() {
203         return createTempFileName(null);
204     }
205 
206     public String createTempFileName(String extension) {
207         StringBundler sb = new StringBundler();
208 
209         sb.append(SystemProperties.get(SystemProperties.TMP_DIR));
210         sb.append(StringPool.SLASH);
211         sb.append(Time.getTimestamp());
212         sb.append(PwdGenerator.getPassword(PwdGenerator.KEY2, 8));
213 
214         if (Validator.isNotNull(extension)) {
215             sb.append(StringPool.PERIOD);
216             sb.append(extension);
217         }
218 
219         return sb.toString();
220     }
221 
222     public String decodeSafeFileName(String fileName) {
223         return StringUtil.replace(
224             fileName, _SAFE_FILE_NAME_2, _SAFE_FILE_NAME_1);
225     }
226 
227     public boolean delete(String file) {
228         return delete(new File(file));
229     }
230 
231     public boolean delete(File file) {
232         if ((file != null) && file.exists()) {
233             return file.delete();
234         }
235         else {
236             return false;
237         }
238     }
239 
240     public void deltree(String directory) {
241         deltree(new File(directory));
242     }
243 
244     public void deltree(File directory) {
245         if (directory.exists() && directory.isDirectory()) {
246             File[] fileArray = directory.listFiles();
247 
248             for (int i = 0; i < fileArray.length; i++) {
249                 if (fileArray[i].isDirectory()) {
250                     deltree(fileArray[i]);
251                 }
252                 else {
253                     fileArray[i].delete();
254                 }
255             }
256 
257             directory.delete();
258         }
259     }
260 
261     public String encodeSafeFileName(String fileName) {
262         return StringUtil.replace(
263             fileName, _SAFE_FILE_NAME_1, _SAFE_FILE_NAME_2);
264     }
265 
266     public boolean exists(String fileName) {
267         return exists(new File(fileName));
268     }
269 
270     public boolean exists(File file) {
271         return file.exists();
272     }
273 
274     public String extractText(InputStream is, String fileName) {
275         String text = null;
276 
277         try {
278             if (!is.markSupported()) {
279                 is = new BufferedInputStream(is);
280             }
281 
282             String contentType = MimeTypesUtil.getContentType(is, fileName);
283 
284             TextExtractor textExtractor = _textExtractors.get(contentType);
285 
286             if (textExtractor != null) {
287                 if (_log.isInfoEnabled()) {
288                     _log.info(
289                         "Using text extractor " +
290                             textExtractor.getClass().getName());
291                 }
292 
293                 StringBuilder sb = new StringBuilder();
294 
295                 Reader reader = null;
296 
297                 if (ServerDetector.isJOnAS() && JavaProps.isJDK6() &&
298                     contentType.equals(ContentTypes.APPLICATION_MSWORD)) {
299 
300                     if (_log.isWarnEnabled()) {
301                         _log.warn(
302                             "JOnAS 5 with JDK 6 has a known issue with text " +
303                                 "extraction of Word documents. Use JDK 5 if " +
304                                     "you require indexing of Word documents.");
305                     }
306 
307                     if (_log.isDebugEnabled()) {
308 
309                         // Execute code that will generate the error so it can
310                         // be fixed at a later date
311 
312                         reader = textExtractor.extractText(
313                             is, contentType, null);
314                     }
315                     else {
316                         reader = new StringReader(StringPool.BLANK);
317                     }
318                 }
319                 else {
320                     reader = textExtractor.extractText(
321                         is, contentType, null);
322                 }
323 
324                 try{
325                     char[] buffer = new char[1024];
326 
327                     int result = -1;
328 
329                     while ((result = reader.read(buffer)) != -1) {
330                         sb.append(buffer, 0, result);
331                     }
332                 }
333                 finally {
334                     try {
335                         reader.close();
336                     }
337                     catch (IOException ioe) {
338                     }
339                 }
340 
341                 text = sb.toString();
342             }
343             else {
344                 if (contentType.equals(ContentTypes.APPLICATION_ZIP) ||
345                     contentType.startsWith(
346                         "application/vnd.openxmlformats-officedocument.")) {
347 
348                     try {
349                         POITextExtractor poiTextExtractor =
350                             ExtractorFactory.createExtractor(is);
351 
352                         text = poiTextExtractor.getText();
353                     }
354                     catch (Exception e) {
355                         if (_log.isWarnEnabled()) {
356                             _log.warn(e, e);
357                         }
358                     }
359                 }
360 
361                 if ((text == null) && _log.isInfoEnabled()) {
362                     _log.info("No text extractor found for " + fileName);
363                 }
364             }
365         }
366         catch (Exception e) {
367             _log.error(e, e);
368         }
369 
370         if (_log.isDebugEnabled()) {
371             _log.debug("Extractor returned text:\n\n" + text);
372         }
373 
374         if (text == null) {
375             text = StringPool.BLANK;
376         }
377 
378         return text;
379     }
380 
381     public String getAbsolutePath(File file) {
382         return StringUtil.replace(
383             file.getAbsolutePath(), StringPool.BACK_SLASH, StringPool.SLASH);
384     }
385 
386     public byte[] getBytes(File file) throws IOException {
387         if ((file == null) || !file.exists()) {
388             return null;
389         }
390 
391         FileInputStream is = new FileInputStream(file);
392 
393         byte[] bytes = getBytes(is, (int)file.length());
394 
395         is.close();
396 
397         return bytes;
398     }
399 
400     public byte[] getBytes(InputStream is) throws IOException {
401         return getBytes(is, -1);
402     }
403 
404     public byte[] getBytes(InputStream inputStream, int bufferSize)
405         throws IOException {
406 
407         UnsyncByteArrayOutputStream unsyncByteArrayOutputStream =
408             new UnsyncByteArrayOutputStream();
409 
410         StreamUtil.transfer(
411             inputStream, unsyncByteArrayOutputStream, bufferSize);
412 
413         return unsyncByteArrayOutputStream.toByteArray();
414     }
415 
416     public String getExtension(String fileName) {
417         if (fileName == null) {
418             return null;
419         }
420 
421         int pos = fileName.lastIndexOf(StringPool.PERIOD);
422 
423         if (pos > 0) {
424             return fileName.substring(pos + 1, fileName.length()).toLowerCase();
425         }
426         else {
427             return StringPool.BLANK;
428         }
429     }
430 
431     public String getPath(String fullFileName) {
432         int pos = fullFileName.lastIndexOf(StringPool.SLASH);
433 
434         if (pos == -1) {
435             pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
436         }
437 
438         String shortFileName = fullFileName.substring(0, pos);
439 
440         if (Validator.isNull(shortFileName)) {
441             return StringPool.SLASH;
442         }
443 
444         return shortFileName;
445     }
446 
447     public String getShortFileName(String fullFileName) {
448         int pos = fullFileName.lastIndexOf(StringPool.SLASH);
449 
450         if (pos == -1) {
451             pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
452         }
453 
454         String shortFileName =
455             fullFileName.substring(pos + 1, fullFileName.length());
456 
457         return shortFileName;
458     }
459 
460     public boolean isAscii(File file) throws IOException {
461         boolean ascii = true;
462 
463         nsDetector detector = new nsDetector(nsPSMDetector.ALL);
464 
465         InputStream inputStream = new FileInputStream(file);
466 
467         byte[] buffer = new byte[1024];
468 
469         int len = 0;
470 
471         while ((len = inputStream.read(buffer, 0, buffer.length)) != -1) {
472 
473             if (ascii) {
474                 ascii = detector.isAscii(buffer, len);
475 
476                 if (!ascii) {
477                     break;
478                 }
479             }
480         }
481 
482         detector.DataEnd();
483 
484         inputStream.close();
485 
486         return ascii;
487     }
488 
489     public String[] listDirs(String fileName) {
490         return listDirs(new File(fileName));
491     }
492 
493     public String[] listDirs(File file) {
494         List<String> dirs = new ArrayList<String>();
495 
496         File[] fileArray = file.listFiles();
497 
498         for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
499             if (fileArray[i].isDirectory()) {
500                 dirs.add(fileArray[i].getName());
501             }
502         }
503 
504         return dirs.toArray(new String[dirs.size()]);
505     }
506 
507     public String[] listFiles(String fileName) {
508         if (Validator.isNull(fileName)) {
509             return new String[0];
510         }
511 
512         return listFiles(new File(fileName));
513     }
514 
515     public String[] listFiles(File file) {
516         List<String> files = new ArrayList<String>();
517 
518         File[] fileArray = file.listFiles();
519 
520         for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
521             if (fileArray[i].isFile()) {
522                 files.add(fileArray[i].getName());
523             }
524         }
525 
526         return files.toArray(new String[files.size()]);
527     }
528 
529     public void mkdirs(String pathName) {
530         File file = new File(pathName);
531 
532         file.mkdirs();
533     }
534 
535     public boolean move(String sourceFileName, String destinationFileName) {
536         return move(new File(sourceFileName), new File(destinationFileName));
537     }
538 
539     public boolean move(File source, File destination) {
540         if (!source.exists()) {
541             return false;
542         }
543 
544         destination.delete();
545 
546         return source.renameTo(destination);
547     }
548 
549     public String read(String fileName) throws IOException {
550         return read(new File(fileName));
551     }
552 
553     public String read(File file) throws IOException {
554         return read(file, false);
555     }
556 
557     public String read(File file, boolean raw) throws IOException {
558         FileInputStream fis = new FileInputStream(file);
559 
560         byte[] bytes = new byte[fis.available()];
561 
562         fis.read(bytes);
563 
564         fis.close();
565 
566         String s = new String(bytes, StringPool.UTF8);
567 
568         if (raw) {
569             return s;
570         }
571         else {
572             return StringUtil.replace(
573                 s, StringPool.RETURN_NEW_LINE, StringPool.NEW_LINE);
574         }
575     }
576 
577     public String replaceSeparator(String fileName) {
578         return StringUtil.replace(
579             fileName, StringPool.BACK_SLASH, StringPool.SLASH);
580     }
581 
582     public File[] sortFiles(File[] files) {
583         if (files == null) {
584             return null;
585         }
586 
587         Arrays.sort(files, new FileComparator());
588 
589         List<File> directoryList = new ArrayList<File>();
590         List<File> fileList = new ArrayList<File>();
591 
592         for (int i = 0; i < files.length; i++) {
593             if (files[i].isDirectory()) {
594                 directoryList.add(files[i]);
595             }
596             else {
597                 fileList.add(files[i]);
598             }
599         }
600 
601         directoryList.addAll(fileList);
602 
603         return directoryList.toArray(new File[directoryList.size()]);
604     }
605 
606     public String stripExtension(String fileName) {
607         if (fileName == null) {
608             return null;
609         }
610 
611         String ext = getExtension(fileName);
612 
613         if (ext.length() > 0) {
614             return fileName.substring(0, fileName.length() - ext.length() - 1);
615         }
616         else {
617             return fileName;
618         }
619     }
620 
621     public List<String> toList(Reader reader) {
622         List<String> list = new ArrayList<String>();
623 
624         try {
625             UnsyncBufferedReader unsyncBufferedReader =
626                 new UnsyncBufferedReader(reader);
627 
628             String line = null;
629 
630             while ((line = unsyncBufferedReader.readLine()) != null) {
631                 list.add(line);
632             }
633 
634             unsyncBufferedReader.close();
635         }
636         catch (IOException ioe) {
637         }
638 
639         return list;
640     }
641 
642     public List<String> toList(String fileName) {
643         try {
644             return toList(new FileReader(fileName));
645         }
646         catch (IOException ioe) {
647             return new ArrayList<String>();
648         }
649     }
650 
651     public Properties toProperties(FileInputStream fis) {
652         Properties props = new Properties();
653 
654         try {
655             props.load(fis);
656         }
657         catch (IOException ioe) {
658         }
659 
660         return props;
661     }
662 
663     public Properties toProperties(String fileName) {
664         try {
665             return toProperties(new FileInputStream(fileName));
666         }
667         catch (IOException ioe) {
668             return new Properties();
669         }
670     }
671 
672     public void write(String fileName, String s) throws IOException {
673         write(new File(fileName), s);
674     }
675 
676     public void write(String fileName, String s, boolean lazy)
677         throws IOException {
678 
679         write(new File(fileName), s, lazy);
680     }
681 
682     public void write(String fileName, String s, boolean lazy, boolean append)
683         throws IOException {
684 
685         write(new File(fileName), s, lazy, append);
686     }
687 
688     public void write(String pathName, String fileName, String s)
689         throws IOException {
690 
691         write(new File(pathName, fileName), s);
692     }
693 
694     public void write(String pathName, String fileName, String s, boolean lazy)
695         throws IOException {
696 
697         write(new File(pathName, fileName), s, lazy);
698     }
699 
700     public void write(
701             String pathName, String fileName, String s, boolean lazy,
702             boolean append)
703         throws IOException {
704 
705         write(new File(pathName, fileName), s, lazy, append);
706     }
707 
708     public void write(File file, String s) throws IOException {
709         write(file, s, false);
710     }
711 
712     public void write(File file, String s, boolean lazy)
713         throws IOException {
714 
715         write(file, s, lazy, false);
716     }
717 
718     public void write(File file, String s, boolean lazy, boolean append)
719         throws IOException {
720 
721         if (file.getParent() != null) {
722             mkdirs(file.getParent());
723         }
724 
725         if (lazy && file.exists()) {
726             String content = read(file);
727 
728             if (content.equals(s)) {
729                 return;
730             }
731         }
732 
733         Writer writer = new OutputStreamWriter(
734             new FileOutputStream(file, append), StringPool.UTF8);
735 
736         writer.write(s);
737 
738         writer.close();
739     }
740 
741     public void write(String fileName, byte[] bytes) throws IOException {
742         write(new File(fileName), bytes);
743     }
744 
745     public void write(File file, byte[] bytes) throws IOException {
746         write(file, bytes, 0, bytes.length);
747     }
748 
749     public void write(File file, byte[] bytes, int offset, int length)
750         throws IOException {
751 
752         if (file.getParent() != null) {
753             mkdirs(file.getParent());
754         }
755 
756         FileOutputStream fos = new FileOutputStream(file);
757 
758         fos.write(bytes, offset, length);
759 
760         fos.close();
761     }
762 
763     public void write(String fileName, InputStream is) throws IOException {
764         write(new File(fileName), is);
765     }
766 
767     public void write(File file, InputStream is) throws IOException {
768         if (file.getParent() != null) {
769             mkdirs(file.getParent());
770         }
771 
772         StreamUtil.transfer(is, new FileOutputStream(file));
773     }
774 
775     private static final String[] _SAFE_FILE_NAME_1 = {
776         StringPool.AMPERSAND, StringPool.CLOSE_PARENTHESIS,
777         StringPool.OPEN_PARENTHESIS, StringPool.SEMICOLON
778     };
779 
780     private static final String[] _SAFE_FILE_NAME_2 = {
781         "_AMP_", "_CP_", "_OP_", "_SEM_"
782     };
783 
784     private static Log _log = LogFactoryUtil.getLog(FileImpl.class);
785 
786     private static FileImpl _instance = new FileImpl();
787 
788     private Map<String, TextExtractor> _textExtractors =
789         new HashMap<String, TextExtractor>();
790 
791 }