1   /**
2    * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
3    *
4    * The contents of this file are subject to the terms of the Liferay Enterprise
5    * Subscription License ("License"). You may not use this file except in
6    * compliance with the License. You can obtain a copy of the License by
7    * contacting Liferay, Inc. See the License for the specific language governing
8    * permissions and limitations under the License, including but not limited to
9    * distribution rights of the Software.
10   *
11   *
12   *
13   */
14  
15  package com.liferay.portal.util;
16  
17  import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
18  import com.liferay.portal.kernel.io.unsync.UnsyncByteArrayOutputStream;
19  import com.liferay.portal.kernel.log.Log;
20  import com.liferay.portal.kernel.log.LogFactoryUtil;
21  import com.liferay.portal.kernel.util.CharPool;
22  import com.liferay.portal.kernel.util.ContentTypes;
23  import com.liferay.portal.kernel.util.FileComparator;
24  import com.liferay.portal.kernel.util.MimeTypesUtil;
25  import com.liferay.portal.kernel.util.StreamUtil;
26  import com.liferay.portal.kernel.util.StringBundler;
27  import com.liferay.portal.kernel.util.StringPool;
28  import com.liferay.portal.kernel.util.StringUtil;
29  import com.liferay.portal.kernel.util.Time;
30  import com.liferay.portal.kernel.util.Validator;
31  import com.liferay.util.PwdGenerator;
32  import com.liferay.util.SystemProperties;
33  import com.liferay.util.lucene.JerichoHTMLTextExtractor;
34  
35  import java.io.BufferedInputStream;
36  import java.io.File;
37  import java.io.FileInputStream;
38  import java.io.FileOutputStream;
39  import java.io.FileReader;
40  import java.io.IOException;
41  import java.io.InputStream;
42  import java.io.OutputStreamWriter;
43  import java.io.RandomAccessFile;
44  import java.io.Reader;
45  import java.io.Writer;
46  
47  import java.util.ArrayList;
48  import java.util.Arrays;
49  import java.util.HashMap;
50  import java.util.List;
51  import java.util.Map;
52  import java.util.Properties;
53  
54  import org.apache.jackrabbit.extractor.MsExcelTextExtractor;
55  import org.apache.jackrabbit.extractor.MsPowerPointTextExtractor;
56  import org.apache.jackrabbit.extractor.MsWordTextExtractor;
57  import org.apache.jackrabbit.extractor.OpenOfficeTextExtractor;
58  import org.apache.jackrabbit.extractor.PdfTextExtractor;
59  import org.apache.jackrabbit.extractor.PlainTextExtractor;
60  import org.apache.jackrabbit.extractor.RTFTextExtractor;
61  import org.apache.jackrabbit.extractor.TextExtractor;
62  import org.apache.jackrabbit.extractor.XMLTextExtractor;
63  import org.apache.poi.POITextExtractor;
64  import org.apache.poi.extractor.ExtractorFactory;
65  import org.apache.tools.ant.DirectoryScanner;
66  
67  import org.mozilla.intl.chardet.nsDetector;
68  import org.mozilla.intl.chardet.nsPSMDetector;
69  
70  /**
71   * <a href="FileImpl.java.html"><b><i>View Source</i></b></a>
72   *
73   * @author Brian Wing Shun Chan
74   * @author Alexander Chow
75   */
76  public class FileImpl implements com.liferay.portal.kernel.util.File {
77  
78      public static FileImpl getInstance() {
79          return _instance;
80      }
81  
82      public FileImpl() {
83          Class<?>[] textExtractorClasses = new Class[] {
84              JerichoHTMLTextExtractor.class, MsExcelTextExtractor.class,
85              MsPowerPointTextExtractor.class, MsWordTextExtractor.class,
86              OpenOfficeTextExtractor.class, PdfTextExtractor.class,
87              PlainTextExtractor.class, RTFTextExtractor.class,
88              XMLTextExtractor.class
89          };
90  
91          for (Class<?> textExtractorClass : textExtractorClasses) {
92              try {
93                  TextExtractor textExtractor =
94                      (TextExtractor)textExtractorClass.newInstance();
95  
96                  String[] contentTypes = textExtractor.getContentTypes();
97  
98                  for (String contentType : contentTypes) {
99                      _textExtractors.put(contentType, textExtractor);
100                 }
101             }
102             catch (Exception e) {
103                 _log.error(e, e);
104             }
105         }
106     }
107 
108     public void copyDirectory(String sourceDirName, String destinationDirName) {
109         copyDirectory(new File(sourceDirName), new File(destinationDirName));
110     }
111 
112     public void copyDirectory(File source, File destination) {
113         if (source.exists() && source.isDirectory()) {
114             if (!destination.exists()) {
115                 destination.mkdirs();
116             }
117 
118             File[] fileArray = source.listFiles();
119 
120             for (int i = 0; i < fileArray.length; i++) {
121                 if (fileArray[i].isDirectory()) {
122                     copyDirectory(
123                         fileArray[i],
124                         new File(destination.getPath() + File.separator
125                             + fileArray[i].getName()));
126                 }
127                 else {
128                     copyFile(
129                         fileArray[i],
130                         new File(destination.getPath() + File.separator
131                             + fileArray[i].getName()));
132                 }
133             }
134         }
135     }
136 
137     public void copyFile(String source, String destination) {
138         copyFile(source, destination, false);
139     }
140 
141     public void copyFile(String source, String destination, boolean lazy) {
142         copyFile(new File(source), new File(destination), lazy);
143     }
144 
145     public void copyFile(File source, File destination) {
146         copyFile(source, destination, false);
147     }
148 
149     public void copyFile(File source, File destination, boolean lazy) {
150         if (!source.exists()) {
151             return;
152         }
153 
154         if (lazy) {
155             String oldContent = null;
156 
157             try {
158                 oldContent = read(source);
159             }
160             catch (Exception e) {
161                 return;
162             }
163 
164             String newContent = null;
165 
166             try {
167                 newContent = read(destination);
168             }
169             catch (Exception e) {
170             }
171 
172             if ((oldContent == null) || !oldContent.equals(newContent)) {
173                 copyFile(source, destination, false);
174             }
175         }
176         else {
177             if ((destination.getParentFile() != null) &&
178                 (!destination.getParentFile().exists())) {
179 
180                 destination.getParentFile().mkdirs();
181             }
182 
183             try {
184                 StreamUtil.transfer(
185                     new FileInputStream(source),
186                     new FileOutputStream(destination));
187             }
188             catch (IOException ioe) {
189                 _log.error(ioe.getMessage());
190             }
191         }
192     }
193 
194     public File createTempFile() {
195         return createTempFile(null);
196     }
197 
198     public File createTempFile(String extension) {
199         return new File(createTempFileName(extension));
200     }
201 
202     public String createTempFileName() {
203         return createTempFileName(null);
204     }
205 
206     public String createTempFileName(String extension) {
207         StringBundler sb = new StringBundler();
208 
209         sb.append(SystemProperties.get(SystemProperties.TMP_DIR));
210         sb.append(StringPool.SLASH);
211         sb.append(Time.getTimestamp());
212         sb.append(PwdGenerator.getPassword(PwdGenerator.KEY2, 8));
213 
214         if (Validator.isNotNull(extension)) {
215             sb.append(StringPool.PERIOD);
216             sb.append(extension);
217         }
218 
219         return sb.toString();
220     }
221 
222     public String decodeSafeFileName(String fileName) {
223         return StringUtil.replace(
224             fileName, _SAFE_FILE_NAME_2, _SAFE_FILE_NAME_1);
225     }
226 
227     public boolean delete(String file) {
228         return delete(new File(file));
229     }
230 
231     public boolean delete(File file) {
232         if ((file != null) && file.exists()) {
233             return file.delete();
234         }
235         else {
236             return false;
237         }
238     }
239 
240     public void deltree(String directory) {
241         deltree(new File(directory));
242     }
243 
244     public void deltree(File directory) {
245         if (directory.exists() && directory.isDirectory()) {
246             File[] fileArray = directory.listFiles();
247 
248             for (int i = 0; i < fileArray.length; i++) {
249                 if (fileArray[i].isDirectory()) {
250                     deltree(fileArray[i]);
251                 }
252                 else {
253                     fileArray[i].delete();
254                 }
255             }
256 
257             directory.delete();
258         }
259     }
260 
261     public String encodeSafeFileName(String fileName) {
262         if (fileName == null) {
263             return StringPool.BLANK;
264         }
265 
266         return StringUtil.replace(
267             fileName, _SAFE_FILE_NAME_1, _SAFE_FILE_NAME_2);
268     }
269 
270     public boolean exists(String fileName) {
271         return exists(new File(fileName));
272     }
273 
274     public boolean exists(File file) {
275         return file.exists();
276     }
277 
278     public String extractText(InputStream is, String fileName) {
279         String text = null;
280 
281         try {
282             if (!is.markSupported()) {
283                 is = new BufferedInputStream(is);
284             }
285 
286             String contentType = MimeTypesUtil.getContentType(is, fileName);
287 
288             if (_log.isInfoEnabled()) {
289                 _log.info(
290                     "Attempting to extract text from " + fileName +
291                         " of type " + contentType);
292             }
293 
294             TextExtractor textExtractor = _textExtractors.get(contentType);
295 
296             if (textExtractor != null) {
297                 if (_log.isInfoEnabled()) {
298                     _log.info(
299                         "Using text extractor " +
300                             textExtractor.getClass().getName());
301                 }
302 
303                 StringBuilder sb = new StringBuilder();
304 
305                 Reader reader = textExtractor.extractText(
306                     is, contentType, null);
307 
308                 try{
309                     char[] buffer = new char[1024];
310 
311                     int result = -1;
312 
313                     while ((result = reader.read(buffer)) != -1) {
314                         sb.append(buffer, 0, result);
315                     }
316                 }
317                 finally {
318                     try {
319                         reader.close();
320                     }
321                     catch (IOException ioe) {
322                     }
323                 }
324 
325                 text = sb.toString();
326             }
327             else if (contentType.equals(ContentTypes.APPLICATION_ZIP) ||
328                 contentType.startsWith(
329                     "application/vnd.openxmlformats-officedocument.")) {
330 
331                 try {
332                     POITextExtractor poiTextExtractor =
333                         ExtractorFactory.createExtractor(is);
334 
335                     text = poiTextExtractor.getText();
336                 }
337                 catch (Exception e) {
338                     if (_log.isInfoEnabled()) {
339                         _log.info(e.getMessage());
340                     }
341                 }
342             }
343         }
344         catch (Exception e) {
345             _log.error(e);
346         }
347 
348         if (_log.isInfoEnabled()) {
349             if (text == null) {
350                 _log.info("No text extractor found for " + fileName);
351             }
352             else {
353                 _log.info("Text was extracted for " + fileName);
354             }
355         }
356 
357         if (_log.isDebugEnabled()) {
358             _log.debug("Extractor returned text:\n\n" + text);
359         }
360 
361         if (text == null) {
362             text = StringPool.BLANK;
363         }
364 
365         return text;
366     }
367 
368     public String[] find(String directory, String includes, String excludes) {
369         if (directory.length() > 0) {
370             directory = replaceSeparator(directory);
371 
372             if (directory.charAt(directory.length() - 1) == CharPool.SLASH) {
373                 directory = directory.substring(0, directory.length() - 1);
374             }
375         }
376 
377         DirectoryScanner directoryScanner = new DirectoryScanner();
378 
379         directoryScanner.setBasedir(directory);
380         directoryScanner.setExcludes(StringUtil.split(excludes));
381         directoryScanner.setIncludes(StringUtil.split(includes));
382 
383         directoryScanner.scan();
384 
385         String[] includedFiles = directoryScanner.getIncludedFiles();
386 
387         for (int i = 0; i < includedFiles.length; i++) {
388             includedFiles[i] =
389                 directory.concat(StringPool.SLASH).concat(
390                     replaceSeparator(includedFiles[i]));
391         }
392 
393         return includedFiles;
394     }
395 
396     public String getAbsolutePath(File file) {
397         return StringUtil.replace(
398             file.getAbsolutePath(), CharPool.BACK_SLASH, CharPool.SLASH);
399     }
400 
401     public byte[] getBytes(File file) throws IOException {
402         if ((file == null) || !file.exists()) {
403             return null;
404         }
405 
406         RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r");
407 
408         byte[] bytes = new byte[(int)randomAccessFile.length()];
409 
410         randomAccessFile.readFully(bytes);
411 
412         randomAccessFile.close();
413 
414         return bytes;
415     }
416 
417     public byte[] getBytes(InputStream is) throws IOException {
418         return getBytes(is, -1);
419     }
420 
421     public byte[] getBytes(InputStream inputStream, int bufferSize)
422         throws IOException {
423 
424         UnsyncByteArrayOutputStream unsyncByteArrayOutputStream =
425             new UnsyncByteArrayOutputStream();
426 
427         StreamUtil.transfer(
428             inputStream, unsyncByteArrayOutputStream, bufferSize);
429 
430         return unsyncByteArrayOutputStream.toByteArray();
431     }
432 
433     public String getExtension(String fileName) {
434         if (fileName == null) {
435             return null;
436         }
437 
438         int pos = fileName.lastIndexOf(CharPool.PERIOD);
439 
440         if (pos != -1) {
441             return fileName.substring(pos + 1, fileName.length()).toLowerCase();
442         }
443         else {
444             return null;
445         }
446     }
447 
448     public String getPath(String fullFileName) {
449         int pos = fullFileName.lastIndexOf(CharPool.SLASH);
450 
451         if (pos == -1) {
452             pos = fullFileName.lastIndexOf(CharPool.BACK_SLASH);
453         }
454 
455         String shortFileName = fullFileName.substring(0, pos);
456 
457         if (Validator.isNull(shortFileName)) {
458             return StringPool.SLASH;
459         }
460 
461         return shortFileName;
462     }
463 
464     public String getShortFileName(String fullFileName) {
465         int pos = fullFileName.lastIndexOf(CharPool.SLASH);
466 
467         if (pos == -1) {
468             pos = fullFileName.lastIndexOf(CharPool.BACK_SLASH);
469         }
470 
471         String shortFileName =
472             fullFileName.substring(pos + 1, fullFileName.length());
473 
474         return shortFileName;
475     }
476 
477     public boolean isAscii(File file) throws IOException {
478         boolean ascii = true;
479 
480         nsDetector detector = new nsDetector(nsPSMDetector.ALL);
481 
482         InputStream inputStream = new FileInputStream(file);
483 
484         byte[] buffer = new byte[1024];
485 
486         int len = 0;
487 
488         while ((len = inputStream.read(buffer, 0, buffer.length)) != -1) {
489             if (ascii) {
490                 ascii = detector.isAscii(buffer, len);
491 
492                 if (!ascii) {
493                     break;
494                 }
495             }
496         }
497 
498         detector.DataEnd();
499 
500         inputStream.close();
501 
502         return ascii;
503     }
504 
505     public String[] listDirs(String fileName) {
506         return listDirs(new File(fileName));
507     }
508 
509     public String[] listDirs(File file) {
510         List<String> dirs = new ArrayList<String>();
511 
512         File[] fileArray = file.listFiles();
513 
514         for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
515             if (fileArray[i].isDirectory()) {
516                 dirs.add(fileArray[i].getName());
517             }
518         }
519 
520         return dirs.toArray(new String[dirs.size()]);
521     }
522 
523     public String[] listFiles(String fileName) {
524         if (Validator.isNull(fileName)) {
525             return new String[0];
526         }
527 
528         return listFiles(new File(fileName));
529     }
530 
531     public String[] listFiles(File file) {
532         List<String> files = new ArrayList<String>();
533 
534         File[] fileArray = file.listFiles();
535 
536         for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
537             if (fileArray[i].isFile()) {
538                 files.add(fileArray[i].getName());
539             }
540         }
541 
542         return files.toArray(new String[files.size()]);
543     }
544 
545     public void mkdirs(String pathName) {
546         File file = new File(pathName);
547 
548         file.mkdirs();
549     }
550 
551     public boolean move(String sourceFileName, String destinationFileName) {
552         return move(new File(sourceFileName), new File(destinationFileName));
553     }
554 
555     public boolean move(File source, File destination) {
556         if (!source.exists()) {
557             return false;
558         }
559 
560         destination.delete();
561 
562         return source.renameTo(destination);
563     }
564 
565     public String read(String fileName) throws IOException {
566         return read(new File(fileName));
567     }
568 
569     public String read(File file) throws IOException {
570         return read(file, false);
571     }
572 
573     public String read(File file, boolean raw) throws IOException {
574         byte[] bytes = getBytes(file);
575 
576         if (bytes == null) {
577             return null;
578         }
579 
580         String s = new String(bytes, StringPool.UTF8);
581 
582         if (raw) {
583             return s;
584         }
585         else {
586             return StringUtil.replace(
587                 s, StringPool.RETURN_NEW_LINE, StringPool.NEW_LINE);
588         }
589     }
590 
591     public String replaceSeparator(String fileName) {
592         return StringUtil.replace(
593             fileName, CharPool.BACK_SLASH, CharPool.SLASH);
594     }
595 
596     public File[] sortFiles(File[] files) {
597         if (files == null) {
598             return null;
599         }
600 
601         Arrays.sort(files, new FileComparator());
602 
603         List<File> directoryList = new ArrayList<File>();
604         List<File> fileList = new ArrayList<File>();
605 
606         for (int i = 0; i < files.length; i++) {
607             if (files[i].isDirectory()) {
608                 directoryList.add(files[i]);
609             }
610             else {
611                 fileList.add(files[i]);
612             }
613         }
614 
615         directoryList.addAll(fileList);
616 
617         return directoryList.toArray(new File[directoryList.size()]);
618     }
619 
620     public String stripExtension(String fileName) {
621         if (fileName == null) {
622             return null;
623         }
624 
625         int pos = fileName.lastIndexOf(StringPool.PERIOD);
626 
627         if (pos != -1) {
628             return fileName.substring(0, pos);
629         }
630         else {
631             return fileName;
632         }
633     }
634 
635     public List<String> toList(Reader reader) {
636         List<String> list = new ArrayList<String>();
637 
638         try {
639             UnsyncBufferedReader unsyncBufferedReader =
640                 new UnsyncBufferedReader(reader);
641 
642             String line = null;
643 
644             while ((line = unsyncBufferedReader.readLine()) != null) {
645                 list.add(line);
646             }
647 
648             unsyncBufferedReader.close();
649         }
650         catch (IOException ioe) {
651         }
652 
653         return list;
654     }
655 
656     public List<String> toList(String fileName) {
657         try {
658             return toList(new FileReader(fileName));
659         }
660         catch (IOException ioe) {
661             return new ArrayList<String>();
662         }
663     }
664 
665     public Properties toProperties(FileInputStream fis) {
666         Properties properties = new Properties();
667 
668         try {
669             properties.load(fis);
670         }
671         catch (IOException ioe) {
672         }
673 
674         return properties;
675     }
676 
677     public Properties toProperties(String fileName) {
678         try {
679             return toProperties(new FileInputStream(fileName));
680         }
681         catch (IOException ioe) {
682             return new Properties();
683         }
684     }
685 
686     public void write(String fileName, String s) throws IOException {
687         write(new File(fileName), s);
688     }
689 
690     public void write(String fileName, String s, boolean lazy)
691         throws IOException {
692 
693         write(new File(fileName), s, lazy);
694     }
695 
696     public void write(String fileName, String s, boolean lazy, boolean append)
697         throws IOException {
698 
699         write(new File(fileName), s, lazy, append);
700     }
701 
702     public void write(String pathName, String fileName, String s)
703         throws IOException {
704 
705         write(new File(pathName, fileName), s);
706     }
707 
708     public void write(String pathName, String fileName, String s, boolean lazy)
709         throws IOException {
710 
711         write(new File(pathName, fileName), s, lazy);
712     }
713 
714     public void write(
715             String pathName, String fileName, String s, boolean lazy,
716             boolean append)
717         throws IOException {
718 
719         write(new File(pathName, fileName), s, lazy, append);
720     }
721 
722     public void write(File file, String s) throws IOException {
723         write(file, s, false);
724     }
725 
726     public void write(File file, String s, boolean lazy)
727         throws IOException {
728 
729         write(file, s, lazy, false);
730     }
731 
732     public void write(File file, String s, boolean lazy, boolean append)
733         throws IOException {
734 
735         if (file.getParent() != null) {
736             mkdirs(file.getParent());
737         }
738 
739         if (lazy && file.exists()) {
740             String content = read(file);
741 
742             if (content.equals(s)) {
743                 return;
744             }
745         }
746 
747         Writer writer = new OutputStreamWriter(
748             new FileOutputStream(file, append), StringPool.UTF8);
749 
750         writer.write(s);
751 
752         writer.close();
753     }
754 
755     public void write(String fileName, byte[] bytes) throws IOException {
756         write(new File(fileName), bytes);
757     }
758 
759     public void write(File file, byte[] bytes) throws IOException {
760         write(file, bytes, 0, bytes.length);
761     }
762 
763     public void write(File file, byte[] bytes, int offset, int length)
764         throws IOException {
765 
766         if (file.getParent() != null) {
767             mkdirs(file.getParent());
768         }
769 
770         FileOutputStream fos = new FileOutputStream(file);
771 
772         fos.write(bytes, offset, length);
773 
774         fos.close();
775     }
776 
777     public void write(String fileName, InputStream is) throws IOException {
778         write(new File(fileName), is);
779     }
780 
781     public void write(File file, InputStream is) throws IOException {
782         if (file.getParent() != null) {
783             mkdirs(file.getParent());
784         }
785 
786         StreamUtil.transfer(is, new FileOutputStream(file));
787     }
788 
789     private static final String[] _SAFE_FILE_NAME_1 = {
790         StringPool.AMPERSAND, StringPool.CLOSE_PARENTHESIS,
791         StringPool.OPEN_PARENTHESIS, StringPool.SEMICOLON
792     };
793 
794     private static final String[] _SAFE_FILE_NAME_2 = {
795         "_AMP_", "_CP_", "_OP_", "_SEM_"
796     };
797 
798     private static Log _log = LogFactoryUtil.getLog(FileImpl.class);
799 
800     private static FileImpl _instance = new FileImpl();
801 
802     private Map<String, TextExtractor> _textExtractors =
803         new HashMap<String, TextExtractor>();
804 
805 }