001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
018 import com.liferay.portal.kernel.io.unsync.UnsyncByteArrayOutputStream;
019 import com.liferay.portal.kernel.log.Log;
020 import com.liferay.portal.kernel.log.LogFactoryUtil;
021 import com.liferay.portal.kernel.util.CharPool;
022 import com.liferay.portal.kernel.util.ContentTypes;
023 import com.liferay.portal.kernel.util.FileComparator;
024 import com.liferay.portal.kernel.util.JavaProps;
025 import com.liferay.portal.kernel.util.MimeTypesUtil;
026 import com.liferay.portal.kernel.util.ServerDetector;
027 import com.liferay.portal.kernel.util.StreamUtil;
028 import com.liferay.portal.kernel.util.StringBundler;
029 import com.liferay.portal.kernel.util.StringPool;
030 import com.liferay.portal.kernel.util.StringUtil;
031 import com.liferay.portal.kernel.util.Time;
032 import com.liferay.portal.kernel.util.Validator;
033 import com.liferay.util.PwdGenerator;
034 import com.liferay.util.SystemProperties;
035 import com.liferay.util.lucene.JerichoHTMLTextExtractor;
036
037 import java.io.BufferedInputStream;
038 import java.io.File;
039 import java.io.FileInputStream;
040 import java.io.FileOutputStream;
041 import java.io.FileReader;
042 import java.io.IOException;
043 import java.io.InputStream;
044 import java.io.OutputStreamWriter;
045 import java.io.RandomAccessFile;
046 import java.io.Reader;
047 import java.io.StringReader;
048 import java.io.Writer;
049
050 import java.util.ArrayList;
051 import java.util.Arrays;
052 import java.util.HashMap;
053 import java.util.List;
054 import java.util.Map;
055 import java.util.Properties;
056
057 import org.apache.jackrabbit.extractor.MsExcelTextExtractor;
058 import org.apache.jackrabbit.extractor.MsPowerPointTextExtractor;
059 import org.apache.jackrabbit.extractor.MsWordTextExtractor;
060 import org.apache.jackrabbit.extractor.OpenOfficeTextExtractor;
061 import org.apache.jackrabbit.extractor.PdfTextExtractor;
062 import org.apache.jackrabbit.extractor.PlainTextExtractor;
063 import org.apache.jackrabbit.extractor.RTFTextExtractor;
064 import org.apache.jackrabbit.extractor.TextExtractor;
065 import org.apache.jackrabbit.extractor.XMLTextExtractor;
066 import org.apache.poi.POITextExtractor;
067 import org.apache.poi.extractor.ExtractorFactory;
068 import org.apache.tools.ant.DirectoryScanner;
069
070 import org.mozilla.intl.chardet.nsDetector;
071 import org.mozilla.intl.chardet.nsPSMDetector;
072
073
077 public class FileImpl implements com.liferay.portal.kernel.util.File {
078
079 public static FileImpl getInstance() {
080 return _instance;
081 }
082
083 public FileImpl() {
084 Class<?>[] textExtractorClasses = new Class[] {
085 JerichoHTMLTextExtractor.class, MsExcelTextExtractor.class,
086 MsPowerPointTextExtractor.class, MsWordTextExtractor.class,
087 OpenOfficeTextExtractor.class, PdfTextExtractor.class,
088 PlainTextExtractor.class, RTFTextExtractor.class,
089 XMLTextExtractor.class
090 };
091
092 for (Class<?> textExtractorClass : textExtractorClasses) {
093 try {
094 TextExtractor textExtractor =
095 (TextExtractor)textExtractorClass.newInstance();
096
097 String[] contentTypes = textExtractor.getContentTypes();
098
099 for (String contentType : contentTypes) {
100 _textExtractors.put(contentType, textExtractor);
101 }
102 }
103 catch (Exception e) {
104 _log.error(e, e);
105 }
106 }
107 }
108
109 public void copyDirectory(String sourceDirName, String destinationDirName) {
110 copyDirectory(new File(sourceDirName), new File(destinationDirName));
111 }
112
113 public void copyDirectory(File source, File destination) {
114 if (source.exists() && source.isDirectory()) {
115 if (!destination.exists()) {
116 destination.mkdirs();
117 }
118
119 File[] fileArray = source.listFiles();
120
121 for (int i = 0; i < fileArray.length; i++) {
122 if (fileArray[i].isDirectory()) {
123 copyDirectory(
124 fileArray[i],
125 new File(destination.getPath() + File.separator
126 + fileArray[i].getName()));
127 }
128 else {
129 copyFile(
130 fileArray[i],
131 new File(destination.getPath() + File.separator
132 + fileArray[i].getName()));
133 }
134 }
135 }
136 }
137
138 public void copyFile(String source, String destination) {
139 copyFile(source, destination, false);
140 }
141
142 public void copyFile(String source, String destination, boolean lazy) {
143 copyFile(new File(source), new File(destination), lazy);
144 }
145
146 public void copyFile(File source, File destination) {
147 copyFile(source, destination, false);
148 }
149
150 public void copyFile(File source, File destination, boolean lazy) {
151 if (!source.exists()) {
152 return;
153 }
154
155 if (lazy) {
156 String oldContent = null;
157
158 try {
159 oldContent = read(source);
160 }
161 catch (Exception e) {
162 return;
163 }
164
165 String newContent = null;
166
167 try {
168 newContent = read(destination);
169 }
170 catch (Exception e) {
171 }
172
173 if ((oldContent == null) || !oldContent.equals(newContent)) {
174 copyFile(source, destination, false);
175 }
176 }
177 else {
178 if ((destination.getParentFile() != null) &&
179 (!destination.getParentFile().exists())) {
180
181 destination.getParentFile().mkdirs();
182 }
183
184 try {
185 StreamUtil.transfer(
186 new FileInputStream(source),
187 new FileOutputStream(destination));
188 }
189 catch (IOException ioe) {
190 _log.error(ioe.getMessage());
191 }
192 }
193 }
194
195 public File createTempFile() {
196 return createTempFile(null);
197 }
198
199 public File createTempFile(String extension) {
200 return new File(createTempFileName(extension));
201 }
202
203 public String createTempFileName() {
204 return createTempFileName(null);
205 }
206
207 public String createTempFileName(String extension) {
208 StringBundler sb = new StringBundler();
209
210 sb.append(SystemProperties.get(SystemProperties.TMP_DIR));
211 sb.append(StringPool.SLASH);
212 sb.append(Time.getTimestamp());
213 sb.append(PwdGenerator.getPassword(PwdGenerator.KEY2, 8));
214
215 if (Validator.isNotNull(extension)) {
216 sb.append(StringPool.PERIOD);
217 sb.append(extension);
218 }
219
220 return sb.toString();
221 }
222
223 public String decodeSafeFileName(String fileName) {
224 return StringUtil.replace(
225 fileName, _SAFE_FILE_NAME_2, _SAFE_FILE_NAME_1);
226 }
227
228 public boolean delete(String file) {
229 return delete(new File(file));
230 }
231
232 public boolean delete(File file) {
233 if ((file != null) && file.exists()) {
234 return file.delete();
235 }
236 else {
237 return false;
238 }
239 }
240
241 public void deltree(String directory) {
242 deltree(new File(directory));
243 }
244
245 public void deltree(File directory) {
246 if (directory.exists() && directory.isDirectory()) {
247 File[] fileArray = directory.listFiles();
248
249 for (int i = 0; i < fileArray.length; i++) {
250 if (fileArray[i].isDirectory()) {
251 deltree(fileArray[i]);
252 }
253 else {
254 fileArray[i].delete();
255 }
256 }
257
258 directory.delete();
259 }
260 }
261
262 public String encodeSafeFileName(String fileName) {
263 if (fileName == null) {
264 return StringPool.BLANK;
265 }
266
267 return StringUtil.replace(
268 fileName, _SAFE_FILE_NAME_1, _SAFE_FILE_NAME_2);
269 }
270
271 public boolean exists(String fileName) {
272 return exists(new File(fileName));
273 }
274
275 public boolean exists(File file) {
276 return file.exists();
277 }
278
279 public String extractText(InputStream is, String fileName) {
280 String text = null;
281
282 try {
283 if (!is.markSupported()) {
284 is = new BufferedInputStream(is);
285 }
286
287 String contentType = MimeTypesUtil.getContentType(is, fileName);
288
289 if (_log.isInfoEnabled()) {
290 _log.info(
291 "Attempting to extract text from " + fileName +
292 " of type " + contentType);
293 }
294
295 TextExtractor textExtractor = _textExtractors.get(contentType);
296
297 if (textExtractor != null) {
298 if (_log.isInfoEnabled()) {
299 _log.info(
300 "Using text extractor " +
301 textExtractor.getClass().getName());
302 }
303
304 StringBuilder sb = new StringBuilder();
305
306 Reader reader = null;
307
308 if (ServerDetector.isJOnAS() && JavaProps.isJDK6() &&
309 contentType.equals(ContentTypes.APPLICATION_MSWORD)) {
310
311 if (_log.isWarnEnabled()) {
312 _log.warn(
313 "JOnAS 5 with JDK 6 has a known issue with text " +
314 "extraction of Word documents. Use JDK 5 if " +
315 "you require indexing of Word documents.");
316 }
317
318 if (_log.isDebugEnabled()) {
319
320
321
322
323 reader = textExtractor.extractText(
324 is, contentType, null);
325 }
326 else {
327 reader = new StringReader(StringPool.BLANK);
328 }
329 }
330 else {
331 reader = textExtractor.extractText(
332 is, contentType, null);
333 }
334
335 try{
336 char[] buffer = new char[1024];
337
338 int result = -1;
339
340 while ((result = reader.read(buffer)) != -1) {
341 sb.append(buffer, 0, result);
342 }
343 }
344 finally {
345 try {
346 reader.close();
347 }
348 catch (IOException ioe) {
349 }
350 }
351
352 text = sb.toString();
353 }
354 else if (contentType.equals(ContentTypes.APPLICATION_ZIP) ||
355 contentType.startsWith(
356 "application/vnd.openxmlformats-officedocument.")) {
357
358 try {
359 POITextExtractor poiTextExtractor =
360 ExtractorFactory.createExtractor(is);
361
362 text = poiTextExtractor.getText();
363 }
364 catch (Exception e) {
365 if (_log.isInfoEnabled()) {
366 _log.info(e.getMessage());
367 }
368 }
369 }
370 }
371 catch (Exception e) {
372 _log.error(e, e);
373 }
374
375 if (_log.isInfoEnabled()) {
376 if (text == null) {
377 _log.info("No text extractor found for " + fileName);
378 }
379 else {
380 _log.info("Text was extracted for " + fileName);
381 }
382 }
383
384 if (_log.isDebugEnabled()) {
385 _log.debug("Extractor returned text:\n\n" + text);
386 }
387
388 if (text == null) {
389 text = StringPool.BLANK;
390 }
391
392 return text;
393 }
394
395 public String[] find(String directory, String includes, String excludes) {
396 DirectoryScanner directoryScanner = new DirectoryScanner();
397
398 directoryScanner.setBasedir(directory);
399 directoryScanner.setExcludes(StringUtil.split(excludes));
400 directoryScanner.setIncludes(StringUtil.split(includes));
401
402 directoryScanner.scan();
403
404 return directoryScanner.getIncludedFiles();
405 }
406
407 public String getAbsolutePath(File file) {
408 return StringUtil.replace(
409 file.getAbsolutePath(), CharPool.BACK_SLASH, CharPool.SLASH);
410 }
411
412 public byte[] getBytes(File file) throws IOException {
413 if ((file == null) || !file.exists()) {
414 return null;
415 }
416
417 RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r");
418
419 byte[] bytes = new byte[(int)randomAccessFile.length()];
420
421 randomAccessFile.readFully(bytes);
422
423 randomAccessFile.close();
424
425 return bytes;
426 }
427
428 public byte[] getBytes(InputStream is) throws IOException {
429 return getBytes(is, -1);
430 }
431
432 public byte[] getBytes(InputStream inputStream, int bufferSize)
433 throws IOException {
434
435 UnsyncByteArrayOutputStream unsyncByteArrayOutputStream =
436 new UnsyncByteArrayOutputStream();
437
438 StreamUtil.transfer(
439 inputStream, unsyncByteArrayOutputStream, bufferSize);
440
441 return unsyncByteArrayOutputStream.toByteArray();
442 }
443
444 public String getExtension(String fileName) {
445 if (fileName == null) {
446 return null;
447 }
448
449 int pos = fileName.lastIndexOf(CharPool.PERIOD);
450
451 if (pos > 0) {
452 return fileName.substring(pos + 1, fileName.length()).toLowerCase();
453 }
454 else {
455 return StringPool.BLANK;
456 }
457 }
458
459 public String getPath(String fullFileName) {
460 int pos = fullFileName.lastIndexOf(CharPool.SLASH);
461
462 if (pos == -1) {
463 pos = fullFileName.lastIndexOf(CharPool.BACK_SLASH);
464 }
465
466 String shortFileName = fullFileName.substring(0, pos);
467
468 if (Validator.isNull(shortFileName)) {
469 return StringPool.SLASH;
470 }
471
472 return shortFileName;
473 }
474
475 public String getShortFileName(String fullFileName) {
476 int pos = fullFileName.lastIndexOf(CharPool.SLASH);
477
478 if (pos == -1) {
479 pos = fullFileName.lastIndexOf(CharPool.BACK_SLASH);
480 }
481
482 String shortFileName =
483 fullFileName.substring(pos + 1, fullFileName.length());
484
485 return shortFileName;
486 }
487
488 public boolean isAscii(File file) throws IOException {
489 boolean ascii = true;
490
491 nsDetector detector = new nsDetector(nsPSMDetector.ALL);
492
493 InputStream inputStream = new FileInputStream(file);
494
495 byte[] buffer = new byte[1024];
496
497 int len = 0;
498
499 while ((len = inputStream.read(buffer, 0, buffer.length)) != -1) {
500 if (ascii) {
501 ascii = detector.isAscii(buffer, len);
502
503 if (!ascii) {
504 break;
505 }
506 }
507 }
508
509 detector.DataEnd();
510
511 inputStream.close();
512
513 return ascii;
514 }
515
516 public String[] listDirs(String fileName) {
517 return listDirs(new File(fileName));
518 }
519
520 public String[] listDirs(File file) {
521 List<String> dirs = new ArrayList<String>();
522
523 File[] fileArray = file.listFiles();
524
525 for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
526 if (fileArray[i].isDirectory()) {
527 dirs.add(fileArray[i].getName());
528 }
529 }
530
531 return dirs.toArray(new String[dirs.size()]);
532 }
533
534 public String[] listFiles(String fileName) {
535 if (Validator.isNull(fileName)) {
536 return new String[0];
537 }
538
539 return listFiles(new File(fileName));
540 }
541
542 public String[] listFiles(File file) {
543 List<String> files = new ArrayList<String>();
544
545 File[] fileArray = file.listFiles();
546
547 for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
548 if (fileArray[i].isFile()) {
549 files.add(fileArray[i].getName());
550 }
551 }
552
553 return files.toArray(new String[files.size()]);
554 }
555
556 public void mkdirs(String pathName) {
557 File file = new File(pathName);
558
559 file.mkdirs();
560 }
561
562 public boolean move(String sourceFileName, String destinationFileName) {
563 return move(new File(sourceFileName), new File(destinationFileName));
564 }
565
566 public boolean move(File source, File destination) {
567 if (!source.exists()) {
568 return false;
569 }
570
571 destination.delete();
572
573 return source.renameTo(destination);
574 }
575
576 public String read(String fileName) throws IOException {
577 return read(new File(fileName));
578 }
579
580 public String read(File file) throws IOException {
581 return read(file, false);
582 }
583
584 public String read(File file, boolean raw) throws IOException {
585 byte[] bytes = getBytes(file);
586
587 if (bytes == null) {
588 return null;
589 }
590
591 String s = new String(bytes, StringPool.UTF8);
592
593 if (raw) {
594 return s;
595 }
596 else {
597 return StringUtil.replace(
598 s, StringPool.RETURN_NEW_LINE, StringPool.NEW_LINE);
599 }
600 }
601
602 public String replaceSeparator(String fileName) {
603 return StringUtil.replace(
604 fileName, CharPool.BACK_SLASH, CharPool.SLASH);
605 }
606
607 public File[] sortFiles(File[] files) {
608 if (files == null) {
609 return null;
610 }
611
612 Arrays.sort(files, new FileComparator());
613
614 List<File> directoryList = new ArrayList<File>();
615 List<File> fileList = new ArrayList<File>();
616
617 for (int i = 0; i < files.length; i++) {
618 if (files[i].isDirectory()) {
619 directoryList.add(files[i]);
620 }
621 else {
622 fileList.add(files[i]);
623 }
624 }
625
626 directoryList.addAll(fileList);
627
628 return directoryList.toArray(new File[directoryList.size()]);
629 }
630
631 public String stripExtension(String fileName) {
632 if (fileName == null) {
633 return null;
634 }
635
636 String ext = getExtension(fileName);
637
638 if (ext.length() > 0) {
639 return fileName.substring(0, fileName.length() - ext.length() - 1);
640 }
641 else {
642 return fileName;
643 }
644 }
645
646 public List<String> toList(Reader reader) {
647 List<String> list = new ArrayList<String>();
648
649 try {
650 UnsyncBufferedReader unsyncBufferedReader =
651 new UnsyncBufferedReader(reader);
652
653 String line = null;
654
655 while ((line = unsyncBufferedReader.readLine()) != null) {
656 list.add(line);
657 }
658
659 unsyncBufferedReader.close();
660 }
661 catch (IOException ioe) {
662 }
663
664 return list;
665 }
666
667 public List<String> toList(String fileName) {
668 try {
669 return toList(new FileReader(fileName));
670 }
671 catch (IOException ioe) {
672 return new ArrayList<String>();
673 }
674 }
675
676 public Properties toProperties(FileInputStream fis) {
677 Properties props = new Properties();
678
679 try {
680 props.load(fis);
681 }
682 catch (IOException ioe) {
683 }
684
685 return props;
686 }
687
688 public Properties toProperties(String fileName) {
689 try {
690 return toProperties(new FileInputStream(fileName));
691 }
692 catch (IOException ioe) {
693 return new Properties();
694 }
695 }
696
697 public void write(String fileName, String s) throws IOException {
698 write(new File(fileName), s);
699 }
700
701 public void write(String fileName, String s, boolean lazy)
702 throws IOException {
703
704 write(new File(fileName), s, lazy);
705 }
706
707 public void write(String fileName, String s, boolean lazy, boolean append)
708 throws IOException {
709
710 write(new File(fileName), s, lazy, append);
711 }
712
713 public void write(String pathName, String fileName, String s)
714 throws IOException {
715
716 write(new File(pathName, fileName), s);
717 }
718
719 public void write(String pathName, String fileName, String s, boolean lazy)
720 throws IOException {
721
722 write(new File(pathName, fileName), s, lazy);
723 }
724
725 public void write(
726 String pathName, String fileName, String s, boolean lazy,
727 boolean append)
728 throws IOException {
729
730 write(new File(pathName, fileName), s, lazy, append);
731 }
732
733 public void write(File file, String s) throws IOException {
734 write(file, s, false);
735 }
736
737 public void write(File file, String s, boolean lazy)
738 throws IOException {
739
740 write(file, s, lazy, false);
741 }
742
743 public void write(File file, String s, boolean lazy, boolean append)
744 throws IOException {
745
746 if (file.getParent() != null) {
747 mkdirs(file.getParent());
748 }
749
750 if (lazy && file.exists()) {
751 String content = read(file);
752
753 if (content.equals(s)) {
754 return;
755 }
756 }
757
758 Writer writer = new OutputStreamWriter(
759 new FileOutputStream(file, append), StringPool.UTF8);
760
761 writer.write(s);
762
763 writer.close();
764 }
765
766 public void write(String fileName, byte[] bytes) throws IOException {
767 write(new File(fileName), bytes);
768 }
769
770 public void write(File file, byte[] bytes) throws IOException {
771 write(file, bytes, 0, bytes.length);
772 }
773
774 public void write(File file, byte[] bytes, int offset, int length)
775 throws IOException {
776
777 if (file.getParent() != null) {
778 mkdirs(file.getParent());
779 }
780
781 FileOutputStream fos = new FileOutputStream(file);
782
783 fos.write(bytes, offset, length);
784
785 fos.close();
786 }
787
788 public void write(String fileName, InputStream is) throws IOException {
789 write(new File(fileName), is);
790 }
791
792 public void write(File file, InputStream is) throws IOException {
793 if (file.getParent() != null) {
794 mkdirs(file.getParent());
795 }
796
797 StreamUtil.transfer(is, new FileOutputStream(file));
798 }
799
800 private static final String[] _SAFE_FILE_NAME_1 = {
801 StringPool.AMPERSAND, StringPool.CLOSE_PARENTHESIS,
802 StringPool.OPEN_PARENTHESIS, StringPool.SEMICOLON
803 };
804
805 private static final String[] _SAFE_FILE_NAME_2 = {
806 "_AMP_", "_CP_", "_OP_", "_SEM_"
807 };
808
809 private static Log _log = LogFactoryUtil.getLog(FileImpl.class);
810
811 private static FileImpl _instance = new FileImpl();
812
813 private Map<String, TextExtractor> _textExtractors =
814 new HashMap<String, TextExtractor>();
815
816 }