1
14
15 package com.liferay.portal.util;
16
17 import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
18 import com.liferay.portal.kernel.io.unsync.UnsyncByteArrayOutputStream;
19 import com.liferay.portal.kernel.log.Log;
20 import com.liferay.portal.kernel.log.LogFactoryUtil;
21 import com.liferay.portal.kernel.util.ContentTypes;
22 import com.liferay.portal.kernel.util.FileComparator;
23 import com.liferay.portal.kernel.util.JavaProps;
24 import com.liferay.portal.kernel.util.MimeTypesUtil;
25 import com.liferay.portal.kernel.util.ServerDetector;
26 import com.liferay.portal.kernel.util.StreamUtil;
27 import com.liferay.portal.kernel.util.StringBundler;
28 import com.liferay.portal.kernel.util.StringPool;
29 import com.liferay.portal.kernel.util.StringUtil;
30 import com.liferay.portal.kernel.util.Time;
31 import com.liferay.portal.kernel.util.Validator;
32 import com.liferay.util.PwdGenerator;
33 import com.liferay.util.SystemProperties;
34 import com.liferay.util.lucene.JerichoHTMLTextExtractor;
35
36 import java.io.BufferedInputStream;
37 import java.io.File;
38 import java.io.FileInputStream;
39 import java.io.FileOutputStream;
40 import java.io.FileReader;
41 import java.io.IOException;
42 import java.io.InputStream;
43 import java.io.OutputStreamWriter;
44 import java.io.Reader;
45 import java.io.StringReader;
46 import java.io.Writer;
47
48 import java.util.ArrayList;
49 import java.util.Arrays;
50 import java.util.HashMap;
51 import java.util.List;
52 import java.util.Map;
53 import java.util.Properties;
54
55 import org.apache.jackrabbit.extractor.MsExcelTextExtractor;
56 import org.apache.jackrabbit.extractor.MsPowerPointTextExtractor;
57 import org.apache.jackrabbit.extractor.MsWordTextExtractor;
58 import org.apache.jackrabbit.extractor.OpenOfficeTextExtractor;
59 import org.apache.jackrabbit.extractor.PdfTextExtractor;
60 import org.apache.jackrabbit.extractor.PlainTextExtractor;
61 import org.apache.jackrabbit.extractor.RTFTextExtractor;
62 import org.apache.jackrabbit.extractor.TextExtractor;
63 import org.apache.jackrabbit.extractor.XMLTextExtractor;
64 import org.apache.poi.POITextExtractor;
65 import org.apache.poi.extractor.ExtractorFactory;
66
67 import org.mozilla.intl.chardet.nsDetector;
68 import org.mozilla.intl.chardet.nsPSMDetector;
69
70
76 public class FileImpl implements com.liferay.portal.kernel.util.File {
77
78 public static FileImpl getInstance() {
79 return _instance;
80 }
81
82 public FileImpl() {
83 Class<?>[] textExtractorClasses = new Class[] {
84 JerichoHTMLTextExtractor.class, MsExcelTextExtractor.class,
85 MsPowerPointTextExtractor.class, MsWordTextExtractor.class,
86 OpenOfficeTextExtractor.class, PdfTextExtractor.class,
87 PlainTextExtractor.class, RTFTextExtractor.class,
88 XMLTextExtractor.class
89 };
90
91 for (Class<?> textExtractorClass : textExtractorClasses) {
92 try {
93 TextExtractor textExtractor =
94 (TextExtractor)textExtractorClass.newInstance();
95
96 String[] contentTypes = textExtractor.getContentTypes();
97
98 for (String contentType : contentTypes) {
99 _textExtractors.put(contentType, textExtractor);
100 }
101 }
102 catch (Exception e) {
103 _log.error(e, e);
104 }
105 }
106 }
107
108 public void copyDirectory(String sourceDirName, String destinationDirName) {
109 copyDirectory(new File(sourceDirName), new File(destinationDirName));
110 }
111
112 public void copyDirectory(File source, File destination) {
113 if (source.exists() && source.isDirectory()) {
114 if (!destination.exists()) {
115 destination.mkdirs();
116 }
117
118 File[] fileArray = source.listFiles();
119
120 for (int i = 0; i < fileArray.length; i++) {
121 if (fileArray[i].isDirectory()) {
122 copyDirectory(
123 fileArray[i],
124 new File(destination.getPath() + File.separator
125 + fileArray[i].getName()));
126 }
127 else {
128 copyFile(
129 fileArray[i],
130 new File(destination.getPath() + File.separator
131 + fileArray[i].getName()));
132 }
133 }
134 }
135 }
136
137 public void copyFile(String source, String destination) {
138 copyFile(source, destination, false);
139 }
140
141 public void copyFile(String source, String destination, boolean lazy) {
142 copyFile(new File(source), new File(destination), lazy);
143 }
144
145 public void copyFile(File source, File destination) {
146 copyFile(source, destination, false);
147 }
148
149 public void copyFile(File source, File destination, boolean lazy) {
150 if (!source.exists()) {
151 return;
152 }
153
154 if (lazy) {
155 String oldContent = null;
156
157 try {
158 oldContent = read(source);
159 }
160 catch (Exception e) {
161 return;
162 }
163
164 String newContent = null;
165
166 try {
167 newContent = read(destination);
168 }
169 catch (Exception e) {
170 }
171
172 if ((oldContent == null) || !oldContent.equals(newContent)) {
173 copyFile(source, destination, false);
174 }
175 }
176 else {
177 if ((destination.getParentFile() != null) &&
178 (!destination.getParentFile().exists())) {
179
180 destination.getParentFile().mkdirs();
181 }
182
183 try {
184 StreamUtil.transfer(
185 new FileInputStream(source),
186 new FileOutputStream(destination));
187 }
188 catch (IOException ioe) {
189 _log.error(ioe.getMessage());
190 }
191 }
192 }
193
194 public File createTempFile() {
195 return createTempFile(null);
196 }
197
198 public File createTempFile(String extension) {
199 return new File(createTempFileName(extension));
200 }
201
202 public String createTempFileName() {
203 return createTempFileName(null);
204 }
205
206 public String createTempFileName(String extension) {
207 StringBundler sb = new StringBundler();
208
209 sb.append(SystemProperties.get(SystemProperties.TMP_DIR));
210 sb.append(StringPool.SLASH);
211 sb.append(Time.getTimestamp());
212 sb.append(PwdGenerator.getPassword(PwdGenerator.KEY2, 8));
213
214 if (Validator.isNotNull(extension)) {
215 sb.append(StringPool.PERIOD);
216 sb.append(extension);
217 }
218
219 return sb.toString();
220 }
221
222 public String decodeSafeFileName(String fileName) {
223 return StringUtil.replace(
224 fileName, _SAFE_FILE_NAME_2, _SAFE_FILE_NAME_1);
225 }
226
227 public boolean delete(String file) {
228 return delete(new File(file));
229 }
230
231 public boolean delete(File file) {
232 if ((file != null) && file.exists()) {
233 return file.delete();
234 }
235 else {
236 return false;
237 }
238 }
239
240 public void deltree(String directory) {
241 deltree(new File(directory));
242 }
243
244 public void deltree(File directory) {
245 if (directory.exists() && directory.isDirectory()) {
246 File[] fileArray = directory.listFiles();
247
248 for (int i = 0; i < fileArray.length; i++) {
249 if (fileArray[i].isDirectory()) {
250 deltree(fileArray[i]);
251 }
252 else {
253 fileArray[i].delete();
254 }
255 }
256
257 directory.delete();
258 }
259 }
260
261 public String encodeSafeFileName(String fileName) {
262 return StringUtil.replace(
263 fileName, _SAFE_FILE_NAME_1, _SAFE_FILE_NAME_2);
264 }
265
266 public boolean exists(String fileName) {
267 return exists(new File(fileName));
268 }
269
270 public boolean exists(File file) {
271 return file.exists();
272 }
273
274 public String extractText(InputStream is, String fileName) {
275 String text = null;
276
277 try {
278 if (!is.markSupported()) {
279 is = new BufferedInputStream(is);
280 }
281
282 String contentType = MimeTypesUtil.getContentType(is, fileName);
283
284 TextExtractor textExtractor = _textExtractors.get(contentType);
285
286 if (textExtractor != null) {
287 if (_log.isInfoEnabled()) {
288 _log.info(
289 "Using text extractor " +
290 textExtractor.getClass().getName());
291 }
292
293 StringBuilder sb = new StringBuilder();
294
295 Reader reader = null;
296
297 if (ServerDetector.isJOnAS() && JavaProps.isJDK6() &&
298 contentType.equals(ContentTypes.APPLICATION_MSWORD)) {
299
300 if (_log.isWarnEnabled()) {
301 _log.warn(
302 "JOnAS 5 with JDK 6 has a known issue with text " +
303 "extraction of Word documents. Use JDK 5 if " +
304 "you require indexing of Word documents.");
305 }
306
307 if (_log.isDebugEnabled()) {
308
309
312 reader = textExtractor.extractText(
313 is, contentType, null);
314 }
315 else {
316 reader = new StringReader(StringPool.BLANK);
317 }
318 }
319 else {
320 reader = textExtractor.extractText(
321 is, contentType, null);
322 }
323
324 try{
325 char[] buffer = new char[1024];
326
327 int result = -1;
328
329 while ((result = reader.read(buffer)) != -1) {
330 sb.append(buffer, 0, result);
331 }
332 }
333 finally {
334 try {
335 reader.close();
336 }
337 catch (IOException ioe) {
338 }
339 }
340
341 text = sb.toString();
342 }
343 else {
344 if (contentType.equals(ContentTypes.APPLICATION_ZIP) ||
345 contentType.startsWith(
346 "application/vnd.openxmlformats-officedocument.")) {
347
348 try {
349 POITextExtractor poiTextExtractor =
350 ExtractorFactory.createExtractor(is);
351
352 text = poiTextExtractor.getText();
353 }
354 catch (Exception e) {
355 if (_log.isWarnEnabled()) {
356 _log.warn(e, e);
357 }
358 }
359 }
360
361 if ((text == null) && _log.isInfoEnabled()) {
362 _log.info("No text extractor found for " + fileName);
363 }
364 }
365 }
366 catch (Exception e) {
367 _log.error(e, e);
368 }
369
370 if (_log.isDebugEnabled()) {
371 _log.debug("Extractor returned text:\n\n" + text);
372 }
373
374 if (text == null) {
375 text = StringPool.BLANK;
376 }
377
378 return text;
379 }
380
381 public String getAbsolutePath(File file) {
382 return StringUtil.replace(
383 file.getAbsolutePath(), StringPool.BACK_SLASH, StringPool.SLASH);
384 }
385
386 public byte[] getBytes(File file) throws IOException {
387 if ((file == null) || !file.exists()) {
388 return null;
389 }
390
391 FileInputStream is = new FileInputStream(file);
392
393 byte[] bytes = getBytes(is, (int)file.length());
394
395 is.close();
396
397 return bytes;
398 }
399
400 public byte[] getBytes(InputStream is) throws IOException {
401 return getBytes(is, -1);
402 }
403
404 public byte[] getBytes(InputStream inputStream, int bufferSize)
405 throws IOException {
406
407 UnsyncByteArrayOutputStream unsyncByteArrayOutputStream =
408 new UnsyncByteArrayOutputStream();
409
410 StreamUtil.transfer(
411 inputStream, unsyncByteArrayOutputStream, bufferSize);
412
413 return unsyncByteArrayOutputStream.toByteArray();
414 }
415
416 public String getExtension(String fileName) {
417 if (fileName == null) {
418 return null;
419 }
420
421 int pos = fileName.lastIndexOf(StringPool.PERIOD);
422
423 if (pos > 0) {
424 return fileName.substring(pos + 1, fileName.length()).toLowerCase();
425 }
426 else {
427 return StringPool.BLANK;
428 }
429 }
430
431 public String getPath(String fullFileName) {
432 int pos = fullFileName.lastIndexOf(StringPool.SLASH);
433
434 if (pos == -1) {
435 pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
436 }
437
438 String shortFileName = fullFileName.substring(0, pos);
439
440 if (Validator.isNull(shortFileName)) {
441 return StringPool.SLASH;
442 }
443
444 return shortFileName;
445 }
446
447 public String getShortFileName(String fullFileName) {
448 int pos = fullFileName.lastIndexOf(StringPool.SLASH);
449
450 if (pos == -1) {
451 pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
452 }
453
454 String shortFileName =
455 fullFileName.substring(pos + 1, fullFileName.length());
456
457 return shortFileName;
458 }
459
460 public boolean isAscii(File file) throws IOException {
461 boolean ascii = true;
462
463 nsDetector detector = new nsDetector(nsPSMDetector.ALL);
464
465 InputStream inputStream = new FileInputStream(file);
466
467 byte[] buffer = new byte[1024];
468
469 int len = 0;
470
471 while ((len = inputStream.read(buffer, 0, buffer.length)) != -1) {
472
473 if (ascii) {
474 ascii = detector.isAscii(buffer, len);
475
476 if (!ascii) {
477 break;
478 }
479 }
480 }
481
482 detector.DataEnd();
483
484 inputStream.close();
485
486 return ascii;
487 }
488
489 public String[] listDirs(String fileName) {
490 return listDirs(new File(fileName));
491 }
492
493 public String[] listDirs(File file) {
494 List<String> dirs = new ArrayList<String>();
495
496 File[] fileArray = file.listFiles();
497
498 for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
499 if (fileArray[i].isDirectory()) {
500 dirs.add(fileArray[i].getName());
501 }
502 }
503
504 return dirs.toArray(new String[dirs.size()]);
505 }
506
507 public String[] listFiles(String fileName) {
508 if (Validator.isNull(fileName)) {
509 return new String[0];
510 }
511
512 return listFiles(new File(fileName));
513 }
514
515 public String[] listFiles(File file) {
516 List<String> files = new ArrayList<String>();
517
518 File[] fileArray = file.listFiles();
519
520 for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
521 if (fileArray[i].isFile()) {
522 files.add(fileArray[i].getName());
523 }
524 }
525
526 return files.toArray(new String[files.size()]);
527 }
528
529 public void mkdirs(String pathName) {
530 File file = new File(pathName);
531
532 file.mkdirs();
533 }
534
535 public boolean move(String sourceFileName, String destinationFileName) {
536 return move(new File(sourceFileName), new File(destinationFileName));
537 }
538
539 public boolean move(File source, File destination) {
540 if (!source.exists()) {
541 return false;
542 }
543
544 destination.delete();
545
546 return source.renameTo(destination);
547 }
548
549 public String read(String fileName) throws IOException {
550 return read(new File(fileName));
551 }
552
553 public String read(File file) throws IOException {
554 return read(file, false);
555 }
556
557 public String read(File file, boolean raw) throws IOException {
558 FileInputStream fis = new FileInputStream(file);
559
560 byte[] bytes = new byte[fis.available()];
561
562 fis.read(bytes);
563
564 fis.close();
565
566 String s = new String(bytes, StringPool.UTF8);
567
568 if (raw) {
569 return s;
570 }
571 else {
572 return StringUtil.replace(
573 s, StringPool.RETURN_NEW_LINE, StringPool.NEW_LINE);
574 }
575 }
576
577 public String replaceSeparator(String fileName) {
578 return StringUtil.replace(
579 fileName, StringPool.BACK_SLASH, StringPool.SLASH);
580 }
581
582 public File[] sortFiles(File[] files) {
583 if (files == null) {
584 return null;
585 }
586
587 Arrays.sort(files, new FileComparator());
588
589 List<File> directoryList = new ArrayList<File>();
590 List<File> fileList = new ArrayList<File>();
591
592 for (int i = 0; i < files.length; i++) {
593 if (files[i].isDirectory()) {
594 directoryList.add(files[i]);
595 }
596 else {
597 fileList.add(files[i]);
598 }
599 }
600
601 directoryList.addAll(fileList);
602
603 return directoryList.toArray(new File[directoryList.size()]);
604 }
605
606 public String stripExtension(String fileName) {
607 if (fileName == null) {
608 return null;
609 }
610
611 String ext = getExtension(fileName);
612
613 if (ext.length() > 0) {
614 return fileName.substring(0, fileName.length() - ext.length() - 1);
615 }
616 else {
617 return fileName;
618 }
619 }
620
621 public List<String> toList(Reader reader) {
622 List<String> list = new ArrayList<String>();
623
624 try {
625 UnsyncBufferedReader unsyncBufferedReader =
626 new UnsyncBufferedReader(reader);
627
628 String line = null;
629
630 while ((line = unsyncBufferedReader.readLine()) != null) {
631 list.add(line);
632 }
633
634 unsyncBufferedReader.close();
635 }
636 catch (IOException ioe) {
637 }
638
639 return list;
640 }
641
642 public List<String> toList(String fileName) {
643 try {
644 return toList(new FileReader(fileName));
645 }
646 catch (IOException ioe) {
647 return new ArrayList<String>();
648 }
649 }
650
651 public Properties toProperties(FileInputStream fis) {
652 Properties props = new Properties();
653
654 try {
655 props.load(fis);
656 }
657 catch (IOException ioe) {
658 }
659
660 return props;
661 }
662
663 public Properties toProperties(String fileName) {
664 try {
665 return toProperties(new FileInputStream(fileName));
666 }
667 catch (IOException ioe) {
668 return new Properties();
669 }
670 }
671
672 public void write(String fileName, String s) throws IOException {
673 write(new File(fileName), s);
674 }
675
676 public void write(String fileName, String s, boolean lazy)
677 throws IOException {
678
679 write(new File(fileName), s, lazy);
680 }
681
682 public void write(String fileName, String s, boolean lazy, boolean append)
683 throws IOException {
684
685 write(new File(fileName), s, lazy, append);
686 }
687
688 public void write(String pathName, String fileName, String s)
689 throws IOException {
690
691 write(new File(pathName, fileName), s);
692 }
693
694 public void write(String pathName, String fileName, String s, boolean lazy)
695 throws IOException {
696
697 write(new File(pathName, fileName), s, lazy);
698 }
699
700 public void write(
701 String pathName, String fileName, String s, boolean lazy,
702 boolean append)
703 throws IOException {
704
705 write(new File(pathName, fileName), s, lazy, append);
706 }
707
708 public void write(File file, String s) throws IOException {
709 write(file, s, false);
710 }
711
712 public void write(File file, String s, boolean lazy)
713 throws IOException {
714
715 write(file, s, lazy, false);
716 }
717
718 public void write(File file, String s, boolean lazy, boolean append)
719 throws IOException {
720
721 if (file.getParent() != null) {
722 mkdirs(file.getParent());
723 }
724
725 if (lazy && file.exists()) {
726 String content = read(file);
727
728 if (content.equals(s)) {
729 return;
730 }
731 }
732
733 Writer writer = new OutputStreamWriter(
734 new FileOutputStream(file, append), StringPool.UTF8);
735
736 writer.write(s);
737
738 writer.close();
739 }
740
741 public void write(String fileName, byte[] bytes) throws IOException {
742 write(new File(fileName), bytes);
743 }
744
745 public void write(File file, byte[] bytes) throws IOException {
746 write(file, bytes, 0, bytes.length);
747 }
748
749 public void write(File file, byte[] bytes, int offset, int length)
750 throws IOException {
751
752 if (file.getParent() != null) {
753 mkdirs(file.getParent());
754 }
755
756 FileOutputStream fos = new FileOutputStream(file);
757
758 fos.write(bytes, offset, length);
759
760 fos.close();
761 }
762
763 public void write(String fileName, InputStream is) throws IOException {
764 write(new File(fileName), is);
765 }
766
767 public void write(File file, InputStream is) throws IOException {
768 if (file.getParent() != null) {
769 mkdirs(file.getParent());
770 }
771
772 StreamUtil.transfer(is, new FileOutputStream(file));
773 }
774
775 private static final String[] _SAFE_FILE_NAME_1 = {
776 StringPool.AMPERSAND, StringPool.CLOSE_PARENTHESIS,
777 StringPool.OPEN_PARENTHESIS, StringPool.SEMICOLON
778 };
779
780 private static final String[] _SAFE_FILE_NAME_2 = {
781 "_AMP_", "_CP_", "_OP_", "_SEM_"
782 };
783
784 private static Log _log = LogFactoryUtil.getLog(FileImpl.class);
785
786 private static FileImpl _instance = new FileImpl();
787
788 private Map<String, TextExtractor> _textExtractors =
789 new HashMap<String, TextExtractor>();
790
791 }