001    /**
002     * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
003     *
004     * The contents of this file are subject to the terms of the Liferay Enterprise
005     * Subscription License ("License"). You may not use this file except in
006     * compliance with the License. You can obtain a copy of the License by
007     * contacting Liferay, Inc. See the License for the specific language governing
008     * permissions and limitations under the License, including but not limited to
009     * distribution rights of the Software.
010     *
011     *
012     *
013     */
014    
015    package com.liferay.portal.util;
016    
017    import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
018    import com.liferay.portal.kernel.io.unsync.UnsyncByteArrayOutputStream;
019    import com.liferay.portal.kernel.log.Log;
020    import com.liferay.portal.kernel.log.LogFactoryUtil;
021    import com.liferay.portal.kernel.util.CharPool;
022    import com.liferay.portal.kernel.util.ContentTypes;
023    import com.liferay.portal.kernel.util.FileComparator;
024    import com.liferay.portal.kernel.util.JavaProps;
025    import com.liferay.portal.kernel.util.MimeTypesUtil;
026    import com.liferay.portal.kernel.util.ServerDetector;
027    import com.liferay.portal.kernel.util.StreamUtil;
028    import com.liferay.portal.kernel.util.StringBundler;
029    import com.liferay.portal.kernel.util.StringPool;
030    import com.liferay.portal.kernel.util.StringUtil;
031    import com.liferay.portal.kernel.util.Time;
032    import com.liferay.portal.kernel.util.Validator;
033    import com.liferay.util.PwdGenerator;
034    import com.liferay.util.SystemProperties;
035    import com.liferay.util.lucene.JerichoHTMLTextExtractor;
036    
037    import java.io.BufferedInputStream;
038    import java.io.File;
039    import java.io.FileInputStream;
040    import java.io.FileOutputStream;
041    import java.io.FileReader;
042    import java.io.IOException;
043    import java.io.InputStream;
044    import java.io.OutputStreamWriter;
045    import java.io.RandomAccessFile;
046    import java.io.Reader;
047    import java.io.StringReader;
048    import java.io.Writer;
049    
050    import java.util.ArrayList;
051    import java.util.Arrays;
052    import java.util.HashMap;
053    import java.util.List;
054    import java.util.Map;
055    import java.util.Properties;
056    
057    import org.apache.jackrabbit.extractor.MsExcelTextExtractor;
058    import org.apache.jackrabbit.extractor.MsPowerPointTextExtractor;
059    import org.apache.jackrabbit.extractor.MsWordTextExtractor;
060    import org.apache.jackrabbit.extractor.OpenOfficeTextExtractor;
061    import org.apache.jackrabbit.extractor.PdfTextExtractor;
062    import org.apache.jackrabbit.extractor.PlainTextExtractor;
063    import org.apache.jackrabbit.extractor.RTFTextExtractor;
064    import org.apache.jackrabbit.extractor.TextExtractor;
065    import org.apache.jackrabbit.extractor.XMLTextExtractor;
066    import org.apache.poi.POITextExtractor;
067    import org.apache.poi.extractor.ExtractorFactory;
068    import org.apache.tools.ant.DirectoryScanner;
069    
070    import org.mozilla.intl.chardet.nsDetector;
071    import org.mozilla.intl.chardet.nsPSMDetector;
072    
073    /**
074     * @author Brian Wing Shun Chan
075     * @author Alexander Chow
076     */
077    public class FileImpl implements com.liferay.portal.kernel.util.File {
078    
079            public static FileImpl getInstance() {
080                    return _instance;
081            }
082    
083            public FileImpl() {
084                    Class<?>[] textExtractorClasses = new Class[] {
085                            JerichoHTMLTextExtractor.class, MsExcelTextExtractor.class,
086                            MsPowerPointTextExtractor.class, MsWordTextExtractor.class,
087                            OpenOfficeTextExtractor.class, PdfTextExtractor.class,
088                            PlainTextExtractor.class, RTFTextExtractor.class,
089                            XMLTextExtractor.class
090                    };
091    
092                    for (Class<?> textExtractorClass : textExtractorClasses) {
093                            try {
094                                    TextExtractor textExtractor =
095                                            (TextExtractor)textExtractorClass.newInstance();
096    
097                                    String[] contentTypes = textExtractor.getContentTypes();
098    
099                                    for (String contentType : contentTypes) {
100                                            _textExtractors.put(contentType, textExtractor);
101                                    }
102                            }
103                            catch (Exception e) {
104                                    _log.error(e, e);
105                            }
106                    }
107            }
108    
109            public void copyDirectory(String sourceDirName, String destinationDirName) {
110                    copyDirectory(new File(sourceDirName), new File(destinationDirName));
111            }
112    
113            public void copyDirectory(File source, File destination) {
114                    if (source.exists() && source.isDirectory()) {
115                            if (!destination.exists()) {
116                                    destination.mkdirs();
117                            }
118    
119                            File[] fileArray = source.listFiles();
120    
121                            for (int i = 0; i < fileArray.length; i++) {
122                                    if (fileArray[i].isDirectory()) {
123                                            copyDirectory(
124                                                    fileArray[i],
125                                                    new File(destination.getPath() + File.separator
126                                                            + fileArray[i].getName()));
127                                    }
128                                    else {
129                                            copyFile(
130                                                    fileArray[i],
131                                                    new File(destination.getPath() + File.separator
132                                                            + fileArray[i].getName()));
133                                    }
134                            }
135                    }
136            }
137    
138            public void copyFile(String source, String destination) {
139                    copyFile(source, destination, false);
140            }
141    
142            public void copyFile(String source, String destination, boolean lazy) {
143                    copyFile(new File(source), new File(destination), lazy);
144            }
145    
146            public void copyFile(File source, File destination) {
147                    copyFile(source, destination, false);
148            }
149    
150            public void copyFile(File source, File destination, boolean lazy) {
151                    if (!source.exists()) {
152                            return;
153                    }
154    
155                    if (lazy) {
156                            String oldContent = null;
157    
158                            try {
159                                    oldContent = read(source);
160                            }
161                            catch (Exception e) {
162                                    return;
163                            }
164    
165                            String newContent = null;
166    
167                            try {
168                                    newContent = read(destination);
169                            }
170                            catch (Exception e) {
171                            }
172    
173                            if ((oldContent == null) || !oldContent.equals(newContent)) {
174                                    copyFile(source, destination, false);
175                            }
176                    }
177                    else {
178                            if ((destination.getParentFile() != null) &&
179                                    (!destination.getParentFile().exists())) {
180    
181                                    destination.getParentFile().mkdirs();
182                            }
183    
184                            try {
185                                    StreamUtil.transfer(
186                                            new FileInputStream(source),
187                                            new FileOutputStream(destination));
188                            }
189                            catch (IOException ioe) {
190                                    _log.error(ioe.getMessage());
191                            }
192                    }
193            }
194    
195            public File createTempFile() {
196                    return createTempFile(null);
197            }
198    
199            public File createTempFile(String extension) {
200                    return new File(createTempFileName(extension));
201            }
202    
203            public String createTempFileName() {
204                    return createTempFileName(null);
205            }
206    
207            public String createTempFileName(String extension) {
208                    StringBundler sb = new StringBundler();
209    
210                    sb.append(SystemProperties.get(SystemProperties.TMP_DIR));
211                    sb.append(StringPool.SLASH);
212                    sb.append(Time.getTimestamp());
213                    sb.append(PwdGenerator.getPassword(PwdGenerator.KEY2, 8));
214    
215                    if (Validator.isNotNull(extension)) {
216                            sb.append(StringPool.PERIOD);
217                            sb.append(extension);
218                    }
219    
220                    return sb.toString();
221            }
222    
223            public String decodeSafeFileName(String fileName) {
224                    return StringUtil.replace(
225                            fileName, _SAFE_FILE_NAME_2, _SAFE_FILE_NAME_1);
226            }
227    
228            public boolean delete(String file) {
229                    return delete(new File(file));
230            }
231    
232            public boolean delete(File file) {
233                    if ((file != null) && file.exists()) {
234                            return file.delete();
235                    }
236                    else {
237                            return false;
238                    }
239            }
240    
241            public void deltree(String directory) {
242                    deltree(new File(directory));
243            }
244    
245            public void deltree(File directory) {
246                    if (directory.exists() && directory.isDirectory()) {
247                            File[] fileArray = directory.listFiles();
248    
249                            for (int i = 0; i < fileArray.length; i++) {
250                                    if (fileArray[i].isDirectory()) {
251                                            deltree(fileArray[i]);
252                                    }
253                                    else {
254                                            fileArray[i].delete();
255                                    }
256                            }
257    
258                            directory.delete();
259                    }
260            }
261    
262            public String encodeSafeFileName(String fileName) {
263                    if (fileName == null) {
264                            return StringPool.BLANK;
265                    }
266    
267                    return StringUtil.replace(
268                            fileName, _SAFE_FILE_NAME_1, _SAFE_FILE_NAME_2);
269            }
270    
271            public boolean exists(String fileName) {
272                    return exists(new File(fileName));
273            }
274    
275            public boolean exists(File file) {
276                    return file.exists();
277            }
278    
279            public String extractText(InputStream is, String fileName) {
280                    String text = null;
281    
282                    try {
283                            if (!is.markSupported()) {
284                                    is = new BufferedInputStream(is);
285                            }
286    
287                            String contentType = MimeTypesUtil.getContentType(is, fileName);
288    
289                            if (_log.isInfoEnabled()) {
290                                    _log.info(
291                                            "Attempting to extract text from " + fileName +
292                                                    " of type " + contentType);
293                            }
294    
295                            TextExtractor textExtractor = _textExtractors.get(contentType);
296    
297                            if (textExtractor != null) {
298                                    if (_log.isInfoEnabled()) {
299                                            _log.info(
300                                                    "Using text extractor " +
301                                                            textExtractor.getClass().getName());
302                                    }
303    
304                                    StringBuilder sb = new StringBuilder();
305    
306                                    Reader reader = null;
307    
308                                    if (ServerDetector.isJOnAS() && JavaProps.isJDK6() &&
309                                            contentType.equals(ContentTypes.APPLICATION_MSWORD)) {
310    
311                                            if (_log.isWarnEnabled()) {
312                                                    _log.warn(
313                                                            "JOnAS 5 with JDK 6 has a known issue with text " +
314                                                                    "extraction of Word documents. Use JDK 5 if " +
315                                                                            "you require indexing of Word documents.");
316                                            }
317    
318                                            if (_log.isDebugEnabled()) {
319    
320                                                    // Execute code that will generate the error so it can
321                                                    // be fixed at a later date
322    
323                                                    reader = textExtractor.extractText(
324                                                            is, contentType, null);
325                                            }
326                                            else {
327                                                    reader = new StringReader(StringPool.BLANK);
328                                            }
329                                    }
330                                    else {
331                                            reader = textExtractor.extractText(
332                                                    is, contentType, null);
333                                    }
334    
335                                    try{
336                                            char[] buffer = new char[1024];
337    
338                                            int result = -1;
339    
340                                            while ((result = reader.read(buffer)) != -1) {
341                                                    sb.append(buffer, 0, result);
342                                            }
343                                    }
344                                    finally {
345                                            try {
346                                                    reader.close();
347                                            }
348                                            catch (IOException ioe) {
349                                            }
350                                    }
351    
352                                    text = sb.toString();
353                            }
354                            else if (contentType.equals(ContentTypes.APPLICATION_ZIP) ||
355                                    contentType.startsWith(
356                                            "application/vnd.openxmlformats-officedocument.")) {
357    
358                                    try {
359                                            POITextExtractor poiTextExtractor =
360                                                    ExtractorFactory.createExtractor(is);
361    
362                                            text = poiTextExtractor.getText();
363                                    }
364                                    catch (Exception e) {
365                                            if (_log.isInfoEnabled()) {
366                                                    _log.info(e.getMessage());
367                                            }
368                                    }
369                            }
370                    }
371                    catch (Exception e) {
372                            _log.error(e, e);
373                    }
374    
375                    if (_log.isInfoEnabled()) {
376                            if (text == null) {
377                                    _log.info("No text extractor found for " + fileName);
378                            }
379                            else {
380                                    _log.info("Text was extracted for " + fileName);
381                            }
382                    }
383    
384                    if (_log.isDebugEnabled()) {
385                            _log.debug("Extractor returned text:\n\n" + text);
386                    }
387    
388                    if (text == null) {
389                            text = StringPool.BLANK;
390                    }
391    
392                    return text;
393            }
394    
395            public String[] find(String directory, String includes, String excludes) {
396                    DirectoryScanner directoryScanner = new DirectoryScanner();
397    
398                    directoryScanner.setBasedir(directory);
399                    directoryScanner.setExcludes(StringUtil.split(excludes));
400                    directoryScanner.setIncludes(StringUtil.split(includes));
401    
402                    directoryScanner.scan();
403    
404                    return directoryScanner.getIncludedFiles();
405            }
406    
407            public String getAbsolutePath(File file) {
408                    return StringUtil.replace(
409                            file.getAbsolutePath(), CharPool.BACK_SLASH, CharPool.SLASH);
410            }
411    
412            public byte[] getBytes(File file) throws IOException {
413                    if ((file == null) || !file.exists()) {
414                            return null;
415                    }
416    
417                    RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r");
418    
419                    byte[] bytes = new byte[(int)randomAccessFile.length()];
420    
421                    randomAccessFile.readFully(bytes);
422    
423                    randomAccessFile.close();
424    
425                    return bytes;
426            }
427    
428            public byte[] getBytes(InputStream is) throws IOException {
429                    return getBytes(is, -1);
430            }
431    
432            public byte[] getBytes(InputStream inputStream, int bufferSize)
433                    throws IOException {
434    
435                    UnsyncByteArrayOutputStream unsyncByteArrayOutputStream =
436                            new UnsyncByteArrayOutputStream();
437    
438                    StreamUtil.transfer(
439                            inputStream, unsyncByteArrayOutputStream, bufferSize);
440    
441                    return unsyncByteArrayOutputStream.toByteArray();
442            }
443    
444            public String getExtension(String fileName) {
445                    if (fileName == null) {
446                            return null;
447                    }
448    
449                    int pos = fileName.lastIndexOf(CharPool.PERIOD);
450    
451                    if (pos > 0) {
452                            return fileName.substring(pos + 1, fileName.length()).toLowerCase();
453                    }
454                    else {
455                            return StringPool.BLANK;
456                    }
457            }
458    
459            public String getPath(String fullFileName) {
460                    int pos = fullFileName.lastIndexOf(CharPool.SLASH);
461    
462                    if (pos == -1) {
463                            pos = fullFileName.lastIndexOf(CharPool.BACK_SLASH);
464                    }
465    
466                    String shortFileName = fullFileName.substring(0, pos);
467    
468                    if (Validator.isNull(shortFileName)) {
469                            return StringPool.SLASH;
470                    }
471    
472                    return shortFileName;
473            }
474    
475            public String getShortFileName(String fullFileName) {
476                    int pos = fullFileName.lastIndexOf(CharPool.SLASH);
477    
478                    if (pos == -1) {
479                            pos = fullFileName.lastIndexOf(CharPool.BACK_SLASH);
480                    }
481    
482                    String shortFileName =
483                            fullFileName.substring(pos + 1, fullFileName.length());
484    
485                    return shortFileName;
486            }
487    
488            public boolean isAscii(File file) throws IOException {
489                    boolean ascii = true;
490    
491                    nsDetector detector = new nsDetector(nsPSMDetector.ALL);
492    
493                    InputStream inputStream = new FileInputStream(file);
494    
495                    byte[] buffer = new byte[1024];
496    
497                    int len = 0;
498    
499                    while ((len = inputStream.read(buffer, 0, buffer.length)) != -1) {
500                            if (ascii) {
501                                    ascii = detector.isAscii(buffer, len);
502    
503                                    if (!ascii) {
504                                            break;
505                                    }
506                            }
507                    }
508    
509                    detector.DataEnd();
510    
511                    inputStream.close();
512    
513                    return ascii;
514            }
515    
516            public String[] listDirs(String fileName) {
517                    return listDirs(new File(fileName));
518            }
519    
520            public String[] listDirs(File file) {
521                    List<String> dirs = new ArrayList<String>();
522    
523                    File[] fileArray = file.listFiles();
524    
525                    for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
526                            if (fileArray[i].isDirectory()) {
527                                    dirs.add(fileArray[i].getName());
528                            }
529                    }
530    
531                    return dirs.toArray(new String[dirs.size()]);
532            }
533    
534            public String[] listFiles(String fileName) {
535                    if (Validator.isNull(fileName)) {
536                            return new String[0];
537                    }
538    
539                    return listFiles(new File(fileName));
540            }
541    
542            public String[] listFiles(File file) {
543                    List<String> files = new ArrayList<String>();
544    
545                    File[] fileArray = file.listFiles();
546    
547                    for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
548                            if (fileArray[i].isFile()) {
549                                    files.add(fileArray[i].getName());
550                            }
551                    }
552    
553                    return files.toArray(new String[files.size()]);
554            }
555    
556            public void mkdirs(String pathName) {
557                    File file = new File(pathName);
558    
559                    file.mkdirs();
560            }
561    
562            public boolean move(String sourceFileName, String destinationFileName) {
563                    return move(new File(sourceFileName), new File(destinationFileName));
564            }
565    
566            public boolean move(File source, File destination) {
567                    if (!source.exists()) {
568                            return false;
569                    }
570    
571                    destination.delete();
572    
573                    return source.renameTo(destination);
574            }
575    
576            public String read(String fileName) throws IOException {
577                    return read(new File(fileName));
578            }
579    
580            public String read(File file) throws IOException {
581                    return read(file, false);
582            }
583    
584            public String read(File file, boolean raw) throws IOException {
585                    byte[] bytes = getBytes(file);
586    
587                    if (bytes == null) {
588                            return null;
589                    }
590    
591                    String s = new String(bytes, StringPool.UTF8);
592    
593                    if (raw) {
594                            return s;
595                    }
596                    else {
597                            return StringUtil.replace(
598                                    s, StringPool.RETURN_NEW_LINE, StringPool.NEW_LINE);
599                    }
600            }
601    
602            public String replaceSeparator(String fileName) {
603                    return StringUtil.replace(
604                            fileName, CharPool.BACK_SLASH, CharPool.SLASH);
605            }
606    
607            public File[] sortFiles(File[] files) {
608                    if (files == null) {
609                            return null;
610                    }
611    
612                    Arrays.sort(files, new FileComparator());
613    
614                    List<File> directoryList = new ArrayList<File>();
615                    List<File> fileList = new ArrayList<File>();
616    
617                    for (int i = 0; i < files.length; i++) {
618                            if (files[i].isDirectory()) {
619                                    directoryList.add(files[i]);
620                            }
621                            else {
622                                    fileList.add(files[i]);
623                            }
624                    }
625    
626                    directoryList.addAll(fileList);
627    
628                    return directoryList.toArray(new File[directoryList.size()]);
629            }
630    
631            public String stripExtension(String fileName) {
632                    if (fileName == null) {
633                            return null;
634                    }
635    
636                    String ext = getExtension(fileName);
637    
638                    if (ext.length() > 0) {
639                            return fileName.substring(0, fileName.length() - ext.length() - 1);
640                    }
641                    else {
642                            return fileName;
643                    }
644            }
645    
646            public List<String> toList(Reader reader) {
647                    List<String> list = new ArrayList<String>();
648    
649                    try {
650                            UnsyncBufferedReader unsyncBufferedReader =
651                                    new UnsyncBufferedReader(reader);
652    
653                            String line = null;
654    
655                            while ((line = unsyncBufferedReader.readLine()) != null) {
656                                    list.add(line);
657                            }
658    
659                            unsyncBufferedReader.close();
660                    }
661                    catch (IOException ioe) {
662                    }
663    
664                    return list;
665            }
666    
667            public List<String> toList(String fileName) {
668                    try {
669                            return toList(new FileReader(fileName));
670                    }
671                    catch (IOException ioe) {
672                            return new ArrayList<String>();
673                    }
674            }
675    
676            public Properties toProperties(FileInputStream fis) {
677                    Properties props = new Properties();
678    
679                    try {
680                            props.load(fis);
681                    }
682                    catch (IOException ioe) {
683                    }
684    
685                    return props;
686            }
687    
688            public Properties toProperties(String fileName) {
689                    try {
690                            return toProperties(new FileInputStream(fileName));
691                    }
692                    catch (IOException ioe) {
693                            return new Properties();
694                    }
695            }
696    
697            public void write(String fileName, String s) throws IOException {
698                    write(new File(fileName), s);
699            }
700    
701            public void write(String fileName, String s, boolean lazy)
702                    throws IOException {
703    
704                    write(new File(fileName), s, lazy);
705            }
706    
707            public void write(String fileName, String s, boolean lazy, boolean append)
708                    throws IOException {
709    
710                    write(new File(fileName), s, lazy, append);
711            }
712    
713            public void write(String pathName, String fileName, String s)
714                    throws IOException {
715    
716                    write(new File(pathName, fileName), s);
717            }
718    
719            public void write(String pathName, String fileName, String s, boolean lazy)
720                    throws IOException {
721    
722                    write(new File(pathName, fileName), s, lazy);
723            }
724    
725            public void write(
726                            String pathName, String fileName, String s, boolean lazy,
727                            boolean append)
728                    throws IOException {
729    
730                    write(new File(pathName, fileName), s, lazy, append);
731            }
732    
733            public void write(File file, String s) throws IOException {
734                    write(file, s, false);
735            }
736    
737            public void write(File file, String s, boolean lazy)
738                    throws IOException {
739    
740                    write(file, s, lazy, false);
741            }
742    
743            public void write(File file, String s, boolean lazy, boolean append)
744                    throws IOException {
745    
746                    if (file.getParent() != null) {
747                            mkdirs(file.getParent());
748                    }
749    
750                    if (lazy && file.exists()) {
751                            String content = read(file);
752    
753                            if (content.equals(s)) {
754                                    return;
755                            }
756                    }
757    
758                    Writer writer = new OutputStreamWriter(
759                            new FileOutputStream(file, append), StringPool.UTF8);
760    
761                    writer.write(s);
762    
763                    writer.close();
764            }
765    
766            public void write(String fileName, byte[] bytes) throws IOException {
767                    write(new File(fileName), bytes);
768            }
769    
770            public void write(File file, byte[] bytes) throws IOException {
771                    write(file, bytes, 0, bytes.length);
772            }
773    
774            public void write(File file, byte[] bytes, int offset, int length)
775                    throws IOException {
776    
777                    if (file.getParent() != null) {
778                            mkdirs(file.getParent());
779                    }
780    
781                    FileOutputStream fos = new FileOutputStream(file);
782    
783                    fos.write(bytes, offset, length);
784    
785                    fos.close();
786            }
787    
788            public void write(String fileName, InputStream is) throws IOException {
789                    write(new File(fileName), is);
790            }
791    
792            public void write(File file, InputStream is) throws IOException {
793                    if (file.getParent() != null) {
794                            mkdirs(file.getParent());
795                    }
796    
797                    StreamUtil.transfer(is, new FileOutputStream(file));
798            }
799    
800            private static final String[] _SAFE_FILE_NAME_1 = {
801                    StringPool.AMPERSAND, StringPool.CLOSE_PARENTHESIS,
802                    StringPool.OPEN_PARENTHESIS, StringPool.SEMICOLON
803            };
804    
805            private static final String[] _SAFE_FILE_NAME_2 = {
806                    "_AMP_", "_CP_", "_OP_", "_SEM_"
807            };
808    
809            private static Log _log = LogFactoryUtil.getLog(FileImpl.class);
810    
811            private static FileImpl _instance = new FileImpl();
812    
813            private Map<String, TextExtractor> _textExtractors =
814                    new HashMap<String, TextExtractor>();
815    
816    }