/*
Copyright (C) 2000-2010  Ministere de la culture et de la communication (France), AJLSM
See LICENCE file
*/

package fr.gouv.culture.sdx.search.lucene.analysis;

import java.io.Reader;

import org.apache.avalon.framework.configuration.Configuration;
import org.apache.avalon.framework.configuration.ConfigurationException;
import org.apache.avalon.framework.logger.Logger;
import org.apache.lucene.analysis.TokenStream;

import gpl.pierrick.brihaye.aramorph.lucene.ArabicGrammaticalFilter;
import gpl.pierrick.brihaye.aramorph.lucene.ArabicStemmer;
import gpl.pierrick.brihaye.aramorph.lucene.ArabicTokenizer;

/** Analyzer for the arabic language. This analyzer uses Tim Buckwalter's algorithm
 * (avalaible at <a href="http://www.ldc.upenn.edu/Catalog/CatalogEntry.jsp?catalogId=LDC2002L49">LDC
 * Catalog</a>) to identify the morphological category of arabic tokens.
 * The relevant categories are still to be determined but the current list gives
 * good results.
 * Final tokens are a romanized canonical version of the word.
 * @author Pierrick Brihaye, 2003
 */
public final class Analyzer_ar extends AbstractAnalyzer {

	protected final static String ANALYZER_TYPE="Analyzer_ar";
	
    /** Configure the glosser.
     * @param configuration The configuration object
     * @throws ConfigurationException If a problem occurs during configuration
     */
    public void configure(Configuration configuration) throws ConfigurationException {
        super.configure(configuration);
    }

    /** Transmits a super.getLog() to the class.
     * @param logger The super.getLog()
     */
    public void enableLogging(Logger logger) {
        super.enableLogging(logger);
    }

    /** Returns a token stream of romanized arabic words whose morphological categories are found to be semantically relevant.
     * @return The token stream
     * @param reader The reader
     *@param fieldName The field
     */
    public final TokenStream tokenStream(String fieldName, Reader reader) {
        TokenStream result = null;
        try {
            result = new ArabicTokenizer(reader);
            result = new ArabicStemmer(result);
            result = new ArabicGrammaticalFilter(result);
        } catch (Exception e) {
            this.logger.error("Arabic analyzer error", e);
        }
        return result;
    }

	/**
	 * @see fr.gouv.culture.sdx.search.lucene.analysis.AbstractAnalyzer#getAnalyserType()
	 */
	protected String getAnalyzerType() {
		return Analyzer_ar.ANALYZER_TYPE;
	}

	/** Creates a TokenStream which tokenizes all the text in the provided
	 *  Reader. Provided for backward compatibility only.
	 * @deprecated use tokenStream(String, Reader) instead. 
	 * @see fr.gouv.culture.sdx.search.lucene.analysis.Analyzer#tokenStream(java.io.Reader)
	 * @author Malo Pichot, 2007
	 */
	public TokenStream tokenStream(Reader reader) {
		return tokenStream(null, reader);
	}
}

