package com.ibm.dltj.crf;

import com.ibm.dltj.DLT;
import com.ibm.dltj.DLTException;
import com.ibm.dltj.Dictionary;
import com.ibm.dltj.DictionaryInfo;
import com.ibm.dltj.Gloss;
import com.ibm.dltj.GlossCollection;
import com.ibm.dltj.crf.feature.handler.FeatureHandler;
import com.ibm.dltj.crf.feature.handler.FeatureHandlerFactory;
import com.ibm.dltj.crf.feature.template.FeatureTemplate;
import com.ibm.dltj.crf.feature.template.FeatureTemplateParser;
import com.ibm.dltj.fst.NetGenericDictionary;
import com.ibm.dltj.gloss.CRFLabelSet;
import com.ibm.dltj.gloss.CRFLabelSetGloss;
import com.ibm.dltj.gloss.CRFLearningRate;
import com.ibm.dltj.gloss.CRFLearningRateGloss;
import com.ibm.dltj.gloss.CRFTransitionFeatureGloss;
import com.ibm.dltj.gloss.StringArrayGloss;
import com.ibm.dltj.gloss.StringGloss;
import com.ibm.dltj.gloss.ZhLemmaGloss;
import com.ibm.dltj.util.ArrayUtils;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.RandomAccessFile;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

/* loaded from: input_file:dlt.jar:com/ibm/dltj/crf/CRFDictionary.class */
public class CRFDictionary {
    private static final int[] GLOSS_TYPES;
    private final Dictionary _dic;
    private CRFStateFeatureFSA _mapping;
    private FeatureTemplate[] _templates;
    private FeatureTemplate[] _stateTemplates;
    private FeatureTemplate[] _transitionTemplates;
    private CRFLabelSet _labelSet;
    private CRFLearningRate _learningRate;
    private FeatureHandler[] _featureHandlers;
    private String[] _featurePaths;
    private boolean _editable = false;
    private static final String DEFAULT_LEARNING_ALGORITHM = "L1-SGD";
    private static final String DEFAULT_PATTERN_NUMERIC = "[+-]?\\d[\\d,./]*";
    private static final String DEFAULT_PATTERN_SYMBOL = "[\\p{So}\\p{Sm}]+";
    static final /* synthetic */ boolean $assertionsDisabled;

    /* loaded from: input_file:dlt.jar:com/ibm/dltj/crf/CRFDictionary$PropertyName.class */
    public enum PropertyName {
        DIC_LANGUAGE,
        DIC_COPYRIGHT,
        LABEL_SET,
        LABEL_CLASSNAME,
        FEATURE_TEMPLATE,
        FEATURE_MAPPING,
        FEATURE_HANDLER,
        FEATURE_REGEX_NUMERIC,
        FEATURE_REGEX_SYMBOL,
        FEATURE_PATH
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:dlt.jar:com/ibm/dltj/crf/CRFDictionary$ReservedWord.class */
    public enum ReservedWord {
        LEARNING_ALGORITHM,
        LABEL_SET,
        LABEL_CLASS,
        STATE_FEATURE_TEMPLATE_LIST,
        TRANSITION_FEATURE_TEMPLATE_LIST,
        LEARNING_RATE,
        FEATURE_HANDLER_LIST,
        FEATURE_PATH_LIST,
        PATTERN_NUMERIC,
        PATTERN_SYMBOL
    }

    static String getCopyright() {
        return "\n\n(C) Copyright IBM Corp. 2003, 2010.\n\n";
    }

    private CRFDictionary(Dictionary dictionary) throws DLTException {
        if (dictionary == null) {
            throw new IllegalArgumentException();
        }
        this._dic = dictionary;
    }

    public static CRFDictionary newInstance(File file) throws DLTException, IOException {
        CRFDictionary cRFDictionary = new CRFDictionary(new Dictionary());
        cRFDictionary.load(file);
        return cRFDictionary;
    }

    public static CRFDictionary newInstance(File file, EnumMap<PropertyName, Object> enumMap) throws DLTException {
        if (file == null || enumMap == null) {
            throw new IllegalArgumentException();
        }
        String str = (String) enumMap.get(PropertyName.DIC_COPYRIGHT);
        if (str == null) {
            str = ZhLemmaGloss.ZHLEMMA_SAME;
        }
        DictionaryInfo dictionaryInfo = new DictionaryInfo(117571585L, true, true, false, true, false, str, false, true, false, 7000);
        dictionaryInfo.setDescription("IBM LanguageWare CRF Dictionary (" + DLT.getStrVersion() + ")");
        Dictionary createDictionary = Dictionary.createDictionary(file, dictionaryInfo);
        String str2 = (String) enumMap.get(PropertyName.DIC_LANGUAGE);
        if (str2 == null) {
            str2 = ZhLemmaGloss.ZHLEMMA_SAME;
        }
        for (int i : GLOSS_TYPES) {
            createDictionary.registerType(i, str2);
        }
        CRFDictionary cRFDictionary = new CRFDictionary(createDictionary);
        cRFDictionary.setAttribute(ReservedWord.LEARNING_ALGORITHM, new StringGloss(DEFAULT_LEARNING_ALGORITHM));
        cRFDictionary.setAttribute(ReservedWord.LEARNING_RATE, new CRFLearningRateGloss());
        List<String> list = (List) enumMap.get(PropertyName.FEATURE_MAPPING);
        if (list == null) {
            list = Collections.emptyList();
        }
        cRFDictionary.setMapping(list);
        List list2 = (List) enumMap.get(PropertyName.LABEL_SET);
        if (list2 == null) {
            list2 = Collections.emptyList();
        }
        cRFDictionary.setAttribute(ReservedWord.LABEL_SET, new CRFLabelSetGloss(list2));
        cRFDictionary.setFeatureMapping(new CRFStateFeatureFSA(cRFDictionary.getLabelSet().size(), ((NetGenericDictionary) createDictionary.getNet()).getCharacterMap()));
        List<String> list3 = (List) enumMap.get(PropertyName.FEATURE_TEMPLATE);
        if (list3 == null) {
            list3 = Collections.emptyList();
        }
        cRFDictionary.setFeatureTemplate(list3);
        String str3 = (String) enumMap.get(PropertyName.LABEL_CLASSNAME);
        if (str3 != null && str3.length() > 0) {
            cRFDictionary.setAttribute(ReservedWord.LABEL_CLASS, new StringGloss(str3));
        }
        List list4 = (List) enumMap.get(PropertyName.FEATURE_PATH);
        if (list4 == null) {
            list4 = Collections.emptyList();
        }
        cRFDictionary.setAttribute(ReservedWord.FEATURE_PATH_LIST, new StringArrayGloss((List<String>) list4));
        List list5 = (List) enumMap.get(PropertyName.FEATURE_HANDLER);
        if (list5 == null) {
            list5 = Collections.emptyList();
        }
        cRFDictionary.setAttribute(ReservedWord.FEATURE_HANDLER_LIST, new StringArrayGloss((List<String>) list5));
        String str4 = (String) enumMap.get(PropertyName.FEATURE_REGEX_NUMERIC);
        if (str4 == null) {
            str4 = DEFAULT_PATTERN_NUMERIC;
        }
        cRFDictionary.setAttribute(ReservedWord.PATTERN_NUMERIC, new StringGloss(str4));
        String str5 = (String) enumMap.get(PropertyName.FEATURE_REGEX_SYMBOL);
        if (str5 == null) {
            str5 = DEFAULT_PATTERN_SYMBOL;
        }
        cRFDictionary.setAttribute(ReservedWord.PATTERN_SYMBOL, new StringGloss(str5));
        return cRFDictionary;
    }

    private <K> void setAttribute(K k, Gloss gloss) throws DLTException {
        if (gloss == null) {
            return;
        }
        this._dic.addWord(k.toString(), this._dic.addGloss(gloss.getType(), gloss));
    }

    private void setMapping(List<String> list) throws DLTException {
        String str;
        String str2;
        if (list == null || list.isEmpty()) {
            return;
        }
        for (String str3 : list) {
            if (str3.length() != 0 && !str3.startsWith("#")) {
                if (str3.contains("\t")) {
                    String[] split = str3.split("\t");
                    str = split[0];
                    str2 = split[1];
                } else {
                    str = str3;
                    str2 = str3;
                }
                setAttribute(str, new StringGloss(str2));
            }
        }
    }

    private void setFeatureTemplate(List<String> list) throws DLTException {
        if (list == null || list.isEmpty()) {
            return;
        }
        for (String str : list) {
            if (str.startsWith(FeatureTemplate.UNIGRAM_TEMPLATE_PREFIX)) {
                setAttribute(ReservedWord.STATE_FEATURE_TEMPLATE_LIST, new StringGloss(str));
            } else if (str.startsWith(FeatureTemplate.BIGRAM_TEMPLATE_PREFIX)) {
                setAttribute(ReservedWord.TRANSITION_FEATURE_TEMPLATE_LIST, new StringGloss(str));
            }
        }
    }

    private void setFeatureMapping(CRFStateFeatureFSA cRFStateFeatureFSA) {
        if (!$assertionsDisabled && cRFStateFeatureFSA == null) {
            throw new AssertionError();
        }
        this._mapping = cRFStateFeatureFSA;
    }

    public void load(File file) throws DLTException, IOException {
        if (file == null) {
            throw new FileNotFoundException();
        }
        this._dic.load(file);
        RandomAccessFile randomAccessFile = null;
        try {
            randomAccessFile = new RandomAccessFile(file, "r");
            randomAccessFile.seek(randomAccessFile.length() - 8);
            randomAccessFile.seek(randomAccessFile.length() - randomAccessFile.readLong());
            this._mapping = new CRFStateFeatureFSA(randomAccessFile, ((NetGenericDictionary) this._dic.getNet()).getCharacterMap());
            if (randomAccessFile != null) {
                randomAccessFile.close();
            }
        } catch (Throwable th) {
            if (randomAccessFile != null) {
                randomAccessFile.close();
            }
            throw th;
        }
    }

    public void save() throws DLTException, IOException {
        this._dic.getSummary().addBuildHistoryStep(getClass().getSimpleName());
        this._dic.save();
        DataOutputStream dataOutputStream = null;
        try {
            dataOutputStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(this._dic.getFile(), true)));
            int size = dataOutputStream.size();
            this._mapping.save(dataOutputStream);
            dataOutputStream.writeLong((dataOutputStream.size() - size) + 8);
            if (dataOutputStream != null) {
                dataOutputStream.close();
            }
        } catch (Throwable th) {
            if (dataOutputStream != null) {
                dataOutputStream.close();
            }
            throw th;
        }
    }

    public boolean isEditable() {
        if (!$assertionsDisabled && this._editable != this._mapping.isEditable()) {
            throw new AssertionError();
        }
        if ($assertionsDisabled || this._editable == this._labelSet.isEditable()) {
            return this._editable;
        }
        throw new AssertionError();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void setEditable(boolean z) throws DLTException {
        this._editable = z;
        getFeatureMapping().setEditable(z);
        getLabelSet().setEditable(z);
    }

    public Dictionary getDictionary() {
        return this._dic;
    }

    private <K> List<String> getAttribute(K k) throws DLTException {
        GlossCollection glossCollection = this._dic.get(k.toString());
        if (glossCollection == null) {
            return Collections.emptyList();
        }
        ArrayList arrayList = new ArrayList(glossCollection.size());
        for (Gloss gloss : glossCollection) {
            if (gloss.getType() == 60) {
                arrayList.add(((StringGloss) gloss).getValue());
            }
            if (gloss.getType() == 86) {
                for (String str : ((StringArrayGloss) gloss).getStrings()) {
                    arrayList.add(str);
                }
            }
        }
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public CRFLabelSet getLabelSet() throws DLTException {
        if (this._labelSet == null) {
            GlossCollection glossCollection = this._dic.get(ReservedWord.LABEL_SET.toString());
            if (!$assertionsDisabled && glossCollection.size() != 1) {
                throw new AssertionError();
            }
            this._labelSet = (CRFLabelSet) glossCollection.iterator().next();
        }
        return this._labelSet;
    }

    FeatureTemplate[] getFeatureTemplate() throws DLTException {
        if (this._templates == null) {
            ArrayList arrayList = new ArrayList();
            for (FeatureTemplate featureTemplate : getStateFeatureTemplate()) {
                arrayList.add(featureTemplate);
            }
            for (FeatureTemplate featureTemplate2 : getTransitionFeatureTemplate()) {
                arrayList.add(featureTemplate2);
            }
            this._templates = (FeatureTemplate[]) arrayList.toArray(new FeatureTemplate[arrayList.size()]);
        }
        return this._templates;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public FeatureTemplate[] getStateFeatureTemplate() throws DLTException {
        if (this._stateTemplates == null) {
            List<FeatureTemplate> parse = new FeatureTemplateParser(getFeatureMapping()).parse(getAttribute(ReservedWord.STATE_FEATURE_TEMPLATE_LIST));
            this._stateTemplates = (FeatureTemplate[]) parse.toArray(new FeatureTemplate[parse.size()]);
        }
        return this._stateTemplates;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public FeatureTemplate[] getTransitionFeatureTemplate() throws DLTException {
        if (this._transitionTemplates == null) {
            List<FeatureTemplate> parse = new FeatureTemplateParser(getFeatureMapping()).parse(getAttribute(ReservedWord.TRANSITION_FEATURE_TEMPLATE_LIST));
            this._transitionTemplates = (FeatureTemplate[]) parse.toArray(new FeatureTemplate[parse.size()]);
        }
        return this._transitionTemplates;
    }

    public CRFLearningRate getLearningRate() throws DLTException {
        if (this._learningRate == null) {
            GlossCollection glossCollection = this._dic.get(ReservedWord.LEARNING_RATE.toString());
            if (!$assertionsDisabled && glossCollection.size() != 1) {
                throw new AssertionError();
            }
            this._learningRate = (CRFLearningRate) glossCollection.iterator().next();
        }
        return this._learningRate;
    }

    public Pattern getNumericPattern() {
        try {
            List<String> attribute = getAttribute(ReservedWord.PATTERN_NUMERIC);
            if (attribute.isEmpty()) {
                return null;
            }
            return Pattern.compile(attribute.get(0));
        } catch (DLTException e) {
            return null;
        }
    }

    public Pattern getSymbolPattern() {
        try {
            List<String> attribute = getAttribute(ReservedWord.PATTERN_SYMBOL);
            if (attribute.isEmpty()) {
                return null;
            }
            return Pattern.compile(attribute.get(0));
        } catch (DLTException e) {
            return null;
        }
    }

    public FeatureHandler[] getFeatureHandlerList() throws DLTException {
        if (this._featureHandlers == null) {
            List<String> attribute = getAttribute(ReservedWord.FEATURE_HANDLER_LIST);
            if (attribute == null || attribute.isEmpty()) {
                this._featureHandlers = new FeatureHandler[0];
            } else {
                ArrayList arrayList = new ArrayList(attribute.size());
                Iterator<String> it = attribute.iterator();
                while (it.hasNext()) {
                    arrayList.add(FeatureHandlerFactory.createHandler(it.next(), this));
                }
                this._featureHandlers = (FeatureHandler[]) arrayList.toArray(new FeatureHandler[arrayList.size()]);
            }
        }
        return this._featureHandlers;
    }

    public String[] getFeaturePathList() throws DLTException {
        if (this._featurePaths == null) {
            List<String> attribute = getAttribute(ReservedWord.FEATURE_PATH_LIST);
            if (attribute == null || attribute.isEmpty()) {
                this._featurePaths = ArrayUtils.EMPTY_STRING_ARRAY;
            } else {
                this._featurePaths = (String[]) attribute.toArray(new String[attribute.size()]);
            }
        }
        return this._featurePaths;
    }

    public CRFStateFeatureFSA getFeatureMapping() {
        if ($assertionsDisabled || this._mapping != null) {
            return this._mapping;
        }
        throw new AssertionError();
    }

    public Class<?> getLabelClass() {
        try {
            List<String> attribute = getAttribute(ReservedWord.LABEL_CLASS);
            if (attribute.isEmpty()) {
                return null;
            }
            return Class.forName(attribute.get(0));
        } catch (DLTException e) {
            return null;
        } catch (ClassNotFoundException e2) {
            return null;
        }
    }

    public CRFDictionary trimToSize() throws DLTException {
        this._mapping = this._mapping.trimToSize();
        return this;
    }

    void dump(PrintWriter printWriter) throws DLTException {
        if (!$assertionsDisabled && printWriter == null) {
            throw new AssertionError();
        }
        DecimalFormat decimalFormat = new DecimalFormat("####0.0#########");
        CRFLabelSet labelSet = getLabelSet();
        int size = labelSet.size();
        for (Map.Entry<String, Object> entry : this._dic.getNet()) {
            printWriter.print(entry.getKey());
            printWriter.println("={");
            Collection<Gloss> collection = (Collection) entry.getValue();
            if (collection != null) {
                for (Gloss gloss : collection) {
                    if (gloss.getType() == 60) {
                        printWriter.println("\t" + ((StringGloss) gloss).getValue());
                    } else if (gloss.getType() == 52) {
                        float[][] w = ((CRFTransitionFeatureGloss) gloss).w();
                        for (int i = 0; i < size; i++) {
                            printWriter.print("\t" + labelSet.getLabel(i) + "\t{");
                            for (int i2 = 0; i2 < size; i2++) {
                                printWriter.print(decimalFormat.format(w[i2][i]));
                                if (i2 + 1 != size) {
                                    printWriter.print(", ");
                                }
                            }
                            printWriter.println("}");
                        }
                    } else {
                        printWriter.println("\t" + gloss);
                    }
                }
                printWriter.println("}");
                printWriter.println();
            }
        }
        this._mapping.dump(printWriter, getLabelSet(), decimalFormat);
        printWriter.flush();
    }

    static {
        $assertionsDisabled = !CRFDictionary.class.desiredAssertionStatus();
        GLOSS_TYPES = new int[]{60, 86, 50, 51, 52, 53};
    }
}
