package com.ibm.datatools.metadata.discovery.algorithms.pattern;

import com.ibm.datatools.metadata.discovery.DiscoveryException;
import com.ibm.datatools.metadata.discovery.sampling.SampleManager;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.URL;
import java.util.Vector;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/* loaded from: input_file:com/ibm/datatools/metadata/discovery/algorithms/pattern/RegexPattern.class */
public class RegexPattern extends PatternBase implements ConstrainedResource {
    private String _name;
    private PatternConfig _config;
    private Automaton _automaton;
    private int _numSamples;
    private int _numNewSamples;
    private int _numEmptySamples;
    private int _numUniqueSamples;
    private int _minSampleLength;
    private int _maxSampleLength;
    private int _numSamplesMatched;
    private int _numSamplesTested;
    private DistinctCount _distinctCount;
    private long _startTime;
    private long _maxElapsedTime;
    private boolean _resourcesExceeded;
    private int _maxNumStates;
    private int _perfLogPrevNumSamples;
    private int _perfLogPrevNumStates;
    private int _perfLogPrevNumTransitions;
    static Class class$0;
    private static Logger plogger = Logger.getLogger("com.ibm.almaden.infosphere.performance");
    private static int _mergeFrequency = 1;
    private static int _maxMaxNumStates = 10000;

    public RegexPattern(String str, boolean z, boolean z2, boolean z3, double d, double d2, double d3, long j) {
        this._maxNumStates = 0;
        this._perfLogPrevNumSamples = 0;
        this._perfLogPrevNumStates = 0;
        this._perfLogPrevNumTransitions = 0;
        this._name = str;
        this._config = new PatternConfig(z, z2, z3, d, d2, d3);
        this._automaton = new RegexAutomaton(str, this._config, this);
        this._numSamples = 0;
        this._numNewSamples = 0;
        this._numEmptySamples = 0;
        this._numUniqueSamples = -1;
        this._minSampleLength = -1;
        this._maxSampleLength = 1;
        this._numSamplesMatched = 0;
        this._numSamplesTested = 0;
        this._distinctCount = new DistinctCount();
        this._startTime = System.currentTimeMillis();
        this._maxElapsedTime = j;
        this._resourcesExceeded = false;
    }

    public RegexPattern(Element element) {
        this._maxNumStates = 0;
        this._perfLogPrevNumSamples = 0;
        this._perfLogPrevNumStates = 0;
        this._perfLogPrevNumTransitions = 0;
        if (!element.getNodeName().equals("RegexPattern")) {
            throw new IllegalArgumentException("Missing element: Pattern");
        }
        if (!element.hasAttribute("name")) {
            throw new IllegalArgumentException("Missing attribute: name");
        }
        this._name = element.getAttribute("name");
        NodeList elementsByTagName = element.getElementsByTagName("PatternStats");
        if (elementsByTagName.getLength() != 1) {
            throw new IllegalArgumentException("Missing element: PatternStats");
        }
        Element element2 = (Element) elementsByTagName.item(0);
        if (!element2.hasAttribute("numSamples")) {
            throw new IllegalArgumentException("Missing attribute: numSamples");
        }
        if (!element2.hasAttribute("numEmptySamples")) {
            throw new IllegalArgumentException("Missing attribute: numEmptySamples");
        }
        if (!element2.hasAttribute("numUniqueSamples")) {
            throw new IllegalArgumentException("Missing attribute: numUniqueSamples");
        }
        if (!element2.hasAttribute("minSampleLength")) {
            throw new IllegalArgumentException("Missing attribute: minSampleLength");
        }
        if (!element2.hasAttribute("maxSampleLength")) {
            throw new IllegalArgumentException("Missing attribute: maxSampleLength");
        }
        this._numSamples = Integer.parseInt(element2.getAttribute("numSamples"));
        this._numNewSamples = 0;
        this._numEmptySamples = Integer.parseInt(element2.getAttribute("numEmptySamples"));
        this._numUniqueSamples = Integer.parseInt(element2.getAttribute("numUniqueSamples"));
        this._minSampleLength = Integer.parseInt(element2.getAttribute("minSampleLength"));
        this._maxSampleLength = Integer.parseInt(element2.getAttribute("maxSampleLength"));
        this._numSamplesMatched = 0;
        this._numSamplesTested = 0;
        this._distinctCount = new DistinctCount();
        NodeList elementsByTagName2 = element.getElementsByTagName("PatternConfig");
        if (elementsByTagName2.getLength() != 1) {
            throw new IllegalArgumentException("Missing element: PatternConfig");
        }
        this._config = new PatternConfig((Element) elementsByTagName2.item(0));
        NodeList elementsByTagName3 = element.getElementsByTagName("RegexAutomaton");
        if (elementsByTagName3.getLength() != 1) {
            throw new IllegalArgumentException("Missing element: RegexAutomaton");
        }
        this._automaton = new RegexAutomaton(this._config, this, (Element) elementsByTagName3.item(0));
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public String getName() {
        return this._name;
    }

    private void doPerformanceLog() {
        plogger.debug(new StringBuffer(String.valueOf(getName())).append(",").append(System.currentTimeMillis() - this._startTime).append(",").append(this._numSamples).append(",").append(this._numEmptySamples).append(",").append(this._minSampleLength).append(",").append(this._maxSampleLength).append(",").append(getNumStates()).append(",").append(getNumTransitions()).toString());
        this._perfLogPrevNumSamples = this._numSamples;
        this._perfLogPrevNumStates = getNumStates();
        this._perfLogPrevNumTransitions = getNumTransitions();
    }

    private void doPerformanceLogConditional() {
        boolean z = false;
        int numStates = getNumStates();
        int numTransitions = getNumTransitions();
        if (this._numSamples != this._perfLogPrevNumSamples) {
            z = false | (this._numSamples < 100 || ((double) Math.abs(this._numSamples - this._perfLogPrevNumSamples)) >= 0.05d * ((double) this._perfLogPrevNumSamples));
        }
        if (numStates != this._perfLogPrevNumStates) {
            z |= numStates < 100 || ((double) Math.abs(numStates - this._perfLogPrevNumStates)) >= 0.05d * ((double) this._perfLogPrevNumStates);
        }
        if (numTransitions != this._perfLogPrevNumTransitions) {
            z |= numTransitions < 100 || ((double) Math.abs(numTransitions - this._perfLogPrevNumTransitions)) >= 0.05d * ((double) this._perfLogPrevNumTransitions);
        }
        if (z) {
            doPerformanceLog();
        }
    }

    protected void doPerformanceLogTerminate() {
        doPerformanceLog();
        plogger.debug(new StringBuffer(String.valueOf(getName())).append(",").append((System.currentTimeMillis() - this._startTime) + 1).append(",").append(-1).append(",").append(-1).append(",").append(-1).append(",").append(-1).append(",").append(-1).append(",").append(-1).toString());
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.ConstrainedResource
    public boolean resourceExceeded() {
        if (this._resourcesExceeded) {
            return true;
        }
        boolean z = this._maxElapsedTime > 0 && System.currentTimeMillis() - this._startTime > this._maxElapsedTime;
        boolean z2 = getNumStates() > _maxMaxNumStates;
        this._maxNumStates = Math.max(this._maxNumStates, getNumStates());
        this._resourcesExceeded = z || z2;
        doPerformanceLogConditional();
        return this._resourcesExceeded;
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public void sample(String str, int i) {
        boolean sample = this._automaton.sample(str, i);
        this._numSamples++;
        if (str.length() == 0) {
            this._numEmptySamples++;
        }
        if (this._minSampleLength < 0 || str.length() < this._minSampleLength) {
            this._minSampleLength = str.length();
        }
        if (this._maxSampleLength < 0 || str.length() > this._maxSampleLength) {
            this._maxSampleLength = str.length();
        }
        if (sample) {
            this._numNewSamples++;
            if (this._numNewSamples % _mergeFrequency == 0) {
                this._automaton.merge(false);
            }
        }
        this._distinctCount.addString(str);
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public void sample(String str) {
        sample(str, 1);
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public void test(String str, int i) throws DiscoveryException {
        if (this._automaton.matches(str)) {
            this._numSamplesMatched += i;
        }
        this._numSamplesTested += i;
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public void test(String str) throws DiscoveryException {
        test(str, 1);
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public void finish() {
        this._automaton.finish();
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public void close() {
        boolean isClosed = this._automaton.isClosed();
        this._automaton.close();
        if (isClosed || !this._automaton.isClosed()) {
            return;
        }
        doPerformanceLogTerminate();
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public String getPatternExpression() throws DiscoveryException {
        return this._automaton.getPatternExpression();
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public double getSupportEst() {
        close();
        return this._automaton.getSupport();
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public double getConfidenceEst() {
        close();
        double numUniqueSamples = (1.0d * getNumUniqueSamples()) / getLanguageSize();
        if (numUniqueSamples > 1.0d) {
            return 1.0d;
        }
        return numUniqueSamples;
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public double getSupport() {
        return this._numSamplesTested == 0 ? SampleManager.ZERO_SAMPLING_RATE : (1.0d * this._numSamplesMatched) / this._numSamplesTested;
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public double getConfidence() {
        return getConfidenceEst();
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public int getNumSamples() {
        return this._numSamples;
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public int getNumEmptySamples() {
        return this._numEmptySamples;
    }

    public int getNumUniqueSamples() {
        if (this._numUniqueSamples < 0) {
            this._numUniqueSamples = this._distinctCount.combEst();
        }
        return this._numUniqueSamples;
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public int getMinSampleLength() {
        return this._minSampleLength;
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public int getMaxSampleLength() {
        return this._maxSampleLength;
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public int getNumSamplesTested() {
        return this._numSamplesTested;
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public int getNumSamplesMatched() {
        return this._numSamplesMatched;
    }

    public int getNumStates() {
        return this._automaton.getNumStates();
    }

    public int getNumTransitions() {
        return this._automaton.getNumTransitions();
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public double getLanguageSize() {
        return this._automaton.getLanguageSize();
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public String toXML() {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append(new StringBuffer("<RegexPattern xmlns=\"").append("http://www.ibm.com/infosphere/schema").append("\" name=\"").append(XMLUtil.encodeString(this._name)).append("\">").toString());
        stringBuffer.append(new StringBuffer("<PatternStats numSamples=\"").append(getNumSamples()).append("\" numEmptySamples=\"").append(getNumEmptySamples()).append("\" numUniqueSamples=\"").append(getNumUniqueSamples()).append("\" minSampleLength=\"").append(getMinSampleLength()).append("\" maxSampleLength=\"").append(getMaxSampleLength()).append("\"/>").toString());
        this._config.toXML(stringBuffer);
        this._automaton.toXML(stringBuffer);
        stringBuffer.append("</RegexPattern>");
        return stringBuffer.toString();
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public void removeDelimiters() {
        this._automaton.removeDelimiters();
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public void compress(double d) {
        this._automaton.compress(d);
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public void removeNoise(double d) {
        this._automaton.removeNoise(d);
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public String generateSample(boolean z) {
        return this._automaton.generateSample(z);
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public void generateSamples(PrintWriter printWriter, int i, boolean z, boolean z2) {
        this._automaton.generateSamples(printWriter, i, z, z2);
    }

    public static double similarityS(String str, String str2) {
        return 1.0d - ((1.0d * StringUtil.levenshteinDistance(str, str2)) / (Math.max(str.length(), str2.length()) > 0 ? r0 : 1));
    }

    public static double similarityD(Pattern pattern) {
        return 1.0d;
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public double similarityV(Pattern pattern, int i) throws DiscoveryException, IOException, SAXException, ParserConfigurationException {
        if (!(pattern instanceof RegexPattern)) {
            throw new IllegalArgumentException("Pattern is not a regular expression");
        }
        RegexPattern regexPattern = (RegexPattern) pattern;
        if (i < 1) {
            throw new IllegalArgumentException(new StringBuffer("Invalid argument: ").append(i).toString());
        }
        Element documentElement = XMLUtil.parse(toXML()).getDocumentElement();
        Element documentElement2 = XMLUtil.parse(regexPattern.toXML()).getDocumentElement();
        RegexPattern regexPattern2 = new RegexPattern(documentElement);
        RegexPattern regexPattern3 = new RegexPattern(documentElement2);
        Vector vector = new Vector();
        for (int i2 = 0; i2 < i; i2++) {
            vector.add(regexPattern2.generateSample(false));
        }
        Vector vector2 = new Vector();
        for (int i3 = 0; i3 < i; i3++) {
            vector2.add(regexPattern3.generateSample(false));
        }
        regexPattern2._numSamplesMatched = 0;
        regexPattern2._numSamplesTested = 0;
        regexPattern3._numSamplesMatched = 0;
        regexPattern3._numSamplesTested = 0;
        for (int i4 = 0; i4 < i; i4++) {
            String str = (String) vector.elementAt(i4);
            regexPattern2.test((String) vector2.elementAt(i4), 1);
            regexPattern3.test(str, 1);
        }
        return (1.0d * (regexPattern2._numSamplesMatched + regexPattern3._numSamplesMatched)) / (2 * i);
    }

    @Override // com.ibm.datatools.metadata.discovery.algorithms.pattern.Pattern
    public double similarityC(Pattern pattern, double d, int i) throws DiscoveryException, SAXException, IOException, ParserConfigurationException {
        if (!(pattern instanceof RegexPattern)) {
            throw new IllegalArgumentException("Pattern is not a regular expression");
        }
        RegexPattern regexPattern = (RegexPattern) pattern;
        if (this._config._maxNodeExpansion != 1.0d) {
            throw new IllegalArgumentException("Invalid pattern");
        }
        if (regexPattern._config._maxNodeExpansion != 1.0d) {
            throw new IllegalArgumentException("Invalid pattern");
        }
        if (d < 1.0d) {
            throw new IllegalArgumentException(new StringBuffer("Invalid argument: ").append(d).toString());
        }
        if (i < 1) {
            throw new IllegalArgumentException(new StringBuffer("Invalid argument: ").append(i).toString());
        }
        Element documentElement = XMLUtil.parse(toXML()).getDocumentElement();
        Element documentElement2 = XMLUtil.parse(regexPattern.toXML()).getDocumentElement();
        double d2 = 0.0d;
        for (int i2 = 0; i2 < i; i2++) {
            double d3 = 1.0d + ((i2 * (d - 1.0d)) / i);
            RegexPattern regexPattern2 = new RegexPattern(documentElement);
            regexPattern2.compress(d3);
            RegexPattern regexPattern3 = new RegexPattern(documentElement2);
            regexPattern3.compress(d3);
            d2 += Math.abs(regexPattern2.getConfidenceEst() - regexPattern3.getConfidenceEst());
        }
        return 1.0d - ((1.0d * d2) / i);
    }

    /* JADX WARN: Type inference failed for: r0v0, types: [java.io.PrintStream, java.lang.Throwable] */
    private static void usage() {
        ?? r0 = System.out;
        StringBuffer stringBuffer = new StringBuffer("Usage: java ");
        Class<?> cls = class$0;
        if (cls == null) {
            try {
                cls = Class.forName("com.ibm.datatools.metadata.discovery.algorithms.pattern.RegexPattern");
                class$0 = cls;
            } catch (ClassNotFoundException unused) {
                throw new NoClassDefFoundError(r0.getMessage());
            }
        }
        r0.println(stringBuffer.append(cls.getName()).append(" <noiseThreshold> <maxNodeExpansion>  < file").toString());
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r1v2, types: [java.lang.Throwable] */
    private static void doLogConfig() {
        Class<?> cls = class$0;
        if (cls == null) {
            try {
                cls = Class.forName("com.ibm.datatools.metadata.discovery.algorithms.pattern.RegexPattern");
                class$0 = cls;
            } catch (ClassNotFoundException unused) {
                throw new NoClassDefFoundError(cls.getMessage());
            }
        }
        URL resource = getResource(cls, "logconfig.xml");
        if (resource != null) {
            DOMConfigurator.configure(resource);
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r1v5, types: [java.lang.Throwable] */
    private static URL getResource(Class cls, String str) {
        String stringBuffer = new StringBuffer(String.valueOf(cls.getPackage().getName().replace('.', '/'))).append("/").append(str).toString();
        Class<?> cls2 = class$0;
        if (cls2 == null) {
            try {
                cls2 = Class.forName("com.ibm.datatools.metadata.discovery.algorithms.pattern.RegexPattern");
                class$0 = cls2;
            } catch (ClassNotFoundException unused) {
                throw new NoClassDefFoundError(cls2.getMessage());
            }
        }
        URL resource = cls2.getClassLoader().getResource(stringBuffer);
        if (resource == null) {
            System.err.println(new StringBuffer("Cannot find resource ").append(stringBuffer).toString());
        }
        return resource;
    }

    public static void main(String[] strArr) throws Exception {
        doLogConfig();
        if (strArr.length != 2) {
            usage();
            System.exit(1);
        }
        double doubleValue = new Double(strArr[0]).doubleValue();
        double doubleValue2 = new Double(strArr[1]).doubleValue();
        new String();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(System.in));
        RegexPattern regexPattern = new RegexPattern("noname", true, true, true, doubleValue2, 1000000.0d, doubleValue, -1L);
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                System.out.println(new StringBuffer("Regular expression is ").append(regexPattern.getPatternExpression()).toString());
                System.out.println(new StringBuffer("Estimated support ").append(regexPattern.getSupportEst()).append(", confidence ").append(regexPattern.getConfidenceEst()).toString());
                return;
            }
            regexPattern.sample(readLine);
        }
    }
}
