/*
 * Decompiled with CFR 0.152.
 */
package com.wcohen.ss;

import com.wcohen.ss.AbstractSourcedTokenizedStringDistance;
import com.wcohen.ss.BagOfSourcedTokens;
import com.wcohen.ss.api.SourcedStringWrapperIterator;
import com.wcohen.ss.api.SourcedTokenizer;
import com.wcohen.ss.api.StringWrapper;
import com.wcohen.ss.api.StringWrapperIterator;
import com.wcohen.ss.api.Token;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;

public abstract class AbstractSourcedStatisticalTokenDistance
extends AbstractSourcedTokenizedStringDistance {
    private static final Integer ONE = new Integer(1);
    private static final Integer TWO = new Integer(2);
    private static final Integer THREE = new Integer(3);
    protected Map documentFrequency = new HashMap();
    protected int collectionSize = 0;
    protected int totalTokenCount = 0;
    private int warningCounter = 0;

    public AbstractSourcedStatisticalTokenDistance(SourcedTokenizer tokenizer) {
        super(tokenizer);
    }

    public AbstractSourcedStatisticalTokenDistance() {
    }

    public void train(StringWrapperIterator i0) {
        SourcedStringWrapperIterator i = (SourcedStringWrapperIterator)i0;
        HashSet<Token> seenTokens = new HashSet<Token>();
        while (i.hasNext()) {
            BagOfSourcedTokens bag = this.asBagOfSourcedTokens(i.nextSourcedStringWrapper());
            seenTokens.clear();
            Iterator j = bag.tokenIterator();
            while (j.hasNext()) {
                ++this.totalTokenCount;
                Token tokj = (Token)j.next();
                if (seenTokens.contains(tokj)) continue;
                seenTokens.add(tokj);
                Integer df = (Integer)this.documentFrequency.get(tokj);
                if (df == null) {
                    this.documentFrequency.put(tokj, ONE);
                    continue;
                }
                if (df == ONE) {
                    this.documentFrequency.put(tokj, TWO);
                    continue;
                }
                if (df == TWO) {
                    this.documentFrequency.put(tokj, THREE);
                    continue;
                }
                this.documentFrequency.put(tokj, new Integer(df + 1));
            }
            ++this.collectionSize;
        }
    }

    protected void checkTrainingHasHappened(StringWrapper s, StringWrapper t) {
        if (this.collectionSize == 0 && ++this.warningCounter <= 10) {
            System.out.println("Warning: " + this.getClass() + " not yet trained for sim('" + s + "','" + t + "')");
            if (this.warningCounter == 10) {
                System.out.println("(By the way, that's the last warning you'll get about this.)");
            }
        }
    }

    public int getDocumentFrequency(Token tok) {
        Integer freqInteger = (Integer)this.documentFrequency.get(tok);
        if (freqInteger == null) {
            return 0;
        }
        return freqInteger;
    }
}

