package it.unimi.dsi.sux4j.mph;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import com.martiansoftware.jsap.stringparsers.FileStringParser;
import com.martiansoftware.jsap.stringparsers.ForNameStringParser;
import it.unimi.dsi.bits.BitVector;
import it.unimi.dsi.bits.Fast;
import it.unimi.dsi.bits.HuTuckerTransformationStrategy;
import it.unimi.dsi.bits.TransformationStrategies;
import it.unimi.dsi.bits.TransformationStrategy;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.AbstractLongBigList;
import it.unimi.dsi.fastutil.longs.LongBigList;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.FileLinesCollection;
import it.unimi.dsi.io.LineIterator;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.sux4j.io.ChunkedHashStore;
import it.unimi.dsi.sux4j.mph.GOV3Function;
import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.nio.charset.Charset;
import java.util.Collection;
import java.util.zip.GZIPInputStream;
import org.apache.commons.lang.CharEncoding;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:it/unimi/dsi/sux4j/mph/ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.class */
public class ZFastTrieDistributorMonotoneMinimalPerfectHashFunction<T> extends AbstractHashFunction<T> implements Serializable {
    public static final long serialVersionUID = 4;
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.class);
    private final long size;
    private final int log2BucketSize;
    private final TransformationStrategy<? super T> transform;
    private final ZFastTrieDistributor<BitVector> distributor;
    private final GOV3Function<BitVector> offset;
    private long seed;
    protected final long signatureMask;
    protected final LongBigList signatures;

    /* loaded from: input_file:it/unimi/dsi/sux4j/mph/ZFastTrieDistributorMonotoneMinimalPerfectHashFunction$Builder.class */
    public static class Builder<T> {
        protected Iterable<? extends T> keys;
        protected TransformationStrategy<? super T> transform;
        protected long numKeys = -1;
        protected int signatureWidth;
        protected File tempDir;
        protected boolean built;

        public Builder<T> keys(Iterable<? extends T> iterable) {
            this.keys = iterable;
            return this;
        }

        public Builder<T> transform(TransformationStrategy<? super T> transformationStrategy) {
            this.transform = transformationStrategy;
            return this;
        }

        public Builder<T> signed(int i) {
            this.signatureWidth = i;
            return this;
        }

        public Builder<T> tempDir(File file) {
            this.tempDir = file;
            return this;
        }

        public ZFastTrieDistributorMonotoneMinimalPerfectHashFunction<T> build() throws IOException {
            if (this.built) {
                throw new IllegalStateException("This builder has been already used");
            }
            this.built = true;
            return new ZFastTrieDistributorMonotoneMinimalPerfectHashFunction<>(this.keys, this.transform, -1, this.signatureWidth, this.tempDir);
        }
    }

    protected ZFastTrieDistributorMonotoneMinimalPerfectHashFunction(Iterable<? extends T> iterable, TransformationStrategy<? super T> transformationStrategy, int i, int i2, File file) throws IOException {
        this.transform = transformationStrategy;
        this.defRetValue = -1L;
        long j = 0;
        long j2 = 0;
        XoRoShiRo128PlusRandomGenerator xoRoShiRo128PlusRandomGenerator = new XoRoShiRo128PlusRandomGenerator();
        ChunkedHashStore<T> chunkedHashStore = new ChunkedHashStore<>(TransformationStrategies.identity(), file);
        chunkedHashStore.reset(xoRoShiRo128PlusRandomGenerator.nextLong());
        Iterable<BitVector> wrap = TransformationStrategies.wrap(iterable, transformationStrategy);
        ProgressLogger progressLogger = new ProgressLogger(LOGGER);
        progressLogger.displayLocalSpeed = true;
        progressLogger.displayFreeMemory = true;
        progressLogger.itemsName = "keys";
        progressLogger.start("Scanning collection...");
        for (BitVector bitVector : wrap) {
            j = Math.max(j, bitVector.length());
            j2 += bitVector.length();
            chunkedHashStore.add(bitVector);
            progressLogger.lightUpdate();
        }
        progressLogger.done();
        chunkedHashStore.checkAndRetry(wrap);
        this.size = chunkedHashStore.size();
        if (this.size == 0) {
            this.log2BucketSize = -1;
            this.distributor = null;
            this.offset = null;
            this.signatureMask = 0L;
            this.signatures = null;
            chunkedHashStore.close();
            return;
        }
        long j3 = ((j2 + this.size) - 1) / this.size;
        long ceil = (long) Math.ceil(10.5d + (4.05d * Math.log(j3)) + (2.43d * Math.log(Math.log(this.size) + 1.0d)) + (2.43d * Math.log(Math.log(j3) + 1.0d)));
        this.log2BucketSize = i == -1 ? Fast.mostSignificantBit(ceil) : i;
        LOGGER.debug("Average length: " + j3);
        LOGGER.debug("Max length: " + j);
        LOGGER.debug("Bucket size: " + (1 << this.log2BucketSize));
        LOGGER.info("Computing z-fast trie distributor...");
        this.distributor = new ZFastTrieDistributor<>(wrap, this.log2BucketSize, TransformationStrategies.identity(), chunkedHashStore);
        LOGGER.info("Computing offsets...");
        this.offset = new GOV3Function.Builder().store(chunkedHashStore).values(new AbstractLongBigList() { // from class: it.unimi.dsi.sux4j.mph.ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.1
            final long bucketSizeMask;

            {
                this.bucketSizeMask = (1 << ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.this.log2BucketSize) - 1;
            }

            @Override // it.unimi.dsi.fastutil.longs.LongBigList
            public long getLong(long j4) {
                return j4 & this.bucketSizeMask;
            }

            @Override // it.unimi.dsi.fastutil.Size64
            public long size64() {
                return ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.this.size;
            }
        }, this.log2BucketSize).indirect().build();
        this.seed = chunkedHashStore.seed();
        double log = j3 * Math.log(2.0d);
        LOGGER.info("Forecast bit cost per element: " + ((1.0d / ceil) * (((-6.0d) * Fast.log2(Math.log(2.0d))) + (5.0d * Fast.log2(log)) + (2.0d * Fast.log2(ceil)) + Fast.log2(Math.log(log) - Math.log(Math.log(2.0d))) + (6.0d * GOV3Function.C) + (3.0d * Fast.log2(2.718281828459045d)) + (3.0d * Fast.log2(Math.log(3.0d * this.size))) + 3.0d + (GOV3Function.C * ceil) + (GOV3Function.C * ceil * Fast.log2(ceil)))));
        LOGGER.info("Actual bit cost per element: " + (numBits() / this.size));
        if (i2 != 0) {
            this.signatureMask = (-1) >>> (64 - i2);
            this.signatures = chunkedHashStore.signatures(i2, progressLogger);
        } else {
            this.signatureMask = 0L;
            this.signatures = null;
        }
        chunkedHashStore.close();
    }

    @Override // it.unimi.dsi.fastutil.objects.Object2LongFunction
    public long getLong(Object obj) {
        if (this.size == 0) {
            return this.defRetValue;
        }
        BitVector fast = this.transform.toBitVector(obj).fast();
        long[] preprocessSpooky4 = Hashes.preprocessSpooky4(fast, this.seed);
        long[] jArr = new long[3];
        Hashes.spooky4(fast, fast.length(), this.seed, preprocessSpooky4, jArr);
        long longByBitVectorTripleAndState = (this.distributor.getLongByBitVectorTripleAndState(fast, jArr, preprocessSpooky4) << this.log2BucketSize) + this.offset.getLongByTriple(jArr);
        return this.signatureMask != 0 ? (longByBitVectorTripleAndState < 0 || longByBitVectorTripleAndState >= this.size || this.signatures.getLong(longByBitVectorTripleAndState) != (jArr[0] & this.signatureMask)) ? this.defRetValue : longByBitVectorTripleAndState : (longByBitVectorTripleAndState < 0 || longByBitVectorTripleAndState >= this.size) ? this.defRetValue : longByBitVectorTripleAndState;
    }

    @Override // it.unimi.dsi.sux4j.mph.AbstractHashFunction, it.unimi.dsi.fastutil.Size64
    public long size64() {
        return this.size;
    }

    public long numBits() {
        if (this.size == 0) {
            return 0L;
        }
        return this.distributor.numBits() + this.offset.numBits() + this.transform.numBits();
    }

    public static void main(String[] strArr) throws NoSuchMethodException, IOException, JSAPException {
        Collection fileLinesCollection;
        SimpleJSAP simpleJSAP = new SimpleJSAP(ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.class.getName(), "Builds a monotone minimal perfect hash using a probabilistic z-fast trie as a distributor reading a newline-separated list of strings.", new Parameter[]{new FlaggedOption("encoding", ForNameStringParser.getParser(Charset.class), CharEncoding.UTF_8, false, 'e', "encoding", "The string file encoding."), new FlaggedOption("tempDir", FileStringParser.getParser(), JSAP.NO_DEFAULT, false, 'T', "temp-dir", "A directory for temporary files."), new Switch("huTucker", 'h', "hu-tucker", "Use Hu-Tucker coding to reduce string length."), new Switch("iso", 'i', "iso", "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."), new Switch("utf32", (char) 0, "utf-32", "Use UTF-32 internally (handles surrogate pairs)."), new FlaggedOption("signatureWidth", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, false, 's', "signature-width", "If specified, the signature width in bits; if negative, the generated function will be a dictionary."), new Switch("zipped", 'z', "zipped", "The string list is compressed in gzip format."), new FlaggedOption("log2bucket", JSAP.INTEGER_PARSER, "-1", false, 'b', "log2bucket", "The base 2 logarithm of the bucket size (mainly for testing)."), new UnflaggedOption("function", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, false, "The filename for the serialised monotone minimal perfect hash function."), new UnflaggedOption("stringFile", JSAP.STRING_PARSER, "-", false, false, "The name of a file containing a newline-separated list of strings, or - for standard input; in the first case, strings will not be loaded into core memory.")});
        JSAPResult parse = simpleJSAP.parse(strArr);
        if (simpleJSAP.messagePrinted()) {
            return;
        }
        String string = parse.getString("function");
        String string2 = parse.getString("stringFile");
        int i = parse.getInt("log2bucket");
        Charset charset = (Charset) parse.getObject("encoding");
        File file = parse.getFile("tempDir");
        boolean z = parse.getBoolean("zipped");
        boolean z2 = parse.getBoolean("iso");
        boolean z3 = parse.getBoolean("utf32");
        boolean z4 = parse.getBoolean("huTucker");
        int i2 = parse.getInt("signatureWidth", 0);
        if ("-".equals(string2)) {
            ProgressLogger progressLogger = new ProgressLogger(LOGGER);
            progressLogger.displayLocalSpeed = true;
            progressLogger.displayFreeMemory = true;
            progressLogger.start("Loading strings...");
            fileLinesCollection = new LineIterator(new FastBufferedReader(new InputStreamReader(z ? new GZIPInputStream(System.in) : System.in, charset)), progressLogger).allLines();
            progressLogger.done();
        } else {
            fileLinesCollection = new FileLinesCollection(string2, charset.toString(), z);
        }
        BinIO.storeObject(new ZFastTrieDistributorMonotoneMinimalPerfectHashFunction(fileLinesCollection, z4 ? new HuTuckerTransformationStrategy((Iterable<? extends CharSequence>) fileLinesCollection, true) : z2 ? TransformationStrategies.prefixFreeIso() : z3 ? TransformationStrategies.prefixFreeUtf32() : TransformationStrategies.prefixFreeUtf16(), i, i2, file), string);
        LOGGER.info("Completed.");
    }
}
