package xyz.felh.openai.jtokkit;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import xyz.felh.openai.jtokkit.api.Encoding;
import xyz.felh.openai.jtokkit.api.GptBytePairEncodingParams;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:xyz/felh/openai/jtokkit/EncodingFactory.class */
public final class EncodingFactory {
    private static final String ENDOFTEXT = "<|endoftext|>";
    private static final String FIM_PREFIX = "<|fim_prefix|>";
    private static final String FIM_MIDDLE = "<|fim_middle|>";
    private static final String FIM_SUFFIX = "<|fim_suffix|>";
    private static final String ENDOFPROMPT = "<|endofprompt|>";
    private static final String IM_START = "<|im_start|>";
    private static final String IM_END = "<|im_end|>";
    private static final String IM_SEP = "<|im_sep|>";
    private static final Map<String, Integer> SPECIAL_TOKENS_X50K_BASE;
    private static final Map<String, Integer> SPECIAL_TOKENS_P50K_EDIT;
    private static final Map<String, Integer> SPECIAL_TOKENS_CL100K_BASE;

    public static Encoding r50kBase() {
        return fromPredefinedParameters("r50k_base", "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "r50k_base.tiktoken", SPECIAL_TOKENS_X50K_BASE);
    }

    public static Encoding p50kBase() {
        return fromPredefinedParameters("p50k_base", "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "p50k_base.tiktoken", SPECIAL_TOKENS_X50K_BASE);
    }

    public static Encoding p50kEdit() {
        return fromPredefinedParameters("p50k_edit", "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "p50k_base.tiktoken", SPECIAL_TOKENS_P50K_EDIT);
    }

    public static Encoding cl100kBase() {
        return fromPredefinedParameters("cl100k_base", "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", "cl100k_base.tiktoken", SPECIAL_TOKENS_CL100K_BASE);
    }

    public static Encoding fromParameters(GptBytePairEncodingParams gptBytePairEncodingParams) {
        return new GptBytePairEncoding(gptBytePairEncodingParams);
    }

    private static Encoding fromPredefinedParameters(String str, String str2, String str3, Map<String, Integer> map) {
        return fromParameters(new GptBytePairEncodingParams(str, Pattern.compile(str2), loadMergeableRanks(str3), map));
    }

    private static Map<byte[], Integer> loadMergeableRanks(String str) {
        try {
            InputStream resourceAsStream = EncodingFactory.class.getResourceAsStream(str);
            try {
                if (resourceAsStream == null) {
                    throw new IllegalStateException("Could not find " + str + " in resources");
                }
                HashMap hashMap = new HashMap();
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(resourceAsStream, StandardCharsets.UTF_8));
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        if (resourceAsStream != null) {
                            resourceAsStream.close();
                        }
                        return hashMap;
                    }
                    String[] split = readLine.split("\\s+", 2);
                    if (split.length != 2) {
                        throw new IllegalStateException("Invalid line in " + str + ": " + readLine);
                    }
                    hashMap.put(Base64.getDecoder().decode(split[0].getBytes(StandardCharsets.UTF_8)), Integer.valueOf(Integer.parseInt(split[1])));
                }
            } finally {
            }
        } catch (IOException e) {
            throw new IllegalStateException("Could not load " + str + " from resources", e);
        }
    }

    private EncodingFactory() {
    }

    static {
        HashMap hashMap = new HashMap();
        hashMap.put(ENDOFTEXT, 50256);
        SPECIAL_TOKENS_X50K_BASE = Collections.unmodifiableMap(hashMap);
        HashMap hashMap2 = new HashMap();
        hashMap2.put(ENDOFTEXT, 50256);
        hashMap2.put(FIM_PREFIX, 50281);
        hashMap2.put(FIM_MIDDLE, 50282);
        hashMap2.put(FIM_SUFFIX, 50283);
        SPECIAL_TOKENS_P50K_EDIT = Collections.unmodifiableMap(hashMap2);
        HashMap hashMap3 = new HashMap();
        hashMap3.put(ENDOFTEXT, 100257);
        hashMap3.put(FIM_PREFIX, 100258);
        hashMap3.put(FIM_MIDDLE, 100259);
        hashMap3.put(FIM_SUFFIX, 100260);
        hashMap3.put(ENDOFPROMPT, 100276);
        hashMap3.put(IM_START, 100264);
        hashMap3.put(IM_SEP, 100266);
        hashMap3.put(IM_END, 100265);
        SPECIAL_TOKENS_CL100K_BASE = Collections.unmodifiableMap(hashMap3);
    }
}
