/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.tools;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.text.ParseException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.ParseSegment;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.tools.AbstractCommonCrawlFormat;
import org.apache.nutch.tools.CommonCrawlConfig;
import org.apache.nutch.tools.WARCUtils;
import org.archive.format.warc.WARCConstants;
import org.archive.io.warc.WARCRecordInfo;
import org.archive.io.warc.WARCWriter;
import org.archive.io.warc.WARCWriterPoolSettings;
import org.archive.io.warc.WARCWriterPoolSettingsData;
import org.archive.uid.RecordIDGenerator;
import org.archive.uid.UUIDGenerator;
import org.archive.util.DateUtils;
import org.archive.util.anvl.ANVLRecord;

public class CommonCrawlFormatWARC
extends AbstractCommonCrawlFormat {
    public static final String MAX_WARC_FILE_SIZE = "warc.file.size.max";
    public static final String TEMPLATE = "${prefix}-${timestamp17}-${serialno}";
    private static final AtomicInteger SERIALNO = new AtomicInteger();
    private static final UUIDGenerator GENERATOR = new UUIDGenerator();
    private String outputDir = null;
    private ByteArrayOutputStream out = new ByteArrayOutputStream();
    private WARCWriter writer;
    private ParseData parseData;

    public CommonCrawlFormatWARC(Configuration nutchConf, CommonCrawlConfig config) throws IOException {
        super(null, null, null, nutchConf, config);
        ANVLRecord info = WARCUtils.getWARCInfoContent(nutchConf);
        List<String> md = Collections.singletonList(info.toString());
        this.outputDir = config.getOutputDir();
        if (null == this.outputDir) {
            String message = "Missing output directory configuration: " + this.outputDir;
            throw new RuntimeException(message);
        }
        File file = new File(this.outputDir);
        long maxSize = 0x40000000L;
        if (config.getWarcSize() > 0L) {
            maxSize = config.getWarcSize();
        }
        WARCWriterPoolSettingsData settings = new WARCWriterPoolSettingsData("WEB", TEMPLATE, maxSize, config.isCompressed(), Arrays.asList(file), md, (RecordIDGenerator)new UUIDGenerator());
        this.writer = new WARCWriter(SERIALNO, (WARCWriterPoolSettings)settings);
    }

    public CommonCrawlFormatWARC(String url, Content content, Metadata metadata, Configuration nutchConf, CommonCrawlConfig config, ParseData parseData) throws IOException {
        super(url, content, metadata, nutchConf, config);
        this.parseData = parseData;
        ANVLRecord info = WARCUtils.getWARCInfoContent(this.conf);
        List<String> md = Collections.singletonList(info.toString());
        this.outputDir = config.getOutputDir();
        if (null == this.outputDir) {
            String message = "Missing output directory configuration: " + this.outputDir;
            throw new RuntimeException(message);
        }
        File file = new File(this.outputDir);
        long maxSize = 0x40000000L;
        if (config.getWarcSize() > 0L) {
            maxSize = config.getWarcSize();
        }
        WARCWriterPoolSettingsData settings = new WARCWriterPoolSettingsData("WEB", TEMPLATE, maxSize, config.isCompressed(), Arrays.asList(file), md, (RecordIDGenerator)new UUIDGenerator());
        this.writer = new WARCWriter(SERIALNO, (WARCWriterPoolSettings)settings);
    }

    @Override
    public String getJsonData(String url, Content content, Metadata metadata, ParseData parseData) throws IOException {
        this.url = url;
        this.content = content;
        this.metadata = metadata;
        this.parseData = parseData;
        return this.getJsonData();
    }

    @Override
    public String getJsonData() throws IOException {
        long position = this.writer.getPosition();
        try {
            this.writer.checkSize();
            if (this.writer.getPosition() != position) {
                position = this.writer.getPosition();
            }
            URI id = this.writeResponse();
            if (StringUtils.isNotBlank((CharSequence)this.metadata.get("_request_"))) {
                this.writeRequest(id);
            }
        }
        catch (IOException e) {
            throw e;
        }
        catch (ParseException e) {
            LOG.error("Can't get a valid date from: {}", (Object)this.url);
        }
        return null;
    }

    protected URI writeResponse() throws IOException, ParseException {
        WARCRecordInfo record = new WARCRecordInfo();
        record.setType(WARCConstants.WARCRecordType.response);
        record.setUrl(this.getUrl());
        record.setCreate14DigitDate(DateUtils.getLog14Date((long)Long.parseLong(this.metadata.get("nutch.fetch.time"))));
        record.setMimetype("application/http; msgtype=response");
        record.setRecordId(GENERATOR.getRecordID());
        String IP = this.getResponseAddress();
        if (StringUtils.isNotBlank((CharSequence)IP)) {
            record.addExtraHeader("WARC-IP-Address", IP);
        }
        if (ParseSegment.isTruncated(this.content)) {
            record.addExtraHeader("WARC-Truncated", "unspecified");
        }
        ByteArrayOutputStream output = new ByteArrayOutputStream();
        String httpHeaders = this.metadata.get("_response.headers_");
        if (StringUtils.isNotBlank((CharSequence)(httpHeaders = WARCUtils.fixHttpHeaders(httpHeaders, this.content.getContent().length)))) {
            output.write(httpHeaders.getBytes());
        } else {
            record.setType(WARCConstants.WARCRecordType.resource);
            record.setMimetype(this.content.getContentType());
        }
        output.write(this.getResponseContent().getBytes());
        record.setContentLength((long)output.size());
        record.setContentStream((InputStream)new ByteArrayInputStream(output.toByteArray()));
        if (output.size() > 0) {
            this.writer.writeRecord(record);
        }
        return record.getRecordId();
    }

    protected URI writeRequest(URI id) throws IOException, ParseException {
        WARCRecordInfo record = new WARCRecordInfo();
        record.setType(WARCConstants.WARCRecordType.request);
        record.setUrl(this.getUrl());
        record.setCreate14DigitDate(DateUtils.getLog14Date((long)Long.parseLong(this.metadata.get("nutch.fetch.time"))));
        record.setMimetype("application/http; msgtype=request");
        record.setRecordId(GENERATOR.getRecordID());
        if (id != null) {
            ANVLRecord headers = new ANVLRecord();
            headers.addLabelValue("WARC-Concurrent-To", "<" + id.toString() + ">");
            record.setExtraHeaders(headers);
        }
        ByteArrayOutputStream output = new ByteArrayOutputStream();
        output.write(this.metadata.get("_request_").getBytes());
        record.setContentLength((long)output.size());
        record.setContentStream((InputStream)new ByteArrayInputStream(output.toByteArray()));
        this.writer.writeRecord(record);
        return record.getRecordId();
    }

    @Override
    protected String generateJson() throws IOException {
        return null;
    }

    @Override
    protected void writeKeyValue(String key, String value) throws IOException {
        throw new NotImplementedException();
    }

    @Override
    protected void writeKeyNull(String key) throws IOException {
        throw new NotImplementedException();
    }

    @Override
    protected void startArray(String key, boolean nested, boolean newline) throws IOException {
        throw new NotImplementedException();
    }

    @Override
    protected void closeArray(String key, boolean nested, boolean newline) throws IOException {
        throw new NotImplementedException();
    }

    @Override
    protected void writeArrayValue(String value) throws IOException {
        throw new NotImplementedException();
    }

    @Override
    protected void startObject(String key) throws IOException {
        throw new NotImplementedException();
    }

    @Override
    protected void closeObject(String key) throws IOException {
        throw new NotImplementedException();
    }

    @Override
    public void close() {
        if (this.writer != null) {
            try {
                this.writer.close();
            }
            catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }
}

