package us.codecraft.webmagic.processor;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.utils.UrlUtils;

/* loaded from: input_file:WEB-INF/lib/webmagic-core-0.5.3.jar:us/codecraft/webmagic/processor/SimplePageProcessor.class */
public class SimplePageProcessor implements PageProcessor {
    private String urlPattern;
    private Site site;

    public SimplePageProcessor(String str, String str2) {
        this.site = Site.me().addStartUrl(str).setDomain(UrlUtils.getDomain(str));
        this.urlPattern = "(" + str2.replace(".", "\\.").replace("*", "[^\"'#]*") + ")";
    }

    @Override // us.codecraft.webmagic.processor.PageProcessor
    public void process(Page page) {
        page.addTargetRequests(page.getHtml().links().regex(this.urlPattern).all());
        page.putField("title", page.getHtml().xpath("//title"));
        page.putField("html", page.getHtml().toString());
        page.putField("content", page.getHtml().smartContent());
    }

    @Override // us.codecraft.webmagic.processor.PageProcessor
    public Site getSite() {
        return this.site;
    }
}
