• 大小: 10.32MB
    文件类型: .rar
    金币: 1
    下载: 0 次
    发布日期: 2023-10-16
  • 语言: 其他
  • 标签: nifi  processor  a  tag  html  

资源简介

这是一个自定义nifiprocessor的实例,里面提供了五个实例,提取url页面中所有的a标签,通过redis过滤,去重,过滤等等。

资源截图

代码片段和文件信息

package hylanda.processors.getatag;

import java.io.BufferedOutputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.io.IOUtils;
import org.apache.nifi.annotation.behavior.ReadsAttribute;
import org.apache.nifi.annotation.behavior.ReadsAttributes;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.SeeAlso;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONobject;

import hylanda.processors.getatag.util.GenUrlCrc64;
import hylanda.processors.getatag.util.IpConfig;


@Tags({“hylandaxinwenduchaurljson“})
@CapabilityDescription(“新闻督察用同一个taskid下的多个结果url分解成jsonarray“)
@SeeAlso({})
@ReadsAttributes({@ReadsAttribute(attribute=““ description=““)})
@WritesAttributes({@WritesAttribute(attribute=““ description=““)})
public class XWDCAttributeToJSON extends AbstractProcessor{

public static final PropertyDescriptor LIST = new PropertyDescriptor
     .Builder().name(“list“)
     .displayName(“list“)
     .description(“url list!“)
     .required(true)
     .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
     .build();
public static final PropertyDescriptor TASK_ID = new PropertyDescriptor
     .Builder().name(“task_id“)
     .displayName(“task_id“)
     .description(“task id!“)
     .required(true)
     .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
     .build();

public static final PropertyDescriptor SOURCE_URL = new PropertyDescriptor
     .Builder().name(“source_url“)
     .displayName(“source_url“)
     .description(“source_url!“)
     .required(true)
     .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
     .build();
public static final PropertyDescriptor SITE_ID = new PropertyDescriptor
     .Builder().name(“site_id“)
     .displayName(“site_id“)
     .description(“site_id!“)
     .required(true)
     .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
     .build();

public static final Relationship SUCCESS = new Relationship.Builder

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----

     文件        445  2017-12-29 17:03  nifi-get-a-tag-processor\.classpath

     文件         30  2018-01-03 17:55  nifi-get-a-tag-processor\.git\COMMIT_EDITMSG

     文件        288  2018-01-03 14:44  nifi-get-a-tag-processor\.git\config

     文件         23  2018-01-03 14:09  nifi-get-a-tag-processor\.git\HEAD

     文件      14296  2018-01-03 17:55  nifi-get-a-tag-processor\.git\index

     文件        473  2018-01-03 17:55  nifi-get-a-tag-processor\.git\logs\HEAD

     文件        473  2018-01-03 17:55  nifi-get-a-tag-processor\.git\logs\refs\heads\master

     文件        280  2018-01-03 17:55  nifi-get-a-tag-processor\.git\logs\refs\remotes\origin\master

     文件        193  2018-01-03 17:55  nifi-get-a-tag-processor\.git\objects\00\e57a9597e86fe34f895a030a465b3ea28b225e

     文件      37332  2018-01-03 14:13  nifi-get-a-tag-processor\.git\objects\01\3626653a35f0ce72d2cd7253e49b83939a1bc4

     文件        218  2018-01-03 14:13  nifi-get-a-tag-processor\.git\objects\01\7d6e1bd764b633947a5e24278d19cbf174867b

     文件         50  2018-01-03 14:13  nifi-get-a-tag-processor\.git\objects\02\2a6083a236568e25bfb2a5dca37b9ca091cce8

     文件       1038  2018-01-03 14:13  nifi-get-a-tag-processor\.git\objects\02\b47211114674c02371b5cee27551dd1a2ed908

     文件       3359  2018-01-03 14:13  nifi-get-a-tag-processor\.git\objects\04\a77742fe62380edfe61818a77d4b15dbc079b2

     文件       1238  2017-12-28 16:56  nifi-get-a-tag-processor\.git\objects\04\b29fd9934e9eb21980b581cf65a8e0da014f81

     文件         91  2018-01-03 14:13  nifi-get-a-tag-processor\.git\objects\04\f424ef708fa7e07a7641ea2123672584f55995

     文件        159  2017-12-28 16:56  nifi-get-a-tag-processor\.git\objects\07\b0585d480d09cb06d87642f1e279bab66489b9

     文件         52  2018-01-03 17:55  nifi-get-a-tag-processor\.git\objects\08\33a204985d7e830ab31807c1b2bbfb6179e30f

     文件       2343  2018-01-03 14:13  nifi-get-a-tag-processor\.git\objects\09\b945da29d0794f8cc77e1aaf5a5a9e0c0666e0

     文件        159  2018-01-03 14:13  nifi-get-a-tag-processor\.git\objects\0b\96c41a7f2615b3719f2aa888328c0a10259ea3

     文件         50  2018-01-03 14:13  nifi-get-a-tag-processor\.git\objects\0b\bc16f746404e293fb8868cdbfe060f46e792a4

     文件        349  2018-01-03 14:13  nifi-get-a-tag-processor\.git\objects\0c\8d41dd940b459b97ce3ec07f4a0a6095f5113d

     文件        776  2018-01-03 14:13  nifi-get-a-tag-processor\.git\objects\0d\eb33ed65a45ed60c4b662e6102b5b8f399688b

     文件         63  2018-01-03 14:13  nifi-get-a-tag-processor\.git\objects\0e\5f38d43703317212bcbeb061795eb6f20a67d2

     文件         64  2017-12-28 16:56  nifi-get-a-tag-processor\.git\objects\0e\96ba8f9503894dcf53b6d47246f86e5412a6ba

     文件        184  2018-01-03 14:43  nifi-get-a-tag-processor\.git\objects\0e\d5a0cf3ef6e7d941013dee1c11fc7f297a7eb5

     文件         49  2018-01-03 14:13  nifi-get-a-tag-processor\.git\objects\0e\e831dfe236bf63051eddf373b2e71f779a671d

     文件        211  2018-01-03 14:43  nifi-get-a-tag-processor\.git\objects\0f\70175a6f469401a589726d9bd2aaa8b3324b8c

     文件     189890  2018-01-03 14:13  nifi-get-a-tag-processor\.git\objects\10\7b061f5fa84cecca645eae9bdefc98e084ce03

     文件        165  2018-01-03 14:43  nifi-get-a-tag-processor\.git\objects\12\207cce229a0441c9f93ac7a433124d707d8a89

............此处省略596个文件信息

评论

共有 条评论