资源简介

历史数据原因诸多格式的手机号码,需要清洗出要想的格式,各种正则,还有处理这些问题的思路,还有详细使用文档。

资源截图

代码片段和文件信息

package com.bigdata.std;

import jdk.nashorn.internal.runtime.regexp.joni.Regex;

import java.net.SocketPermission;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class UtilClass {
    //去除汉字,标点符号
    public static Pattern pattern1 = Pattern.compile(“[\u4e00-\u9fa5]|[\\pP+~$‘^=|<>~`$^+=|<>¥×]“);
    public static String pattern2=“[0-9]+“;//纯数字
    //有效手机号
    public static String pattern3=“^((13[0-9])|(14[57])|(15[0-35-9])|(17[035-8])|(18[0-9])|166|198|199|(147))\\d{8}$“;
    public static String pattern4=“[a-zA-Z]“;//纯英文



    public static String cellValidator(String cell)
    {
        String ce=cell.trim();
        Pattern compile = Pattern.compile(pattern3);
        Matcher matcher1 = compile.matcher(ce);

        //1.1不是有效手机号的
        if(!matcher1.matches()){
            //1.2 ““ null 等置为KUYU90
            if (“null“.equalsIgnoreCase(ce) ||““.equals(ce)) {
                return “KUYU90“;
            }
            //1.3 纯数字并且不等于11位改为KUYU90
            boolean result=ce.matches(pattern2);
            if (result == true && ce.length() !=11 ) {
                return “KUYU90“;
            }

            //1.4 如果有对多个号码取第一个(转化为前十一位符合手机号码)
            String cs=ce.substring(011).trim();
            boolean re=cs.matches(pattern3);
            if (re==true){
                return  cs;
            }
            //1.5是否包含“ “
            if(cs.contains(“ “)){
                return “KUYU90“;
            }
            //1.6如果是含英文
            compile = Pattern.compile(pattern4);
            Matcher matcher = compile.matcher(ce);
            if (matcher.find()){
                return “KUYU90“;
            }
            //1.7 去除汉字标点后 符合有效手机号的可以,否则KUYU90
            matcher = pattern1.matcher(ce);
            if (matcher.find()){
                String all = matcher.replaceAll(““);
                 result=all.trim().matches(pattern3);
                if (result == true ) {
                    //System.out.println(“该字符串是纯数字且超过11位“);
                    return all;
                }
                return “KUYU90“;
            }


            //1.8 座机如果是0546-6443144 ,138-8766-2238 去除 -
            //多个手机号码取第一个
            if(ce.length()>11 && ce.contains(“-“)){
                String[] ces = ce.split(“-“);
                StringBuilder sb=new StringBuilder();
                if(ces[0].trim().length()<11){//不符合
                    for (int i=0;i                            sb.append(ces[i].trim());
                    }
                    boolean rt = sb.toString().trim().matches(pattern3);
                    if (rt == true ) {//分割完拼接后是否可以
                        return sb.toString().trim();
                    }
                    return “KUYU90“;
                }
            }
        }
        //是有效手机号
        return ce;

    }
}

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2018-12-06 18:04  手机号清洗UDF(hiveimpala)\
     目录           0  2018-12-06 18:03  手机号清洗UDF(hiveimpala)\cellPhoneUDF\
     目录           0  2018-12-06 18:03  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\
     文件         636  2018-11-29 14:16  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\compiler.xml
     目录           0  2018-12-06 18:05  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\inspectionProfiles\
     目录           0  2018-12-06 18:03  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\
     文件         465  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__antlr_antlr_2_7_7.xml
     文件         517  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__aopalliance_aopalliance_1_0.xml
     文件         429  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__asm_asm_3_1.xml
     文件         548  2018-11-29 17:44  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__com_google_code_findbugs_jsr305_3_0_0.xml
     文件         516  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__com_google_guava_guava_14_0_1.xml
     文件         599  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__com_google_inject_extensions_guice_servlet_3_0.xml
     文件         499  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__com_google_inject_guice_3_0.xml
     文件         577  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__com_google_protobuf_protobuf_java_2_5_0.xml
     文件         548  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__com_jolbox_bonecp_0_8_0_RELEASE.xml
     文件         572  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__com_sun_jersey_contribs_jersey_guice_1_9.xml
     文件         543  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__com_sun_jersey_jersey_client_1_9.xml
     文件         529  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__com_sun_jersey_jersey_core_1_9.xml
     文件         529  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__com_sun_jersey_jersey_json_1_9.xml
     文件         543  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__com_sun_jersey_jersey_server_1_9.xml
     文件         551  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__com_sun_xml_bind_jaxb_impl_2_2_3_1.xml
     文件         563  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__com_thoughtworks_paranamer_paranamer_2_3.xml
     文件         622  2018-11-29 17:44  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__com_twitter_parquet_hadoop_bundle_1_6_0rc3.xml
     文件         517  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__commons_cli_commons_cli_1_2.xml
     文件         539  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__commons_codec_commons_codec_1_4.xml
     文件         605  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__commons_collections_commons_collections_3_1.xml
     文件         528  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__commons_dbcp_commons_dbcp_1_4.xml
     文件         608  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__commons_httpclient_commons_httpclient_3_0_1.xml
     文件         506  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__commons_io_commons_io_2_4.xml
     文件         528  2018-11-29 17:41  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__commons_lang_commons_lang_2_6.xml
     文件         575  2018-11-29 14:19  手机号清洗UDF(hiveimpala)\cellPhoneUDF\.idea\libraries\Maven__commons_logging_commons_logging_1_1_3.xml
............此处省略116个文件信息

评论

共有 条评论