好记忆力不如烂笔头31-java应用中的敏感词过滤实现(3)

好记性不如烂笔头31-java应用中的敏感词过滤实现(3)

敏感词过滤,国内混的同学看到这个都会会心一笑。其实敏感词过滤,在几乎所有国家都是存在的,只是表现的形式并不完全相同而已。

既然这个功能叫做关键词过滤,那么做在过滤器中,应该是一个好主意。

1、JAVA实现利用过滤器实现敏感信息过滤

过滤器的JAVA代码:

package com.filter;

import java.io.BufferedReader;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

importjava.io.UnsupportedEncodingException;

import java.util.ArrayList;

import java.util.List;

import javax.servlet.Filter;

import javax.servlet.FilterChain;

import javax.servlet.FilterConfig;

import javax.servlet.ServletException;

import javax.servlet.ServletRequest;

import javax.servlet.ServletResponse;

importjavax.servlet.http.HttpServletRequest;

import javax.servlet.http.HttpServletRequestWrapper;

importjavax.servlet.http.HttpServletResponse;

/**

 *一个简单的敏感词过滤器,这里针对从GET的方式做了转码,如果用POST方式,将ISO8859-1字符转换为UTF-8下面一段要注释掉

 *@author 范芳铭

 */

public class EasyDirtyFilter implementsFilter{

   private FilterConfig config = null;

   

   @Override

   public void init(FilterConfig filterConfig) throws ServletException {

       System.out.println("----过滤器初始化----");

       this.config = filterConfig;

   }

 

   //过滤器功能在这里实现

   @Override

   public void doFilter(ServletRequest req, ServletResponse resp,

            FilterChain chain) throwsIOException, ServletException {

       HttpServletRequest request = (HttpServletRequest) req;

       HttpServletResponse response = (HttpServletResponse) resp;

       String charset = "UTF-8";

       request.setCharacterEncoding(charset);

       response.setCharacterEncoding(charset);

       response.setContentType("text/html;charset="+charset);

       

       DirtyRequest dirtyreq = new DirtyRequest(request);

       

       chain.doFilter(dirtyreq, response);

   }

 

   @Override

   public void destroy() {

       System.out.println("----过滤器销毁----");

   }

   

   private List<String> getDirtyWords(){

       List<String> dirtyWords = new ArrayList<String>();

       String dirtyWordPath = config.getInitParameter("dirtyword");

       InputStream inputStream =config.getServletContext().getResourceAsStream(dirtyWordPath);

       InputStreamReader is = null;

       try {

            is = newInputStreamReader(inputStream,"UTF-8");

       } catch (UnsupportedEncodingException e2) {

            e2.printStackTrace();

       }

       BufferedReader reader = new BufferedReader(is);

       String line;

       try {

           while ((line =reader.readLine())!= null) {//如果 line为空说明读完了

                dirtyWords.add(line);

            }

       } catch (IOException e) {

            e.printStackTrace();

       }

       return dirtyWords;

   }

 

   //使用Decorator模式包装request对象,实现敏感字符过滤功能

   class DirtyRequest extends HttpServletRequestWrapper{

 

       private List<String> dirtyWords = getDirtyWords();

       private HttpServletRequest request;

       public DirtyRequest(HttpServletRequest request) {

            super(request);

            this.request = request;

       }

       //重写getParameter方法,实现对敏感字符的过滤

       @Override

       public String getParameter(String name) {

            String value =this.request.getParameter(name);

            //如果get的方式提交表单,通过request.setCharacterEncoding("UTF-8");这种方式是解决不了中文乱码问题

            //参考:http://blog.csdn.net/ffm83/article/details/43229819

            if(value==null){

                return null;

            }

            //将ISO8859-1字符转换为UTF-8

            try {

                value=new String(value.getBytes("ISO8859-1"),"UTF-8") ;

            }catch (UnsupportedEncodingException e) {

                //TODO Auto-generated catch block

                e.printStackTrace();

            }

 

            for(String dirtyWord : dirtyWords){

               if(value.contains(dirtyWord)){

                    System.out.println("内容中包含敏感词:"+dirtyWord+",将会被替换成****");

                    //替换敏感字符

                    value =value.replace(dirtyWord, "****");

                }

            }

            return value;

       }

   }

}

2、将过滤器添加到Web.xml

    <!--配置过滤器 -->

    <filter>

        <filter-name>easyFilter</filter-name>

        <filter-class>com.filter.EasyDirtyFilter2</filter-class>

     <!-- 配置要过滤的敏感字符文件 -->

     <init-param>

        <param-name>dirtyword</param-name>   

        <param-value>/WEB-INF/dirtyword.txt</param-value>

   </init-param>

    </filter>

 

    <!--映射过滤器 -->

    <filter-mapping>

        <filter-name>easyFilter</filter-name>

        <!--“/*”表示拦截所有的请求-->

        <url-pattern>/*</url-pattern>

    </filter-mapping>  

        <servlet>

        <servlet-name>dirty</servlet-name>

        <servlet-class>com.servlet.RequestDirty</servlet-class>

    </servlet>

 

    <servlet-mapping>

        <servlet-name>dirty</servlet-name>

        <url-pattern>/dirty</url-pattern>

    </servlet-mapping>

 

 

3、测试用的servlet源代码

package com.servlet;

 

import java.io.IOException;

import java.io.PrintWriter;

 

import javax.servlet.ServletException;

import javax.servlet.http.HttpServlet;

importjavax.servlet.http.HttpServletRequest;

importjavax.servlet.http.HttpServletResponse;

 

/**

 *从外部获取信息,如果有敏感词,那么需要过滤

 *@author 范芳铭

 */

public class RequestDirty extendsHttpServlet {

    publicvoid doGet(HttpServletRequest request, HttpServletResponse response)

            throwsServletException, IOException {     

        Stringinfo = request.getParameter("info");

        //在过滤器里进行了转码,这里就不要再进行转码

        //info=new String(info.getBytes("ISO8859-1"),"UTF-8") ;

        System.out.println(info);

       

        PrintWriterout = response.getWriter();

       out.write("获得信息如下:" + info);

 

    }

 

    publicvoid doPost(HttpServletRequest request, HttpServletResponse response)

            throwsServletException, IOException {

        doGet(request,response);

    }

}

 

4、其他

一个关键词文件,dirtyword.txt 放在WEB-INF下。

关键词文件如下:(仅供示例,无任何含义)

粗话

黑人

黑鬼

 

5、测试

在浏览器输入:

http://127.0.0.1:8080/webStudy/dirty?info=黑人是美国无产阶级的成员

页面输出结果:获得信息如下:****是美国无产阶级的成员

后台输入情况:

内容中包含敏感词:黑人,将会被替换成****

****是美国无产阶级的成员