使用SQL对数据进行整理

网上下的全国 省市区 数据比较乱(http://qq704855854.blog.163.com/blog/static/19111835520142319275411/)。导入后,进行整理。

SQL数据文件: http://files.cnblogs.com/newsea/省市五级原始数据.rar

新建两个函数:

---去除括号
CREATE function [dbo].[tidy](@Name varchar(500))
 returns varchar(500)
 as begin
 
    declare @startIndex as int ;
    declare @endIndex as int ;
 
    declare @len as int ;
    declare @reverse as varchar(500) ;
    declare @luanma as varchar(200);
     
    
    set @startIndex = CHARINDEX('(',@Name) ;
    if(  @startIndex =0) return @Name ;
    
    set @len = LEN(@name) ;
    set @reverse = Reverse( @Name );
    set @endIndex =  CHARINDEX(')', @reverse ) ;
    
    
    if( @endIndex <2 ) return left(@Name, @startIndex -1 ) ;
    
  
    if( CHARINDEX('(',@reverse) < @endIndex) return left(@Name,@startIndex-1 ) ;
    
    set @luanma =  right(@Name , @endIndex - 1 ) ;
    if( @luanma = '?') return left(@Name,@startIndex-1 ) ;
    return  left(@Name ,@StartIndex-1 ) + @luanma;
 end ;
GO


CREATE function [dbo].[TrimWord] (@Name varchar(500))
 returns varchar(500)
 as begin
    declare @ret as varchar(500)
    set @ret = @Name ;
    if( @ret like '*%') begin
        set @ret = right( @ret , len(@ret) - 1) ;
    end
    
    if( @ret like '%办事处') begin
        set @ret = LEFT( @ret , len(@ret) - 3) ;
    end    
    
    if( @ret like '%街道') begin
        set @ret = LEFT( @ret , len(@ret) - 2) ;
    end
    
    if( @ret like '%行政事务管理中心') begin
        set @ret = LEFT( @ret , len(@ret) - 8) ;
    end    
    
    if( @ret like '%社会事务管理处') begin
        set @ret = LEFT( @ret , len(@ret) - 7) ;
    end        
    
    if( @ret like '%生态管理委员会') begin
        set @ret = LEFT( @ret , len(@ret) - 7) ;
    end        
    if( @ret like '%管理委员会') begin
        set @ret = LEFT( @ret , len(@ret) - 5) ;
    end    
        
    if( @ret like '%建设委员会') begin
        set @ret = LEFT( @ret , len(@ret) - 5) ;
    end        
        
    if( @ret like '%街道办事处筹备组') begin
        set @ret = LEFT( @ret , len(@ret) - 8) ;
    end        
    if( @ret like '%管理*') begin
        set @ret = LEFT( @ret , len(@ret) - 4) ;
    end        

    if( @ret like '%管委会') begin
        set @ret = LEFT( @ret , len(@ret) - 3) ;
    end        
    if( @ret like '%生活区') begin
        set @ret = LEFT( @ret , len(@ret) - 3) ;
    end        
    if( @ret like '%工矿区') begin
        set @ret = LEFT( @ret , len(@ret) - 3) ;
    end        
        
    if( LEN(@ret) <5) return @ret ;
    
    if( @ret like '%县%') begin
        set @ret = Right( @ret , LEN(@ret) -  CHARINDEX('',@ret) ) ;
    end
    
    if( LEN(@ret) <5) return @ret ;
    
    if( @ret like '%市%') begin
        set @ret = Right( @ret , LEN(@ret) -  CHARINDEX('',@ret) ) ;
    end
     
    if( LEN(@ret) <5) return @ret ;
    
    if( @ret like '%省%') begin
        set @ret = Right( @ret , LEN(@ret) -  CHARINDEX('',@ret) ) ;
    end    
    

    return @ret ;
 end ;
GO

使用如下SQL查询:

select   dbo.TrimWord( dbo.tidy( name) ) as TName ,* 
into n_Town
from town
where LEN(name) >2

继续整理

update n_Town
set TName = REPLACE(TName,'镇镇','镇')
where TName like '%镇镇'

update n_Town
set TName = REPLACE(TName,'乡乡','乡')
where TName like '%乡乡'

delete n_Town
where TName like '%\%'

n_Town 就是整理好的。其中 TName 是正确的。