DataFrame合并:轴向链接concat

from pandas import DataFrame,Series
import pandas as pd
import numpy as np

arr = np.arange(12).reshape((3,4))
print(arr)
'''
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
'''
print(np.concatenate([arr,arr],axis=1))
'''
[[ 0  1  2  3  0  1  2  3]
 [ 4  5  6  7  4  5  6  7]
 [ 8  9 10 11  8  9 10 11]]
'''

s1 = Series([0,1],index=['a','b'])
s2 = Series([2,3,4],index=['c','d','e'])
s3 = Series([5,6],index=['f','g'])
print(s1)
'''
a    0
b    1
dtype: int64
'''
print(s2)
'''
c    2
d    3
e    4
dtype: int64
'''
print(s3)
'''
f    5
g    6
dtype: int64
'''
print(pd.concat([s1,s2,s3]))
'''
a    0
b    1
c    2
d    3
e    4
f    5
g    6
dtype: int64
'''
# concat是在axis=0上工作的,产生一个新的series,如果传入axis=1,结果会变成dataframe(axis=1是列)
print(pd.concat([s1,s2,s3],axis=1,sort=True))
'''
     0    1    2
a  0.0  NaN  NaN
b  1.0  NaN  NaN
c  NaN  2.0  NaN
d  NaN  3.0  NaN
e  NaN  4.0  NaN
f  NaN  NaN  5.0
g  NaN  NaN  6.0
'''

s4 = Series([0,1],index=['a','b'])
s5 = Series([2,3,4],index=['a','b','c'])
print(pd.concat([s4,s5]))  # 轴向链接
'''
a    0
b    1
a    2
b    3
c    4
dtype: int64
'''
print(pd.concat([s4,s5],axis=1,sort=True)) # 外连接,并集
'''
     0  1
a  0.0  2
b  1.0  3
c  NaN  4
'''
print(pd.concat([s4,s5],axis=1,join='inner')) # 内连接,交集
'''
   0  1
a  0  2
b  1  3
'''
print(pd.concat([s4,s5],axis=1,join_axes=[['a','b','c','d']])) # 使用join_axes指定索引
'''
     0    1
a  0.0  2.0
b  1.0  3.0
c  NaN  4.0
d  NaN  NaN
'''
print(pd.concat([s1,s2,s3],keys=['A','B','C'])) # 连接的片段在结果可使用keys区分
'''
A  a    0
   b    1
B  c    2
   d    3
   e    4
C  f    5
   g    6
dtype: int64
'''
# 如果axis=1,keys就会变成dataframe的列头
print(pd.concat([s1,s2,s3],keys=['A','B','C'],axis=1,sort=True))
'''
     A    B    C
a  0.0  NaN  NaN
b  1.0  NaN  NaN
c  NaN  2.0  NaN
d  NaN  3.0  NaN
e  NaN  4.0  NaN
f  NaN  NaN  5.0
g  NaN  NaN  6.0
'''

df1 = DataFrame(np.arange(6).reshape((3,2)),index=['a','b','c'],columns=['one','two'])
df2 = DataFrame(np.arange(4).reshape((2,2)),index=['a','c'],columns=['three','four'])
print(df1)
'''
   one  two
a    0    1
b    2    3
c    4    5
'''
print(df2)
'''
   three  four
a      0     1
c      2     3
'''
print(pd.concat([df1,df2],sort=True))
'''
   four  one  three  two
a   NaN  0.0    NaN  1.0
b   NaN  2.0    NaN  3.0
c   NaN  4.0    NaN  5.0
a   1.0  NaN    0.0  NaN
c   3.0  NaN    2.0  NaN
'''
print(pd.concat([df1,df2],axis=1,sort=True))
'''
   one  two  three  four
a    0    1    0.0   1.0
b    2    3    NaN   NaN
c    4    5    2.0   3.0
'''
print(pd.concat([df1,df2],axis=1,keys=['level1','level2'],names=['upper','lower'],sort=True))
'''
upper level1     level2     
lower    one two  three four
a          0   1    0.0  1.0
b          2   3    NaN  NaN
c          4   5    2.0  3.0
'''