因为习惯了使用pandas的DataFrame数据结构,同时pandas作为一个方便计算和表操作的数据结构具有十分显著的优势,甚至很多时候dataFrame可以作为excel在使用,而在用python操作gis的shp文件时很不顺畅,不太符合使用习惯,故写了一个DataFrame与arcgis地理文件相互转换的函数,这个处理起来可以节约大量的思考时间。
Shp转DataFrame:
import arcpy
import pandas as pd
def Shp2dataframe(path):
'''将arcpy表单变为pandas表单输出'''
fields=arcpy.ListFields(path)
table=[]
fieldname=[field.name for field in fields]
#游标集合,用for 循环一次后没办法循环第二次!一个游标实例只能循环一次
data=arcpy.SearchCursor(path)
for row in data:
#Shape字段中的要数是一个几何类
r=[]
for field in fields:
r.append(row.getValue(field.name))
table.append(r)
return pd.DataFrame(table,columns=fieldname)
DataFrame转Shp:
DataFrame转Shp采用了模板形式,通过模板建立字段文件,坐标系等可以更加快速构建字段。
#将由ReadTable读取的pandas表转换为shp格式,template为模板
def Dataframe2ShpTemplate(df,outpath,geoType,template):
'''
Fuction:
make the table of pandas's DataFrame convert to the shp of esri
Input:
df -- pandas DataFrame from the shp converted
outpath -- the shp output path
geometryType -- the type of geomentey, eg:'POINT','POLYLINE','POLYGON','MULTIPOINT'
temple -- the temple, at most time it is used the DataFrame's shp
'''
out_path = outpath.replace(outpath.split('/')[-1],'')
out_name = outpath.split('/')[-1]
geometry_type = geoType
#template为模板,可以将里面属性全部赋予新建的要素,包括字段、坐标系
feature_class = arcpy.CreateFeatureclass_management(
out_path, out_name, geometry_type, template)
#'*'表示插入所有字段,但如果不用模板容易产生位置不对等
#cursor = arcpy.da.InsertCursor(outpath,'*')
for row in df.index:
#Shape需要改为'SHAPE@'才可以写入
df['SHAPE@'] = df['Shape']
cursor = arcpy.da.InsertCursor(outpath,[field for field in df.columns])
cursor.insertRow([df[field][row] for field in df.columns])
print 'Pandas to shp finish!'
del cursor
实例应用:
写一个根据gps公交点Txt构建shp数据代码,代码如下:
def readDataFile(filetype,filename,savefile):
#用'gbk'编码读取,读取成统一编码的unicode
with codecs.open(filename,encoding='gbk') as datafile:
#以列表形式读取所有文件
pointData = datafile.readlines()
#第一行删除并返回为title
outputFileName = 'bus'+re.findall('[0-9]*[0-9]',filename)[0]+filetype
#检查是否导出文件重复
saveEnv = arcpy.Describe(savefile)
for child in saveEnv.children:
if child.name == outputFileName:
outputFileName = outputFileName + '_1'
print 'output path is %s'%(savefile+outputFileName)
#设置shp文件模板
template = u'./dealing/temple.gdb/%s'%filetype
linename = filename.strip('./dealing\\').decode('gbk').encode('utf-8')
if filetype == 'point':
df = pd.DataFrame(columns=Shp2dataframe(template).columns)
for num in xrange(len(pointData)):
row = pointData[num].strip('\r\n').split(' ')
df.set_value(num,'name',row[0])
df.set_value(num,'x',row[1])
df.set_value(num,'y',row[2])
df.set_value(num,'line',linename.strip('point.txt'))
point = arcpy.PointGeometry(arcpy.Point(row[1],row[2]))
df.set_value(num,'Shape',point)
elif filetype == 'line':
df = pd.DataFrame(columns=Shp2dataframe(template).columns)
pointList = []
#构建线集合
for eachPoint in pointData:
coord = eachPoint.strip('\r\n').split(' ')
pointList.append(arcpy.Point(float(coord[0]),float(coord[1])))
df.set_value(0,'name',linename.strip('line.txt'))
#组建线要素arcpy.Polyline(arcpy.Array(pointList))
df.set_value(0,'Shape',arcpy.Polyline(arcpy.Array(pointList)))
Dataframe2ShpTemplate(df,savefile+outputFileName,'',template)
return df
-------sugar---------------------sugar--------------------sugar-------------------sugar----------------sugar----------
#搜索目录下的所有带point.txt和line.txt的文件
pointfiles = glob.glob('./dealing/*point.txt')
polylinefiles = glob.glob('./dealing/*line.txt')
for pf in pointfiles:
print pf
readDataFile('point',pf,u'dealing/广州市道路网.gdb/')
for pl in polylinefiles:
print pl
df=readDataFile('line',pl,u'dealing/广州市道路网.gdb/')
lineshp = arcpy.Describe(u'dealing/广州市道路网.gdb/')
linelist = []
for child in lineshp.children:
if 'line' in child.name:
linelist.append(u'dealing/广州市道路网.gdb/'+child.name)
arcpy.Merge_management(linelist,u'dealing/广州市道路网.gdb/0allLine')
Kanonpy