defCityidentification(querydict,name): #利用查询字典,将一些县或者城市的小地名归一化为城市名 for i in querydict.keys(): if name in i: name = querydict[i] break#结束本层循环 return name
各线路站点名称清洗、归一化
data['station'] = data['沿途主要车站'].apply(lambda x:[(i if len(i)<3else (i if i[-1] notin ['东','南','西','北'] else i[:-1])) for i in x.split('、')]) data['station'] = data['station'].apply(lambda x: [Cityidentification(citydict,i) for i in x] ) defcleancity(lst): citylist = list(set(citydict.values())) return [i for i in lst if i in citylist]
defgetedgelist(data):#data为列沿线车站列 #获取城市-城市节点联系 linklist = [] for i in data: if len(i) > 1: for j in range(len(i)-1): linklist.append([i[j],i[j+1]]) linklist.sort(reverse=False) link = [i for i in linklist if len(set(i)) > 1]
#筛选城市间连接 edges = [] for i in link: if i notin edges: edges.append(i) #计算城市间连接数量 edgelists = [] for i in edges: n = link.count(i) lst = i lst.append(n) edgelists.append(lst) return pd.DataFrame(edgelists,columns = ['from','to','weight'])