1、文本文件读取一条内容有多行情况
主要是规律发现,每条数据其实总数都是39条
import glob
aaaaa = glob.glob(r'C:\User*****化\归档_album\*')
import csv
all_datas = []
for i in aaaaa:
print(i)
with open(i.replace("C:",""), 'r', encoding='UTF-8') as f:
raw_datas = f.readlines()
ka = []
for raw_data in raw_datas:
raw_data = raw_data.split("\t")
# print(raw_data)
if len(ka) == 39:
all_datas.append(ka)
# print(len(ka))
# print(ka)
ka = []
if len(raw_data) == 39:
# print(len(raw_data))
# print(raw_data)
all_datas.append(raw_data)
else:
# print(len(ka))
# print(ka)
if len(ka) ==0:
ka.extend(raw_data)
elif 0<len(ka)<39:
ka = ka[:-1]+[ka[-1]+raw_data[0]]+raw_data[1:]
# print(ka)
with open(r"raw_***.csv", 'w', encoding='utf-8-sig', newline="") as f:
f_csv = csv.writer(f, dialect="excel")
f_csv.writerow(headers)
for list in all_datas:
f_csv.writerow(list)
2、STX异常字符处理
split("\x02")
3、pandas info信息转成可读取操作的格式
# 存储为字符串,创建一个StringIO,便于在内存中写入字符串
import io
buf = io.StringIO()
# 数据属性写入
all_datas.info(buf=buf)
# 读取写到的数据,并转换成dataframe
re = buf.getvalue()
df = pd.DataFrame(re.split("\n"), columns=['info'])
# 根据保存字符串的格式,使用df.loc[]定位所要获取内容的位置
df_info = df.loc[3:len(df)-4, 'info'].str.split(n=1, expand=True).reset_index(drop=True)
# 创建一个新的属性list用于保存获取到的内容,我这里保存打印的最后一列内容
att = []
for i in df_info[1]:
att.append(i.split()[0])
pandas 花式索引,多条件筛选
datas[(datas['type'] == 1) & (datas['status'] == 1) & ((datas['channelid'] == 1) | (datas['channelid'] == 2) | (datas['channelid'] == 4)| (datas['channelid'] == 5) ) & ((datas['vendor'] == 11) | (datas['vendor'] == 12) | (datas['vendor'] == 19) ) ] # 获取type为1的数据
4、str指定位置替换或添加字符
参考:https://www.cnblogs.com/dapenson/p/14164461.html
def replace_char(old_string, char, index):
'''
字符串按索引位置替换字符
'''
old_string = str(old_string)
# 新的字符串 = 老字符串[:要替换的索引位置] + 替换成的目标字符 + 老字符串[要替换的索引位置+1:]
new_string = old_string[:index] + char + old_string[index+1:]
return new_string
def add_char(old_string, char, index):
'''
将字符串按索引位置添加字符
'''
old_string = str(old_string)
# 新的字符串 = 老字符串[:要替换的索引位置] + 替换成的目标字符 + 老字符串[要替换的索引位置+1:]
new_string = old_string[:index] + char + old_string[index:]
return new_string