增加将数据写入CSV文件的逻辑
This commit is contained in:
parent
7fcc377a27
commit
6fe643638e
|
|
@ -1,4 +1,5 @@
|
|||
import os
|
||||
import csv
|
||||
import pandas as pd
|
||||
from dbfread import DBF
|
||||
from itertools import islice
|
||||
|
|
@ -13,11 +14,11 @@ class DbfToPostgresCtllogPipeline(BasePipeline):
|
|||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
# todo:本地调试打开
|
||||
# self.data_root = 'D:\disney_test'
|
||||
# self.mapping_file = 'D:\disney_test\disney-mapping.xlsx'
|
||||
self.data_root = 'D:\disney_test'
|
||||
self.mapping_file = 'D:\disney_test\disney-mapping-v2.xlsx'
|
||||
# todo:本地调试打开
|
||||
self.data_root = os.getenv('DATA_PVC_MOUNT_PATH', '/data')
|
||||
self.mapping_file = os.getenv('MAPPING_FILE')
|
||||
# self.data_root = os.getenv('DATA_PVC_MOUNT_PATH', '/data')
|
||||
# self.mapping_file = os.getenv('MAPPING_FILE')
|
||||
|
||||
self.dbf_dir = os.getenv('DBF_INPUT_DIR', os.path.join(self.data_root, 'dbf-input'))
|
||||
|
||||
|
|
@ -46,7 +47,7 @@ class DbfToPostgresCtllogPipeline(BasePipeline):
|
|||
# 清理数据
|
||||
mapping_df = mapping_df.dropna(subset=['AS_SERIAL', 'ID', 'data_field_sequence_id'])
|
||||
mapping_df = mapping_df[['AS_SERIAL', 'ID', 'data_field_sequence_id',
|
||||
'control_group_controller_id', 'controller_point_id']]
|
||||
'control_group_controller_id', 'controller_point_id','control_group_name','control_group_id']]
|
||||
|
||||
# 创建映射字典
|
||||
mapping_dict = {}
|
||||
|
|
@ -58,7 +59,9 @@ class DbfToPostgresCtllogPipeline(BasePipeline):
|
|||
mapping_dict[key].append({
|
||||
'seq_id': int(row['data_field_sequence_id']),
|
||||
'control_group_controller_id': int(row['control_group_controller_id']),
|
||||
'point_id': int(row['controller_point_id'])
|
||||
'point_id': int(row['controller_point_id']),
|
||||
'control_group_name': row['control_group_name'],
|
||||
'control_group_id': int(row['control_group_id'])
|
||||
})
|
||||
|
||||
self.logger.info(f"Loaded {len(mapping_dict)} mapping entries")
|
||||
|
|
@ -73,10 +76,18 @@ class DbfToPostgresCtllogPipeline(BasePipeline):
|
|||
|
||||
# 连接数据库
|
||||
# todo:本地调试时打开
|
||||
db_config = self.config.get_database_config()
|
||||
self.db = Database(**db_config)
|
||||
# db_config = self.config.get_database_config()
|
||||
# self.db = Database(**db_config)
|
||||
# todo:本地调试时打开
|
||||
|
||||
self.csv_file_path = 'D:\disney_test\debug_controller_log.csv'
|
||||
# 初始化CSV文件
|
||||
if not os.path.exists(self.csv_file_path):
|
||||
with open(self.csv_file_path, 'w') as f:
|
||||
csv.writer(f).writerow(
|
||||
['created', 'control_group_controller_id', 'point_id', 'real_value', 'control_group_name',
|
||||
'control_group_id'])
|
||||
|
||||
# 处理文件
|
||||
total_processed = 0
|
||||
for filename in os.listdir(self.dbf_dir):
|
||||
|
|
@ -88,7 +99,7 @@ class DbfToPostgresCtllogPipeline(BasePipeline):
|
|||
|
||||
# 关闭数据库连接
|
||||
# todo:本地调试时打开
|
||||
self.db.disconnect()
|
||||
# self.db.disconnect()
|
||||
# todo:本地调试时打开
|
||||
return total_processed
|
||||
|
||||
|
|
@ -146,17 +157,24 @@ class DbfToPostgresCtllogPipeline(BasePipeline):
|
|||
formatted_time,
|
||||
mapping['control_group_controller_id'],
|
||||
mapping['point_id'],
|
||||
float(value)
|
||||
float(value),
|
||||
mapping['control_group_name'],
|
||||
mapping['control_group_id']
|
||||
))
|
||||
|
||||
# 批量插入
|
||||
if len(batch_data) >= batch_size:
|
||||
# todo:本地调试先注释掉
|
||||
self.db.execute_batch(
|
||||
"INSERT INTO controller_log (created, control_group_controller_id, point_id, real_value) VALUES (%s, %s, %s, %s)",
|
||||
batch_data
|
||||
)
|
||||
# self.db.execute_batch(
|
||||
# "INSERT INTO controller_log (created, control_group_controller_id, point_id, real_value) VALUES (%s, %s, %s, %s)",
|
||||
# batch_data
|
||||
# )
|
||||
# todo:本地调试先注释掉
|
||||
|
||||
# 以追加模式写入 CSV
|
||||
with open(self.csv_file_path, 'a', newline='') as f:
|
||||
csv.writer(f).writerows(batch_data)
|
||||
|
||||
processed_records += len(batch_data)
|
||||
self.logger.debug(f"Processed {processed_records} records from {os.path.basename(file_path)}")
|
||||
batch_data = []
|
||||
|
|
@ -168,11 +186,16 @@ class DbfToPostgresCtllogPipeline(BasePipeline):
|
|||
# 插入剩余记录
|
||||
if batch_data:
|
||||
# todo:本地调试先注释掉
|
||||
self.db.execute_batch(
|
||||
"INSERT INTO controller_log (created, control_group_controller_id, point_id, real_value) VALUES (%s, %s, %s, %s)",
|
||||
batch_data
|
||||
)
|
||||
# self.db.execute_batch(
|
||||
# "INSERT INTO controller_log (created, control_group_controller_id, point_id, real_value) VALUES (%s, %s, %s, %s)",
|
||||
# batch_data
|
||||
# )
|
||||
# todo:本地调试先注释掉
|
||||
|
||||
# 以追加模式写入 CSV
|
||||
with open(self.csv_file_path, 'a', newline='') as f:
|
||||
csv.writer(f).writerows(batch_data)
|
||||
|
||||
processed_records += len(batch_data)
|
||||
self.logger.debug(f"Processed {processed_records} records from {os.path.basename(file_path)}")
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user