这是连续波形数据下载python脚本更新。

import os
import time
import socket
import numpy as np
from obspy import UTCDateTime, Stream
from obspy.clients.fdsn import Client
from concurrent.futures import ThreadPoolExecutor, as_completed
from requests.exceptions import ConnectionError, Timeout

# 参数设置
client = Client("IRIS")
output_dir = "global_data"
os.makedirs(output_dir, exist_ok=True)
sta_file = "station.lst"
start_date = UTCDateTime("2013-01-01")
end_date = UTCDateTime("2024-01-01")  # 包括该天
thread_workers = 20
exception_log = "exceptions.txt"
timing_log = "download_time.txt"
max_retries = 5  # 最大重试次数
print(f"📁 当前工作目录: {os.getcwd()}")
print(f"📁 波形保存路径: {os.path.abspath(output_dir)}")

# 读取台站列表
sta_list = []
with open(sta_file, "r") as sf:
    for line in sf:
        if line.strip() and not line.strip().startswith("#"):
            parts = line.strip().split()
            if len(parts) >= 2:
                net, sta = parts[0], parts[1]
                sta_list.append((net, sta))

def download_station(net, sta, day):
    """
    下载单个台站某天波形数据,去仪器响应,返回Stream
    """
    start = UTCDateTime(day)
    end = start + 86400
    st = client.get_waveforms(net, sta, "*", "LH?", start, end, attach_response=True)

    st.remove_response(output="VEL", pre_filt=(0.008, 0.01, 0.3, 0.4),
                       taper=True, zero_mean=True, taper_fraction=0.05)
    return st
def download_day(day):
    """
    下载某天所有台站数据,返回合并Stream和日志信息
    """
    stream_day = Stream()
    daily_log = []
    log_lines = []

    with ThreadPoolExecutor(max_workers=thread_workers) as executor:
        futures = {executor.submit(download_station, net, sta, day): (net, sta) for net, sta in sta_list}

        for future in as_completed(futures):
            net, sta = futures[future]
            try:
                st = future.result()
                stream_day += st
                print(f"✅ {net}.{sta} 下载成功({len(st)} traces)")
                daily_log.append((net, sta, 1))
            except Exception as e:
                print(f"❌ {net}.{sta} 下载失败: {e}")
                daily_log.append((net, sta, 0))
                log_lines.append(f"{day.date} {net}.{sta}{e}")

    return stream_day, daily_log, log_lines

def is_network_error(e):
    """
    判断异常是否为网络相关异常
    """
    network_error_types = (ConnectionError, Timeout, socket.timeout, socket.error)
    return isinstance(e, network_error_types) or "timed out" in str(e).lower() or "connection" in str(e).lower()

# 主循环
current_day = start_date
while current_day <= end_date:
    filename = f"{current_day.strftime('%Y%m%d')}.mseed"
    filepath = os.path.join(output_dir, filename)

    # 先判断文件是否存在且非空
    if os.path.exists(filepath) and os.path.getsize(filepath) > 0:
        print(f"\n📆 {current_day.date} 文件已存在且非空,跳过下载。")
        current_day += 86400
        continue

    print(f"\n📆 正在处理日期: {current_day.date}")

    attempt = 0
    success = False
    day_start_time = time.time()

    while attempt < max_retries and not success:
        attempt += 1
        print(f"🔄 尝试第 {attempt} 次下载 {current_day.date} ...")

        try:
            stream_day, daily_log, log_lines = download_day(current_day)

            if len(stream_day) == 0:
                print(f"⚠️ {current_day.date} 没有下载到数据,准备重试...")
                raise ValueError("下载数据为空")

            # 保存合并后的波形
            stream_day.write(filepath, format="MSEED")
            print(f"💾 {filename} 保存成功(共 {len(stream_day)} traces)")
            success = True

            # 写入异常日志
            if log_lines:
                with open(exception_log, "a") as elog:
                    elog.write("\n".join(log_lines) + "\n")

        except Exception as e:
            print(f"❌ 下载异常: {e}")

            # 判断是否为网络错误
            if is_network_error(e):
                print("🌐 网络异常,等待5秒后重试...")
                time.sleep(5)
            else:
                print("⚠️ 非网络异常,仍将重试...")
                time.sleep(3)

    # 下载耗时记录
    day_duration = time.time() - day_start_time
    with open(timing_log, "a") as tlog:
        tlog.write(f"{current_day.date}: {day_duration:.2f} seconds\n")

    if not success:
        print(f"❌ {current_day.date} 下载失败,超过最大重试次数。请检查网络或日志。")

    current_day += 86400

  这个脚本实现的功能包括: * 下载台站列表station.lst的2013-01-01到2024-01-01,LH?数据。 * 每天的数据存储为global_data/YYYYMMDD.mseed。 * 去仪器响应,保留VEL,滤波频率为0.008, 0.01, 0.3, 0.4。 * 记录每天数据下载的耗时,保存在“download_time.txt”中。 * 判断是否是网络中断错误,如果是则做5次尝试重新下载,每次间隔5秒。 * 判断当天数据是否已经被下载,如果没有或者大小是0则开始下载。 * 将错误输出到exceptions.txt中。 * 每天数据下载时,启用20个进程进行下载。