SEISAMUSE

urllib3的问题

发表于 2025-06-08 分类于 work 本文字数： 2.7k 阅读时长 ≈ 2 分钟

安装了seisman的HinetPy，运行脚本数据下载脚本的时候就遇到这个问题：

from HinetPy import Client, win32
  File "/home/junxie/work/Snet/HinetPy-main/HinetPy/__init__.py", line 23, in <module>
    from .client import Client
  File "/home/junxie/work/Snet/HinetPy-main/HinetPy/client.py", line 22, in <module>
    from urllib3.util import create_urllib3_context
ImportError: cannot import name 'create_urllib3_context' from 'urllib3.util' (/home/junxie/.conda/envs/py3/lib/python3.9/site-packages/urllib3/util/__init__.py)

怎么整？

阅读全文 »

统计数据下载情况

发表于 2025-06-07 分类于 work 本文字数： 6.2k 阅读时长 ≈ 6 分钟

数据下载好后需要统计下载的情况，这是python脚本。

import os
from obspy import read
from datetime import datetime as dt
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.dates as mdates
from collections import defaultdict
from matplotlib.transforms import blended_transform_factory

# ===== 配置 =====
data_dir = "daily_waveforms"  # 存放YYYYMMDD.mseed文件目录
station_file = "station.list"  # 台站列表，格式：NET STA（两列）
components = ["LHZ", "LHN", "LHE"]
start_year = 2020
end_year = 2020

# ===== 读取台站列表 =====
stations = []
with open(station_file, "r") as f:
    for line in f:
        if line.strip():
            net, sta = line.strip().split()[:2]
            stations.append((net, sta))

station_set = set(f"{net}.{sta}" for net, sta in stations)

# ===== 初始化 =====
station_dates = {comp: defaultdict(set) for comp in components}  # comp -> {net.sta -> set(date)}
station_file_sizes = defaultdict(int)  # 台站对应所有文件大小
total_files = 0
total_file_size = 0

# ===== 遍历所有mseed文件 =====
all_files = sorted(f for f in os.listdir(data_dir) if f.endswith(".mseed"))
for filename in all_files:
    filepath = os.path.join(data_dir, filename)

    size = os.path.getsize(filepath)
    total_files += 1
    total_file_size += size

    try:
        st = read(filepath)
    except Exception as e:
        print(f"读取文件失败 {filename}: {e}")
        continue

    # 解析文件日期 YYYYMMDD
    try:
        file_date = dt.strptime(filename[:8], "%Y%m%d").date()
    except Exception as e:
        print(f"无法解析日期 {filename}: {e}")
        continue

    for tr in st:
        net = tr.stats.network
        sta = tr.stats.station
        comp = tr.stats.channel[-3:]

        key = f"{net}.{sta}"
        if key not in station_set:
            continue
        if comp not in components:
            continue

        station_dates[comp][key].add(file_date)
        station_file_sizes[key] += size

print(f"共计读取文件数: {total_files}")
print(f"所有文件总大小: {total_file_size / (1024**2):.2f} MB")

# ===== 删除无有效数据台站 =====
for comp in components:
    keys_to_remove = [k for k, dates in station_dates[comp].items() if len(dates) == 0]
    for k in keys_to_remove:
        del station_dates[comp][k]

# ===== 输出有效天数统计文本 =====
for comp in components:
    with open(f"station_day_count_{comp}.txt", "w") as f:
        for key in sorted(station_dates[comp].keys()):
            f.write(f"{key} {len(station_dates[comp][key])}\n")

# ===== 台网颜色映射 =====
all_nets = sorted(set(net for net, sta in stations))
net_color_map = {net: cm.get_cmap("tab20")(i / max(len(all_nets)-1,1)) for i, net in enumerate(all_nets)}

# ===== 绘图函数 =====
def plot_component_availability(comp, station_dates_comp):
    valid_stations = sorted(station_dates_comp.keys(), key=lambda x: (x.split(".")[0], x.split(".")[1]))
    fig, ax = plt.subplots(figsize=(14, max(6, 0.3 * len(valid_stations))))

    def to_datetime(d):
        return dt(d.year, d.month, d.day)

    for idx, key in enumerate(valid_stations):
        net = key.split(".")[0]
        dates = sorted(station_dates_comp[key])
        x_vals = [to_datetime(d) for d in dates]
        ax.plot(x_vals, [idx]*len(dates), ".", color=net_color_map[net], markersize=2)

    ax.set_xlabel("时间")
    ax.set_ylabel("台站")
    ax.set_yticks(range(len(valid_stations)))
    ax.set_yticklabels(valid_stations, fontsize=5)

    ax.grid(True, linestyle=":", alpha=0.5)

    x_start = dt(start_year, 1, 1)
    x_end = dt(end_year, 12, 31)
    ax.set_xlim(x_start, x_end)

    ax.xaxis.set_major_locator(mdates.YearLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))

    plt.tight_layout(rect=[0, 0, 0.85, 1])

    # 右侧外部标注台网名
    trans = blended_transform_factory(ax.transAxes, ax.transData)
    net_indices = defaultdict(list)
    for idx, key in enumerate(valid_stations):
        net = key.split(".")[0]
        net_indices[net].append(idx)

    for net, indices in net_indices.items():
        mid_idx = indices[len(indices)//2]
        color = net_color_map[net]
        ax.text(1.02, mid_idx, net, color=color, fontsize=8, fontweight='bold',
                verticalalignment='center', horizontalalignment='left',
                transform=trans)

    plt.savefig(f"{comp}_availability_2013_2022.png", dpi=300, bbox_inches='tight')
    plt.close()
    print(f"已保存图像: {comp}_availability_2013_2022.png")

# ===== 画三分量图 =====
for comp in components:
    plot_component_availability(comp, station_dates[comp])

# ===== 台网统计 =====
network_stats = defaultdict(lambda: {"total": 0, "valid": 0, "day_counts": []})

for net, sta in stations:
    network_stats[net]["total"] += 1

all_valid_keys = set()
for comp in components:
    all_valid_keys.update(station_dates[comp].keys())

for key in all_valid_keys:
    net = key.split(".")[0]
    days_list = [len(station_dates[comp].get(key, [])) for comp in components]
    valid_days = max(days_list)
    network_stats[net]["valid"] += 1
    network_stats[net]["day_counts"].append(valid_days)

print("\n台网统计汇总：")
print(f"{'Net':<6} {'Total':>6} {'Valid':>6} {'AvgDays':>10}")
print("-" * 32)
with open("network_summary_all_components.txt", "w") as f:
    f.write(f"{'Net':<6} {'Total':>6} {'Valid':>6} {'AvgDays':>10}\n")
    for net in sorted(network_stats.keys()):
        total = network_stats[net]["total"]
        valid = network_stats[net]["valid"]
        avg_days = np.mean(network_stats[net]["day_counts"]) if network_stats[net]["day_counts"] else 0
        line = f"{net:<6} {total:>6} {valid:>6} {avg_days:>10.1f}"
        print(line)
        f.write(line + "\n")

# ===== 总结 =====
print(f"\n文件总数: {total_files}")
print(f"文件总大小: {total_file_size/(1024**2):.2f} MB ({total_file_size/(1024**3):.2f} GB)")

这个脚本实现的功能包括：

统计开始时间start_year到结束时间end_year的数据下载情况
对LHZ、LHN和LHE分别进行统计。
对统计结果进行可视化输出。
横轴是时间，纵轴是台站名。不同台网用不同颜色标出。在图右侧标出台网名。
输出文件network_summary_all_components.txt，给出台网包含台站数目，有效台站数目和各自平均天数。
输出文件station_day_count_{comp}.txt，给出台站名和有效天数。
输出文件总数，文件总大小。

python脚本下载连续波形数据

发表于 2025-06-07 分类于 work 本文字数： 4.4k 阅读时长 ≈ 4 分钟

这是连续波形数据下载python脚本更新。

阅读全文 »

注意力机制

发表于 2025-06-01 分类于 ai 本文字数： 11k 阅读时长 ≈ 10 分钟

今天学习注意力机制（Attention Mechanism）。

阅读全文 »

即“蠢”又“坏”？

发表于 2025-05-31 更新于 2025-06-01 分类于某日记本文字数： 2k 阅读时长 ≈ 2 分钟

人大约都是即“蠢”又“坏”的吧！

阅读全文 »

下载地震数据更新

发表于 2025-05-16 分类于 seismology 本文字数： 4.3k 阅读时长 ≈ 4 分钟

以下是地震数据下载脚本更新。

import os
from obspy import UTCDateTime
from obspy.clients.fdsn import Client
from obspy.taup import TauPyModel
from obspy.geodetics import locations2degrees
from concurrent.futures import ThreadPoolExecutor, as_completed

client = Client("IRIS")
model = TauPyModel("iasp91")

output_dir = "processed_sac"
os.makedirs(output_dir, exist_ok=True)

event_file = "event.lst"
sta_file = "sta.lst"
exception_log = "exceptions.txt"
thread_workers = 4

# 读取台站信息：net, sta, lat, lon, elev
with open(sta_file, "r") as sf:
    sta_lines = [line.strip() for line in sf if line.strip()]
sta_list = []
for line in sta_lines:
    parts = line.split("|")
    if len(parts) >= 5:
        net = parts[0].strip()
        sta = parts[1].strip()
        lat = float(parts[2])
        lon = float(parts[3])
        elev = float(parts[4])
        sta_list.append((net, sta, lat, lon, elev))

# 清空异常日志
with open(exception_log, "w") as elog:
    elog.write("")

def process_station(event_id, origin_time, magnitude, ev_lat, ev_lon, ev_dep,
                    net, sta, sta_lat, sta_lon, sta_elev):
    try:
        dist_deg = locations2degrees(ev_lat, ev_lon, sta_lat, sta_lon)
        arrivals = model.get_travel_times(source_depth_in_km=ev_dep,
                                          distance_in_degree=dist_deg,
                                          phase_list=["P"])
        if not arrivals:
            raise Exception("No P arrival")

        p_arrival = origin_time + arrivals[0].time
        start_dl = p_arrival - 50
        end_dl = p_arrival + 150

        st = client.get_waveforms(network=net, station=sta, location="*", channel="BH?",
                                  starttime=start_dl, endtime=end_dl,
                                  attach_response=True)

        st.remove_response(output="VEL", pre_filt=(0.01, 0.02, 8, 10),
                           taper=True, zero_mean=True, taper_fraction=0.05)

        st.trim(starttime=p_arrival - 10, endtime=p_arrival + 60)

        comps = [tr.stats.channel[-1] for tr in st]
        if not all(comp in comps for comp in ["N", "E", "Z"]):
            raise Exception("Incomplete 3-component data")

        for tr in st:
            tr.stats.sac = tr.stats.get("sac", {})
            tr.stats.sac.stla = sta_lat
            tr.stats.sac.stlo = sta_lon
            tr.stats.sac.stel = sta_elev
            tr.stats.sac.evla = ev_lat
            tr.stats.sac.evlo = ev_lon
            tr.stats.sac.evdp = ev_dep
            tr.stats.sac.mag = float(magnitude) if magnitude != "NA" else 0.0

            time_tag = origin_time.strftime("%Y%m%dT%H%M%S")
            chan = tr.stats.channel
            filename = f"{time_tag}_M{magnitude}_{net}.{sta}.{chan}.sac"
            filepath = os.path.join(output_dir, filename)
            tr.write(filepath, format="SAC")

        return f"{net}.{sta} ✅"

    except Exception as e:
        with open(exception_log, "a") as elog:
            elog.write(f"{event_id} {net}.{sta} ❌ {str(e)}\n")
        return f"{net}.{sta} ❌ {str(e)}"

def process_event(line):
    results = []
    parts = line.split("|")
    if len(parts) < 10:
        return ["跳过格式错误行"]

    evid = parts[0].strip()
    time_str = parts[1].strip()
    ev_lat = float(parts[2].strip())
    ev_lon = float(parts[3].strip())
    ev_dep = float(parts[4].strip())
    mag_info = parts[8].strip()
    magnitude = mag_info.split(",")[1] if "," in mag_info else "NA"

    origin_time = UTCDateTime(time_str)
    print(f"\n🟡 Processing event {evid} M{magnitude} @ {origin_time} ({ev_lat}, {ev_lon}, {ev_dep}km)")

    try:
        task_list = []
        with ThreadPoolExecutor(max_workers=thread_workers) as executor:
            for net, sta, sta_lat, sta_lon, sta_elev in sta_list:
                task = executor.submit(process_station, evid, origin_time, magnitude,
                                       ev_lat, ev_lon, ev_dep, net, sta, sta_lat, sta_lon, sta_elev)
                task_list.append(task)

            for task in as_completed(task_list):
                results.append(task.result())

    except Exception as e:
        with open(exception_log, "a") as elog:
            elog.write(f"{evid} XB ERROR: {str(e)}\n")
        results.append(f"⚠️ Failed to process event {evid}: {e}")
    return results

# 读取事件列表并执行
with open(event_file, "r") as f:
    event_lines = [line.strip() for line in f if line.strip()]

for line in event_lines:
    results = process_event(line)
    for r in results:
        print(r)

与之前的脚本:下载地震数据练习不同的地方是多了文件sta.lst。其格式为：
TA|140A|32.6408|-93.573997|56.0|
台网|台站|纬度|经度|高程

鼠标点击位置记录

发表于 2025-05-10 更新于 2025-05-11 分类于 work 本文字数： 1.7k 阅读时长 ≈ 2 分钟

有些时候需要抓取别人图片中的点的信息，应该怎么整？当然是找作者要咯！不过有时可能没办法（例如可能联系不上作者/作者找不到数据了/作者不想给你/作者说你自己提取吧）或则并不需要他图片中的点的准确信息的时候，我们可以自己去点。之前有个perl脚本，但找不到了（这又体现了整理资料和做笔记的必要性），现在python很方便的，调用pynput的Listener就好了。记录鼠标点击的脚本如下：

from pynput.mouse import Listener
import logging
from datetime import datetime

# 配置日志记录
logging.basicConfig(
    filename="mouse_clicks.log",
    level=logging.INFO,
    format="%(asctime)s %(message)s",
    datefmt="%Y-%m-%dT%H:%M:%S"
)

def on_click(x, y, button, pressed):
    if pressed:
        message = f"{x} {y}"
        print(message)  # 输出到屏幕
        logging.info(message)  # 写入日志文件
        if button == button.right:
            # 停止监听器
            return False

# 启动鼠标监听器
with Listener(on_click=on_click) as listener:
    try:
        listener.join()
    except KeyboardInterrupt:
        print("监听器已被用户中断。")

这个脚本会记录鼠标左键的位置，输出到mouse_clicks.log文件中。mouse_clicks.log文件有三列:

2025-05-07T17:01:33 1977 1576

分别表示点击的时间，x和y坐标。其中y坐标是下面大上面小。此外点击鼠标右键或者输入Ctrl+C就可以结束记录。我一般是怎么做的呢？我先点击图片左下角得到（x0,y0），点击右下角得到(x1,y0)，点击左上角得到(x0,y1)，然后点击你想要的点。得到mouse_clicks.log以后就可以这么画图（gmt）。

dat=mouse_clicks.log
#获得x0,y0,x1,y1，他们是参考点
x0=`awk 'NR==1{print $2}' ${dat}`
y0=`awk 'NR==1{print $3}' ${dat}`
x1=`awk 'NR==2{print $2}' ${dat}`
y1=`awk 'NR==3{print $3}' ${dat}`
#获得横纵轴长度（屏幕尺度）
xs=`echo "$x1-$x0" |bc` 
ys=`echo "$y0-$y1" |bc`

awk -v x0=$x0 -v y0=$y0 -v xs=$xs -v ys=$ys 'NR>3{print ($2-x0)/xs*2000-1000,(y0-$3)/ys*1500}' ${dat} | gmt psxy -R -J -O -K -W3p,black >>$ps
#这里（2000，1500）是横纵轴实际尺度，-1000表示实际位置调整。

注意，这种方法仅仅适用于线性笛卡尔坐标，其他的各种投影都不行。
以后得加上这句：脚本/程序不保证正确性，自求多幅(no warranty/use at your own risk)。

gmt怎么画箭头

发表于 2025-05-10 更新于 2025-05-11 分类于 gmt 本文字数： 253 阅读时长 ≈ 1 分钟

用gmt画箭头还是挺常见的。GMT document和GMT中文手册虽好，但如果网络不好不就歇菜了，还是记录一下吧，自己翻起来方便。画箭头参数是Sv(或SV)，自己调Sv之后的参数可以看效果。输入参数表示是x，y，angle，length。x，y表示箭头的起始坐标。angle是箭头指向，逆时针为正，x轴正向为0。length就是那个length了。

1	echo 0 0 60 0.5i \| gmt psxy -R -J -O -K -Sv0.25c+e0.2c -W0.5p -Gblack >>$ps

下载地震数据练习

发表于 2025-04-24 更新于 2025-05-16 分类于 seismology 本文字数： 4.8k 阅读时长 ≈ 4 分钟

接收函数或者其他方法需要下载地震波形数据。这里给出结合FetchEvent和obspy进行数据下载的例子。
FetchEvent从地址https://github.com/EarthScope/fetch-scripts下载。他是用perl脚本写的。运行例子如下：

1	./FetchEvent -s 2006-01-01 -e 2007-05-01 --radius 5:12:95:28 --mag 5:10 -o event.lst

表示下载发生时间为2006-01-01到2007-05-01，位于以(5,12)(lat,lon)为中心，半径28-95度，震级5-10级的地震信息，保存到event.lst中。
下载到的event.lst内容如下：

1	8037834 \|2006/01/23 20:50:46.19Z \| 6.9168\| -77.7951\| 24.0355\|ISC\|\|\|MS,5.86,ISC\|mb,6.16,ISC\|Near west coast of Colombia

表示ID|时间|纬度|经度|深度|目录|||震级类型,震级,目录|震级类型,震级,目录|位置
可以看出地震信息来自ISC目录，其实到ISC直接检索也很方便。

接下来，下载XB台网所有台站接收到的地震理论P波到时前50秒到后150秒三分量数据。其中P波到时调用taup来计算。接下来去仪器响应，最后再截取P波前10秒到后60秒。保存为SAC格式，每个地震每个台站保存一个SAC，名称需包含地震时间震级及台站名。利用多线程ThreadPoolExecutor加速。脚本如下：

import os
from obspy import UTCDateTime, read_inventory
from obspy.clients.fdsn import Client
from obspy.taup import TauPyModel
from obspy.geodetics import locations2degrees
from concurrent.futures import ThreadPoolExecutor, as_completed

client = Client("IRIS")
model = TauPyModel("iasp91")

output_dir = "processed_sac"
os.makedirs(output_dir, exist_ok=True)

event_file = "event.lst"
exception_log = "exceptions.txt"
thread_workers = 4  # 可调线程数

with open(event_file, "r") as f:
    event_lines = [line.strip() for line in f if line.strip()]

with open(exception_log, "w") as elog:
    elog.write("")

def process_station(event_id, origin_time, magnitude, ev_lat, ev_lon, ev_dep, net, sta, sta_lat, sta_lon, sta_elev):
    try:
        # 使用 locations2degrees 计算震中距
        dist_deg = locations2degrees(ev_lat, ev_lon, sta_lat, sta_lon)
        print(dist_deg)
        arrivals = model.get_travel_times(source_depth_in_km=ev_dep,
                                          distance_in_degree=dist_deg,
                                          phase_list=["P"])
        if not arrivals:
            raise Exception("No P arrival")

        p_arrival = origin_time + arrivals[0].time
        start_dl = p_arrival - 50
        end_dl = p_arrival + 150

        # 下载三分量数据
        st = client.get_waveforms(network=net, station=sta, location="*", channel="BH?",
                                  starttime=start_dl, endtime=end_dl,
                                  attach_response=True)

        # 去响应
        st.remove_response(output="VEL", pre_filt=(0.01, 0.02, 8, 10), taper=True, zero_mean=True, taper_fraction=0.05)

        # 截取有效窗口
        st.trim(starttime=p_arrival - 10, endtime=p_arrival + 60)

        # 检查是否为三分量
        if not all(comp in [tr.stats.channel[-1] for tr in st] for comp in ["N", "E", "Z"]):
            raise Exception("Incomplete 3-component data")

        # 写入 SAC 文件并添加头段信息
        for tr in st:
            tr.stats.sac = tr.stats.get("sac", {})
            tr.stats.sac.stla = sta_lat
            tr.stats.sac.stlo = sta_lon
            tr.stats.sac.stel = sta_elev
            tr.stats.sac.evla = ev_lat
            tr.stats.sac.evlo = ev_lon
            tr.stats.sac.evdp = ev_dep
            tr.stats.sac.mag = float(magnitude) if magnitude != "NA" else 0.0

            time_tag = origin_time.strftime("%Y%m%dT%H%M%S")
            chan = tr.stats.channel
            filename = f"{time_tag}_M{magnitude}_{net}.{sta}.{chan}.sac"
            filepath = os.path.join(output_dir, filename)
            tr.write(filepath, format="SAC")
        return f"{net}.{sta} ✅"
    except Exception as e:
        with open(exception_log, "a") as elog:
            elog.write(f"{event_id} {net}.{sta} ❌ {str(e)}\n")
        return f"{net}.{sta} ❌ {str(e)}"

def process_event(line):
    results = []
    parts = line.split("|")
    if len(parts) < 10:
        return ["跳过格式错误行"]

    evid = parts[0].strip()
    time_str = parts[1].strip()
    ev_lat = float(parts[2].strip())
    ev_lon = float(parts[3].strip())
    ev_dep = float(parts[4].strip())
    mag_info = parts[8].strip()
    magnitude = mag_info.split(",")[1] if "," in mag_info else "NA"

    origin_time = UTCDateTime(time_str)
    print(f"\n🟡 Processing event {evid} M{magnitude} @ {origin_time} ({ev_lat}, {ev_lon}, {ev_dep}km)")

    try:
        inventory = client.get_stations(network="XB", starttime=origin_time,
                                        endtime=origin_time + 3600,
                                        level="station", channel="BH?")
        task_list = []
        with ThreadPoolExecutor(max_workers=thread_workers) as executor:
            for network in inventory:
                for station in network:
                    sta_lat = station.latitude
                    sta_lon = station.longitude
                    sta_elev = station.elevation
                    sta = station.code
                    net = network.code
                    task = executor.submit(process_station, evid, origin_time, magnitude,
                                           ev_lat, ev_lon, ev_dep, net, sta, sta_lat, sta_lon, sta_elev)
                    task_list.append(task)

            for task in as_completed(task_list):
                results.append(task.result())

    except Exception as e:
        with open(exception_log, "a") as elog:
            elog.write(f"{evid} XB ERROR: {str(e)}\n")
        results.append(f"⚠️ Failed to process event {evid}: {e}")
    return results

# 执行所有事件处理
for line in event_lines:
    results = process_event(line)
    for r in results:
        print(r)

文献阅读(27)

发表于 2025-04-23 分类于 work 本文字数： 1.5k 阅读时长 ≈ 1 分钟

Coherence-Based Characterization of a Long-Period Monochromatic Seismic Signal

阅读全文 »