1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
| import os from obspy import read from datetime import datetime as dt import numpy as np import matplotlib.pyplot as plt import matplotlib.cm as cm import matplotlib.dates as mdates from collections import defaultdict from matplotlib.transforms import blended_transform_factory
data_dir = "daily_waveforms" station_file = "station.list" components = ["LHZ", "LHN", "LHE"] start_year = 2020 end_year = 2020
stations = [] with open(station_file, "r") as f: for line in f: if line.strip(): net, sta = line.strip().split()[:2] stations.append((net, sta))
station_set = set(f"{net}.{sta}" for net, sta in stations)
station_dates = {comp: defaultdict(set) for comp in components} station_file_sizes = defaultdict(int) total_files = 0 total_file_size = 0
all_files = sorted(f for f in os.listdir(data_dir) if f.endswith(".mseed")) for filename in all_files: filepath = os.path.join(data_dir, filename)
size = os.path.getsize(filepath) total_files += 1 total_file_size += size
try: st = read(filepath) except Exception as e: print(f"读取文件失败 {filename}: {e}") continue
try: file_date = dt.strptime(filename[:8], "%Y%m%d").date() except Exception as e: print(f"无法解析日期 {filename}: {e}") continue
for tr in st: net = tr.stats.network sta = tr.stats.station comp = tr.stats.channel[-3:]
key = f"{net}.{sta}" if key not in station_set: continue if comp not in components: continue
station_dates[comp][key].add(file_date) station_file_sizes[key] += size
print(f"共计读取文件数: {total_files}") print(f"所有文件总大小: {total_file_size / (1024**2):.2f} MB")
for comp in components: keys_to_remove = [k for k, dates in station_dates[comp].items() if len(dates) == 0] for k in keys_to_remove: del station_dates[comp][k]
for comp in components: with open(f"station_day_count_{comp}.txt", "w") as f: for key in sorted(station_dates[comp].keys()): f.write(f"{key} {len(station_dates[comp][key])}\n")
all_nets = sorted(set(net for net, sta in stations)) net_color_map = {net: cm.get_cmap("tab20")(i / max(len(all_nets)-1,1)) for i, net in enumerate(all_nets)}
def plot_component_availability(comp, station_dates_comp): valid_stations = sorted(station_dates_comp.keys(), key=lambda x: (x.split(".")[0], x.split(".")[1])) fig, ax = plt.subplots(figsize=(14, max(6, 0.3 * len(valid_stations))))
def to_datetime(d): return dt(d.year, d.month, d.day)
for idx, key in enumerate(valid_stations): net = key.split(".")[0] dates = sorted(station_dates_comp[key]) x_vals = [to_datetime(d) for d in dates] ax.plot(x_vals, [idx]*len(dates), ".", color=net_color_map[net], markersize=2)
ax.set_xlabel("时间") ax.set_ylabel("台站") ax.set_yticks(range(len(valid_stations))) ax.set_yticklabels(valid_stations, fontsize=5)
ax.grid(True, linestyle=":", alpha=0.5)
x_start = dt(start_year, 1, 1) x_end = dt(end_year, 12, 31) ax.set_xlim(x_start, x_end)
ax.xaxis.set_major_locator(mdates.YearLocator()) ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
plt.tight_layout(rect=[0, 0, 0.85, 1])
trans = blended_transform_factory(ax.transAxes, ax.transData) net_indices = defaultdict(list) for idx, key in enumerate(valid_stations): net = key.split(".")[0] net_indices[net].append(idx)
for net, indices in net_indices.items(): mid_idx = indices[len(indices)//2] color = net_color_map[net] ax.text(1.02, mid_idx, net, color=color, fontsize=8, fontweight='bold', verticalalignment='center', horizontalalignment='left', transform=trans)
plt.savefig(f"{comp}_availability_2013_2022.png", dpi=300, bbox_inches='tight') plt.close() print(f"已保存图像: {comp}_availability_2013_2022.png")
for comp in components: plot_component_availability(comp, station_dates[comp])
network_stats = defaultdict(lambda: {"total": 0, "valid": 0, "day_counts": []})
for net, sta in stations: network_stats[net]["total"] += 1
all_valid_keys = set() for comp in components: all_valid_keys.update(station_dates[comp].keys())
for key in all_valid_keys: net = key.split(".")[0] days_list = [len(station_dates[comp].get(key, [])) for comp in components] valid_days = max(days_list) network_stats[net]["valid"] += 1 network_stats[net]["day_counts"].append(valid_days)
print("\n台网统计汇总:") print(f"{'Net':<6} {'Total':>6} {'Valid':>6} {'AvgDays':>10}") print("-" * 32) with open("network_summary_all_components.txt", "w") as f: f.write(f"{'Net':<6} {'Total':>6} {'Valid':>6} {'AvgDays':>10}\n") for net in sorted(network_stats.keys()): total = network_stats[net]["total"] valid = network_stats[net]["valid"] avg_days = np.mean(network_stats[net]["day_counts"]) if network_stats[net]["day_counts"] else 0 line = f"{net:<6} {total:>6} {valid:>6} {avg_days:>10.1f}" print(line) f.write(line + "\n")
print(f"\n文件总数: {total_files}") print(f"文件总大小: {total_file_size/(1024**2):.2f} MB ({total_file_size/(1024**3):.2f} GB)")
|