first commit

This commit is contained in:
2026-03-02 23:22:33 +08:00
parent 1c5822d16b
commit c5ae56c463
22 changed files with 606 additions and 462 deletions

View File

@@ -22,6 +22,8 @@ BASE_URL = (
"examples/calibration/calibration_data.txt"
)
BLOCK_SPLIT_RE = re.compile(r"\n\s*\n")
SCRIPT_DIR = Path(__file__).resolve().parent
ROOT_DIR = SCRIPT_DIR.parent
def split_blocks(text: str) -> list[str]:
@@ -130,15 +132,21 @@ def ensure_cached_blocks(
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--base-file", default="calibration_data_v5_rc.txt")
parser.add_argument("--output", default="calibration_data_v5_rc_code.txt")
parser.add_argument("--data-dir", default="data")
parser.add_argument("--base-file", default="calibration/calibration_data_v5_rc.txt")
parser.add_argument("--output", default="calibration/calibration_data_v5_rc_code.txt")
parser.add_argument("--data-dir", default="calibration/sources")
parser.add_argument("--force-refresh", action="store_true")
args = parser.parse_args()
base_file = Path(args.base_file)
output_file = Path(args.output)
data_dir = Path(args.data_dir)
def resolve_path(path_text: str) -> Path:
p = Path(path_text)
if p.is_absolute():
return p
return ROOT_DIR / p
base_file = resolve_path(args.base_file)
output_file = resolve_path(args.output)
data_dir = resolve_path(args.data_dir)
code_cache = data_dir / "code74k_2000.txt"
openhermes_cache = data_dir / "openhermes_coding_chosen_1000.txt"

49
scripts/upload_to_modelscope.sh Executable file → Normal file
View File

@@ -1,25 +1,58 @@
#!/usr/bin/env bash
set -euo pipefail
# 用法:
# ./upload_to_modelscope.sh <repo_id> <token>
# 示例:
# ./upload_to_modelscope.sh your_username/your_repo_name ms-xxxxxxxx
# Usage:
# ./scripts/upload_to_modelscope.sh <repo_id> <token> [upload_dir] [mode] [commit_message]
#
# Examples:
# ./scripts/upload_to_modelscope.sh your_user/your_repo ms-xxxx
# ./scripts/upload_to_modelscope.sh your_user/your_repo ms-xxxx modelscope_upload proxy
#
# mode:
# direct (default): unset proxy vars for direct connection
# proxy: keep current proxy environment
REPO_ID="${1:-}"
TOKEN="${2:-}"
UPLOAD_DIR_ARG="${3:-modelscope_upload}"
MODE="${4:-direct}"
COMMIT_MESSAGE="${5:-Upload model artifacts}"
if [[ -z "${REPO_ID}" || -z "${TOKEN}" ]]; then
echo "Usage: $0 <repo_id> <token>"
echo "Usage: $0 <repo_id> <token> [upload_dir] [mode] [commit_message]"
exit 1
fi
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)"
UPLOAD_DIR="${UPLOAD_DIR_ARG}"
if [[ "${UPLOAD_DIR}" != /* ]]; then
UPLOAD_DIR="${ROOT_DIR}/${UPLOAD_DIR}"
fi
"${ROOT_DIR}/.venv/bin/modelscope" login --token "${TOKEN}"
"${ROOT_DIR}/.venv/bin/modelscope" upload "${REPO_ID}" "${SCRIPT_DIR}" . \
if [[ ! -d "${UPLOAD_DIR}" ]]; then
echo "Upload directory does not exist: ${UPLOAD_DIR}"
exit 2
fi
MODELSCOPE_BIN="${ROOT_DIR}/.venv/bin/modelscope"
if [[ ! -x "${MODELSCOPE_BIN}" ]]; then
echo "modelscope CLI not found at ${MODELSCOPE_BIN}"
exit 3
fi
if [[ "${MODE}" == "direct" ]]; then
RUN_CMD=(env -u HTTP_PROXY -u HTTPS_PROXY -u ALL_PROXY -u NO_PROXY "${MODELSCOPE_BIN}")
elif [[ "${MODE}" == "proxy" ]]; then
RUN_CMD=("${MODELSCOPE_BIN}")
else
echo "Unsupported mode: ${MODE} (use direct|proxy)"
exit 4
fi
"${RUN_CMD[@]}" login --token "${TOKEN}"
"${RUN_CMD[@]}" upload "${REPO_ID}" "${UPLOAD_DIR}" . \
--repo-type model \
--commit-message "Upload Qwen3.5-27B quantized GGUF weights"
--commit-message "${COMMIT_MESSAGE}"
echo "Upload finished: ${REPO_ID}"