首页
学习
活动
专区
圈层
工具
发布
社区首页 >专栏 >#腾讯云AI代码助手# tencent-deepseekR1 模型体验心得

#腾讯云AI代码助手# tencent-deepseekR1 模型体验心得

原创
作者头像
用户3836902
发布2025-02-26 15:21:05
发布2025-02-26 15:21:05
2424
举报

#腾讯云AI代码助手# 无须自己动手,使用腾讯云AI代码助手帮我实现实现自动扒取数据的需求,简单又快捷,自己手动小改下就能用了,给#腾讯云AI代码助手# 点赞!

代码语言:txt
复制
package main

import (
	"encoding/json"
	"fmt"
	"log"
	"net/http"
	"sort"
	"strings"
	"time"
)

const (
	limit      = 100
	maxResults = 5000
)

type Model struct {
	ID        string `json:"modelId"`
	Downloads int    `json:"downloads"`
	License   string `json:"-"`
	CardData  struct {
		License string `json:"license"`
	} `json:"cardData"`
	Tags []string `json:"tags"`
}

type Dataset struct {
	ID        string `json:"id"`
	Downloads int    `json:"downloads"`
	License   string `json:"-"`
	CardData  struct {
		License string `json:"license"`
	} `json:"cardData"`
	Tags []string `json:"tags"`
}

type Result struct {
	ID        string
	Type      string
	Downloads int
}

func main() {
	results := make(chan Result)
	defer close(results)

	go fetchModels(results)
	go fetchDatasets(results)

	var items []Result
	for item := range results {
		items = append(items, item)
		if len(items) >= maxResults*2 { // 双倍缓冲
			break
		}
	}

	sort.Slice(items, func(i, j int) bool {
		return items[i].Downloads > items[j].Downloads
	})

	if len(items) > maxResults {
		items = items[:maxResults]
	}

	for _, item := range items {
		fmt.Printf("%s (%s) - Downloads: %d\n", item.ID, item.Type, item.Downloads)
	}
}

func fetchModels(results chan<- Result) {
	client := &http.Client{Timeout: 10 * time.Second}
	
	for offset := 0; ; offset += limit {
		url := fmt.Sprintf("https://huggingface.co/api/models?search=license:mit+OR+license:apache-2.0&sort=downloads&direction=-1&limit=%d&offset=%d", 
			limit, offset)
		
		resp, err := client.Get(url)
		if handleError(err, "models") {
			break
		}

		var models []Model
		if err := json.NewDecoder(resp.Body).Decode(&models); handleError(err, "models") {
			resp.Body.Close()
			break
		}
		resp.Body.Close()

		if len(models) == 0 {
			break
		}

		for _, m := range models {
			m.extractLicense()
			if isValidLicense(m.License) {
				results <- Result{
					ID:        m.ID,
					Type:      "model",
					Downloads: m.Downloads,
				}
			}
		}

		time.Sleep(500 * time.Millisecond)
	}
}

func fetchDatasets(results chan<- Result) {
	client := &http.Client{Timeout: 10 * time.Second}

	for offset := 0; ; offset += limit {
		url := fmt.Sprintf("https://huggingface.co/api/datasets?search=license:mit+OR+license:apache-2.0&sort=downloads&direction=-1&limit=%d&offset=%d", 
			limit, offset)
		
		resp, err := client.Get(url)
		if handleError(err, "datasets") {
			break
		}

		var datasets []Dataset
		if err := json.NewDecoder(resp.Body).Decode(&datasets); handleError(err, "datasets") {
			resp.Body.Close()
			break
		}
		resp.Body.Close()

		if len(datasets) == 0 {
			break
		}

		for _, d := range datasets {
			d.extractLicense()
			if isValidLicense(d.License) {
				results <- Result{
					ID:        d.ID,
					Type:      "dataset",
					Downloads: d.Downloads,
				}
			}
		}

		time.Sleep(500 * time.Millisecond)
	}
}

func (m *Model) extractLicense() {
	m.License = strings.ToLower(m.CardData.License)
	if m.License == "" {
		for _, tag := range m.Tags {
			if strings.HasPrefix(tag, "license:") {
				m.License = strings.ToLower(strings.TrimPrefix(tag, "license:"))
				break
			}
		}
	}
}

func (d *Dataset) extractLicense() {
	d.License = strings.ToLower(d.CardData.License)
	if d.License == "" {
		for _, tag := range d.Tags {
			if strings.HasPrefix(tag, "license:") {
				d.License = strings.ToLower(strings.TrimPrefix(tag, "license:"))
				break
			}
		}
	}
}

func isValidLicense(license string) bool {
	return license == "mit" || license == "apache-2.0"
}

func handleError(err error, resource string) bool {
	if err != nil {
		log.Printf("Error fetching %s: %v", resource, err)
		return true
	}
	return false
}

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档