发布
社区首页 >问答首页 >总结过去一个月Reddit帖子的脚本

总结过去一个月Reddit帖子的脚本
EN

Code Review用户
提问于 2017-07-21 01:40:43
回答 1查看 219关注 0票数 4

这个脚本是用来做我在subreddit上运行的一个月事件中繁琐的工作。它搜索自上次发布以来与事件相关的所有帖子,并创建下个月的大部分帖子。

我最喜欢的是组织层面的批评。我的功能杂乱无章,很难跟踪我有什么,所以我想建议一个更好的做法。

在问题领域,Piece这个名字并不像看上去那么模糊。当然,如果你知道这一点,但仍然认为这是一个可怕的名字,我欢迎你的想法。

代码语言:javascript
代码运行次数:0
复制
import configparser
import datetime
import logging
import re

import pickle
from typing import Optional

import praw
import praw.models

DELIMITER = '---'  # type: str
REDDIT = None
JAM_MAINTAINER = 'G01denW01f11'


def init_reddit(config_pathname: str) -> praw.Reddit:
    """Create global Reddit object from config file"""
    config = configparser.ConfigParser()
    config.read(config_pathname)
    return praw.Reddit(client_id=config['RedditParams']['client_id'],
                       client_secret=config['RedditParams']['client_secret'],
                       user_agent=config['RedditParams']['user_agent'])


def get_reddit() -> praw.Reddit:
    """Get the global Reddit object. Create it if it hasn't been created"""
    global REDDIT
    if not REDDIT:
        REDDIT = init_reddit('config.ini')
    return REDDIT


class Piece(object):
    """A piece to be listed in the piano jam"""

    def __init__(self, composer: str = None, title: str = None, video_url: str = None, score_url: str = None,
                 category: str = None):
        self.composer = composer  # type: str
        self.title = title  # type: str
        self.video_url = video_url  # type: str
        self.score_url = score_url  # type: str
        self.category = category  # type: str

    def __eq__(self, other: 'Piece') -> bool:
        return self.composer == other.composer and self.title == other.title

    def __ne__(self, other: 'Piece') -> bool:
        return not self == other

    def __str__(self) -> str:
        return '{}: [{}]({}) | [Sheet Music]({})'.format(self.composer, self.title, self.video_url.replace(')', '\)'),
                                                         self.score_url.replace(')', '\)'))


class Submission(object):
    """A submission to the month's Jam"""

    def __init__(self, username: str = None, url: str = None, title: str = None, piece: Piece = None):
        self.username = username  # type: str
        self.url = url  # type: str
        self.title = title  # type: str
        self.piece = piece  # type: Piece

    def __eq__(self, other: 'Submission') -> bool:
        return self.username == other.username and self.piece == other.piece

    def __ne__(self, other: 'Submission') -> bool:
        return not self == other

    def __str__(self) -> str:
        return '{}\'s {} by [/u/{}]({})'.format(self.piece.composer, self.piece.title, self.username, self.url)

    def set_piece(self, pieces: [Piece]) -> None:
        """
        From a list of valid pieces, set the one that matches
        :param pieces: A list of pieces to choose from
        """
        self.piece = find_piece_matching_title(pieces, self.title)
        if not self.piece:
            logging.warning('Could not find piece for {} | {}'.format(self.title, self.url))


def find_piece_matching_title(pieces: [Piece], title: str) -> Optional[Piece]:
    """
    Use a simple heuristic to tell which piece a submission is from the title
    :param pieces: Pieces to choose from
    :param title: Submission title
    :return: Appropriate piece, if any
    """
    for piece in pieces:
        if biggest_word_in_line(piece.title).lower() in title.lower():
            return piece
    return None


def format_title(section_title: str) -> str:
    """
    Apply proper formatting to the title of a section
    :param section_title: The title of a section to be formatted
    :return: Formatted title
    """
    return '**{}**'.format(section_title)


class Jam(object):
    """A Piano Jam posting"""

    CATEGORIES = ['Jazz', 'Classical', 'Ragtime', 'Video Game / Anime / Film']  # type: [str]

    def __init__(self, outline_pathname: str = 'jam_outline.txt'):
        """
        Create a Piano Jam instance from a given outline file
        :param outline_pathname: pathname to file with default jam contents
        """
        self.filename = ''  # type: str
        self.submissions = []  # type: [Submission]
        self.pieces = []  # type: [Piece]
        with open(outline_pathname, 'r') as f:
            self.text = f.read()

    def __str__(self):
        submissions_str = ''
        for submission in self.submissions:
            submissions_str += str(submission) + '\n\n'
        pieces_str = ''
        for piece in self.pieces:
            pieces_str += str(piece) + '\n\n'
        return self.text.format(submissions_str, pieces_str)

    def add_submission(self, submission: Submission):
        """
        Add a submission to the Jam. Multiple submissions do not get added
        :param submission: Submission to the Piano Jam
        :return: None
        """
        for prior_submission in self.submissions:
            if submission.username == prior_submission.username and submission.piece == submission.piece:
                if submission.url != prior_submission.url:
                    logging.warning('User {0} attempted to submit a piece multiple times'.format(submission.username))
                return
        self.submissions.append(submission)

    def add_piece(self, piece: Piece):
        if piece not in self.pieces:
            self.pieces.append(piece)

    def save(self, filename: str='') -> None:
        if filename:
            self.filename = filename
        if not self.filename:
            raise ValueError('No filename to save to!')
        with open(self.filename, 'wb') as f:
            pickle.dump(self, f)

    @classmethod
    def load(cls, filename: str) -> 'Jam':
        with open(filename, 'rb') as f:
            jam = pickle.load(f)  # type: Jam
        if type(jam) != Jam:
            raise TypeError('Tried to load a Jam. Got {}'.format(type(jam)))
        assert jam.filename == filename
        return jam


def parse_piece(piece_text: str) -> Piece:
    """
    Construct a Piece from its string representation.
    Expected format: Composer: [Title](url) | [Sheet Music](sheetUrl)
    :param piece_text: Line from Piano Jam specifying a Piece to learn
    """
    piece = Piece()
    piece.composer = piece_text[:piece_text.index(':')]
    piece.title = re.findall(re.compile('\[(.*?)\]'), piece_text)[0]  # type: str
    urls = re.findall(re.compile('\((.*?)\)'), piece_text)
    piece.video_url = urls[0]  # type: str
    piece.score_url = urls[1]  # type: str
    return piece


def parse_pieces(section_text: str) -> [Piece]:
    """Parse all the pieces in a given section"""
    pieces = section_text.split('\n')[1:]  # First line is the category; discard
    return (parse_piece(piece_text) for piece_text in pieces if piece_text.strip() != '')


def get_pieces_from_jam(jam_text: str) -> [Piece]:
    """
    Parse all the pieces from a Jam, given the contents of a post
    :param jam_text: The contents of a Piano Jam posting
    :return: List of pieces to be used for the Jam
    """
    sections = jam_text.split(DELIMITER)
    sections = (section.strip() for section in sections)
    filtered_sections = []
    for section in sections:
        section = section.strip()
        for category in Jam.CATEGORIES:
            category = format_title(category)
            if section.startswith(category):
                filtered_sections.append(section)
                break
    pieces = []
    for section in filtered_sections:
        pieces.extend(parse_pieces(section))
    return pieces


def get_selections_from_url(url: str) -> [Piece]:
    """
    Parse all the pieces from a jam, given its url
    :param url: URL to a Piano Jam post
    :return: List of pieces to be used for the Jam
    """
    try:
        post = praw.models.Submission(get_reddit(), url=url)
    except KeyError:
        raise KeyError('Could not recognize url {0}'.format(url))
    return get_pieces_from_jam(post.selftext)


def search_for_submissions():
    """
    Search Reddit for posts with [Piano Jam] in title within past month
    :return: List of urls to posts
    """
    subreddit = get_reddit().subreddit('piano')
    results = subreddit.search('[Piano Jam]', sort='new', time_filter='month')
    return (result for result in results)


def filter_submissions(submissions: [praw.models.Submission], jam: praw.models.Submission):
    return [submission for submission in submissions
            if '[piano jam]' in submission.title.lower() and
            datetime.datetime.fromtimestamp(submission.created) >
            datetime.datetime.fromtimestamp(jam.created)]


def find_last_jam() -> praw.models.Submission:
    candidates = search_for_submissions()
    for candidate in candidates:
        if candidate.author == JAM_MAINTAINER and '[' not in candidate.title:
            return candidate
    raise ValueError('Could not find last Piano Jam')


def biggest_word_in_line(line: str) -> str:
    words = line.split()
    length = 0
    biggest_word = None
    for word in words:
        if len(word) > length:
            length = len(word)
            biggest_word = word
    assert biggest_word
    return biggest_word


def create_jam() -> [Submission]:
    """
    Find all Piano Jam submissions since the last posting
    Log a warning if there are submissions not in the previous Jam.
    Create Jam from submissions and pickle it for later use.
    """
    previous_jam = find_last_jam()
    entries = filter_submissions(search_for_submissions(), previous_jam)
    submissions = [Submission(entry.author, entry.shortlink, entry.title) for entry in entries]
    pieces = get_pieces_from_jam(previous_jam.selftext)
    new_jam = Jam()
    for submission in submissions:
        submission.set_piece(pieces)
        if submission.piece:
            new_jam.add_submission(submission)
    new_jam.save('current_jam.txt')
EN

回答 1

Code Review用户

回答已采纳

发布于 2017-07-21 18:28:46

  1. 没有修改全局对象的参数的函数是没有意义的。正因为如此,您的init_reddit函数比get_reddit函数更好。
  2. IMHO您应该重新考虑为什么您的函数中有更多的注释而不是代码。也许有一种更地道的方式来表达这一点。(见find_piece_matching_titleformat_title)
  3. 类很好;请考虑创建一个Reddit类,该类要么继承自praw.Reddit,要么将您的reddit实例作为成员变量。你可以把search_for_submissionsfilter_submissions放进去。
  4. 您的parse_pieceparse_piecesget_pieces_from_jam等函数应该是块或Jam对象的一部分。如果您使用对象来包含您的数据,那么让函数作为方法操作该数据是有意义的。

总的来说,我在您的代码中看到了大量顶级函数和对象,而没有清楚地说明它们应该如何协同工作。编写代码的困难部分不一定是编写单个片段,而是找出最简单(最不复杂)的交互方式。

票数 1
EN
页面原文内容由Code Review提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://codereview.stackexchange.com/questions/169811

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档