社区首页 >问答首页 >总结过去一个月Reddit帖子的脚本

问总结过去一个月Reddit帖子的脚本
EN

Code Review用户

提问于 2017-07-20 17:40:43

回答 1查看 219关注 0票数 4

这个脚本是用来做我在subreddit上运行的一个月事件中繁琐的工作。它搜索自上次发布以来与事件相关的所有帖子，并创建下个月的大部分帖子。

我最喜欢的是组织层面的批评。我的功能杂乱无章，很难跟踪我有什么，所以我想建议一个更好的做法。

在问题领域，Piece这个名字并不像看上去那么模糊。当然，如果你知道这一点，但仍然认为这是一个可怕的名字，我欢迎你的想法。

import configparser
import datetime
import logging
import re

import pickle
from typing import Optional

import praw
import praw.models

DELIMITER = '---'  # type: str
REDDIT = None
JAM_MAINTAINER = 'G01denW01f11'


def init_reddit(config_pathname: str) -> praw.Reddit:
    """Create global Reddit object from config file"""
    config = configparser.ConfigParser()
    config.read(config_pathname)
    return praw.Reddit(client_id=config['RedditParams']['client_id'],
                       client_secret=config['RedditParams']['client_secret'],
                       user_agent=config['RedditParams']['user_agent'])


def get_reddit() -> praw.Reddit:
    """Get the global Reddit object. Create it if it hasn't been created"""
    global REDDIT
    if not REDDIT:
        REDDIT = init_reddit('config.ini')
    return REDDIT


class Piece(object):
    """A piece to be listed in the piano jam"""

    def __init__(self, composer: str = None, title: str = None, video_url: str = None, score_url: str = None,
                 category: str = None):
        self.composer = composer  # type: str
        self.title = title  # type: str
        self.video_url = video_url  # type: str
        self.score_url = score_url  # type: str
        self.category = category  # type: str

    def __eq__(self, other: 'Piece') -> bool:
        return self.composer == other.composer and self.title == other.title

    def __ne__(self, other: 'Piece') -> bool:
        return not self == other

    def __str__(self) -> str:
        return '{}: [{}]({}) | [Sheet Music]({})'.format(self.composer, self.title, self.video_url.replace(')', '\)'),
                                                         self.score_url.replace(')', '\)'))


class Submission(object):
    """A submission to the month's Jam"""

    def __init__(self, username: str = None, url: str = None, title: str = None, piece: Piece = None):
        self.username = username  # type: str
        self.url = url  # type: str
        self.title = title  # type: str
        self.piece = piece  # type: Piece

    def __eq__(self, other: 'Submission') -> bool:
        return self.username == other.username and self.piece == other.piece

    def __ne__(self, other: 'Submission') -> bool:
        return not self == other

    def __str__(self) -> str:
        return '{}\'s {} by [/u/{}]({})'.format(self.piece.composer, self.piece.title, self.username, self.url)

    def set_piece(self, pieces: [Piece]) -> None:
        """
        From a list of valid pieces, set the one that matches
        :param pieces: A list of pieces to choose from
        """
        self.piece = find_piece_matching_title(pieces, self.title)
        if not self.piece:
            logging.warning('Could not find piece for {} | {}'.format(self.title, self.url))


def find_piece_matching_title(pieces: [Piece], title: str) -> Optional[Piece]:
    """
    Use a simple heuristic to tell which piece a submission is from the title
    :param pieces: Pieces to choose from
    :param title: Submission title
    :return: Appropriate piece, if any
    """
    for piece in pieces:
        if biggest_word_in_line(piece.title).lower() in title.lower():
            return piece
    return None


def format_title(section_title: str) -> str:
    """
    Apply proper formatting to the title of a section
    :param section_title: The title of a section to be formatted
    :return: Formatted title
    """
    return '**{}**'.format(section_title)


class Jam(object):
    """A Piano Jam posting"""

    CATEGORIES = ['Jazz', 'Classical', 'Ragtime', 'Video Game / Anime / Film']  # type: [str]

    def __init__(self, outline_pathname: str = 'jam_outline.txt'):
        """
        Create a Piano Jam instance from a given outline file
        :param outline_pathname: pathname to file with default jam contents
        """
        self.filename = ''  # type: str
        self.submissions = []  # type: [Submission]
        self.pieces = []  # type: [Piece]
        with open(outline_pathname, 'r') as f:
            self.text = f.read()

    def __str__(self):
        submissions_str = ''
        for submission in self.submissions:
            submissions_str += str(submission) + '\n\n'
        pieces_str = ''
        for piece in self.pieces:
            pieces_str += str(piece) + '\n\n'
        return self.text.format(submissions_str, pieces_str)

    def add_submission(self, submission: Submission):
        """
        Add a submission to the Jam. Multiple submissions do not get added
        :param submission: Submission to the Piano Jam
        :return: None
        """
        for prior_submission in self.submissions:
            if submission.username == prior_submission.username and submission.piece == submission.piece:
                if submission.url != prior_submission.url:
                    logging.warning('User {0} attempted to submit a piece multiple times'.format(submission.username))
                return
        self.submissions.append(submission)

    def add_piece(self, piece: Piece):
        if piece not in self.pieces:
            self.pieces.append(piece)

    def save(self, filename: str='') -> None:
        if filename:
            self.filename = filename
        if not self.filename:
            raise ValueError('No filename to save to!')
        with open(self.filename, 'wb') as f:
            pickle.dump(self, f)

    @classmethod
    def load(cls, filename: str) -> 'Jam':
        with open(filename, 'rb') as f:
            jam = pickle.load(f)  # type: Jam
        if type(jam) != Jam:
            raise TypeError('Tried to load a Jam. Got {}'.format(type(jam)))
        assert jam.filename == filename
        return jam


def parse_piece(piece_text: str) -> Piece:
    """
    Construct a Piece from its string representation.
    Expected format: Composer: [Title](url) | [Sheet Music](sheetUrl)
    :param piece_text: Line from Piano Jam specifying a Piece to learn
    """
    piece = Piece()
    piece.composer = piece_text[:piece_text.index(':')]
    piece.title = re.findall(re.compile('\[(.*?)\]'), piece_text)[0]  # type: str
    urls = re.findall(re.compile('\((.*?)\)'), piece_text)
    piece.video_url = urls[0]  # type: str
    piece.score_url = urls[1]  # type: str
    return piece


def parse_pieces(section_text: str) -> [Piece]:
    """Parse all the pieces in a given section"""
    pieces = section_text.split('\n')[1:]  # First line is the category; discard
    return (parse_piece(piece_text) for piece_text in pieces if piece_text.strip() != '')


def get_pieces_from_jam(jam_text: str) -> [Piece]:
    """
    Parse all the pieces from a Jam, given the contents of a post
    :param jam_text: The contents of a Piano Jam posting
    :return: List of pieces to be used for the Jam
    """
    sections = jam_text.split(DELIMITER)
    sections = (section.strip() for section in sections)
    filtered_sections = []
    for section in sections:
        section = section.strip()
        for category in Jam.CATEGORIES:
            category = format_title(category)
            if section.startswith(category):
                filtered_sections.append(section)
                break
    pieces = []
    for section in filtered_sections:
        pieces.extend(parse_pieces(section))
    return pieces


def get_selections_from_url(url: str) -> [Piece]:
    """
    Parse all the pieces from a jam, given its url
    :param url: URL to a Piano Jam post
    :return: List of pieces to be used for the Jam
    """
    try:
        post = praw.models.Submission(get_reddit(), url=url)
    except KeyError:
        raise KeyError('Could not recognize url {0}'.format(url))
    return get_pieces_from_jam(post.selftext)


def search_for_submissions():
    """
    Search Reddit for posts with [Piano Jam] in title within past month
    :return: List of urls to posts
    """
    subreddit = get_reddit().subreddit('piano')
    results = subreddit.search('[Piano Jam]', sort='new', time_filter='month')
    return (result for result in results)


def filter_submissions(submissions: [praw.models.Submission], jam: praw.models.Submission):
    return [submission for submission in submissions
            if '[piano jam]' in submission.title.lower() and
            datetime.datetime.fromtimestamp(submission.created) >
            datetime.datetime.fromtimestamp(jam.created)]


def find_last_jam() -> praw.models.Submission:
    candidates = search_for_submissions()
    for candidate in candidates:
        if candidate.author == JAM_MAINTAINER and '[' not in candidate.title:
            return candidate
    raise ValueError('Could not find last Piano Jam')


def biggest_word_in_line(line: str) -> str:
    words = line.split()
    length = 0
    biggest_word = None
    for word in words:
        if len(word) > length:
            length = len(word)
            biggest_word = word
    assert biggest_word
    return biggest_word


def create_jam() -> [Submission]:
    """
    Find all Piano Jam submissions since the last posting
    Log a warning if there are submissions not in the previous Jam.
    Create Jam from submissions and pickle it for later use.
    """
    previous_jam = find_last_jam()
    entries = filter_submissions(search_for_submissions(), previous_jam)
    submissions = [Submission(entry.author, entry.shortlink, entry.title) for entry in entries]
    pieces = get_pieces_from_jam(previous_jam.selftext)
    new_jam = Jam()
    for submission in submissions:
        submission.set_piece(pieces)
        if submission.piece:
            new_jam.add_submission(submission)
    new_jam.save('current_jam.txt')

python

回答 1

Code Review用户

回答已采纳

发布于 2017-07-21 10:28:46

没有修改全局对象的参数的函数是没有意义的。正因为如此，您的init_reddit函数比get_reddit函数更好。
IMHO您应该重新考虑为什么您的函数中有更多的注释而不是代码。也许有一种更地道的方式来表达这一点。(见find_piece_matching_title，format_title)
类很好；请考虑创建一个Reddit类，该类要么继承自praw.Reddit，要么将您的reddit实例作为成员变量。你可以把search_for_submissions和filter_submissions放进去。
您的parse_piece、parse_pieces、get_pieces_from_jam等函数应该是块或Jam对象的一部分。如果您使用对象来包含您的数据，那么让函数作为方法操作该数据是有意义的。