YouTubeから動画をダウンロードするescript

ついカッとなってYouTubeから動画をダウンロードするescriptをErlangで書いてみた.後悔はしていない.Pythonで書こうと思ったら,HTTP HEADでのアクセスが面倒くさかったので,Erlangになった.


Pythonのはyoutube-dl.pyというのがあるから,そっちを使えばよいでしょう.youtube-dl.pyのほうが認証も出来たり高機能.


使い方は,実行権限をつけて保存して

$ ./dlyoutube.erl http://jp.youtube.com/watch?v=rwUdH036LEM

とかすればいいだけ.もちろん,先にErlangをインストールしておく必要有り.

#!/usr/bin/env escript

main([]) ->
    ok;
main([URL | T]) ->
    try
        start(URL)
    catch
        _:Why ->
            io:format("failed to download.~n"),
            io:format("Reason: ~p~n", [Why])
    end,

    main(T).

start(URL) ->
    inets:start(),
    io:format("checking...~n"),
    case is_youtube(URL) of
        true ->
            Domain = to_domain(URL),
            VideoID = to_video_id(URL),
            ArgT = get_t(URL),

            %% Normal :320x240  FLV
            %% &fmt=6 :448x336  FLV High Quality
            %% &fmt=18:480x360  MP4 iPod
            %% &fmt=22:1280x720 MP4 HD
            Fmts = ["&fmt=22", "&fmt=18", "&fmt=6", ""],

            download(Domain, VideoID, ArgT, Fmts);
        _ ->
            throw("invalid URL")
    end.

is_youtube(URL) ->
    {ok, MP} = re:compile("http://.*\\.youtube\\.com.*"),
    case re:run(URL, MP) of
        unmatch ->
            throw("invalid URL");
        _ ->
            true
    end.

to_domain(URL) ->
    SP = string:tokens(URL, "/"),
    if
        length(SP) > 1 ->
            lists:nth(2, SP);
        true ->
            "www.youtube.com"
    end.

to_video_id(URL) ->
    SP0 = string:tokens(URL, "?"),
    if
        length(SP0) > 1 ->
            SP1 = string:tokens(lists:nth(2, SP0), "&"),
            get_video_id(SP1);
        true ->
            throw("invalid URL")
    end.

get_video_id([[$v, $= | _] = H| _]) ->
    lists:sublist(H, 3, length(H) - 2);
get_video_id([_ | T]) ->
    get_video_id(T);
get_video_id([]) ->
    false.

get_t(URL) ->
    Header = [{"User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.1) Gecko/2008070208 Firefox/3.0.1"},
              {"Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.7"},
              {"Accept", "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"},
              {"Accept-Language", "en-us,en;q=0.5"}],
    case http:request(get, {URL, Header}, [], []) of
        {ok, {{_, 200, _}, _, Body}} ->
            {ok, MP} = re:compile("\"t\": \".*\",", [ungreedy]),
            case re:run(Body, MP) of
                {match, [{Start, Len} | _]} ->
                    lists:sublist(Body, Start + 1 + 6, Len - 6 - 2);
                _ ->
                    throw("colud not found 't' of argument")
            end;
        _ ->
            throw("colud not found 't' of argument")
    end.

download(_, VideoID, ArgT, _)
  when VideoID =:= false; ArgT =:= false ->
    throw("invalid URL");
download(Domain, VideoID, ArgT, [Fmt | T]) ->
    Header = [{"User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.1) Gecko/2008070208 Firefox/3.0.1"},
              {"Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.7"},
              {"Accept", "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"}],

    URL = "http://" ++ Domain ++ "/get_video?video_id=" ++ VideoID ++
        "&t=" ++ ArgT ++ Fmt,

    case http:request(head, {URL, Header}, [], []) of
        {ok, {{_, 200, _}, Head, _}} ->
            [Len | _] = [list_to_integer(L) || {"content-length", L} <- Head],

            File = case Fmt of
                       "&fmt=22" ->
                           VideoID ++ ".mp4";
                       "&fmt=18" ->
                           VideoID ++ ".mp4";
                       _ ->
                           VideoID ++ ".flv"
                   end,

            io:format("start downloading (~p bytes)...~n", [Len]),
            
            http:request(get, {URL, Header}, [], [{stream, File}]),
            
            io:format("finished.~n"),
            io:format("saved to ~p.~n", [File]);
        _ ->
            download(Domain, VideoID, ArgT, T)
    end;
download(_, _, _, []) ->
    throw("could not found file").