点击这里给我发消息 点击这里给我发消息

QQ 空间文章抓取

添加时间:2010-1-5
    相关阅读: 技术 HTML

前几天为自己论坛写的,可以同步会员QQ空间的文章,就技术而言没什么意思,因blog新开,没想到写什么好。拿来充数。

/**//// <summary>
  /// 用户可以申请将自己的文章同步到某个板块实体类
  /// </summary>
  public class QQZone
  {
    private int id;
    private string qq;
    private string username;
    private Guid userId;
    private int forumId;
    public int Id
    {
      get
      {
        return id;
      }
      set
      {
        id = value;
      }
    }
    public string QQ
    {
      get { return qq; }
      set { qq = value; }
    }
    public string Username
    {
      get { return username; }
      set { username = value; }
    }
    public Guid UserId
    {
      get { return userId; }
      set { userId = value; }
    }
    public int ForumId
    {
      get { return forumId; }
      set { forumId = value; }
    }
    public QQZone(int id,string qq,string username,Guid userId,int forumId)
    {
      this.id = id;
      this.qq = qq;
      this.username = username;
      this.userId = userId;
      this.forumId = forumId;
    }
  }
  /**//// <summary>
  /// 对抓过来的文章
  /// </summary>
  public class QQZoneBlog
  {
    private string subject=string.Empty;
    private string body=string.Empty;
    private string tag=string.Empty;
    private List<string> replayContent = new List<string>();
    public string Subject
    {
      get { return subject; }
      set { subject = value; }
    }
    public string Body
    {
      get { return body; }
      set { body = value; }
    }
    public string Tag
    {
      get { return tag; }
      set { tag = value; }
    }
    /**//// <summary>
    /// 把回复也搞过来了,只是没抓回复者的名称,在我论坛里都显示为匿名用户
    /// </summary>
    public List<string> ReplayContent
    {
      get { return replayContent; }
      set { replayContent = value; }
    }
  }

下面的代码是根据QQ 号码获取文章Id

 /**//// <summary>
    /// 根据qq号码获取文章Id
    /// </summary>
    /// <param name="qq"></param>
    /// <returns></returns>
    public static List<string> GetBlogId(string qq)
    {
      string url = string.Format(@"http://u.cnc.qzone.qq.com/cgi-bin/cgi_qqzone_static.cgi?uin={0}&flag=1426064064&property=GoRE&vuin=0&t=1196748211",qq);
      List<string> result = new List<string>();
      System.Net.WebClient client = new System.Net.WebClient();
      System.IO.Stream stream = client.OpenRead(url);
      StringBuilder sb=new StringBuilder();
      using (StreamReader reader = new StreamReader(stream, Encoding.GetEncoding("gb2312")))
      {
        string str = string.Empty;
        while ((str = reader.ReadLine()) != null)
        {
          sb.Append(str);
        }                          
      }
      int index = sb.ToString().IndexOf("[{");
      int endindex = sb.ToString().IndexOf("}]", 0);
      if (index == -1 || endindex == -1)
      {
        return result;
      }
      string tag = sb.ToString().Substring(index + 1, (endindex - index)).Replace("", "").Replace(",{", "{");
      string[] tags = tag.Split('');
      for (int i = 0; i < tags.Length; i++)
      {
        result.Add(tags[i].Replace("{"blogid":", "").Substring(0, tags[i].Replace("{"blogid":", "").IndexOf(",")));
      }
      return result;
    }

有了blogId ,就可以抓文章了

   public static void CreatePost(QQZone zone, string blogId)
    {
      // 获得原始内容
      StringBuilder sb = new StringBuilder();
      string urlFormat = "http://b.cnc.qzone.qq.com/cgi-bin/blognew/blog_get_data?uin={0}&numperpage=15&blogid={1}&arch=0&pos=0&direct=1&r=0.339366103963674";
      System.Net.WebClient client = new System.Net.WebClient();
      using (Stream stream = client.OpenRead(string.Format(urlFormat,zone.QQ,blogId)))
      {
        using (StreamReader reader = new StreamReader(stream, Encoding.GetEncoding("gb2312")))
        {
          string str = string.Empty;
          while ((str = reader.ReadLine()) != null)
          {
            sb.Append(str);
            sb.Append("r");
          }
        }
      }
      // 提取所需信息
      string[] content = sb.ToString().Split(new char[] { 'r' });
      QQZoneBlog blog = new QQZoneBlog();
      foreach (string s in content)
      {
        if (s.StartsWith(""category":"))
        {
          blog.Tag  ="会员QQ空间,"+ s.Replace(""category":"", "").TrimEnd(new char[] { ',' }).TrimEnd(new char[] { '"' });
        }
        if (s.StartsWith(""title":"))
        {
          blog.Subject = s.Replace(""title":"", "").TrimEnd(new char[] { ',' }).TrimEnd(new char[] { '"' });
        }
        if (s.StartsWith(""content":"))                                      
        {
          blog.Body = s.Replace(""content":"", "").TrimEnd(new char[] { ',' }).TrimEnd(new char[] { '"' }).Replace("
", "<br/>");
          blog.Body = blog.Body + string.Format("<br/><br/><p>[url=http://user.qzone.qq.com/{0}/blog/{1}]查看原文[/url]</p>", zone.QQ, blogId);
        }
        if (s.StartsWith(""replycontent":"))
        {
          blog.ReplayContent.Add(s.Replace(""replycontent":"", "").TrimEnd(new char[] { ',' }).TrimEnd(new char[] { '"' }).Replace("
","<br/>"));
        }
      }
      if (blog.Subject.IndexOf("[No]") == -1)
      {
        // 插入帖子                  
        ForumPost post = new ForumPost();
        post.ForumId = zone.ForumId;
        post.Subject = blog.Subject;
        post.Body = blog.Body;
        post.TagsText = blog.Tag;
        post.PostType = PostType.HTML;
        ForumPost newPost = ForumPostManager.AddPost(post, UserManager.GetUser(zone.UserId));
        DownloadImages(newPost);
        if (blog.ReplayContent.Count > 0)
        {
          ForumPost replaypost = new ForumPost();
          User user = UserManager.GetAnonymousUser(true);
          replaypost.ForumId = zone.ForumId;
          replaypost.Subject = "Re:" + blog.Subject;
          replaypost.ParentId = newPost.PostId;
          replaypost.PostType = PostType.HTML;
          foreach (string body in blog.ReplayContent)
          {
            replaypost.Body = body;
            ForumPost newPostReplay = ForumPostManager.AddPost(replaypost, user);
            // qq 的图片防盗连,给他下载过来作为帖子的附件
            DownloadImages(newPostReplay);
          }
        }
        CreateSyncHistory(zone.QQ, blogId);
      }
      
    }

本文作者:未知
咨询热线:020-85648757 85648755 85648616 0755-27912581 客服:020-85648756 0755-27912581 业务传真:020-32579052
广州市网景网络科技有限公司 Copyright◎2003-2008 Veelink.com. All Rights Reserved.
广州商务地址:广东省广州市黄埔大道中203号(海景园区)海景花园C栋501室
= 深圳商务地址:深圳市宝源路华丰宝源大厦606
研发中心:广东广州市天河软件园海景园区 粤ICP备05103322号 工商注册